]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Use pointers with ftype_unpack_canonical
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
485
247d1f21
RH
486/* Simple helpers for checking if, or what kind of, NaN we have */
487static inline __attribute__((unused)) bool is_nan(FloatClass c)
488{
489 return unlikely(c >= float_class_qnan);
490}
491
492static inline __attribute__((unused)) bool is_snan(FloatClass c)
493{
494 return c == float_class_snan;
495}
496
497static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498{
499 return c == float_class_qnan;
500}
501
a90119b5
AB
502/*
503 * Structure holding all of the decomposed parts of a float. The
504 * exponent is unbiased and the fraction is normalized. All
505 * calculations are done with a 64 bit fraction and then rounded as
506 * appropriate for the final format.
507 *
508 * Thanks to the packed FloatClass a decent compiler should be able to
509 * fit the whole structure into registers and avoid using the stack
510 * for parameter passing.
511 */
512
513typedef struct {
514 uint64_t frac;
515 int32_t exp;
516 FloatClass cls;
517 bool sign;
f8155c1d 518} FloatParts64;
a90119b5 519
e99c4373 520#define DECOMPOSED_BINARY_POINT 63
a90119b5 521#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
522
523/* Structure holding all of the relevant parameters for a format.
524 * exp_size: the size of the exponent field
525 * exp_bias: the offset applied to the exponent field
526 * exp_max: the maximum normalised exponent
527 * frac_size: the size of the fraction field
528 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
529 * The following are computed based the size of fraction
530 * frac_lsb: least significant bit of fraction
ca3a3d5a 531 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 532 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
533 * The following optional modifiers are available:
534 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
535 */
536typedef struct {
537 int exp_size;
538 int exp_bias;
539 int exp_max;
540 int frac_size;
541 int frac_shift;
542 uint64_t frac_lsb;
543 uint64_t frac_lsbm1;
544 uint64_t round_mask;
545 uint64_t roundeven_mask;
ca3a3d5a 546 bool arm_althp;
a90119b5
AB
547} FloatFmt;
548
549/* Expand fields based on the size of exponent and fraction */
550#define FLOAT_PARAMS(E, F) \
551 .exp_size = E, \
552 .exp_bias = ((1 << E) - 1) >> 1, \
553 .exp_max = (1 << E) - 1, \
554 .frac_size = F, \
555 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
556 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
557 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
558 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
559 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
560
561static const FloatFmt float16_params = {
562 FLOAT_PARAMS(5, 10)
563};
564
6fed16b2
AB
565static const FloatFmt float16_params_ahp = {
566 FLOAT_PARAMS(5, 10),
567 .arm_althp = true
568};
569
8282310d
LZ
570static const FloatFmt bfloat16_params = {
571 FLOAT_PARAMS(8, 7)
572};
573
a90119b5
AB
574static const FloatFmt float32_params = {
575 FLOAT_PARAMS(8, 23)
576};
577
578static const FloatFmt float64_params = {
579 FLOAT_PARAMS(11, 52)
580};
581
6fff2167 582/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 583static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 584{
d8fdd172
RH
585 const int f_size = fmt->frac_size;
586 const int e_size = fmt->exp_size;
6fff2167 587
d8fdd172 588 *r = (FloatParts64) {
6fff2167 589 .cls = float_class_unclassified,
d8fdd172
RH
590 .sign = extract64(raw, f_size + e_size, 1),
591 .exp = extract64(raw, f_size, e_size),
592 .frac = extract64(raw, 0, f_size)
6fff2167
AB
593 };
594}
595
3dddb203 596static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 597{
3dddb203 598 unpack_raw64(p, &float16_params, f);
6fff2167
AB
599}
600
3dddb203 601static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 602{
3dddb203 603 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
604}
605
3dddb203 606static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 607{
3dddb203 608 unpack_raw64(p, &float32_params, f);
6fff2167
AB
609}
610
3dddb203 611static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 612{
3dddb203 613 unpack_raw64(p, &float64_params, f);
6fff2167
AB
614}
615
616/* Pack a float from parts, but do not canonicalize. */
9e4af58c 617static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 618{
9e4af58c
RH
619 const int f_size = fmt->frac_size;
620 const int e_size = fmt->exp_size;
621 uint64_t ret;
622
623 ret = (uint64_t)p->sign << (f_size + e_size);
624 ret = deposit64(ret, f_size, e_size, p->exp);
625 ret = deposit64(ret, 0, f_size, p->frac);
626 return ret;
6fff2167
AB
627}
628
71fd178e 629static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 630{
71fd178e 631 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
632}
633
71fd178e 634static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 635{
71fd178e 636 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
637}
638
71fd178e 639static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 640{
71fd178e 641 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
642}
643
71fd178e 644static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 645{
71fd178e 646 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
647}
648
0664335a
RH
649/*----------------------------------------------------------------------------
650| Functions and definitions to determine: (1) whether tininess for underflow
651| is detected before or after rounding by default, (2) what (if anything)
652| happens when exceptions are raised, (3) how signaling NaNs are distinguished
653| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
654| are propagated from function inputs to output. These details are target-
655| specific.
656*----------------------------------------------------------------------------*/
139c1837 657#include "softfloat-specialize.c.inc"
0664335a 658
0fc07cad
RH
659#define parts_default_nan parts64_default_nan
660
6fff2167 661/* Canonicalize EXP and FRAC, setting CLS. */
f8155c1d 662static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
f9943c7f 663 float_status *status)
6fff2167 664{
ca3a3d5a 665 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
666 if (part.frac == 0) {
667 part.cls = float_class_inf;
668 } else {
94933df0 669 part.frac <<= parm->frac_shift;
298b468e
RH
670 part.cls = (parts_is_snan_frac(part.frac, status)
671 ? float_class_snan : float_class_qnan);
6fff2167
AB
672 }
673 } else if (part.exp == 0) {
674 if (likely(part.frac == 0)) {
675 part.cls = float_class_zero;
676 } else if (status->flush_inputs_to_zero) {
677 float_raise(float_flag_input_denormal, status);
678 part.cls = float_class_zero;
679 part.frac = 0;
680 } else {
e99c4373 681 int shift = clz64(part.frac);
6fff2167
AB
682 part.cls = float_class_normal;
683 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
684 part.frac <<= shift;
685 }
686 } else {
687 part.cls = float_class_normal;
688 part.exp -= parm->exp_bias;
689 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
690 }
691 return part;
692}
693
694/* Round and uncanonicalize a floating-point number by parts. There
695 * are FRAC_SHIFT bits that may require rounding at the bottom of the
696 * fraction; these bits will be removed. The exponent will be biased
697 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
698 */
699
f8155c1d 700static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
6fff2167
AB
701 const FloatFmt *parm)
702{
5d64abb3 703 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
704 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
705 const uint64_t round_mask = parm->round_mask;
706 const uint64_t roundeven_mask = parm->roundeven_mask;
707 const int exp_max = parm->exp_max;
708 const int frac_shift = parm->frac_shift;
709 uint64_t frac, inc;
710 int exp, flags = 0;
711 bool overflow_norm;
712
713 frac = p.frac;
714 exp = p.exp;
715
716 switch (p.cls) {
717 case float_class_normal:
718 switch (s->float_rounding_mode) {
719 case float_round_nearest_even:
720 overflow_norm = false;
721 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
722 break;
723 case float_round_ties_away:
724 overflow_norm = false;
725 inc = frac_lsbm1;
726 break;
727 case float_round_to_zero:
728 overflow_norm = true;
729 inc = 0;
730 break;
731 case float_round_up:
732 inc = p.sign ? 0 : round_mask;
733 overflow_norm = p.sign;
734 break;
735 case float_round_down:
736 inc = p.sign ? round_mask : 0;
737 overflow_norm = !p.sign;
738 break;
5d64abb3
RH
739 case float_round_to_odd:
740 overflow_norm = true;
741 inc = frac & frac_lsb ? 0 : round_mask;
742 break;
6fff2167
AB
743 default:
744 g_assert_not_reached();
745 }
746
747 exp += parm->exp_bias;
748 if (likely(exp > 0)) {
749 if (frac & round_mask) {
750 flags |= float_flag_inexact;
e99c4373
RH
751 if (uadd64_overflow(frac, inc, &frac)) {
752 frac = (frac >> 1) | DECOMPOSED_IMPLICIT_BIT;
6fff2167
AB
753 exp++;
754 }
755 }
756 frac >>= frac_shift;
757
ca3a3d5a
AB
758 if (parm->arm_althp) {
759 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
760 if (unlikely(exp > exp_max)) {
761 /* Overflow. Return the maximum normal. */
762 flags = float_flag_invalid;
763 exp = exp_max;
764 frac = -1;
765 }
766 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
767 flags |= float_flag_overflow | float_flag_inexact;
768 if (overflow_norm) {
769 exp = exp_max - 1;
770 frac = -1;
771 } else {
772 p.cls = float_class_inf;
773 goto do_inf;
774 }
775 }
776 } else if (s->flush_to_zero) {
777 flags |= float_flag_output_denormal;
778 p.cls = float_class_zero;
779 goto do_zero;
780 } else {
e99c4373
RH
781 bool is_tiny = s->tininess_before_rounding || (exp < 0);
782
783 if (!is_tiny) {
784 uint64_t discard;
785 is_tiny = !uadd64_overflow(frac, inc, &discard);
786 }
6fff2167
AB
787
788 shift64RightJamming(frac, 1 - exp, &frac);
789 if (frac & round_mask) {
790 /* Need to recompute round-to-even. */
5d64abb3
RH
791 switch (s->float_rounding_mode) {
792 case float_round_nearest_even:
6fff2167
AB
793 inc = ((frac & roundeven_mask) != frac_lsbm1
794 ? frac_lsbm1 : 0);
5d64abb3
RH
795 break;
796 case float_round_to_odd:
797 inc = frac & frac_lsb ? 0 : round_mask;
798 break;
3dede407
RH
799 default:
800 break;
6fff2167
AB
801 }
802 flags |= float_flag_inexact;
803 frac += inc;
804 }
805
806 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
807 frac >>= frac_shift;
808
809 if (is_tiny && (flags & float_flag_inexact)) {
810 flags |= float_flag_underflow;
811 }
812 if (exp == 0 && frac == 0) {
813 p.cls = float_class_zero;
814 }
815 }
816 break;
817
818 case float_class_zero:
819 do_zero:
820 exp = 0;
821 frac = 0;
822 break;
823
824 case float_class_inf:
825 do_inf:
ca3a3d5a 826 assert(!parm->arm_althp);
6fff2167
AB
827 exp = exp_max;
828 frac = 0;
829 break;
830
831 case float_class_qnan:
832 case float_class_snan:
ca3a3d5a 833 assert(!parm->arm_althp);
6fff2167 834 exp = exp_max;
94933df0 835 frac >>= parm->frac_shift;
6fff2167
AB
836 break;
837
838 default:
839 g_assert_not_reached();
840 }
841
842 float_raise(flags, s);
843 p.exp = exp;
844 p.frac = frac;
845 return p;
846}
847
f8155c1d 848static FloatParts64 return_nan(FloatParts64 a, float_status *s)
dbe4d53a 849{
57547c60
RH
850 g_assert(is_nan(a.cls));
851 if (is_snan(a.cls)) {
d82f3b2d 852 float_raise(float_flag_invalid, s);
57547c60
RH
853 if (!s->default_nan_mode) {
854 return parts_silence_nan(a, s);
dbe4d53a 855 }
57547c60
RH
856 } else if (!s->default_nan_mode) {
857 return a;
dbe4d53a 858 }
0fc07cad
RH
859 parts_default_nan(&a, s);
860 return a;
dbe4d53a
AB
861}
862
f8155c1d 863static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
6fff2167
AB
864{
865 if (is_snan(a.cls) || is_snan(b.cls)) {
d82f3b2d 866 float_raise(float_flag_invalid, s);
6fff2167
AB
867 }
868
869 if (s->default_nan_mode) {
0fc07cad 870 parts_default_nan(&a, s);
6fff2167 871 } else {
4f251cfd 872 if (pickNaN(a.cls, b.cls,
6fff2167 873 a.frac > b.frac ||
913602e3 874 (a.frac == b.frac && a.sign < b.sign), s)) {
6fff2167
AB
875 a = b;
876 }
0bcfbcbe
RH
877 if (is_snan(a.cls)) {
878 return parts_silence_nan(a, s);
879 }
6fff2167
AB
880 }
881 return a;
882}
883
f8155c1d 884static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64 c,
d446830a
AB
885 bool inf_zero, float_status *s)
886{
1839189b
PM
887 int which;
888
d446830a 889 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
d82f3b2d 890 float_raise(float_flag_invalid, s);
d446830a
AB
891 }
892
3bd2dec1 893 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 894
d446830a 895 if (s->default_nan_mode) {
1839189b
PM
896 /* Note that this check is after pickNaNMulAdd so that function
897 * has an opportunity to set the Invalid flag.
898 */
f7e598e2 899 which = 3;
1839189b 900 }
d446830a 901
1839189b
PM
902 switch (which) {
903 case 0:
904 break;
905 case 1:
906 a = b;
907 break;
908 case 2:
909 a = c;
910 break;
911 case 3:
0fc07cad
RH
912 parts_default_nan(&a, s);
913 break;
1839189b
PM
914 default:
915 g_assert_not_reached();
d446830a 916 }
1839189b 917
0bcfbcbe
RH
918 if (is_snan(a.cls)) {
919 return parts_silence_nan(a, s);
920 }
d446830a
AB
921 return a;
922}
923
aaffb7bf
RH
924/*
925 * Pack/unpack routines with a specific FloatFmt.
926 */
927
98e256fc
RH
928static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
929 float_status *s, const FloatFmt *params)
aaffb7bf 930{
98e256fc
RH
931 float16_unpack_raw(p, f);
932 *p = sf_canonicalize(*p, params, s);
aaffb7bf
RH
933}
934
98e256fc
RH
935static void float16_unpack_canonical(FloatParts64 *p, float16 f,
936 float_status *s)
aaffb7bf 937{
98e256fc 938 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
939}
940
98e256fc
RH
941static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
942 float_status *s)
aaffb7bf 943{
98e256fc
RH
944 bfloat16_unpack_raw(p, f);
945 *p = sf_canonicalize(*p, &bfloat16_params, s);
aaffb7bf
RH
946}
947
948static float16 float16a_round_pack_canonical(FloatParts64 p, float_status *s,
949 const FloatFmt *params)
950{
71fd178e
RH
951 p = round_canonical(p, s, params);
952 return float16_pack_raw(&p);
aaffb7bf
RH
953}
954
955static float16 float16_round_pack_canonical(FloatParts64 p, float_status *s)
956{
957 return float16a_round_pack_canonical(p, s, &float16_params);
958}
959
960static bfloat16 bfloat16_round_pack_canonical(FloatParts64 p, float_status *s)
961{
71fd178e
RH
962 p = round_canonical(p, s, &bfloat16_params);
963 return bfloat16_pack_raw(&p);
aaffb7bf
RH
964}
965
98e256fc
RH
966static void float32_unpack_canonical(FloatParts64 *p, float32 f,
967 float_status *s)
aaffb7bf 968{
98e256fc
RH
969 float32_unpack_raw(p, f);
970 *p = sf_canonicalize(*p, &float32_params, s);
aaffb7bf
RH
971}
972
973static float32 float32_round_pack_canonical(FloatParts64 p, float_status *s)
974{
71fd178e
RH
975 p = round_canonical(p, s, &float32_params);
976 return float32_pack_raw(&p);
aaffb7bf
RH
977}
978
98e256fc
RH
979static void float64_unpack_canonical(FloatParts64 *p, float64 f,
980 float_status *s)
aaffb7bf 981{
98e256fc
RH
982 float64_unpack_raw(p, f);
983 *p = sf_canonicalize(*p, &float64_params, s);
aaffb7bf
RH
984}
985
986static float64 float64_round_pack_canonical(FloatParts64 p, float_status *s)
987{
71fd178e
RH
988 p = round_canonical(p, s, &float64_params);
989 return float64_pack_raw(&p);
aaffb7bf
RH
990}
991
6fff2167
AB
992/*
993 * Returns the result of adding or subtracting the values of the
994 * floating-point values `a' and `b'. The operation is performed
995 * according to the IEC/IEEE Standard for Binary Floating-Point
996 * Arithmetic.
997 */
998
f8155c1d 999static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
6fff2167
AB
1000 float_status *s)
1001{
1002 bool a_sign = a.sign;
1003 bool b_sign = b.sign ^ subtract;
1004
1005 if (a_sign != b_sign) {
1006 /* Subtraction */
1007
1008 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1009 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
1010 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1011 a.frac = a.frac - b.frac;
1012 } else {
1013 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1014 a.frac = b.frac - a.frac;
1015 a.exp = b.exp;
1016 a_sign ^= 1;
1017 }
1018
1019 if (a.frac == 0) {
1020 a.cls = float_class_zero;
1021 a.sign = s->float_rounding_mode == float_round_down;
1022 } else {
e99c4373 1023 int shift = clz64(a.frac);
6fff2167
AB
1024 a.frac = a.frac << shift;
1025 a.exp = a.exp - shift;
1026 a.sign = a_sign;
1027 }
1028 return a;
1029 }
1030 if (is_nan(a.cls) || is_nan(b.cls)) {
1031 return pick_nan(a, b, s);
1032 }
1033 if (a.cls == float_class_inf) {
1034 if (b.cls == float_class_inf) {
1035 float_raise(float_flag_invalid, s);
0fc07cad 1036 parts_default_nan(&a, s);
6fff2167
AB
1037 }
1038 return a;
1039 }
1040 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1041 a.sign = s->float_rounding_mode == float_round_down;
1042 return a;
1043 }
1044 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1045 b.sign = a_sign ^ 1;
1046 return b;
1047 }
1048 if (b.cls == float_class_zero) {
1049 return a;
1050 }
1051 } else {
1052 /* Addition */
1053 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1054 if (a.exp > b.exp) {
1055 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1056 } else if (a.exp < b.exp) {
1057 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1058 a.exp = b.exp;
1059 }
e99c4373
RH
1060
1061 if (uadd64_overflow(a.frac, b.frac, &a.frac)) {
64d450a0 1062 shift64RightJamming(a.frac, 1, &a.frac);
e99c4373 1063 a.frac |= DECOMPOSED_IMPLICIT_BIT;
6fff2167
AB
1064 a.exp += 1;
1065 }
1066 return a;
1067 }
1068 if (is_nan(a.cls) || is_nan(b.cls)) {
1069 return pick_nan(a, b, s);
1070 }
1071 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1072 return a;
1073 }
1074 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1075 b.sign = b_sign;
1076 return b;
1077 }
1078 }
1079 g_assert_not_reached();
1080}
1081
1082/*
1083 * Returns the result of adding or subtracting the floating-point
1084 * values `a' and `b'. The operation is performed according to the
1085 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1086 */
1087
97ff87c0 1088float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167 1089{
98e256fc
RH
1090 FloatParts64 pa, pb, pr;
1091
1092 float16_unpack_canonical(&pa, a, status);
1093 float16_unpack_canonical(&pb, b, status);
1094 pr = addsub_floats(pa, pb, false, status);
6fff2167
AB
1095
1096 return float16_round_pack_canonical(pr, status);
1097}
1098
1b615d48
EC
1099float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1100{
98e256fc
RH
1101 FloatParts64 pa, pb, pr;
1102
1103 float16_unpack_canonical(&pa, a, status);
1104 float16_unpack_canonical(&pb, b, status);
1105 pr = addsub_floats(pa, pb, true, status);
1b615d48
EC
1106
1107 return float16_round_pack_canonical(pr, status);
1108}
1109
1110static float32 QEMU_SOFTFLOAT_ATTR
1111soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167 1112{
98e256fc
RH
1113 FloatParts64 pa, pb, pr;
1114
1115 float32_unpack_canonical(&pa, a, status);
1116 float32_unpack_canonical(&pb, b, status);
1117 pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1118
1119 return float32_round_pack_canonical(pr, status);
1120}
1121
1b615d48
EC
1122static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1123{
1124 return soft_f32_addsub(a, b, false, status);
1125}
1126
1127static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1128{
1129 return soft_f32_addsub(a, b, true, status);
1130}
1131
1132static float64 QEMU_SOFTFLOAT_ATTR
1133soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167 1134{
98e256fc
RH
1135 FloatParts64 pa, pb, pr;
1136
1137 float64_unpack_canonical(&pa, a, status);
1138 float64_unpack_canonical(&pb, b, status);
1139 pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1140
1141 return float64_round_pack_canonical(pr, status);
1142}
1143
1b615d48 1144static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1145{
1b615d48
EC
1146 return soft_f64_addsub(a, b, false, status);
1147}
6fff2167 1148
1b615d48
EC
1149static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1150{
1151 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1152}
1153
1b615d48 1154static float hard_f32_add(float a, float b)
6fff2167 1155{
1b615d48
EC
1156 return a + b;
1157}
6fff2167 1158
1b615d48
EC
1159static float hard_f32_sub(float a, float b)
1160{
1161 return a - b;
6fff2167
AB
1162}
1163
1b615d48 1164static double hard_f64_add(double a, double b)
6fff2167 1165{
1b615d48
EC
1166 return a + b;
1167}
6fff2167 1168
1b615d48
EC
1169static double hard_f64_sub(double a, double b)
1170{
1171 return a - b;
1172}
1173
b240c9c4 1174static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1175{
1176 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1177 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1178 }
1179 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1180}
1181
b240c9c4 1182static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1183{
1184 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1185 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1186 } else {
1187 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1188 }
1189}
1190
1191static float32 float32_addsub(float32 a, float32 b, float_status *s,
1192 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1193{
1194 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1195 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1196}
1197
1198static float64 float64_addsub(float64 a, float64 b, float_status *s,
1199 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1200{
1201 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1202 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1203}
1204
1205float32 QEMU_FLATTEN
1206float32_add(float32 a, float32 b, float_status *s)
1207{
1208 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1209}
1210
1211float32 QEMU_FLATTEN
1212float32_sub(float32 a, float32 b, float_status *s)
1213{
1214 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1215}
1216
1217float64 QEMU_FLATTEN
1218float64_add(float64 a, float64 b, float_status *s)
1219{
1220 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1221}
1222
1223float64 QEMU_FLATTEN
1224float64_sub(float64 a, float64 b, float_status *s)
1225{
1226 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1227}
1228
8282310d
LZ
1229/*
1230 * Returns the result of adding or subtracting the bfloat16
1231 * values `a' and `b'.
1232 */
1233bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1234{
98e256fc
RH
1235 FloatParts64 pa, pb, pr;
1236
1237 bfloat16_unpack_canonical(&pa, a, status);
1238 bfloat16_unpack_canonical(&pb, b, status);
1239 pr = addsub_floats(pa, pb, false, status);
8282310d
LZ
1240
1241 return bfloat16_round_pack_canonical(pr, status);
1242}
1243
1244bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1245{
98e256fc
RH
1246 FloatParts64 pa, pb, pr;
1247
1248 bfloat16_unpack_canonical(&pa, a, status);
1249 bfloat16_unpack_canonical(&pb, b, status);
1250 pr = addsub_floats(pa, pb, true, status);
8282310d
LZ
1251
1252 return bfloat16_round_pack_canonical(pr, status);
1253}
1254
74d707e2
AB
1255/*
1256 * Returns the result of multiplying the floating-point values `a' and
1257 * `b'. The operation is performed according to the IEC/IEEE Standard
1258 * for Binary Floating-Point Arithmetic.
1259 */
1260
f8155c1d 1261static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
74d707e2
AB
1262{
1263 bool sign = a.sign ^ b.sign;
1264
1265 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1266 uint64_t hi, lo;
1267 int exp = a.exp + b.exp;
1268
1269 mul64To128(a.frac, b.frac, &hi, &lo);
e99c4373 1270 if (hi & DECOMPOSED_IMPLICIT_BIT) {
74d707e2 1271 exp += 1;
e99c4373
RH
1272 } else {
1273 hi <<= 1;
74d707e2 1274 }
e99c4373 1275 hi |= (lo != 0);
74d707e2
AB
1276
1277 /* Re-use a */
1278 a.exp = exp;
1279 a.sign = sign;
e99c4373 1280 a.frac = hi;
74d707e2
AB
1281 return a;
1282 }
1283 /* handle all the NaN cases */
1284 if (is_nan(a.cls) || is_nan(b.cls)) {
1285 return pick_nan(a, b, s);
1286 }
1287 /* Inf * Zero == NaN */
1288 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1289 (a.cls == float_class_zero && b.cls == float_class_inf)) {
d82f3b2d 1290 float_raise(float_flag_invalid, s);
0fc07cad
RH
1291 parts_default_nan(&a, s);
1292 return a;
74d707e2
AB
1293 }
1294 /* Multiply by 0 or Inf */
1295 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1296 a.sign = sign;
1297 return a;
1298 }
1299 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1300 b.sign = sign;
1301 return b;
1302 }
1303 g_assert_not_reached();
1304}
1305
97ff87c0 1306float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1307{
98e256fc
RH
1308 FloatParts64 pa, pb, pr;
1309
1310 float16_unpack_canonical(&pa, a, status);
1311 float16_unpack_canonical(&pb, b, status);
1312 pr = mul_floats(pa, pb, status);
74d707e2
AB
1313
1314 return float16_round_pack_canonical(pr, status);
1315}
1316
2dfabc86
EC
1317static float32 QEMU_SOFTFLOAT_ATTR
1318soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1319{
98e256fc
RH
1320 FloatParts64 pa, pb, pr;
1321
1322 float32_unpack_canonical(&pa, a, status);
1323 float32_unpack_canonical(&pb, b, status);
1324 pr = mul_floats(pa, pb, status);
74d707e2
AB
1325
1326 return float32_round_pack_canonical(pr, status);
1327}
1328
2dfabc86
EC
1329static float64 QEMU_SOFTFLOAT_ATTR
1330soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1331{
98e256fc
RH
1332 FloatParts64 pa, pb, pr;
1333
1334 float64_unpack_canonical(&pa, a, status);
1335 float64_unpack_canonical(&pb, b, status);
1336 pr = mul_floats(pa, pb, status);
74d707e2
AB
1337
1338 return float64_round_pack_canonical(pr, status);
1339}
1340
2dfabc86
EC
1341static float hard_f32_mul(float a, float b)
1342{
1343 return a * b;
1344}
1345
1346static double hard_f64_mul(double a, double b)
1347{
1348 return a * b;
1349}
1350
2dfabc86
EC
1351float32 QEMU_FLATTEN
1352float32_mul(float32 a, float32 b, float_status *s)
1353{
1354 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1355 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1356}
1357
1358float64 QEMU_FLATTEN
1359float64_mul(float64 a, float64 b, float_status *s)
1360{
1361 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1362 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1363}
1364
8282310d
LZ
1365/*
1366 * Returns the result of multiplying the bfloat16
1367 * values `a' and `b'.
1368 */
1369
1370bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1371{
98e256fc
RH
1372 FloatParts64 pa, pb, pr;
1373
1374 bfloat16_unpack_canonical(&pa, a, status);
1375 bfloat16_unpack_canonical(&pb, b, status);
1376 pr = mul_floats(pa, pb, status);
8282310d
LZ
1377
1378 return bfloat16_round_pack_canonical(pr, status);
1379}
1380
d446830a
AB
1381/*
1382 * Returns the result of multiplying the floating-point values `a' and
1383 * `b' then adding 'c', with no intermediate rounding step after the
1384 * multiplication. The operation is performed according to the
1385 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1386 * The flags argument allows the caller to select negation of the
1387 * addend, the intermediate product, or the final result. (The
1388 * difference between this and having the caller do a separate
1389 * negation is that negating externally will flip the sign bit on
1390 * NaNs.)
1391 */
1392
f8155c1d 1393static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
d446830a
AB
1394 int flags, float_status *s)
1395{
134eda00 1396 bool inf_zero, p_sign;
d446830a
AB
1397 bool sign_flip = flags & float_muladd_negate_result;
1398 FloatClass p_class;
1399 uint64_t hi, lo;
1400 int p_exp;
134eda00
RH
1401 int ab_mask, abc_mask;
1402
1403 ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
1404 abc_mask = float_cmask(c.cls) | ab_mask;
1405 inf_zero = ab_mask == float_cmask_infzero;
d446830a
AB
1406
1407 /* It is implementation-defined whether the cases of (0,inf,qnan)
1408 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1409 * they return if they do), so we have to hand this information
1410 * off to the target-specific pick-a-NaN routine.
1411 */
134eda00 1412 if (unlikely(abc_mask & float_cmask_anynan)) {
d446830a
AB
1413 return pick_nan_muladd(a, b, c, inf_zero, s);
1414 }
1415
1416 if (inf_zero) {
d82f3b2d 1417 float_raise(float_flag_invalid, s);
0fc07cad
RH
1418 parts_default_nan(&a, s);
1419 return a;
d446830a
AB
1420 }
1421
1422 if (flags & float_muladd_negate_c) {
1423 c.sign ^= 1;
1424 }
1425
1426 p_sign = a.sign ^ b.sign;
1427
1428 if (flags & float_muladd_negate_product) {
1429 p_sign ^= 1;
1430 }
1431
134eda00 1432 if (ab_mask & float_cmask_inf) {
d446830a 1433 p_class = float_class_inf;
134eda00 1434 } else if (ab_mask & float_cmask_zero) {
d446830a
AB
1435 p_class = float_class_zero;
1436 } else {
1437 p_class = float_class_normal;
1438 }
1439
1440 if (c.cls == float_class_inf) {
1441 if (p_class == float_class_inf && p_sign != c.sign) {
d82f3b2d 1442 float_raise(float_flag_invalid, s);
0fc07cad 1443 parts_default_nan(&c, s);
d446830a 1444 } else {
9793c1e2 1445 c.sign ^= sign_flip;
d446830a 1446 }
0fc07cad 1447 return c;
d446830a
AB
1448 }
1449
1450 if (p_class == float_class_inf) {
1451 a.cls = float_class_inf;
1452 a.sign = p_sign ^ sign_flip;
1453 return a;
1454 }
1455
1456 if (p_class == float_class_zero) {
1457 if (c.cls == float_class_zero) {
1458 if (p_sign != c.sign) {
1459 p_sign = s->float_rounding_mode == float_round_down;
1460 }
1461 c.sign = p_sign;
1462 } else if (flags & float_muladd_halve_result) {
1463 c.exp -= 1;
1464 }
1465 c.sign ^= sign_flip;
1466 return c;
1467 }
1468
1469 /* a & b should be normals now... */
1470 assert(a.cls == float_class_normal &&
1471 b.cls == float_class_normal);
1472
1473 p_exp = a.exp + b.exp;
1474
d446830a 1475 mul64To128(a.frac, b.frac, &hi, &lo);
d446830a 1476
e99c4373
RH
1477 /* Renormalize to the msb. */
1478 if (hi & DECOMPOSED_IMPLICIT_BIT) {
d446830a 1479 p_exp += 1;
e99c4373
RH
1480 } else {
1481 shortShift128Left(hi, lo, 1, &hi, &lo);
d446830a
AB
1482 }
1483
1484 /* + add/sub */
e99c4373 1485 if (c.cls != float_class_zero) {
d446830a
AB
1486 int exp_diff = p_exp - c.exp;
1487 if (p_sign == c.sign) {
1488 /* Addition */
1489 if (exp_diff <= 0) {
e99c4373 1490 shift64RightJamming(hi, -exp_diff, &hi);
d446830a 1491 p_exp = c.exp;
e99c4373
RH
1492 if (uadd64_overflow(hi, c.frac, &hi)) {
1493 shift64RightJamming(hi, 1, &hi);
1494 hi |= DECOMPOSED_IMPLICIT_BIT;
1495 p_exp += 1;
1496 }
d446830a 1497 } else {
e99c4373
RH
1498 uint64_t c_hi, c_lo, over;
1499 shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
1500 add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
1501 if (over) {
1502 shift64RightJamming(hi, 1, &hi);
1503 hi |= DECOMPOSED_IMPLICIT_BIT;
1504 p_exp += 1;
1505 }
d446830a 1506 }
d446830a
AB
1507 } else {
1508 /* Subtraction */
e99c4373 1509 uint64_t c_hi = c.frac, c_lo = 0;
d446830a
AB
1510
1511 if (exp_diff <= 0) {
1512 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1513 if (exp_diff == 0
1514 &&
1515 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1516 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1517 } else {
1518 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1519 p_sign ^= 1;
1520 p_exp = c.exp;
1521 }
1522 } else {
1523 shift128RightJamming(c_hi, c_lo,
1524 exp_diff,
1525 &c_hi, &c_lo);
1526 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1527 }
1528
1529 if (hi == 0 && lo == 0) {
1530 a.cls = float_class_zero;
1531 a.sign = s->float_rounding_mode == float_round_down;
1532 a.sign ^= sign_flip;
1533 return a;
1534 } else {
1535 int shift;
1536 if (hi != 0) {
1537 shift = clz64(hi);
1538 } else {
1539 shift = clz64(lo) + 64;
1540 }
1541 /* Normalizing to a binary point of 124 is the
1542 correct adjust for the exponent. However since we're
1543 shifting, we might as well put the binary point back
e99c4373 1544 at 63 where we really want it. Therefore shift as
d446830a
AB
1545 if we're leaving 1 bit at the top of the word, but
1546 adjust the exponent as if we're leaving 3 bits. */
e99c4373
RH
1547 shift128Left(hi, lo, shift, &hi, &lo);
1548 p_exp -= shift;
d446830a
AB
1549 }
1550 }
1551 }
e99c4373 1552 hi |= (lo != 0);
d446830a
AB
1553
1554 if (flags & float_muladd_halve_result) {
1555 p_exp -= 1;
1556 }
1557
1558 /* finally prepare our result */
1559 a.cls = float_class_normal;
1560 a.sign = p_sign ^ sign_flip;
1561 a.exp = p_exp;
e99c4373 1562 a.frac = hi;
d446830a
AB
1563
1564 return a;
1565}
1566
97ff87c0 1567float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1568 int flags, float_status *status)
1569{
98e256fc
RH
1570 FloatParts64 pa, pb, pc, pr;
1571
1572 float16_unpack_canonical(&pa, a, status);
1573 float16_unpack_canonical(&pb, b, status);
1574 float16_unpack_canonical(&pc, c, status);
1575 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a
AB
1576
1577 return float16_round_pack_canonical(pr, status);
1578}
1579
ccf770ba
EC
1580static float32 QEMU_SOFTFLOAT_ATTR
1581soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1582 float_status *status)
d446830a 1583{
98e256fc
RH
1584 FloatParts64 pa, pb, pc, pr;
1585
1586 float32_unpack_canonical(&pa, a, status);
1587 float32_unpack_canonical(&pb, b, status);
1588 float32_unpack_canonical(&pc, c, status);
1589 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a
AB
1590
1591 return float32_round_pack_canonical(pr, status);
1592}
1593
ccf770ba
EC
1594static float64 QEMU_SOFTFLOAT_ATTR
1595soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1596 float_status *status)
d446830a 1597{
98e256fc
RH
1598 FloatParts64 pa, pb, pc, pr;
1599
1600 float64_unpack_canonical(&pa, a, status);
1601 float64_unpack_canonical(&pb, b, status);
1602 float64_unpack_canonical(&pc, c, status);
1603 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a
AB
1604
1605 return float64_round_pack_canonical(pr, status);
1606}
1607
f6b3b108
EC
1608static bool force_soft_fma;
1609
ccf770ba
EC
1610float32 QEMU_FLATTEN
1611float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1612{
1613 union_float32 ua, ub, uc, ur;
1614
1615 ua.s = xa;
1616 ub.s = xb;
1617 uc.s = xc;
1618
1619 if (unlikely(!can_use_fpu(s))) {
1620 goto soft;
1621 }
1622 if (unlikely(flags & float_muladd_halve_result)) {
1623 goto soft;
1624 }
1625
1626 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1627 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1628 goto soft;
1629 }
f6b3b108
EC
1630
1631 if (unlikely(force_soft_fma)) {
1632 goto soft;
1633 }
1634
ccf770ba
EC
1635 /*
1636 * When (a || b) == 0, there's no need to check for under/over flow,
1637 * since we know the addend is (normal || 0) and the product is 0.
1638 */
1639 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1640 union_float32 up;
1641 bool prod_sign;
1642
1643 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1644 prod_sign ^= !!(flags & float_muladd_negate_product);
1645 up.s = float32_set_sign(float32_zero, prod_sign);
1646
1647 if (flags & float_muladd_negate_c) {
1648 uc.h = -uc.h;
1649 }
1650 ur.h = up.h + uc.h;
1651 } else {
896f51fb
KC
1652 union_float32 ua_orig = ua;
1653 union_float32 uc_orig = uc;
1654
ccf770ba
EC
1655 if (flags & float_muladd_negate_product) {
1656 ua.h = -ua.h;
1657 }
1658 if (flags & float_muladd_negate_c) {
1659 uc.h = -uc.h;
1660 }
1661
1662 ur.h = fmaf(ua.h, ub.h, uc.h);
1663
1664 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1665 float_raise(float_flag_overflow, s);
ccf770ba 1666 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1667 ua = ua_orig;
1668 uc = uc_orig;
ccf770ba
EC
1669 goto soft;
1670 }
1671 }
1672 if (flags & float_muladd_negate_result) {
1673 return float32_chs(ur.s);
1674 }
1675 return ur.s;
1676
1677 soft:
1678 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1679}
1680
1681float64 QEMU_FLATTEN
1682float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1683{
1684 union_float64 ua, ub, uc, ur;
1685
1686 ua.s = xa;
1687 ub.s = xb;
1688 uc.s = xc;
1689
1690 if (unlikely(!can_use_fpu(s))) {
1691 goto soft;
1692 }
1693 if (unlikely(flags & float_muladd_halve_result)) {
1694 goto soft;
1695 }
1696
1697 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1698 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1699 goto soft;
1700 }
f6b3b108
EC
1701
1702 if (unlikely(force_soft_fma)) {
1703 goto soft;
1704 }
1705
ccf770ba
EC
1706 /*
1707 * When (a || b) == 0, there's no need to check for under/over flow,
1708 * since we know the addend is (normal || 0) and the product is 0.
1709 */
1710 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1711 union_float64 up;
1712 bool prod_sign;
1713
1714 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1715 prod_sign ^= !!(flags & float_muladd_negate_product);
1716 up.s = float64_set_sign(float64_zero, prod_sign);
1717
1718 if (flags & float_muladd_negate_c) {
1719 uc.h = -uc.h;
1720 }
1721 ur.h = up.h + uc.h;
1722 } else {
896f51fb
KC
1723 union_float64 ua_orig = ua;
1724 union_float64 uc_orig = uc;
1725
ccf770ba
EC
1726 if (flags & float_muladd_negate_product) {
1727 ua.h = -ua.h;
1728 }
1729 if (flags & float_muladd_negate_c) {
1730 uc.h = -uc.h;
1731 }
1732
1733 ur.h = fma(ua.h, ub.h, uc.h);
1734
1735 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1736 float_raise(float_flag_overflow, s);
ccf770ba 1737 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1738 ua = ua_orig;
1739 uc = uc_orig;
ccf770ba
EC
1740 goto soft;
1741 }
1742 }
1743 if (flags & float_muladd_negate_result) {
1744 return float64_chs(ur.s);
1745 }
1746 return ur.s;
1747
1748 soft:
1749 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1750}
1751
8282310d
LZ
1752/*
1753 * Returns the result of multiplying the bfloat16 values `a'
1754 * and `b' then adding 'c', with no intermediate rounding step after the
1755 * multiplication.
1756 */
1757
1758bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1759 int flags, float_status *status)
1760{
98e256fc
RH
1761 FloatParts64 pa, pb, pc, pr;
1762
1763 bfloat16_unpack_canonical(&pa, a, status);
1764 bfloat16_unpack_canonical(&pb, b, status);
1765 bfloat16_unpack_canonical(&pc, c, status);
1766 pr = muladd_floats(pa, pb, pc, flags, status);
8282310d
LZ
1767
1768 return bfloat16_round_pack_canonical(pr, status);
1769}
1770
cf07323d
AB
1771/*
1772 * Returns the result of dividing the floating-point value `a' by the
1773 * corresponding value `b'. The operation is performed according to
1774 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1775 */
1776
f8155c1d 1777static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
cf07323d
AB
1778{
1779 bool sign = a.sign ^ b.sign;
1780
1781 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1782 uint64_t n0, n1, q, r;
cf07323d 1783 int exp = a.exp - b.exp;
5dfbc9e4
RH
1784
1785 /*
1786 * We want a 2*N / N-bit division to produce exactly an N-bit
1787 * result, so that we do not lose any precision and so that we
1788 * do not have to renormalize afterward. If A.frac < B.frac,
1789 * then division would produce an (N-1)-bit result; shift A left
1790 * by one to produce the an N-bit result, and decrement the
1791 * exponent to match.
1792 *
1793 * The udiv_qrnnd algorithm that we're using requires normalization,
e99c4373 1794 * i.e. the msb of the denominator must be set, which is already true.
5dfbc9e4 1795 */
cf07323d
AB
1796 if (a.frac < b.frac) {
1797 exp -= 1;
5dfbc9e4 1798 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
e99c4373
RH
1799 } else {
1800 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
cf07323d 1801 }
e99c4373 1802 q = udiv_qrnnd(&r, n1, n0, b.frac);
5dfbc9e4 1803
e99c4373 1804 /* Set lsb if there is a remainder, to set inexact. */
5dfbc9e4 1805 a.frac = q | (r != 0);
cf07323d
AB
1806 a.sign = sign;
1807 a.exp = exp;
1808 return a;
1809 }
1810 /* handle all the NaN cases */
1811 if (is_nan(a.cls) || is_nan(b.cls)) {
1812 return pick_nan(a, b, s);
1813 }
1814 /* 0/0 or Inf/Inf */
1815 if (a.cls == b.cls
1816 &&
1817 (a.cls == float_class_inf || a.cls == float_class_zero)) {
d82f3b2d 1818 float_raise(float_flag_invalid, s);
0fc07cad
RH
1819 parts_default_nan(&a, s);
1820 return a;
cf07323d 1821 }
9cb4e398
AB
1822 /* Inf / x or 0 / x */
1823 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1824 a.sign = sign;
1825 return a;
1826 }
cf07323d
AB
1827 /* Div 0 => Inf */
1828 if (b.cls == float_class_zero) {
d82f3b2d 1829 float_raise(float_flag_divbyzero, s);
cf07323d
AB
1830 a.cls = float_class_inf;
1831 a.sign = sign;
1832 return a;
1833 }
cf07323d
AB
1834 /* Div by Inf */
1835 if (b.cls == float_class_inf) {
1836 a.cls = float_class_zero;
1837 a.sign = sign;
1838 return a;
1839 }
1840 g_assert_not_reached();
1841}
1842
1843float16 float16_div(float16 a, float16 b, float_status *status)
1844{
98e256fc
RH
1845 FloatParts64 pa, pb, pr;
1846
1847 float16_unpack_canonical(&pa, a, status);
1848 float16_unpack_canonical(&pb, b, status);
1849 pr = div_floats(pa, pb, status);
cf07323d
AB
1850
1851 return float16_round_pack_canonical(pr, status);
1852}
1853
4a629561
EC
1854static float32 QEMU_SOFTFLOAT_ATTR
1855soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 1856{
98e256fc
RH
1857 FloatParts64 pa, pb, pr;
1858
1859 float32_unpack_canonical(&pa, a, status);
1860 float32_unpack_canonical(&pb, b, status);
1861 pr = div_floats(pa, pb, status);
cf07323d
AB
1862
1863 return float32_round_pack_canonical(pr, status);
1864}
1865
4a629561
EC
1866static float64 QEMU_SOFTFLOAT_ATTR
1867soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 1868{
98e256fc
RH
1869 FloatParts64 pa, pb, pr;
1870
1871 float64_unpack_canonical(&pa, a, status);
1872 float64_unpack_canonical(&pb, b, status);
1873 pr = div_floats(pa, pb, status);
cf07323d
AB
1874
1875 return float64_round_pack_canonical(pr, status);
1876}
1877
4a629561
EC
1878static float hard_f32_div(float a, float b)
1879{
1880 return a / b;
1881}
1882
1883static double hard_f64_div(double a, double b)
1884{
1885 return a / b;
1886}
1887
1888static bool f32_div_pre(union_float32 a, union_float32 b)
1889{
1890 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1891 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1892 fpclassify(b.h) == FP_NORMAL;
1893 }
1894 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1895}
1896
1897static bool f64_div_pre(union_float64 a, union_float64 b)
1898{
1899 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1900 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1901 fpclassify(b.h) == FP_NORMAL;
1902 }
1903 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1904}
1905
1906static bool f32_div_post(union_float32 a, union_float32 b)
1907{
1908 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1909 return fpclassify(a.h) != FP_ZERO;
1910 }
1911 return !float32_is_zero(a.s);
1912}
1913
1914static bool f64_div_post(union_float64 a, union_float64 b)
1915{
1916 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1917 return fpclassify(a.h) != FP_ZERO;
1918 }
1919 return !float64_is_zero(a.s);
1920}
1921
1922float32 QEMU_FLATTEN
1923float32_div(float32 a, float32 b, float_status *s)
1924{
1925 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1926 f32_div_pre, f32_div_post);
4a629561
EC
1927}
1928
1929float64 QEMU_FLATTEN
1930float64_div(float64 a, float64 b, float_status *s)
1931{
1932 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 1933 f64_div_pre, f64_div_post);
4a629561
EC
1934}
1935
8282310d
LZ
1936/*
1937 * Returns the result of dividing the bfloat16
1938 * value `a' by the corresponding value `b'.
1939 */
1940
1941bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1942{
98e256fc
RH
1943 FloatParts64 pa, pb, pr;
1944
1945 bfloat16_unpack_canonical(&pa, a, status);
1946 bfloat16_unpack_canonical(&pb, b, status);
1947 pr = div_floats(pa, pb, status);
8282310d
LZ
1948
1949 return bfloat16_round_pack_canonical(pr, status);
1950}
1951
6fed16b2
AB
1952/*
1953 * Float to Float conversions
1954 *
1955 * Returns the result of converting one float format to another. The
1956 * conversion is performed according to the IEC/IEEE Standard for
1957 * Binary Floating-Point Arithmetic.
1958 *
1959 * The float_to_float helper only needs to take care of raising
1960 * invalid exceptions and handling the conversion on NaNs.
1961 */
1962
f8155c1d 1963static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
6fed16b2
AB
1964 float_status *s)
1965{
1966 if (dstf->arm_althp) {
1967 switch (a.cls) {
1968 case float_class_qnan:
1969 case float_class_snan:
1970 /* There is no NaN in the destination format. Raise Invalid
1971 * and return a zero with the sign of the input NaN.
1972 */
d82f3b2d 1973 float_raise(float_flag_invalid, s);
6fed16b2
AB
1974 a.cls = float_class_zero;
1975 a.frac = 0;
1976 a.exp = 0;
1977 break;
1978
1979 case float_class_inf:
1980 /* There is no Inf in the destination format. Raise Invalid
1981 * and return the maximum normal with the correct sign.
1982 */
d82f3b2d 1983 float_raise(float_flag_invalid, s);
6fed16b2
AB
1984 a.cls = float_class_normal;
1985 a.exp = dstf->exp_max;
1986 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1987 break;
1988
1989 default:
1990 break;
1991 }
1992 } else if (is_nan(a.cls)) {
0d40cd93 1993 return return_nan(a, s);
6fed16b2
AB
1994 }
1995 return a;
1996}
1997
1998float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1999{
2000 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2001 FloatParts64 pa, pr;
2002
2003 float16a_unpack_canonical(&pa, a, s, fmt16);
2004 pr = float_to_float(pa, &float32_params, s);
6fed16b2
AB
2005 return float32_round_pack_canonical(pr, s);
2006}
2007
2008float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2009{
2010 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2011 FloatParts64 pa, pr;
2012
2013 float16a_unpack_canonical(&pa, a, s, fmt16);
2014 pr = float_to_float(pa, &float64_params, s);
6fed16b2
AB
2015 return float64_round_pack_canonical(pr, s);
2016}
2017
2018float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2019{
2020 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2021 FloatParts64 pa, pr;
2022
2023 float32_unpack_canonical(&pa, a, s);
2024 pr = float_to_float(pa, fmt16, s);
6fed16b2
AB
2025 return float16a_round_pack_canonical(pr, s, fmt16);
2026}
2027
21381dcf
MK
2028static float64 QEMU_SOFTFLOAT_ATTR
2029soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2030{
98e256fc
RH
2031 FloatParts64 pa, pr;
2032
2033 float32_unpack_canonical(&pa, a, s);
2034 pr = float_to_float(pa, &float64_params, s);
6fed16b2
AB
2035 return float64_round_pack_canonical(pr, s);
2036}
2037
21381dcf
MK
2038float64 float32_to_float64(float32 a, float_status *s)
2039{
2040 if (likely(float32_is_normal(a))) {
2041 /* Widening conversion can never produce inexact results. */
2042 union_float32 uf;
2043 union_float64 ud;
2044 uf.s = a;
2045 ud.h = uf.h;
2046 return ud.s;
2047 } else if (float32_is_zero(a)) {
2048 return float64_set_sign(float64_zero, float32_is_neg(a));
2049 } else {
2050 return soft_float32_to_float64(a, s);
2051 }
2052}
2053
6fed16b2
AB
2054float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2055{
2056 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2057 FloatParts64 pa, pr;
2058
2059 float64_unpack_canonical(&pa, a, s);
2060 pr = float_to_float(pa, fmt16, s);
6fed16b2
AB
2061 return float16a_round_pack_canonical(pr, s, fmt16);
2062}
2063
2064float32 float64_to_float32(float64 a, float_status *s)
2065{
98e256fc
RH
2066 FloatParts64 pa, pr;
2067
2068 float64_unpack_canonical(&pa, a, s);
2069 pr = float_to_float(pa, &float32_params, s);
6fed16b2
AB
2070 return float32_round_pack_canonical(pr, s);
2071}
2072
34f0c0a9
LZ
2073float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2074{
98e256fc
RH
2075 FloatParts64 pa, pr;
2076
2077 bfloat16_unpack_canonical(&pa, a, s);
2078 pr = float_to_float(pa, &float32_params, s);
34f0c0a9
LZ
2079 return float32_round_pack_canonical(pr, s);
2080}
2081
2082float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2083{
98e256fc
RH
2084 FloatParts64 pa, pr;
2085
2086 bfloat16_unpack_canonical(&pa, a, s);
2087 pr = float_to_float(pa, &float64_params, s);
34f0c0a9
LZ
2088 return float64_round_pack_canonical(pr, s);
2089}
2090
2091bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2092{
98e256fc
RH
2093 FloatParts64 pa, pr;
2094
2095 float32_unpack_canonical(&pa, a, s);
2096 pr = float_to_float(pa, &bfloat16_params, s);
34f0c0a9
LZ
2097 return bfloat16_round_pack_canonical(pr, s);
2098}
2099
2100bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2101{
98e256fc
RH
2102 FloatParts64 pa, pr;
2103
2104 float64_unpack_canonical(&pa, a, s);
2105 pr = float_to_float(pa, &bfloat16_params, s);
34f0c0a9
LZ
2106 return bfloat16_round_pack_canonical(pr, s);
2107}
2108
dbe4d53a
AB
2109/*
2110 * Rounds the floating-point value `a' to an integer, and returns the
2111 * result as a floating-point value. The operation is performed
2112 * according to the IEC/IEEE Standard for Binary Floating-Point
2113 * Arithmetic.
2114 */
2115
f8155c1d 2116static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2f6c74be 2117 int scale, float_status *s)
dbe4d53a 2118{
2f6c74be
RH
2119 switch (a.cls) {
2120 case float_class_qnan:
2121 case float_class_snan:
dbe4d53a 2122 return return_nan(a, s);
dbe4d53a 2123
dbe4d53a
AB
2124 case float_class_zero:
2125 case float_class_inf:
dbe4d53a
AB
2126 /* already "integral" */
2127 break;
2f6c74be 2128
dbe4d53a 2129 case float_class_normal:
2f6c74be
RH
2130 scale = MIN(MAX(scale, -0x10000), 0x10000);
2131 a.exp += scale;
2132
dbe4d53a
AB
2133 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2134 /* already integral */
2135 break;
2136 }
2137 if (a.exp < 0) {
2138 bool one;
2139 /* all fractional */
d82f3b2d 2140 float_raise(float_flag_inexact, s);
2f6c74be 2141 switch (rmode) {
dbe4d53a
AB
2142 case float_round_nearest_even:
2143 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2144 break;
2145 case float_round_ties_away:
2146 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2147 break;
2148 case float_round_to_zero:
2149 one = false;
2150 break;
2151 case float_round_up:
2152 one = !a.sign;
2153 break;
2154 case float_round_down:
2155 one = a.sign;
2156 break;
5d64abb3
RH
2157 case float_round_to_odd:
2158 one = true;
2159 break;
dbe4d53a
AB
2160 default:
2161 g_assert_not_reached();
2162 }
2163
2164 if (one) {
2165 a.frac = DECOMPOSED_IMPLICIT_BIT;
2166 a.exp = 0;
2167 } else {
2168 a.cls = float_class_zero;
2169 }
2170 } else {
2171 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2172 uint64_t frac_lsbm1 = frac_lsb >> 1;
2173 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2174 uint64_t rnd_mask = rnd_even_mask >> 1;
2175 uint64_t inc;
2176
2f6c74be 2177 switch (rmode) {
dbe4d53a
AB
2178 case float_round_nearest_even:
2179 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2180 break;
2181 case float_round_ties_away:
2182 inc = frac_lsbm1;
2183 break;
2184 case float_round_to_zero:
2185 inc = 0;
2186 break;
2187 case float_round_up:
2188 inc = a.sign ? 0 : rnd_mask;
2189 break;
2190 case float_round_down:
2191 inc = a.sign ? rnd_mask : 0;
2192 break;
5d64abb3
RH
2193 case float_round_to_odd:
2194 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2195 break;
dbe4d53a
AB
2196 default:
2197 g_assert_not_reached();
2198 }
2199
2200 if (a.frac & rnd_mask) {
d82f3b2d 2201 float_raise(float_flag_inexact, s);
e99c4373 2202 if (uadd64_overflow(a.frac, inc, &a.frac)) {
dbe4d53a 2203 a.frac >>= 1;
e99c4373 2204 a.frac |= DECOMPOSED_IMPLICIT_BIT;
dbe4d53a
AB
2205 a.exp++;
2206 }
e99c4373 2207 a.frac &= ~rnd_mask;
dbe4d53a
AB
2208 }
2209 }
2210 break;
2211 default:
2212 g_assert_not_reached();
2213 }
2214 return a;
2215}
2216
2217float16 float16_round_to_int(float16 a, float_status *s)
2218{
98e256fc
RH
2219 FloatParts64 pa, pr;
2220
2221 float16_unpack_canonical(&pa, a, s);
2222 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2223 return float16_round_pack_canonical(pr, s);
2224}
2225
2226float32 float32_round_to_int(float32 a, float_status *s)
2227{
98e256fc
RH
2228 FloatParts64 pa, pr;
2229
2230 float32_unpack_canonical(&pa, a, s);
2231 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2232 return float32_round_pack_canonical(pr, s);
2233}
2234
2235float64 float64_round_to_int(float64 a, float_status *s)
2236{
98e256fc
RH
2237 FloatParts64 pa, pr;
2238
2239 float64_unpack_canonical(&pa, a, s);
2240 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2241 return float64_round_pack_canonical(pr, s);
2242}
2243
34f0c0a9
LZ
2244/*
2245 * Rounds the bfloat16 value `a' to an integer, and returns the
2246 * result as a bfloat16 value.
2247 */
2248
2249bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2250{
98e256fc
RH
2251 FloatParts64 pa, pr;
2252
2253 bfloat16_unpack_canonical(&pa, a, s);
2254 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
34f0c0a9
LZ
2255 return bfloat16_round_pack_canonical(pr, s);
2256}
2257
ab52f973
AB
2258/*
2259 * Returns the result of converting the floating-point value `a' to
2260 * the two's complement integer format. The conversion is performed
2261 * according to the IEC/IEEE Standard for Binary Floating-Point
2262 * Arithmetic---which means in particular that the conversion is
2263 * rounded according to the current rounding mode. If `a' is a NaN,
2264 * the largest positive integer is returned. Otherwise, if the
2265 * conversion overflows, the largest integer with the same sign as `a'
2266 * is returned.
2267*/
2268
f8155c1d 2269static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407 2270 int scale, int64_t min, int64_t max,
ab52f973
AB
2271 float_status *s)
2272{
2273 uint64_t r;
2274 int orig_flags = get_float_exception_flags(s);
f8155c1d 2275 FloatParts64 p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2276
2277 switch (p.cls) {
2278 case float_class_snan:
2279 case float_class_qnan:
801bc563 2280 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2281 return max;
2282 case float_class_inf:
801bc563 2283 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2284 return p.sign ? min : max;
2285 case float_class_zero:
2286 return 0;
2287 case float_class_normal:
e99c4373 2288 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2289 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2290 } else {
2291 r = UINT64_MAX;
2292 }
2293 if (p.sign) {
33358375 2294 if (r <= -(uint64_t) min) {
ab52f973
AB
2295 return -r;
2296 } else {
2297 s->float_exception_flags = orig_flags | float_flag_invalid;
2298 return min;
2299 }
2300 } else {
33358375 2301 if (r <= max) {
ab52f973
AB
2302 return r;
2303 } else {
2304 s->float_exception_flags = orig_flags | float_flag_invalid;
2305 return max;
2306 }
2307 }
2308 default:
2309 g_assert_not_reached();
2310 }
2311}
2312
0d93d8ec
FC
2313int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2314 float_status *s)
2315{
98e256fc
RH
2316 FloatParts64 p;
2317
2318 float16_unpack_canonical(&p, a, s);
2319 return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2320}
2321
3dede407 2322int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2323 float_status *s)
2324{
98e256fc
RH
2325 FloatParts64 p;
2326
2327 float16_unpack_canonical(&p, a, s);
2328 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2329}
2330
3dede407 2331int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2332 float_status *s)
2333{
98e256fc
RH
2334 FloatParts64 p;
2335
2336 float16_unpack_canonical(&p, a, s);
2337 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2338}
2339
3dede407 2340int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2341 float_status *s)
2342{
98e256fc
RH
2343 FloatParts64 p;
2344
2345 float16_unpack_canonical(&p, a, s);
2346 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2347}
2348
3dede407 2349int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2350 float_status *s)
2351{
98e256fc
RH
2352 FloatParts64 p;
2353
2354 float32_unpack_canonical(&p, a, s);
2355 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2356}
2357
3dede407 2358int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2359 float_status *s)
2360{
98e256fc
RH
2361 FloatParts64 p;
2362
2363 float32_unpack_canonical(&p, a, s);
2364 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2365}
2366
3dede407 2367int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2368 float_status *s)
2369{
98e256fc
RH
2370 FloatParts64 p;
2371
2372 float32_unpack_canonical(&p, a, s);
2373 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2374}
2375
3dede407 2376int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2377 float_status *s)
2378{
98e256fc
RH
2379 FloatParts64 p;
2380
2381 float64_unpack_canonical(&p, a, s);
2382 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2383}
2384
3dede407 2385int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2386 float_status *s)
2387{
98e256fc
RH
2388 FloatParts64 p;
2389
2390 float64_unpack_canonical(&p, a, s);
2391 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2392}
2393
3dede407 2394int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2395 float_status *s)
2396{
98e256fc
RH
2397 FloatParts64 p;
2398
2399 float64_unpack_canonical(&p, a, s);
2400 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2401}
2402
0d93d8ec
FC
2403int8_t float16_to_int8(float16 a, float_status *s)
2404{
2405 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2406}
2407
2f6c74be
RH
2408int16_t float16_to_int16(float16 a, float_status *s)
2409{
2410 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2411}
2412
2413int32_t float16_to_int32(float16 a, float_status *s)
2414{
2415 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2416}
2417
2418int64_t float16_to_int64(float16 a, float_status *s)
2419{
2420 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2421}
2422
2423int16_t float32_to_int16(float32 a, float_status *s)
2424{
2425 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2426}
2427
2428int32_t float32_to_int32(float32 a, float_status *s)
2429{
2430 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2431}
2432
2433int64_t float32_to_int64(float32 a, float_status *s)
2434{
2435 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2436}
2437
2438int16_t float64_to_int16(float64 a, float_status *s)
2439{
2440 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2441}
2442
2443int32_t float64_to_int32(float64 a, float_status *s)
2444{
2445 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2446}
2447
2448int64_t float64_to_int64(float64 a, float_status *s)
2449{
2450 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2451}
2452
2453int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2454{
2455 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2456}
2457
2458int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2459{
2460 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2461}
2462
2463int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2464{
2465 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2466}
2467
2f6c74be
RH
2468int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2469{
2470 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2471}
ab52f973 2472
2f6c74be
RH
2473int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2474{
2475 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2476}
2477
2478int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2479{
2480 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2481}
2482
2483int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2484{
2485 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2486}
ab52f973 2487
2f6c74be
RH
2488int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2489{
2490 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2491}
ab52f973 2492
2f6c74be
RH
2493int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2494{
2495 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2496}
ab52f973 2497
34f0c0a9
LZ
2498/*
2499 * Returns the result of converting the floating-point value `a' to
2500 * the two's complement integer format.
2501 */
2502
2503int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2504 float_status *s)
2505{
98e256fc
RH
2506 FloatParts64 p;
2507
2508 bfloat16_unpack_canonical(&p, a, s);
2509 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
34f0c0a9
LZ
2510}
2511
2512int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2513 float_status *s)
2514{
98e256fc
RH
2515 FloatParts64 p;
2516
2517 bfloat16_unpack_canonical(&p, a, s);
2518 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
34f0c0a9
LZ
2519}
2520
2521int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2522 float_status *s)
2523{
98e256fc
RH
2524 FloatParts64 p;
2525
2526 bfloat16_unpack_canonical(&p, a, s);
2527 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
34f0c0a9
LZ
2528}
2529
2530int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2531{
2532 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2533}
2534
2535int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2536{
2537 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2538}
2539
2540int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2541{
2542 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2543}
2544
2545int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2546{
2547 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2548}
2549
2550int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2551{
2552 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2553}
2554
2555int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2556{
2557 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2558}
2559
ab52f973
AB
2560/*
2561 * Returns the result of converting the floating-point value `a' to
2562 * the unsigned integer format. The conversion is performed according
2563 * to the IEC/IEEE Standard for Binary Floating-Point
2564 * Arithmetic---which means in particular that the conversion is
2565 * rounded according to the current rounding mode. If `a' is a NaN,
2566 * the largest unsigned integer is returned. Otherwise, if the
2567 * conversion overflows, the largest unsigned integer is returned. If
2568 * the 'a' is negative, the result is rounded and zero is returned;
2569 * values that do not round to zero will raise the inexact exception
2570 * flag.
2571 */
2572
f8155c1d 2573static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407
RH
2574 int scale, uint64_t max,
2575 float_status *s)
ab52f973
AB
2576{
2577 int orig_flags = get_float_exception_flags(s);
f8155c1d 2578 FloatParts64 p = round_to_int(in, rmode, scale, s);
2f6c74be 2579 uint64_t r;
ab52f973
AB
2580
2581 switch (p.cls) {
2582 case float_class_snan:
2583 case float_class_qnan:
2584 s->float_exception_flags = orig_flags | float_flag_invalid;
2585 return max;
2586 case float_class_inf:
801bc563 2587 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2588 return p.sign ? 0 : max;
2589 case float_class_zero:
2590 return 0;
2591 case float_class_normal:
ab52f973
AB
2592 if (p.sign) {
2593 s->float_exception_flags = orig_flags | float_flag_invalid;
2594 return 0;
2595 }
2596
e99c4373 2597 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2598 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2599 } else {
2600 s->float_exception_flags = orig_flags | float_flag_invalid;
2601 return max;
2602 }
2603
2604 /* For uint64 this will never trip, but if p.exp is too large
2605 * to shift a decomposed fraction we shall have exited via the
2606 * 3rd leg above.
2607 */
2608 if (r > max) {
2609 s->float_exception_flags = orig_flags | float_flag_invalid;
2610 return max;
ab52f973 2611 }
2f6c74be 2612 return r;
ab52f973
AB
2613 default:
2614 g_assert_not_reached();
2615 }
2616}
2617
0d93d8ec
FC
2618uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2619 float_status *s)
2620{
98e256fc
RH
2621 FloatParts64 p;
2622
2623 float16_unpack_canonical(&p, a, s);
2624 return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2625}
2626
3dede407 2627uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2628 float_status *s)
2629{
98e256fc
RH
2630 FloatParts64 p;
2631
2632 float16_unpack_canonical(&p, a, s);
2633 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2634}
2635
3dede407 2636uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2637 float_status *s)
2638{
98e256fc
RH
2639 FloatParts64 p;
2640
2641 float16_unpack_canonical(&p, a, s);
2642 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2643}
2644
3dede407 2645uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2646 float_status *s)
2647{
98e256fc
RH
2648 FloatParts64 p;
2649
2650 float16_unpack_canonical(&p, a, s);
2651 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2652}
2653
3dede407 2654uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2655 float_status *s)
2656{
98e256fc
RH
2657 FloatParts64 p;
2658
2659 float32_unpack_canonical(&p, a, s);
2660 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2661}
2662
3dede407 2663uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2664 float_status *s)
2665{
98e256fc
RH
2666 FloatParts64 p;
2667
2668 float32_unpack_canonical(&p, a, s);
2669 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2670}
2671
3dede407 2672uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2673 float_status *s)
2674{
98e256fc
RH
2675 FloatParts64 p;
2676
2677 float32_unpack_canonical(&p, a, s);
2678 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2679}
2680
3dede407 2681uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2682 float_status *s)
2683{
98e256fc
RH
2684 FloatParts64 p;
2685
2686 float64_unpack_canonical(&p, a, s);
2687 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2688}
2689
3dede407 2690uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2691 float_status *s)
2692{
98e256fc
RH
2693 FloatParts64 p;
2694
2695 float64_unpack_canonical(&p, a, s);
2696 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2697}
2698
3dede407 2699uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2700 float_status *s)
2701{
98e256fc
RH
2702 FloatParts64 p;
2703
2704 float64_unpack_canonical(&p, a, s);
2705 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2706}
2707
0d93d8ec
FC
2708uint8_t float16_to_uint8(float16 a, float_status *s)
2709{
2710 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2711}
2712
2f6c74be
RH
2713uint16_t float16_to_uint16(float16 a, float_status *s)
2714{
2715 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2716}
2717
2718uint32_t float16_to_uint32(float16 a, float_status *s)
2719{
2720 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2721}
2722
2723uint64_t float16_to_uint64(float16 a, float_status *s)
2724{
2725 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2726}
2727
2728uint16_t float32_to_uint16(float32 a, float_status *s)
2729{
2730 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2731}
2732
2733uint32_t float32_to_uint32(float32 a, float_status *s)
2734{
2735 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2736}
2737
2738uint64_t float32_to_uint64(float32 a, float_status *s)
2739{
2740 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2741}
2742
2743uint16_t float64_to_uint16(float64 a, float_status *s)
2744{
2745 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2746}
2747
2748uint32_t float64_to_uint32(float64 a, float_status *s)
2749{
2750 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2751}
2752
2753uint64_t float64_to_uint64(float64 a, float_status *s)
2754{
2755 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2756}
2757
2758uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2759{
2760 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2761}
2762
2763uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2764{
2765 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2766}
2767
2768uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2769{
2770 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2771}
2772
2773uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2774{
2775 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2776}
2777
2778uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2779{
2780 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2781}
2782
2783uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2784{
2785 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2786}
2787
2788uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2789{
2790 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2791}
2792
2793uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2794{
2795 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2796}
2797
2798uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2799{
2800 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2801}
ab52f973 2802
34f0c0a9
LZ
2803/*
2804 * Returns the result of converting the bfloat16 value `a' to
2805 * the unsigned integer format.
2806 */
2807
2808uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2809 int scale, float_status *s)
2810{
98e256fc
RH
2811 FloatParts64 p;
2812
2813 bfloat16_unpack_canonical(&p, a, s);
2814 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
34f0c0a9
LZ
2815}
2816
2817uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2818 int scale, float_status *s)
2819{
98e256fc
RH
2820 FloatParts64 p;
2821
2822 bfloat16_unpack_canonical(&p, a, s);
2823 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
34f0c0a9
LZ
2824}
2825
2826uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2827 int scale, float_status *s)
2828{
98e256fc
RH
2829 FloatParts64 p;
2830
2831 bfloat16_unpack_canonical(&p, a, s);
2832 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
34f0c0a9
LZ
2833}
2834
2835uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2836{
2837 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2838}
2839
2840uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2841{
2842 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2843}
2844
2845uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2846{
2847 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2848}
2849
2850uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2851{
2852 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2853}
2854
2855uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2856{
2857 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2858}
2859
2860uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2861{
2862 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2863}
2864
c02e1fb8
AB
2865/*
2866 * Integer to float conversions
2867 *
2868 * Returns the result of converting the two's complement integer `a'
2869 * to the floating-point format. The conversion is performed according
2870 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2871 */
2872
f8155c1d 2873static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2874{
f8155c1d 2875 FloatParts64 r = { .sign = false };
2abdfe24 2876
c02e1fb8
AB
2877 if (a == 0) {
2878 r.cls = float_class_zero;
c02e1fb8 2879 } else {
2abdfe24
RH
2880 uint64_t f = a;
2881 int shift;
2882
2883 r.cls = float_class_normal;
c02e1fb8 2884 if (a < 0) {
2abdfe24 2885 f = -f;
c02e1fb8 2886 r.sign = true;
c02e1fb8 2887 }
e99c4373 2888 shift = clz64(f);
2abdfe24
RH
2889 scale = MIN(MAX(scale, -0x10000), 0x10000);
2890
2891 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
e99c4373 2892 r.frac = f << shift;
c02e1fb8
AB
2893 }
2894
2895 return r;
2896}
2897
2abdfe24 2898float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2899{
f8155c1d 2900 FloatParts64 pa = int_to_float(a, scale, status);
c02e1fb8
AB
2901 return float16_round_pack_canonical(pa, status);
2902}
2903
2abdfe24
RH
2904float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2905{
2906 return int64_to_float16_scalbn(a, scale, status);
2907}
2908
2909float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2910{
2911 return int64_to_float16_scalbn(a, scale, status);
2912}
2913
2914float16 int64_to_float16(int64_t a, float_status *status)
2915{
2916 return int64_to_float16_scalbn(a, 0, status);
2917}
2918
c02e1fb8
AB
2919float16 int32_to_float16(int32_t a, float_status *status)
2920{
2abdfe24 2921 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2922}
2923
2924float16 int16_to_float16(int16_t a, float_status *status)
2925{
2abdfe24 2926 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2927}
2928
0d93d8ec
FC
2929float16 int8_to_float16(int8_t a, float_status *status)
2930{
2931 return int64_to_float16_scalbn(a, 0, status);
2932}
2933
2abdfe24 2934float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2935{
f8155c1d 2936 FloatParts64 pa = int_to_float(a, scale, status);
c02e1fb8
AB
2937 return float32_round_pack_canonical(pa, status);
2938}
2939
2abdfe24
RH
2940float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2941{
2942 return int64_to_float32_scalbn(a, scale, status);
2943}
2944
2945float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2946{
2947 return int64_to_float32_scalbn(a, scale, status);
2948}
2949
2950float32 int64_to_float32(int64_t a, float_status *status)
2951{
2952 return int64_to_float32_scalbn(a, 0, status);
2953}
2954
c02e1fb8
AB
2955float32 int32_to_float32(int32_t a, float_status *status)
2956{
2abdfe24 2957 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2958}
2959
2960float32 int16_to_float32(int16_t a, float_status *status)
2961{
2abdfe24 2962 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2963}
2964
2abdfe24 2965float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2966{
f8155c1d 2967 FloatParts64 pa = int_to_float(a, scale, status);
c02e1fb8
AB
2968 return float64_round_pack_canonical(pa, status);
2969}
2970
2abdfe24
RH
2971float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2972{
2973 return int64_to_float64_scalbn(a, scale, status);
2974}
2975
2976float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2977{
2978 return int64_to_float64_scalbn(a, scale, status);
2979}
2980
2981float64 int64_to_float64(int64_t a, float_status *status)
2982{
2983 return int64_to_float64_scalbn(a, 0, status);
2984}
2985
c02e1fb8
AB
2986float64 int32_to_float64(int32_t a, float_status *status)
2987{
2abdfe24 2988 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2989}
2990
2991float64 int16_to_float64(int16_t a, float_status *status)
2992{
2abdfe24 2993 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2994}
2995
34f0c0a9
LZ
2996/*
2997 * Returns the result of converting the two's complement integer `a'
2998 * to the bfloat16 format.
2999 */
3000
3001bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3002{
f8155c1d 3003 FloatParts64 pa = int_to_float(a, scale, status);
34f0c0a9
LZ
3004 return bfloat16_round_pack_canonical(pa, status);
3005}
3006
3007bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3008{
3009 return int64_to_bfloat16_scalbn(a, scale, status);
3010}
3011
3012bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3013{
3014 return int64_to_bfloat16_scalbn(a, scale, status);
3015}
3016
3017bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3018{
3019 return int64_to_bfloat16_scalbn(a, 0, status);
3020}
3021
3022bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3023{
3024 return int64_to_bfloat16_scalbn(a, 0, status);
3025}
3026
3027bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3028{
3029 return int64_to_bfloat16_scalbn(a, 0, status);
3030}
c02e1fb8
AB
3031
3032/*
3033 * Unsigned Integer to float conversions
3034 *
3035 * Returns the result of converting the unsigned integer `a' to the
3036 * floating-point format. The conversion is performed according to the
3037 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3038 */
3039
f8155c1d 3040static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 3041{
f8155c1d 3042 FloatParts64 r = { .sign = false };
e99c4373 3043 int shift;
c02e1fb8
AB
3044
3045 if (a == 0) {
3046 r.cls = float_class_zero;
3047 } else {
2abdfe24 3048 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 3049 shift = clz64(a);
c02e1fb8 3050 r.cls = float_class_normal;
e99c4373
RH
3051 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3052 r.frac = a << shift;
c02e1fb8
AB
3053 }
3054
3055 return r;
3056}
3057
2abdfe24 3058float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3059{
f8155c1d 3060 FloatParts64 pa = uint_to_float(a, scale, status);
c02e1fb8
AB
3061 return float16_round_pack_canonical(pa, status);
3062}
3063
2abdfe24
RH
3064float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3065{
3066 return uint64_to_float16_scalbn(a, scale, status);
3067}
3068
3069float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3070{
3071 return uint64_to_float16_scalbn(a, scale, status);
3072}
3073
3074float16 uint64_to_float16(uint64_t a, float_status *status)
3075{
3076 return uint64_to_float16_scalbn(a, 0, status);
3077}
3078
c02e1fb8
AB
3079float16 uint32_to_float16(uint32_t a, float_status *status)
3080{
2abdfe24 3081 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3082}
3083
3084float16 uint16_to_float16(uint16_t a, float_status *status)
3085{
2abdfe24 3086 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3087}
3088
0d93d8ec
FC
3089float16 uint8_to_float16(uint8_t a, float_status *status)
3090{
3091 return uint64_to_float16_scalbn(a, 0, status);
3092}
3093
2abdfe24 3094float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3095{
f8155c1d 3096 FloatParts64 pa = uint_to_float(a, scale, status);
c02e1fb8
AB
3097 return float32_round_pack_canonical(pa, status);
3098}
3099
2abdfe24
RH
3100float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3101{
3102 return uint64_to_float32_scalbn(a, scale, status);
3103}
3104
3105float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3106{
3107 return uint64_to_float32_scalbn(a, scale, status);
3108}
3109
3110float32 uint64_to_float32(uint64_t a, float_status *status)
3111{
3112 return uint64_to_float32_scalbn(a, 0, status);
3113}
3114
c02e1fb8
AB
3115float32 uint32_to_float32(uint32_t a, float_status *status)
3116{
2abdfe24 3117 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3118}
3119
3120float32 uint16_to_float32(uint16_t a, float_status *status)
3121{
2abdfe24 3122 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3123}
3124
2abdfe24 3125float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3126{
f8155c1d 3127 FloatParts64 pa = uint_to_float(a, scale, status);
c02e1fb8
AB
3128 return float64_round_pack_canonical(pa, status);
3129}
3130
2abdfe24
RH
3131float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3132{
3133 return uint64_to_float64_scalbn(a, scale, status);
3134}
3135
3136float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3137{
3138 return uint64_to_float64_scalbn(a, scale, status);
3139}
3140
3141float64 uint64_to_float64(uint64_t a, float_status *status)
3142{
3143 return uint64_to_float64_scalbn(a, 0, status);
3144}
3145
c02e1fb8
AB
3146float64 uint32_to_float64(uint32_t a, float_status *status)
3147{
2abdfe24 3148 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3149}
3150
3151float64 uint16_to_float64(uint16_t a, float_status *status)
3152{
2abdfe24 3153 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3154}
3155
34f0c0a9
LZ
3156/*
3157 * Returns the result of converting the unsigned integer `a' to the
3158 * bfloat16 format.
3159 */
3160
3161bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3162{
f8155c1d 3163 FloatParts64 pa = uint_to_float(a, scale, status);
34f0c0a9
LZ
3164 return bfloat16_round_pack_canonical(pa, status);
3165}
3166
3167bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3168{
3169 return uint64_to_bfloat16_scalbn(a, scale, status);
3170}
3171
3172bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3173{
3174 return uint64_to_bfloat16_scalbn(a, scale, status);
3175}
3176
3177bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3178{
3179 return uint64_to_bfloat16_scalbn(a, 0, status);
3180}
3181
3182bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3183{
3184 return uint64_to_bfloat16_scalbn(a, 0, status);
3185}
3186
3187bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3188{
3189 return uint64_to_bfloat16_scalbn(a, 0, status);
3190}
3191
89360067
AB
3192/* Float Min/Max */
3193/* min() and max() functions. These can't be implemented as
3194 * 'compare and pick one input' because that would mishandle
3195 * NaNs and +0 vs -0.
3196 *
3197 * minnum() and maxnum() functions. These are similar to the min()
3198 * and max() functions but if one of the arguments is a QNaN and
3199 * the other is numerical then the numerical argument is returned.
3200 * SNaNs will get quietened before being returned.
3201 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3202 * and maxNum() operations. min() and max() are the typical min/max
3203 * semantics provided by many CPUs which predate that specification.
3204 *
3205 * minnummag() and maxnummag() functions correspond to minNumMag()
3206 * and minNumMag() from the IEEE-754 2008.
3207 */
f8155c1d 3208static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
89360067
AB
3209 bool ieee, bool ismag, float_status *s)
3210{
3211 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3212 if (ieee) {
3213 /* Takes two floating-point values `a' and `b', one of
3214 * which is a NaN, and returns the appropriate NaN
3215 * result. If either `a' or `b' is a signaling NaN,
3216 * the invalid exception is raised.
3217 */
3218 if (is_snan(a.cls) || is_snan(b.cls)) {
3219 return pick_nan(a, b, s);
3220 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3221 return b;
3222 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3223 return a;
3224 }
3225 }
3226 return pick_nan(a, b, s);
3227 } else {
3228 int a_exp, b_exp;
89360067
AB
3229
3230 switch (a.cls) {
3231 case float_class_normal:
3232 a_exp = a.exp;
3233 break;
3234 case float_class_inf:
3235 a_exp = INT_MAX;
3236 break;
3237 case float_class_zero:
3238 a_exp = INT_MIN;
3239 break;
3240 default:
3241 g_assert_not_reached();
3242 break;
3243 }
3244 switch (b.cls) {
3245 case float_class_normal:
3246 b_exp = b.exp;
3247 break;
3248 case float_class_inf:
3249 b_exp = INT_MAX;
3250 break;
3251 case float_class_zero:
3252 b_exp = INT_MIN;
3253 break;
3254 default:
3255 g_assert_not_reached();
3256 break;
3257 }
3258
6245327a
EC
3259 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3260 bool a_less = a_exp < b_exp;
3261 if (a_exp == b_exp) {
3262 a_less = a.frac < b.frac;
3263 }
3264 return a_less ^ ismin ? b : a;
89360067
AB
3265 }
3266
6245327a 3267 if (a.sign == b.sign) {
89360067
AB
3268 bool a_less = a_exp < b_exp;
3269 if (a_exp == b_exp) {
3270 a_less = a.frac < b.frac;
3271 }
6245327a 3272 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3273 } else {
6245327a 3274 return a.sign ^ ismin ? b : a;
89360067
AB
3275 }
3276 }
3277}
3278
3279#define MINMAX(sz, name, ismin, isiee, ismag) \
3280float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3281 float_status *s) \
3282{ \
98e256fc
RH
3283 FloatParts64 pa, pb, pr; \
3284 float ## sz ## _unpack_canonical(&pa, a, s); \
3285 float ## sz ## _unpack_canonical(&pb, b, s); \
3286 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
89360067
AB
3287 return float ## sz ## _round_pack_canonical(pr, s); \
3288}
3289
3290MINMAX(16, min, true, false, false)
3291MINMAX(16, minnum, true, true, false)
3292MINMAX(16, minnummag, true, true, true)
3293MINMAX(16, max, false, false, false)
3294MINMAX(16, maxnum, false, true, false)
3295MINMAX(16, maxnummag, false, true, true)
3296
3297MINMAX(32, min, true, false, false)
3298MINMAX(32, minnum, true, true, false)
3299MINMAX(32, minnummag, true, true, true)
3300MINMAX(32, max, false, false, false)
3301MINMAX(32, maxnum, false, true, false)
3302MINMAX(32, maxnummag, false, true, true)
3303
3304MINMAX(64, min, true, false, false)
3305MINMAX(64, minnum, true, true, false)
3306MINMAX(64, minnummag, true, true, true)
3307MINMAX(64, max, false, false, false)
3308MINMAX(64, maxnum, false, true, false)
3309MINMAX(64, maxnummag, false, true, true)
3310
3311#undef MINMAX
3312
8282310d
LZ
3313#define BF16_MINMAX(name, ismin, isiee, ismag) \
3314bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3315{ \
98e256fc
RH
3316 FloatParts64 pa, pb, pr; \
3317 bfloat16_unpack_canonical(&pa, a, s); \
3318 bfloat16_unpack_canonical(&pb, b, s); \
3319 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
8282310d
LZ
3320 return bfloat16_round_pack_canonical(pr, s); \
3321}
3322
3323BF16_MINMAX(min, true, false, false)
3324BF16_MINMAX(minnum, true, true, false)
3325BF16_MINMAX(minnummag, true, true, true)
3326BF16_MINMAX(max, false, false, false)
3327BF16_MINMAX(maxnum, false, true, false)
3328BF16_MINMAX(maxnummag, false, true, true)
3329
3330#undef BF16_MINMAX
3331
0c4c9092 3332/* Floating point compare */
f8155c1d 3333static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3334 float_status *s)
0c4c9092
AB
3335{
3336 if (is_nan(a.cls) || is_nan(b.cls)) {
3337 if (!is_quiet ||
3338 a.cls == float_class_snan ||
3339 b.cls == float_class_snan) {
d82f3b2d 3340 float_raise(float_flag_invalid, s);
0c4c9092
AB
3341 }
3342 return float_relation_unordered;
3343 }
3344
3345 if (a.cls == float_class_zero) {
3346 if (b.cls == float_class_zero) {
3347 return float_relation_equal;
3348 }
3349 return b.sign ? float_relation_greater : float_relation_less;
3350 } else if (b.cls == float_class_zero) {
3351 return a.sign ? float_relation_less : float_relation_greater;
3352 }
3353
3354 /* The only really important thing about infinity is its sign. If
3355 * both are infinities the sign marks the smallest of the two.
3356 */
3357 if (a.cls == float_class_inf) {
3358 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3359 return float_relation_equal;
3360 }
3361 return a.sign ? float_relation_less : float_relation_greater;
3362 } else if (b.cls == float_class_inf) {
3363 return b.sign ? float_relation_greater : float_relation_less;
3364 }
3365
3366 if (a.sign != b.sign) {
3367 return a.sign ? float_relation_less : float_relation_greater;
3368 }
3369
3370 if (a.exp == b.exp) {
3371 if (a.frac == b.frac) {
3372 return float_relation_equal;
3373 }
3374 if (a.sign) {
3375 return a.frac > b.frac ?
3376 float_relation_less : float_relation_greater;
3377 } else {
3378 return a.frac > b.frac ?
3379 float_relation_greater : float_relation_less;
3380 }
3381 } else {
3382 if (a.sign) {
3383 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3384 } else {
3385 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3386 }
3387 }
3388}
3389
d9fe9db9
EC
3390#define COMPARE(name, attr, sz) \
3391static int attr \
3392name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3393{ \
98e256fc
RH
3394 FloatParts64 pa, pb; \
3395 float ## sz ## _unpack_canonical(&pa, a, s); \
3396 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3397 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3398}
3399
d9fe9db9
EC
3400COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3401COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3402COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3403
3404#undef COMPARE
3405
71bfd65c 3406FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3407{
3408 return soft_f16_compare(a, b, false, s);
3409}
3410
71bfd65c 3411FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3412{
3413 return soft_f16_compare(a, b, true, s);
3414}
3415
71bfd65c 3416static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3417f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3418{
3419 union_float32 ua, ub;
3420
3421 ua.s = xa;
3422 ub.s = xb;
3423
3424 if (QEMU_NO_HARDFLOAT) {
3425 goto soft;
3426 }
3427
3428 float32_input_flush2(&ua.s, &ub.s, s);
3429 if (isgreaterequal(ua.h, ub.h)) {
3430 if (isgreater(ua.h, ub.h)) {
3431 return float_relation_greater;
3432 }
3433 return float_relation_equal;
3434 }
3435 if (likely(isless(ua.h, ub.h))) {
3436 return float_relation_less;
3437 }
3438 /* The only condition remaining is unordered.
3439 * Fall through to set flags.
3440 */
3441 soft:
3442 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3443}
3444
71bfd65c 3445FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3446{
3447 return f32_compare(a, b, false, s);
3448}
3449
71bfd65c 3450FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3451{
3452 return f32_compare(a, b, true, s);
3453}
3454
71bfd65c 3455static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3456f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3457{
3458 union_float64 ua, ub;
3459
3460 ua.s = xa;
3461 ub.s = xb;
3462
3463 if (QEMU_NO_HARDFLOAT) {
3464 goto soft;
3465 }
3466
3467 float64_input_flush2(&ua.s, &ub.s, s);
3468 if (isgreaterequal(ua.h, ub.h)) {
3469 if (isgreater(ua.h, ub.h)) {
3470 return float_relation_greater;
3471 }
3472 return float_relation_equal;
3473 }
3474 if (likely(isless(ua.h, ub.h))) {
3475 return float_relation_less;
3476 }
3477 /* The only condition remaining is unordered.
3478 * Fall through to set flags.
3479 */
3480 soft:
3481 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3482}
3483
71bfd65c 3484FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3485{
3486 return f64_compare(a, b, false, s);
3487}
3488
71bfd65c 3489FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3490{
3491 return f64_compare(a, b, true, s);
3492}
3493
8282310d
LZ
3494static FloatRelation QEMU_FLATTEN
3495soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3496{
98e256fc
RH
3497 FloatParts64 pa, pb;
3498
3499 bfloat16_unpack_canonical(&pa, a, s);
3500 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3501 return compare_floats(pa, pb, is_quiet, s);
3502}
3503
3504FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3505{
3506 return soft_bf16_compare(a, b, false, s);
3507}
3508
3509FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3510{
3511 return soft_bf16_compare(a, b, true, s);
3512}
3513
0bfc9f19 3514/* Multiply A by 2 raised to the power N. */
f8155c1d 3515static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3516{
3517 if (unlikely(is_nan(a.cls))) {
3518 return return_nan(a, s);
3519 }
3520 if (a.cls == float_class_normal) {
f8155c1d 3521 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3522 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3523 * still allows rounding to infinity, without allowing overflow
f8155c1d 3524 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3525 */
3526 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3527 a.exp += n;
3528 }
3529 return a;
3530}
3531
3532float16 float16_scalbn(float16 a, int n, float_status *status)
3533{
98e256fc
RH
3534 FloatParts64 pa, pr;
3535
3536 float16_unpack_canonical(&pa, a, status);
3537 pr = scalbn_decomposed(pa, n, status);
0bfc9f19
AB
3538 return float16_round_pack_canonical(pr, status);
3539}
3540
3541float32 float32_scalbn(float32 a, int n, float_status *status)
3542{
98e256fc
RH
3543 FloatParts64 pa, pr;
3544
3545 float32_unpack_canonical(&pa, a, status);
3546 pr = scalbn_decomposed(pa, n, status);
0bfc9f19
AB
3547 return float32_round_pack_canonical(pr, status);
3548}
3549
3550float64 float64_scalbn(float64 a, int n, float_status *status)
3551{
98e256fc
RH
3552 FloatParts64 pa, pr;
3553
3554 float64_unpack_canonical(&pa, a, status);
3555 pr = scalbn_decomposed(pa, n, status);
0bfc9f19
AB
3556 return float64_round_pack_canonical(pr, status);
3557}
3558
8282310d
LZ
3559bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3560{
98e256fc
RH
3561 FloatParts64 pa, pr;
3562
3563 bfloat16_unpack_canonical(&pa, a, status);
3564 pr = scalbn_decomposed(pa, n, status);
8282310d
LZ
3565 return bfloat16_round_pack_canonical(pr, status);
3566}
3567
c13bb2da
AB
3568/*
3569 * Square Root
3570 *
3571 * The old softfloat code did an approximation step before zeroing in
3572 * on the final result. However for simpleness we just compute the
3573 * square root by iterating down from the implicit bit to enough extra
3574 * bits to ensure we get a correctly rounded result.
3575 *
3576 * This does mean however the calculation is slower than before,
3577 * especially for 64 bit floats.
3578 */
3579
f8155c1d 3580static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3581{
3582 uint64_t a_frac, r_frac, s_frac;
3583 int bit, last_bit;
3584
3585 if (is_nan(a.cls)) {
3586 return return_nan(a, s);
3587 }
3588 if (a.cls == float_class_zero) {
3589 return a; /* sqrt(+-0) = +-0 */
3590 }
3591 if (a.sign) {
d82f3b2d 3592 float_raise(float_flag_invalid, s);
0fc07cad
RH
3593 parts_default_nan(&a, s);
3594 return a;
c13bb2da
AB
3595 }
3596 if (a.cls == float_class_inf) {
3597 return a; /* sqrt(+inf) = +inf */
3598 }
3599
3600 assert(a.cls == float_class_normal);
3601
3602 /* We need two overflow bits at the top. Adding room for that is a
3603 * right shift. If the exponent is odd, we can discard the low bit
3604 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3605 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3606 */
e99c4373 3607 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3608 a.exp >>= 1;
3609
3610 /* Bit-by-bit computation of sqrt. */
3611 r_frac = 0;
3612 s_frac = 0;
3613
3614 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3615 * properly rounded result. Remember we've inserted two more bits
3616 * at the top, so these positions are two less.
c13bb2da 3617 */
e99c4373 3618 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3619 last_bit = MAX(p->frac_shift - 4, 0);
3620 do {
3621 uint64_t q = 1ULL << bit;
3622 uint64_t t_frac = s_frac + q;
3623 if (t_frac <= a_frac) {
3624 s_frac = t_frac + q;
3625 a_frac -= t_frac;
3626 r_frac += q;
3627 }
3628 a_frac <<= 1;
3629 } while (--bit >= last_bit);
3630
3631 /* Undo the right shift done above. If there is any remaining
3632 * fraction, the result is inexact. Set the sticky bit.
3633 */
e99c4373 3634 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3635
3636 return a;
3637}
3638
97ff87c0 3639float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3640{
98e256fc
RH
3641 FloatParts64 pa, pr;
3642
3643 float16_unpack_canonical(&pa, a, status);
3644 pr = sqrt_float(pa, status, &float16_params);
c13bb2da
AB
3645 return float16_round_pack_canonical(pr, status);
3646}
3647
f131bae8
EC
3648static float32 QEMU_SOFTFLOAT_ATTR
3649soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3650{
98e256fc
RH
3651 FloatParts64 pa, pr;
3652
3653 float32_unpack_canonical(&pa, a, status);
3654 pr = sqrt_float(pa, status, &float32_params);
c13bb2da
AB
3655 return float32_round_pack_canonical(pr, status);
3656}
3657
f131bae8
EC
3658static float64 QEMU_SOFTFLOAT_ATTR
3659soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3660{
98e256fc
RH
3661 FloatParts64 pa, pr;
3662
3663 float64_unpack_canonical(&pa, a, status);
3664 pr = sqrt_float(pa, status, &float64_params);
c13bb2da
AB
3665 return float64_round_pack_canonical(pr, status);
3666}
3667
f131bae8
EC
3668float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3669{
3670 union_float32 ua, ur;
3671
3672 ua.s = xa;
3673 if (unlikely(!can_use_fpu(s))) {
3674 goto soft;
3675 }
3676
3677 float32_input_flush1(&ua.s, s);
3678 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3679 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3680 fpclassify(ua.h) == FP_ZERO) ||
3681 signbit(ua.h))) {
3682 goto soft;
3683 }
3684 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3685 float32_is_neg(ua.s))) {
3686 goto soft;
3687 }
3688 ur.h = sqrtf(ua.h);
3689 return ur.s;
3690
3691 soft:
3692 return soft_f32_sqrt(ua.s, s);
3693}
3694
3695float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3696{
3697 union_float64 ua, ur;
3698
3699 ua.s = xa;
3700 if (unlikely(!can_use_fpu(s))) {
3701 goto soft;
3702 }
3703
3704 float64_input_flush1(&ua.s, s);
3705 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3706 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3707 fpclassify(ua.h) == FP_ZERO) ||
3708 signbit(ua.h))) {
3709 goto soft;
3710 }
3711 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3712 float64_is_neg(ua.s))) {
3713 goto soft;
3714 }
3715 ur.h = sqrt(ua.h);
3716 return ur.s;
3717
3718 soft:
3719 return soft_f64_sqrt(ua.s, s);
3720}
3721
8282310d
LZ
3722bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3723{
98e256fc
RH
3724 FloatParts64 pa, pr;
3725
3726 bfloat16_unpack_canonical(&pa, a, status);
3727 pr = sqrt_float(pa, status, &bfloat16_params);
8282310d
LZ
3728 return bfloat16_round_pack_canonical(pr, status);
3729}
3730
0218a16e
RH
3731/*----------------------------------------------------------------------------
3732| The pattern for a default generated NaN.
3733*----------------------------------------------------------------------------*/
3734
3735float16 float16_default_nan(float_status *status)
3736{
0fc07cad
RH
3737 FloatParts64 p;
3738
3739 parts_default_nan(&p, status);
0218a16e 3740 p.frac >>= float16_params.frac_shift;
71fd178e 3741 return float16_pack_raw(&p);
0218a16e
RH
3742}
3743
3744float32 float32_default_nan(float_status *status)
3745{
0fc07cad
RH
3746 FloatParts64 p;
3747
3748 parts_default_nan(&p, status);
0218a16e 3749 p.frac >>= float32_params.frac_shift;
71fd178e 3750 return float32_pack_raw(&p);
0218a16e
RH
3751}
3752
3753float64 float64_default_nan(float_status *status)
3754{
0fc07cad
RH
3755 FloatParts64 p;
3756
3757 parts_default_nan(&p, status);
0218a16e 3758 p.frac >>= float64_params.frac_shift;
71fd178e 3759 return float64_pack_raw(&p);
0218a16e
RH
3760}
3761
3762float128 float128_default_nan(float_status *status)
3763{
0fc07cad 3764 FloatParts64 p;
0218a16e
RH
3765 float128 r;
3766
0fc07cad 3767 parts_default_nan(&p, status);
0218a16e
RH
3768 /* Extrapolate from the choices made by parts_default_nan to fill
3769 * in the quad-floating format. If the low bit is set, assume we
3770 * want to set all non-snan bits.
3771 */
3772 r.low = -(p.frac & 1);
3773 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
e9321124 3774 r.high |= UINT64_C(0x7FFF000000000000);
0218a16e
RH
3775 r.high |= (uint64_t)p.sign << 63;
3776
3777 return r;
3778}
c13bb2da 3779
8282310d
LZ
3780bfloat16 bfloat16_default_nan(float_status *status)
3781{
0fc07cad
RH
3782 FloatParts64 p;
3783
3784 parts_default_nan(&p, status);
8282310d 3785 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3786 return bfloat16_pack_raw(&p);
8282310d
LZ
3787}
3788
158142c2 3789/*----------------------------------------------------------------------------
377ed926
RH
3790| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3791*----------------------------------------------------------------------------*/
3792
3793float16 float16_silence_nan(float16 a, float_status *status)
3794{
3dddb203
RH
3795 FloatParts64 p;
3796
3797 float16_unpack_raw(&p, a);
377ed926
RH
3798 p.frac <<= float16_params.frac_shift;
3799 p = parts_silence_nan(p, status);
3800 p.frac >>= float16_params.frac_shift;
71fd178e 3801 return float16_pack_raw(&p);
377ed926
RH
3802}
3803
3804float32 float32_silence_nan(float32 a, float_status *status)
3805{
3dddb203
RH
3806 FloatParts64 p;
3807
3808 float32_unpack_raw(&p, a);
377ed926
RH
3809 p.frac <<= float32_params.frac_shift;
3810 p = parts_silence_nan(p, status);
3811 p.frac >>= float32_params.frac_shift;
71fd178e 3812 return float32_pack_raw(&p);
377ed926
RH
3813}
3814
3815float64 float64_silence_nan(float64 a, float_status *status)
3816{
3dddb203
RH
3817 FloatParts64 p;
3818
3819 float64_unpack_raw(&p, a);
377ed926
RH
3820 p.frac <<= float64_params.frac_shift;
3821 p = parts_silence_nan(p, status);
3822 p.frac >>= float64_params.frac_shift;
71fd178e 3823 return float64_pack_raw(&p);
377ed926
RH
3824}
3825
8282310d
LZ
3826bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3827{
3dddb203
RH
3828 FloatParts64 p;
3829
3830 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3831 p.frac <<= bfloat16_params.frac_shift;
3832 p = parts_silence_nan(p, status);
3833 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3834 return bfloat16_pack_raw(&p);
8282310d 3835}
e6b405fe
AB
3836
3837/*----------------------------------------------------------------------------
3838| If `a' is denormal and we are in flush-to-zero mode then set the
3839| input-denormal exception and return zero. Otherwise just return the value.
3840*----------------------------------------------------------------------------*/
3841
f8155c1d 3842static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3843{
3844 if (p.exp == 0 && p.frac != 0) {
3845 float_raise(float_flag_input_denormal, status);
3846 return true;
3847 }
3848
3849 return false;
3850}
3851
3852float16 float16_squash_input_denormal(float16 a, float_status *status)
3853{
3854 if (status->flush_inputs_to_zero) {
3dddb203
RH
3855 FloatParts64 p;
3856
3857 float16_unpack_raw(&p, a);
e6b405fe
AB
3858 if (parts_squash_denormal(p, status)) {
3859 return float16_set_sign(float16_zero, p.sign);
3860 }
3861 }
3862 return a;
3863}
3864
3865float32 float32_squash_input_denormal(float32 a, float_status *status)
3866{
3867 if (status->flush_inputs_to_zero) {
3dddb203
RH
3868 FloatParts64 p;
3869
3870 float32_unpack_raw(&p, a);
e6b405fe
AB
3871 if (parts_squash_denormal(p, status)) {
3872 return float32_set_sign(float32_zero, p.sign);
3873 }
3874 }
3875 return a;
3876}
3877
3878float64 float64_squash_input_denormal(float64 a, float_status *status)
3879{
3880 if (status->flush_inputs_to_zero) {
3dddb203
RH
3881 FloatParts64 p;
3882
3883 float64_unpack_raw(&p, a);
e6b405fe
AB
3884 if (parts_squash_denormal(p, status)) {
3885 return float64_set_sign(float64_zero, p.sign);
3886 }
3887 }
3888 return a;
3889}
3890
8282310d
LZ
3891bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3892{
3893 if (status->flush_inputs_to_zero) {
3dddb203
RH
3894 FloatParts64 p;
3895
3896 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3897 if (parts_squash_denormal(p, status)) {
3898 return bfloat16_set_sign(bfloat16_zero, p.sign);
3899 }
3900 }
3901 return a;
3902}
3903
377ed926 3904/*----------------------------------------------------------------------------
158142c2
FB
3905| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3906| and 7, and returns the properly rounded 32-bit integer corresponding to the
3907| input. If `zSign' is 1, the input is negated before being converted to an
3908| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3909| is simply rounded to an integer, with the inexact exception raised if the
3910| input cannot be represented exactly as an integer. However, if the fixed-
3911| point input is too large, the invalid exception is raised and the largest
3912| positive or negative integer is returned.
3913*----------------------------------------------------------------------------*/
3914
c120391c
RH
3915static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3916 float_status *status)
158142c2 3917{
8f506c70 3918 int8_t roundingMode;
c120391c 3919 bool roundNearestEven;
8f506c70 3920 int8_t roundIncrement, roundBits;
760e1416 3921 int32_t z;
158142c2 3922
a2f2d288 3923 roundingMode = status->float_rounding_mode;
158142c2 3924 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3925 switch (roundingMode) {
3926 case float_round_nearest_even:
f9288a76 3927 case float_round_ties_away:
dc355b76
PM
3928 roundIncrement = 0x40;
3929 break;
3930 case float_round_to_zero:
3931 roundIncrement = 0;
3932 break;
3933 case float_round_up:
3934 roundIncrement = zSign ? 0 : 0x7f;
3935 break;
3936 case float_round_down:
3937 roundIncrement = zSign ? 0x7f : 0;
3938 break;
5d64abb3
RH
3939 case float_round_to_odd:
3940 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3941 break;
dc355b76
PM
3942 default:
3943 abort();
158142c2
FB
3944 }
3945 roundBits = absZ & 0x7F;
3946 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3947 if (!(roundBits ^ 0x40) && roundNearestEven) {
3948 absZ &= ~1;
3949 }
158142c2
FB
3950 z = absZ;
3951 if ( zSign ) z = - z;
3952 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3953 float_raise(float_flag_invalid, status);
2c217da0 3954 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3955 }
a2f2d288 3956 if (roundBits) {
d82f3b2d 3957 float_raise(float_flag_inexact, status);
a2f2d288 3958 }
158142c2
FB
3959 return z;
3960
3961}
3962
3963/*----------------------------------------------------------------------------
3964| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3965| `absZ1', with binary point between bits 63 and 64 (between the input words),
3966| and returns the properly rounded 64-bit integer corresponding to the input.
3967| If `zSign' is 1, the input is negated before being converted to an integer.
3968| Ordinarily, the fixed-point input is simply rounded to an integer, with
3969| the inexact exception raised if the input cannot be represented exactly as
3970| an integer. However, if the fixed-point input is too large, the invalid
3971| exception is raised and the largest positive or negative integer is
3972| returned.
3973*----------------------------------------------------------------------------*/
3974
c120391c 3975static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3976 float_status *status)
158142c2 3977{
8f506c70 3978 int8_t roundingMode;
c120391c 3979 bool roundNearestEven, increment;
760e1416 3980 int64_t z;
158142c2 3981
a2f2d288 3982 roundingMode = status->float_rounding_mode;
158142c2 3983 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3984 switch (roundingMode) {
3985 case float_round_nearest_even:
f9288a76 3986 case float_round_ties_away:
dc355b76
PM
3987 increment = ((int64_t) absZ1 < 0);
3988 break;
3989 case float_round_to_zero:
3990 increment = 0;
3991 break;
3992 case float_round_up:
3993 increment = !zSign && absZ1;
3994 break;
3995 case float_round_down:
3996 increment = zSign && absZ1;
3997 break;
5d64abb3
RH
3998 case float_round_to_odd:
3999 increment = !(absZ0 & 1) && absZ1;
4000 break;
dc355b76
PM
4001 default:
4002 abort();
158142c2
FB
4003 }
4004 if ( increment ) {
4005 ++absZ0;
4006 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4007 if (!(absZ1 << 1) && roundNearestEven) {
4008 absZ0 &= ~1;
4009 }
158142c2
FB
4010 }
4011 z = absZ0;
4012 if ( zSign ) z = - z;
4013 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4014 overflow:
ff32e16e 4015 float_raise(float_flag_invalid, status);
2c217da0 4016 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4017 }
a2f2d288 4018 if (absZ1) {
d82f3b2d 4019 float_raise(float_flag_inexact, status);
a2f2d288 4020 }
158142c2
FB
4021 return z;
4022
4023}
4024
fb3ea83a
TM
4025/*----------------------------------------------------------------------------
4026| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4027| `absZ1', with binary point between bits 63 and 64 (between the input words),
4028| and returns the properly rounded 64-bit unsigned integer corresponding to the
4029| input. Ordinarily, the fixed-point input is simply rounded to an integer,
4030| with the inexact exception raised if the input cannot be represented exactly
4031| as an integer. However, if the fixed-point input is too large, the invalid
4032| exception is raised and the largest unsigned integer is returned.
4033*----------------------------------------------------------------------------*/
4034
c120391c 4035static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 4036 uint64_t absZ1, float_status *status)
fb3ea83a 4037{
8f506c70 4038 int8_t roundingMode;
c120391c 4039 bool roundNearestEven, increment;
fb3ea83a 4040
a2f2d288 4041 roundingMode = status->float_rounding_mode;
fb3ea83a 4042 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
4043 switch (roundingMode) {
4044 case float_round_nearest_even:
f9288a76 4045 case float_round_ties_away:
dc355b76
PM
4046 increment = ((int64_t)absZ1 < 0);
4047 break;
4048 case float_round_to_zero:
4049 increment = 0;
4050 break;
4051 case float_round_up:
4052 increment = !zSign && absZ1;
4053 break;
4054 case float_round_down:
4055 increment = zSign && absZ1;
4056 break;
5d64abb3
RH
4057 case float_round_to_odd:
4058 increment = !(absZ0 & 1) && absZ1;
4059 break;
dc355b76
PM
4060 default:
4061 abort();
fb3ea83a
TM
4062 }
4063 if (increment) {
4064 ++absZ0;
4065 if (absZ0 == 0) {
ff32e16e 4066 float_raise(float_flag_invalid, status);
2c217da0 4067 return UINT64_MAX;
fb3ea83a 4068 }
40662886
PMD
4069 if (!(absZ1 << 1) && roundNearestEven) {
4070 absZ0 &= ~1;
4071 }
fb3ea83a
TM
4072 }
4073
4074 if (zSign && absZ0) {
ff32e16e 4075 float_raise(float_flag_invalid, status);
fb3ea83a
TM
4076 return 0;
4077 }
4078
4079 if (absZ1) {
d82f3b2d 4080 float_raise(float_flag_inexact, status);
fb3ea83a
TM
4081 }
4082 return absZ0;
4083}
4084
158142c2
FB
4085/*----------------------------------------------------------------------------
4086| Normalizes the subnormal single-precision floating-point value represented
4087| by the denormalized significand `aSig'. The normalized exponent and
4088| significand are stored at the locations pointed to by `zExpPtr' and
4089| `zSigPtr', respectively.
4090*----------------------------------------------------------------------------*/
4091
4092static void
0c48262d 4093 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4094{
8f506c70 4095 int8_t shiftCount;
158142c2 4096
0019d5c3 4097 shiftCount = clz32(aSig) - 8;
158142c2
FB
4098 *zSigPtr = aSig<<shiftCount;
4099 *zExpPtr = 1 - shiftCount;
4100
4101}
4102
158142c2
FB
4103/*----------------------------------------------------------------------------
4104| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4105| and significand `zSig', and returns the proper single-precision floating-
4106| point value corresponding to the abstract input. Ordinarily, the abstract
4107| value is simply rounded and packed into the single-precision format, with
4108| the inexact exception raised if the abstract input cannot be represented
4109| exactly. However, if the abstract value is too large, the overflow and
4110| inexact exceptions are raised and an infinity or maximal finite value is
4111| returned. If the abstract value is too small, the input value is rounded to
4112| a subnormal number, and the underflow and inexact exceptions are raised if
4113| the abstract input cannot be represented exactly as a subnormal single-
4114| precision floating-point number.
4115| The input significand `zSig' has its binary point between bits 30
4116| and 29, which is 7 bits to the left of the usual location. This shifted
4117| significand must be normalized or smaller. If `zSig' is not normalized,
4118| `zExp' must be 0; in that case, the result returned is a subnormal number,
4119| and it must not require rounding. In the usual case that `zSig' is
4120| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4121| The handling of underflow and overflow follows the IEC/IEEE Standard for
4122| Binary Floating-Point Arithmetic.
4123*----------------------------------------------------------------------------*/
4124
c120391c 4125static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4126 float_status *status)
158142c2 4127{
8f506c70 4128 int8_t roundingMode;
c120391c 4129 bool roundNearestEven;
8f506c70 4130 int8_t roundIncrement, roundBits;
c120391c 4131 bool isTiny;
158142c2 4132
a2f2d288 4133 roundingMode = status->float_rounding_mode;
158142c2 4134 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4135 switch (roundingMode) {
4136 case float_round_nearest_even:
f9288a76 4137 case float_round_ties_away:
dc355b76
PM
4138 roundIncrement = 0x40;
4139 break;
4140 case float_round_to_zero:
4141 roundIncrement = 0;
4142 break;
4143 case float_round_up:
4144 roundIncrement = zSign ? 0 : 0x7f;
4145 break;
4146 case float_round_down:
4147 roundIncrement = zSign ? 0x7f : 0;
4148 break;
5d64abb3
RH
4149 case float_round_to_odd:
4150 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4151 break;
dc355b76
PM
4152 default:
4153 abort();
4154 break;
158142c2
FB
4155 }
4156 roundBits = zSig & 0x7F;
bb98fe42 4157 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4158 if ( ( 0xFD < zExp )
4159 || ( ( zExp == 0xFD )
bb98fe42 4160 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4161 ) {
5d64abb3
RH
4162 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4163 roundIncrement != 0;
ff32e16e 4164 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4165 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4166 }
4167 if ( zExp < 0 ) {
a2f2d288 4168 if (status->flush_to_zero) {
ff32e16e 4169 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4170 return packFloat32(zSign, 0, 0);
4171 }
a828b373
RH
4172 isTiny = status->tininess_before_rounding
4173 || (zExp < -1)
4174 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4175 shift32RightJamming( zSig, - zExp, &zSig );
4176 zExp = 0;
4177 roundBits = zSig & 0x7F;
ff32e16e
PM
4178 if (isTiny && roundBits) {
4179 float_raise(float_flag_underflow, status);
4180 }
5d64abb3
RH
4181 if (roundingMode == float_round_to_odd) {
4182 /*
4183 * For round-to-odd case, the roundIncrement depends on
4184 * zSig which just changed.
4185 */
4186 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4187 }
158142c2
FB
4188 }
4189 }
a2f2d288 4190 if (roundBits) {
d82f3b2d 4191 float_raise(float_flag_inexact, status);
a2f2d288 4192 }
158142c2 4193 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4194 if (!(roundBits ^ 0x40) && roundNearestEven) {
4195 zSig &= ~1;
4196 }
158142c2
FB
4197 if ( zSig == 0 ) zExp = 0;
4198 return packFloat32( zSign, zExp, zSig );
4199
4200}
4201
4202/*----------------------------------------------------------------------------
4203| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4204| and significand `zSig', and returns the proper single-precision floating-
4205| point value corresponding to the abstract input. This routine is just like
4206| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4207| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4208| floating-point exponent.
4209*----------------------------------------------------------------------------*/
4210
4211static float32
c120391c 4212 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4213 float_status *status)
158142c2 4214{
8f506c70 4215 int8_t shiftCount;
158142c2 4216
0019d5c3 4217 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4218 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4219 status);
158142c2
FB
4220
4221}
4222
158142c2
FB
4223/*----------------------------------------------------------------------------
4224| Normalizes the subnormal double-precision floating-point value represented
4225| by the denormalized significand `aSig'. The normalized exponent and
4226| significand are stored at the locations pointed to by `zExpPtr' and
4227| `zSigPtr', respectively.
4228*----------------------------------------------------------------------------*/
4229
4230static void
0c48262d 4231 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4232{
8f506c70 4233 int8_t shiftCount;
158142c2 4234
0019d5c3 4235 shiftCount = clz64(aSig) - 11;
158142c2
FB
4236 *zSigPtr = aSig<<shiftCount;
4237 *zExpPtr = 1 - shiftCount;
4238
4239}
4240
4241/*----------------------------------------------------------------------------
4242| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4243| double-precision floating-point value, returning the result. After being
4244| shifted into the proper positions, the three fields are simply added
4245| together to form the result. This means that any integer portion of `zSig'
4246| will be added into the exponent. Since a properly normalized significand
4247| will have an integer portion equal to 1, the `zExp' input should be 1 less
4248| than the desired result exponent whenever `zSig' is a complete, normalized
4249| significand.
4250*----------------------------------------------------------------------------*/
4251
c120391c 4252static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4253{
4254
f090c9d4 4255 return make_float64(
bb98fe42 4256 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4257
4258}
4259
4260/*----------------------------------------------------------------------------
4261| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4262| and significand `zSig', and returns the proper double-precision floating-
4263| point value corresponding to the abstract input. Ordinarily, the abstract
4264| value is simply rounded and packed into the double-precision format, with
4265| the inexact exception raised if the abstract input cannot be represented
4266| exactly. However, if the abstract value is too large, the overflow and
4267| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4268| returned. If the abstract value is too small, the input value is rounded to
4269| a subnormal number, and the underflow and inexact exceptions are raised if
4270| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4271| precision floating-point number.
4272| The input significand `zSig' has its binary point between bits 62
4273| and 61, which is 10 bits to the left of the usual location. This shifted
4274| significand must be normalized or smaller. If `zSig' is not normalized,
4275| `zExp' must be 0; in that case, the result returned is a subnormal number,
4276| and it must not require rounding. In the usual case that `zSig' is
4277| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4278| The handling of underflow and overflow follows the IEC/IEEE Standard for
4279| Binary Floating-Point Arithmetic.
4280*----------------------------------------------------------------------------*/
4281
c120391c 4282static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4283 float_status *status)
158142c2 4284{
8f506c70 4285 int8_t roundingMode;
c120391c 4286 bool roundNearestEven;
0c48262d 4287 int roundIncrement, roundBits;
c120391c 4288 bool isTiny;
158142c2 4289
a2f2d288 4290 roundingMode = status->float_rounding_mode;
158142c2 4291 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4292 switch (roundingMode) {
4293 case float_round_nearest_even:
f9288a76 4294 case float_round_ties_away:
dc355b76
PM
4295 roundIncrement = 0x200;
4296 break;
4297 case float_round_to_zero:
4298 roundIncrement = 0;
4299 break;
4300 case float_round_up:
4301 roundIncrement = zSign ? 0 : 0x3ff;
4302 break;
4303 case float_round_down:
4304 roundIncrement = zSign ? 0x3ff : 0;
4305 break;
9ee6f678
BR
4306 case float_round_to_odd:
4307 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4308 break;
dc355b76
PM
4309 default:
4310 abort();
158142c2
FB
4311 }
4312 roundBits = zSig & 0x3FF;
bb98fe42 4313 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4314 if ( ( 0x7FD < zExp )
4315 || ( ( zExp == 0x7FD )
bb98fe42 4316 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4317 ) {
9ee6f678
BR
4318 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4319 roundIncrement != 0;
ff32e16e 4320 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4321 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4322 }
4323 if ( zExp < 0 ) {
a2f2d288 4324 if (status->flush_to_zero) {
ff32e16e 4325 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4326 return packFloat64(zSign, 0, 0);
4327 }
a828b373
RH
4328 isTiny = status->tininess_before_rounding
4329 || (zExp < -1)
4330 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4331 shift64RightJamming( zSig, - zExp, &zSig );
4332 zExp = 0;
4333 roundBits = zSig & 0x3FF;
ff32e16e
PM
4334 if (isTiny && roundBits) {
4335 float_raise(float_flag_underflow, status);
4336 }
9ee6f678
BR
4337 if (roundingMode == float_round_to_odd) {
4338 /*
4339 * For round-to-odd case, the roundIncrement depends on
4340 * zSig which just changed.
4341 */
4342 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4343 }
158142c2
FB
4344 }
4345 }
a2f2d288 4346 if (roundBits) {
d82f3b2d 4347 float_raise(float_flag_inexact, status);
a2f2d288 4348 }
158142c2 4349 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4350 if (!(roundBits ^ 0x200) && roundNearestEven) {
4351 zSig &= ~1;
4352 }
158142c2
FB
4353 if ( zSig == 0 ) zExp = 0;
4354 return packFloat64( zSign, zExp, zSig );
4355
4356}
4357
4358/*----------------------------------------------------------------------------
4359| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4360| and significand `zSig', and returns the proper double-precision floating-
4361| point value corresponding to the abstract input. This routine is just like
4362| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4363| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4364| floating-point exponent.
4365*----------------------------------------------------------------------------*/
4366
4367static float64
c120391c 4368 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4369 float_status *status)
158142c2 4370{
8f506c70 4371 int8_t shiftCount;
158142c2 4372
0019d5c3 4373 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4374 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4375 status);
158142c2
FB
4376
4377}
4378
158142c2
FB
4379/*----------------------------------------------------------------------------
4380| Normalizes the subnormal extended double-precision floating-point value
4381| represented by the denormalized significand `aSig'. The normalized exponent
4382| and significand are stored at the locations pointed to by `zExpPtr' and
4383| `zSigPtr', respectively.
4384*----------------------------------------------------------------------------*/
4385
88857aca
LV
4386void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4387 uint64_t *zSigPtr)
158142c2 4388{
8f506c70 4389 int8_t shiftCount;
158142c2 4390
0019d5c3 4391 shiftCount = clz64(aSig);
158142c2
FB
4392 *zSigPtr = aSig<<shiftCount;
4393 *zExpPtr = 1 - shiftCount;
158142c2
FB
4394}
4395
4396/*----------------------------------------------------------------------------
4397| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4398| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4399| and returns the proper extended double-precision floating-point value
4400| corresponding to the abstract input. Ordinarily, the abstract value is
4401| rounded and packed into the extended double-precision format, with the
4402| inexact exception raised if the abstract input cannot be represented
4403| exactly. However, if the abstract value is too large, the overflow and
4404| inexact exceptions are raised and an infinity or maximal finite value is
4405| returned. If the abstract value is too small, the input value is rounded to
4406| a subnormal number, and the underflow and inexact exceptions are raised if
4407| the abstract input cannot be represented exactly as a subnormal extended
4408| double-precision floating-point number.
4409| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4410| number of bits as single or double precision, respectively. Otherwise, the
4411| result is rounded to the full precision of the extended double-precision
4412| format.
4413| The input significand must be normalized or smaller. If the input
4414| significand is not normalized, `zExp' must be 0; in that case, the result
4415| returned is a subnormal number, and it must not require rounding. The
4416| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4417| Floating-Point Arithmetic.
4418*----------------------------------------------------------------------------*/
4419
c120391c 4420floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4421 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4422 float_status *status)
158142c2 4423{
8f506c70 4424 int8_t roundingMode;
c120391c 4425 bool roundNearestEven, increment, isTiny;
f42c2224 4426 int64_t roundIncrement, roundMask, roundBits;
158142c2 4427
a2f2d288 4428 roundingMode = status->float_rounding_mode;
158142c2
FB
4429 roundNearestEven = ( roundingMode == float_round_nearest_even );
4430 if ( roundingPrecision == 80 ) goto precision80;
4431 if ( roundingPrecision == 64 ) {
e9321124
AB
4432 roundIncrement = UINT64_C(0x0000000000000400);
4433 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4434 }
4435 else if ( roundingPrecision == 32 ) {
e9321124
AB
4436 roundIncrement = UINT64_C(0x0000008000000000);
4437 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4438 }
4439 else {
4440 goto precision80;
4441 }
4442 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4443 switch (roundingMode) {
4444 case float_round_nearest_even:
f9288a76 4445 case float_round_ties_away:
dc355b76
PM
4446 break;
4447 case float_round_to_zero:
4448 roundIncrement = 0;
4449 break;
4450 case float_round_up:
4451 roundIncrement = zSign ? 0 : roundMask;
4452 break;
4453 case float_round_down:
4454 roundIncrement = zSign ? roundMask : 0;
4455 break;
4456 default:
4457 abort();
158142c2
FB
4458 }
4459 roundBits = zSig0 & roundMask;
bb98fe42 4460 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4461 if ( ( 0x7FFE < zExp )
4462 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4463 ) {
4464 goto overflow;
4465 }
4466 if ( zExp <= 0 ) {
a2f2d288 4467 if (status->flush_to_zero) {
ff32e16e 4468 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4469 return packFloatx80(zSign, 0, 0);
4470 }
a828b373
RH
4471 isTiny = status->tininess_before_rounding
4472 || (zExp < 0 )
4473 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4474 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4475 zExp = 0;
4476 roundBits = zSig0 & roundMask;
ff32e16e
PM
4477 if (isTiny && roundBits) {
4478 float_raise(float_flag_underflow, status);
4479 }
a2f2d288 4480 if (roundBits) {
d82f3b2d 4481 float_raise(float_flag_inexact, status);
a2f2d288 4482 }
158142c2 4483 zSig0 += roundIncrement;
bb98fe42 4484 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4485 roundIncrement = roundMask + 1;
4486 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4487 roundMask |= roundIncrement;
4488 }
4489 zSig0 &= ~ roundMask;
4490 return packFloatx80( zSign, zExp, zSig0 );
4491 }
4492 }
a2f2d288 4493 if (roundBits) {
d82f3b2d 4494 float_raise(float_flag_inexact, status);
a2f2d288 4495 }
158142c2
FB
4496 zSig0 += roundIncrement;
4497 if ( zSig0 < roundIncrement ) {
4498 ++zExp;
e9321124 4499 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4500 }
4501 roundIncrement = roundMask + 1;
4502 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4503 roundMask |= roundIncrement;
4504 }
4505 zSig0 &= ~ roundMask;
4506 if ( zSig0 == 0 ) zExp = 0;
4507 return packFloatx80( zSign, zExp, zSig0 );
4508 precision80:
dc355b76
PM
4509 switch (roundingMode) {
4510 case float_round_nearest_even:
f9288a76 4511 case float_round_ties_away:
dc355b76
PM
4512 increment = ((int64_t)zSig1 < 0);
4513 break;
4514 case float_round_to_zero:
4515 increment = 0;
4516 break;
4517 case float_round_up:
4518 increment = !zSign && zSig1;
4519 break;
4520 case float_round_down:
4521 increment = zSign && zSig1;
4522 break;
4523 default:
4524 abort();
158142c2 4525 }
bb98fe42 4526 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4527 if ( ( 0x7FFE < zExp )
4528 || ( ( zExp == 0x7FFE )
e9321124 4529 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4530 && increment
4531 )
4532 ) {
4533 roundMask = 0;
4534 overflow:
ff32e16e 4535 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4536 if ( ( roundingMode == float_round_to_zero )
4537 || ( zSign && ( roundingMode == float_round_up ) )
4538 || ( ! zSign && ( roundingMode == float_round_down ) )
4539 ) {
4540 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4541 }
0f605c88
LV
4542 return packFloatx80(zSign,
4543 floatx80_infinity_high,
4544 floatx80_infinity_low);
158142c2
FB
4545 }
4546 if ( zExp <= 0 ) {
a828b373
RH
4547 isTiny = status->tininess_before_rounding
4548 || (zExp < 0)
4549 || !increment
4550 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4551 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4552 zExp = 0;
ff32e16e
PM
4553 if (isTiny && zSig1) {
4554 float_raise(float_flag_underflow, status);
4555 }
a2f2d288 4556 if (zSig1) {
d82f3b2d 4557 float_raise(float_flag_inexact, status);
a2f2d288 4558 }
dc355b76
PM
4559 switch (roundingMode) {
4560 case float_round_nearest_even:
f9288a76 4561 case float_round_ties_away:
dc355b76
PM
4562 increment = ((int64_t)zSig1 < 0);
4563 break;
4564 case float_round_to_zero:
4565 increment = 0;
4566 break;
4567 case float_round_up:
4568 increment = !zSign && zSig1;
4569 break;
4570 case float_round_down:
4571 increment = zSign && zSig1;
4572 break;
4573 default:
4574 abort();
158142c2
FB
4575 }
4576 if ( increment ) {
4577 ++zSig0;
40662886
PMD
4578 if (!(zSig1 << 1) && roundNearestEven) {
4579 zSig0 &= ~1;
4580 }
bb98fe42 4581 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4582 }
4583 return packFloatx80( zSign, zExp, zSig0 );
4584 }
4585 }
a2f2d288 4586 if (zSig1) {
d82f3b2d 4587 float_raise(float_flag_inexact, status);
a2f2d288 4588 }
158142c2
FB
4589 if ( increment ) {
4590 ++zSig0;
4591 if ( zSig0 == 0 ) {
4592 ++zExp;
e9321124 4593 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4594 }
4595 else {
40662886
PMD
4596 if (!(zSig1 << 1) && roundNearestEven) {
4597 zSig0 &= ~1;
4598 }
158142c2
FB
4599 }
4600 }
4601 else {
4602 if ( zSig0 == 0 ) zExp = 0;
4603 }
4604 return packFloatx80( zSign, zExp, zSig0 );
4605
4606}
4607
4608/*----------------------------------------------------------------------------
4609| Takes an abstract floating-point value having sign `zSign', exponent
4610| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4611| and returns the proper extended double-precision floating-point value
4612| corresponding to the abstract input. This routine is just like
4613| `roundAndPackFloatx80' except that the input significand does not have to be
4614| normalized.
4615*----------------------------------------------------------------------------*/
4616
88857aca 4617floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4618 bool zSign, int32_t zExp,
88857aca
LV
4619 uint64_t zSig0, uint64_t zSig1,
4620 float_status *status)
158142c2 4621{
8f506c70 4622 int8_t shiftCount;
158142c2
FB
4623
4624 if ( zSig0 == 0 ) {
4625 zSig0 = zSig1;
4626 zSig1 = 0;
4627 zExp -= 64;
4628 }
0019d5c3 4629 shiftCount = clz64(zSig0);
158142c2
FB
4630 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4631 zExp -= shiftCount;
ff32e16e
PM
4632 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4633 zSig0, zSig1, status);
158142c2
FB
4634
4635}
4636
158142c2
FB
4637/*----------------------------------------------------------------------------
4638| Returns the least-significant 64 fraction bits of the quadruple-precision
4639| floating-point value `a'.
4640*----------------------------------------------------------------------------*/
4641
a49db98d 4642static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4643{
4644
4645 return a.low;
4646
4647}
4648
4649/*----------------------------------------------------------------------------
4650| Returns the most-significant 48 fraction bits of the quadruple-precision
4651| floating-point value `a'.
4652*----------------------------------------------------------------------------*/
4653
a49db98d 4654static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4655{
4656
e9321124 4657 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4658
4659}
4660
4661/*----------------------------------------------------------------------------
4662| Returns the exponent bits of the quadruple-precision floating-point value
4663| `a'.
4664*----------------------------------------------------------------------------*/
4665
f4014512 4666static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4667{
4668
4669 return ( a.high>>48 ) & 0x7FFF;
4670
4671}
4672
4673/*----------------------------------------------------------------------------
4674| Returns the sign bit of the quadruple-precision floating-point value `a'.
4675*----------------------------------------------------------------------------*/
4676
c120391c 4677static inline bool extractFloat128Sign(float128 a)
158142c2 4678{
c120391c 4679 return a.high >> 63;
158142c2
FB
4680}
4681
4682/*----------------------------------------------------------------------------
4683| Normalizes the subnormal quadruple-precision floating-point value
4684| represented by the denormalized significand formed by the concatenation of
4685| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4686| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4687| significand are stored at the location pointed to by `zSig0Ptr', and the
4688| least significant 64 bits of the normalized significand are stored at the
4689| location pointed to by `zSig1Ptr'.
4690*----------------------------------------------------------------------------*/
4691
4692static void
4693 normalizeFloat128Subnormal(
bb98fe42
AF
4694 uint64_t aSig0,
4695 uint64_t aSig1,
f4014512 4696 int32_t *zExpPtr,
bb98fe42
AF
4697 uint64_t *zSig0Ptr,
4698 uint64_t *zSig1Ptr
158142c2
FB
4699 )
4700{
8f506c70 4701 int8_t shiftCount;
158142c2
FB
4702
4703 if ( aSig0 == 0 ) {
0019d5c3 4704 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4705 if ( shiftCount < 0 ) {
4706 *zSig0Ptr = aSig1>>( - shiftCount );
4707 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4708 }
4709 else {
4710 *zSig0Ptr = aSig1<<shiftCount;
4711 *zSig1Ptr = 0;
4712 }
4713 *zExpPtr = - shiftCount - 63;
4714 }
4715 else {
0019d5c3 4716 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4717 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4718 *zExpPtr = 1 - shiftCount;
4719 }
4720
4721}
4722
4723/*----------------------------------------------------------------------------
4724| Packs the sign `zSign', the exponent `zExp', and the significand formed
4725| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4726| floating-point value, returning the result. After being shifted into the
4727| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4728| added together to form the most significant 32 bits of the result. This
4729| means that any integer portion of `zSig0' will be added into the exponent.
4730| Since a properly normalized significand will have an integer portion equal
4731| to 1, the `zExp' input should be 1 less than the desired result exponent
4732| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4733| significand.
4734*----------------------------------------------------------------------------*/
4735
a49db98d 4736static inline float128
c120391c 4737packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4738{
4739 float128 z;
4740
4741 z.low = zSig1;
c120391c 4742 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4743 return z;
158142c2
FB
4744}
4745
4746/*----------------------------------------------------------------------------
4747| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4748| and extended significand formed by the concatenation of `zSig0', `zSig1',
4749| and `zSig2', and returns the proper quadruple-precision floating-point value
4750| corresponding to the abstract input. Ordinarily, the abstract value is
4751| simply rounded and packed into the quadruple-precision format, with the
4752| inexact exception raised if the abstract input cannot be represented
4753| exactly. However, if the abstract value is too large, the overflow and
4754| inexact exceptions are raised and an infinity or maximal finite value is
4755| returned. If the abstract value is too small, the input value is rounded to
4756| a subnormal number, and the underflow and inexact exceptions are raised if
4757| the abstract input cannot be represented exactly as a subnormal quadruple-
4758| precision floating-point number.
4759| The input significand must be normalized or smaller. If the input
4760| significand is not normalized, `zExp' must be 0; in that case, the result
4761| returned is a subnormal number, and it must not require rounding. In the
4762| usual case that the input significand is normalized, `zExp' must be 1 less
4763| than the ``true'' floating-point exponent. The handling of underflow and
4764| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4765*----------------------------------------------------------------------------*/
4766
c120391c 4767static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4768 uint64_t zSig0, uint64_t zSig1,
4769 uint64_t zSig2, float_status *status)
158142c2 4770{
8f506c70 4771 int8_t roundingMode;
c120391c 4772 bool roundNearestEven, increment, isTiny;
158142c2 4773
a2f2d288 4774 roundingMode = status->float_rounding_mode;
158142c2 4775 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4776 switch (roundingMode) {
4777 case float_round_nearest_even:
f9288a76 4778 case float_round_ties_away:
dc355b76
PM
4779 increment = ((int64_t)zSig2 < 0);
4780 break;
4781 case float_round_to_zero:
4782 increment = 0;
4783 break;
4784 case float_round_up:
4785 increment = !zSign && zSig2;
4786 break;
4787 case float_round_down:
4788 increment = zSign && zSig2;
4789 break;
9ee6f678
BR
4790 case float_round_to_odd:
4791 increment = !(zSig1 & 0x1) && zSig2;
4792 break;
dc355b76
PM
4793 default:
4794 abort();
158142c2 4795 }
bb98fe42 4796 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4797 if ( ( 0x7FFD < zExp )
4798 || ( ( zExp == 0x7FFD )
4799 && eq128(
e9321124
AB
4800 UINT64_C(0x0001FFFFFFFFFFFF),
4801 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4802 zSig0,
4803 zSig1
4804 )
4805 && increment
4806 )
4807 ) {
ff32e16e 4808 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4809 if ( ( roundingMode == float_round_to_zero )
4810 || ( zSign && ( roundingMode == float_round_up ) )
4811 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4812 || (roundingMode == float_round_to_odd)
158142c2
FB
4813 ) {
4814 return
4815 packFloat128(
4816 zSign,
4817 0x7FFE,
e9321124
AB
4818 UINT64_C(0x0000FFFFFFFFFFFF),
4819 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4820 );
4821 }
4822 return packFloat128( zSign, 0x7FFF, 0, 0 );
4823 }
4824 if ( zExp < 0 ) {
a2f2d288 4825 if (status->flush_to_zero) {
ff32e16e 4826 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4827 return packFloat128(zSign, 0, 0, 0);
4828 }
a828b373
RH
4829 isTiny = status->tininess_before_rounding
4830 || (zExp < -1)
4831 || !increment
4832 || lt128(zSig0, zSig1,
4833 UINT64_C(0x0001FFFFFFFFFFFF),
4834 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4835 shift128ExtraRightJamming(
4836 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4837 zExp = 0;
ff32e16e
PM
4838 if (isTiny && zSig2) {
4839 float_raise(float_flag_underflow, status);
4840 }
dc355b76
PM
4841 switch (roundingMode) {
4842 case float_round_nearest_even:
f9288a76 4843 case float_round_ties_away:
dc355b76
PM
4844 increment = ((int64_t)zSig2 < 0);
4845 break;
4846 case float_round_to_zero:
4847 increment = 0;
4848 break;
4849 case float_round_up:
4850 increment = !zSign && zSig2;
4851 break;
4852 case float_round_down:
4853 increment = zSign && zSig2;
4854 break;
9ee6f678
BR
4855 case float_round_to_odd:
4856 increment = !(zSig1 & 0x1) && zSig2;
4857 break;
dc355b76
PM
4858 default:
4859 abort();
158142c2
FB
4860 }
4861 }
4862 }
a2f2d288 4863 if (zSig2) {
d82f3b2d 4864 float_raise(float_flag_inexact, status);
a2f2d288 4865 }
158142c2
FB
4866 if ( increment ) {
4867 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4868 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4869 zSig1 &= ~1;
4870 }
158142c2
FB
4871 }
4872 else {
4873 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4874 }
4875 return packFloat128( zSign, zExp, zSig0, zSig1 );
4876
4877}
4878
4879/*----------------------------------------------------------------------------
4880| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4881| and significand formed by the concatenation of `zSig0' and `zSig1', and
4882| returns the proper quadruple-precision floating-point value corresponding
4883| to the abstract input. This routine is just like `roundAndPackFloat128'
4884| except that the input significand has fewer bits and does not have to be
4885| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4886| point exponent.
4887*----------------------------------------------------------------------------*/
4888
c120391c 4889static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4890 uint64_t zSig0, uint64_t zSig1,
4891 float_status *status)
158142c2 4892{
8f506c70 4893 int8_t shiftCount;
bb98fe42 4894 uint64_t zSig2;
158142c2
FB
4895
4896 if ( zSig0 == 0 ) {
4897 zSig0 = zSig1;
4898 zSig1 = 0;
4899 zExp -= 64;
4900 }
0019d5c3 4901 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4902 if ( 0 <= shiftCount ) {
4903 zSig2 = 0;
4904 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4905 }
4906 else {
4907 shift128ExtraRightJamming(
4908 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4909 }
4910 zExp -= shiftCount;
ff32e16e 4911 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4912
4913}
4914
158142c2 4915
158142c2
FB
4916/*----------------------------------------------------------------------------
4917| Returns the result of converting the 32-bit two's complement integer `a'
4918| to the extended double-precision floating-point format. The conversion
4919| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4920| Arithmetic.
4921*----------------------------------------------------------------------------*/
4922
e5a41ffa 4923floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4924{
c120391c 4925 bool zSign;
3a87d009 4926 uint32_t absA;
8f506c70 4927 int8_t shiftCount;
bb98fe42 4928 uint64_t zSig;
158142c2
FB
4929
4930 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4931 zSign = ( a < 0 );
4932 absA = zSign ? - a : a;
0019d5c3 4933 shiftCount = clz32(absA) + 32;
158142c2
FB
4934 zSig = absA;
4935 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4936
4937}
4938
158142c2
FB
4939/*----------------------------------------------------------------------------
4940| Returns the result of converting the 32-bit two's complement integer `a' to
4941| the quadruple-precision floating-point format. The conversion is performed
4942| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4943*----------------------------------------------------------------------------*/
4944
e5a41ffa 4945float128 int32_to_float128(int32_t a, float_status *status)
158142c2 4946{
c120391c 4947 bool zSign;
3a87d009 4948 uint32_t absA;
8f506c70 4949 int8_t shiftCount;
bb98fe42 4950 uint64_t zSig0;
158142c2
FB
4951
4952 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4953 zSign = ( a < 0 );
4954 absA = zSign ? - a : a;
0019d5c3 4955 shiftCount = clz32(absA) + 17;
158142c2
FB
4956 zSig0 = absA;
4957 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4958
4959}
4960
158142c2
FB
4961/*----------------------------------------------------------------------------
4962| Returns the result of converting the 64-bit two's complement integer `a'
4963| to the extended double-precision floating-point format. The conversion
4964| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4965| Arithmetic.
4966*----------------------------------------------------------------------------*/
4967
e5a41ffa 4968floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4969{
c120391c 4970 bool zSign;
182f42fd 4971 uint64_t absA;
8f506c70 4972 int8_t shiftCount;
158142c2
FB
4973
4974 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4975 zSign = ( a < 0 );
4976 absA = zSign ? - a : a;
0019d5c3 4977 shiftCount = clz64(absA);
158142c2
FB
4978 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4979
4980}
4981
158142c2
FB
4982/*----------------------------------------------------------------------------
4983| Returns the result of converting the 64-bit two's complement integer `a' to
4984| the quadruple-precision floating-point format. The conversion is performed
4985| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4986*----------------------------------------------------------------------------*/
4987
e5a41ffa 4988float128 int64_to_float128(int64_t a, float_status *status)
158142c2 4989{
c120391c 4990 bool zSign;
182f42fd 4991 uint64_t absA;
8f506c70 4992 int8_t shiftCount;
f4014512 4993 int32_t zExp;
bb98fe42 4994 uint64_t zSig0, zSig1;
158142c2
FB
4995
4996 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4997 zSign = ( a < 0 );
4998 absA = zSign ? - a : a;
0019d5c3 4999 shiftCount = clz64(absA) + 49;
158142c2
FB
5000 zExp = 0x406E - shiftCount;
5001 if ( 64 <= shiftCount ) {
5002 zSig1 = 0;
5003 zSig0 = absA;
5004 shiftCount -= 64;
5005 }
5006 else {
5007 zSig1 = absA;
5008 zSig0 = 0;
5009 }
5010 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5011 return packFloat128( zSign, zExp, zSig0, zSig1 );
5012
5013}
5014
6bb8e0f1
PM
5015/*----------------------------------------------------------------------------
5016| Returns the result of converting the 64-bit unsigned integer `a'
5017| to the quadruple-precision floating-point format. The conversion is performed
5018| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5019*----------------------------------------------------------------------------*/
5020
e5a41ffa 5021float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
5022{
5023 if (a == 0) {
5024 return float128_zero;
5025 }
6603d506 5026 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
5027}
5028
158142c2
FB
5029/*----------------------------------------------------------------------------
5030| Returns the result of converting the single-precision floating-point value
5031| `a' to the extended double-precision floating-point format. The conversion
5032| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5033| Arithmetic.
5034*----------------------------------------------------------------------------*/
5035
e5a41ffa 5036floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 5037{
c120391c 5038 bool aSign;
0c48262d 5039 int aExp;
bb98fe42 5040 uint32_t aSig;
158142c2 5041
ff32e16e 5042 a = float32_squash_input_denormal(a, status);
158142c2
FB
5043 aSig = extractFloat32Frac( a );
5044 aExp = extractFloat32Exp( a );
5045 aSign = extractFloat32Sign( a );
5046 if ( aExp == 0xFF ) {
ff32e16e 5047 if (aSig) {
7537c2b4
JM
5048 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5049 status);
5050 return floatx80_silence_nan(res, status);
ff32e16e 5051 }
0f605c88
LV
5052 return packFloatx80(aSign,
5053 floatx80_infinity_high,
5054 floatx80_infinity_low);
158142c2
FB
5055 }
5056 if ( aExp == 0 ) {
5057 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5058 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5059 }
5060 aSig |= 0x00800000;
bb98fe42 5061 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
5062
5063}
5064
158142c2
FB
5065/*----------------------------------------------------------------------------
5066| Returns the result of converting the single-precision floating-point value
5067| `a' to the double-precision floating-point format. The conversion is
5068| performed according to the IEC/IEEE Standard for Binary Floating-Point
5069| Arithmetic.
5070*----------------------------------------------------------------------------*/
5071
e5a41ffa 5072float128 float32_to_float128(float32 a, float_status *status)
158142c2 5073{
c120391c 5074 bool aSign;
0c48262d 5075 int aExp;
bb98fe42 5076 uint32_t aSig;
158142c2 5077
ff32e16e 5078 a = float32_squash_input_denormal(a, status);
158142c2
FB
5079 aSig = extractFloat32Frac( a );
5080 aExp = extractFloat32Exp( a );
5081 aSign = extractFloat32Sign( a );
5082 if ( aExp == 0xFF ) {
ff32e16e
PM
5083 if (aSig) {
5084 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5085 }
158142c2
FB
5086 return packFloat128( aSign, 0x7FFF, 0, 0 );
5087 }
5088 if ( aExp == 0 ) {
5089 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5090 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5091 --aExp;
5092 }
bb98fe42 5093 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
5094
5095}
5096
158142c2
FB
5097/*----------------------------------------------------------------------------
5098| Returns the remainder of the single-precision floating-point value `a'
5099| with respect to the corresponding value `b'. The operation is performed
5100| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5101*----------------------------------------------------------------------------*/
5102
e5a41ffa 5103float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 5104{
c120391c 5105 bool aSign, zSign;
0c48262d 5106 int aExp, bExp, expDiff;
bb98fe42
AF
5107 uint32_t aSig, bSig;
5108 uint32_t q;
5109 uint64_t aSig64, bSig64, q64;
5110 uint32_t alternateASig;
5111 int32_t sigMean;
ff32e16e
PM
5112 a = float32_squash_input_denormal(a, status);
5113 b = float32_squash_input_denormal(b, status);
158142c2
FB
5114
5115 aSig = extractFloat32Frac( a );
5116 aExp = extractFloat32Exp( a );
5117 aSign = extractFloat32Sign( a );
5118 bSig = extractFloat32Frac( b );
5119 bExp = extractFloat32Exp( b );
158142c2
FB
5120 if ( aExp == 0xFF ) {
5121 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 5122 return propagateFloat32NaN(a, b, status);
158142c2 5123 }
ff32e16e 5124 float_raise(float_flag_invalid, status);
af39bc8c 5125 return float32_default_nan(status);
158142c2
FB
5126 }
5127 if ( bExp == 0xFF ) {
ff32e16e
PM
5128 if (bSig) {
5129 return propagateFloat32NaN(a, b, status);
5130 }
158142c2
FB
5131 return a;
5132 }
5133 if ( bExp == 0 ) {
5134 if ( bSig == 0 ) {
ff32e16e 5135 float_raise(float_flag_invalid, status);
af39bc8c 5136 return float32_default_nan(status);
158142c2
FB
5137 }
5138 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5139 }
5140 if ( aExp == 0 ) {
5141 if ( aSig == 0 ) return a;
5142 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5143 }
5144 expDiff = aExp - bExp;
5145 aSig |= 0x00800000;
5146 bSig |= 0x00800000;
5147 if ( expDiff < 32 ) {
5148 aSig <<= 8;
5149 bSig <<= 8;
5150 if ( expDiff < 0 ) {
5151 if ( expDiff < -1 ) return a;
5152 aSig >>= 1;
5153 }
5154 q = ( bSig <= aSig );
5155 if ( q ) aSig -= bSig;
5156 if ( 0 < expDiff ) {
bb98fe42 5157 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5158 q >>= 32 - expDiff;
5159 bSig >>= 2;
5160 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5161 }
5162 else {
5163 aSig >>= 2;
5164 bSig >>= 2;
5165 }
5166 }
5167 else {
5168 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5169 aSig64 = ( (uint64_t) aSig )<<40;
5170 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5171 expDiff -= 64;
5172 while ( 0 < expDiff ) {
5173 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5174 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5175 aSig64 = - ( ( bSig * q64 )<<38 );
5176 expDiff -= 62;
5177 }
5178 expDiff += 64;
5179 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5180 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5181 q = q64>>( 64 - expDiff );
5182 bSig <<= 6;
5183 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5184 }
5185 do {
5186 alternateASig = aSig;
5187 ++q;
5188 aSig -= bSig;
bb98fe42 5189 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5190 sigMean = aSig + alternateASig;
5191 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5192 aSig = alternateASig;
5193 }
bb98fe42 5194 zSign = ( (int32_t) aSig < 0 );
158142c2 5195 if ( zSign ) aSig = - aSig;
ff32e16e 5196 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5197}
5198
369be8f6 5199
158142c2 5200
8229c991
AJ
5201/*----------------------------------------------------------------------------
5202| Returns the binary exponential of the single-precision floating-point value
5203| `a'. The operation is performed according to the IEC/IEEE Standard for
5204| Binary Floating-Point Arithmetic.
5205|
5206| Uses the following identities:
5207|
5208| 1. -------------------------------------------------------------------------
5209| x x*ln(2)
5210| 2 = e
5211|
5212| 2. -------------------------------------------------------------------------
5213| 2 3 4 5 n
5214| x x x x x x x
5215| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5216| 1! 2! 3! 4! 5! n!
5217*----------------------------------------------------------------------------*/
5218
5219static const float64 float32_exp2_coefficients[15] =
5220{
d5138cf4
PM
5221 const_float64( 0x3ff0000000000000ll ), /* 1 */
5222 const_float64( 0x3fe0000000000000ll ), /* 2 */
5223 const_float64( 0x3fc5555555555555ll ), /* 3 */
5224 const_float64( 0x3fa5555555555555ll ), /* 4 */
5225 const_float64( 0x3f81111111111111ll ), /* 5 */
5226 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5227 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5228 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5229 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5230 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5231 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5232 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5233 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5234 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5235 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5236};
5237
e5a41ffa 5238float32 float32_exp2(float32 a, float_status *status)
8229c991 5239{
c120391c 5240 bool aSign;
0c48262d 5241 int aExp;
bb98fe42 5242 uint32_t aSig;
8229c991
AJ
5243 float64 r, x, xn;
5244 int i;
ff32e16e 5245 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5246
5247 aSig = extractFloat32Frac( a );
5248 aExp = extractFloat32Exp( a );
5249 aSign = extractFloat32Sign( a );
5250
5251 if ( aExp == 0xFF) {
ff32e16e
PM
5252 if (aSig) {
5253 return propagateFloat32NaN(a, float32_zero, status);
5254 }
8229c991
AJ
5255 return (aSign) ? float32_zero : a;
5256 }
5257 if (aExp == 0) {
5258 if (aSig == 0) return float32_one;
5259 }
5260
ff32e16e 5261 float_raise(float_flag_inexact, status);
8229c991
AJ
5262
5263 /* ******************************* */
5264 /* using float64 for approximation */
5265 /* ******************************* */
ff32e16e
PM
5266 x = float32_to_float64(a, status);
5267 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5268
5269 xn = x;
5270 r = float64_one;
5271 for (i = 0 ; i < 15 ; i++) {
5272 float64 f;
5273
ff32e16e
PM
5274 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5275 r = float64_add(r, f, status);
8229c991 5276
ff32e16e 5277 xn = float64_mul(xn, x, status);
8229c991
AJ
5278 }
5279
5280 return float64_to_float32(r, status);
5281}
5282
374dfc33
AJ
5283/*----------------------------------------------------------------------------
5284| Returns the binary log of the single-precision floating-point value `a'.
5285| The operation is performed according to the IEC/IEEE Standard for Binary
5286| Floating-Point Arithmetic.
5287*----------------------------------------------------------------------------*/
e5a41ffa 5288float32 float32_log2(float32 a, float_status *status)
374dfc33 5289{
c120391c 5290 bool aSign, zSign;
0c48262d 5291 int aExp;
bb98fe42 5292 uint32_t aSig, zSig, i;
374dfc33 5293
ff32e16e 5294 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5295 aSig = extractFloat32Frac( a );
5296 aExp = extractFloat32Exp( a );
5297 aSign = extractFloat32Sign( a );
5298
5299 if ( aExp == 0 ) {
5300 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5301 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5302 }
5303 if ( aSign ) {
ff32e16e 5304 float_raise(float_flag_invalid, status);
af39bc8c 5305 return float32_default_nan(status);
374dfc33
AJ
5306 }
5307 if ( aExp == 0xFF ) {
ff32e16e
PM
5308 if (aSig) {
5309 return propagateFloat32NaN(a, float32_zero, status);
5310 }
374dfc33
AJ
5311 return a;
5312 }
5313
5314 aExp -= 0x7F;
5315 aSig |= 0x00800000;
5316 zSign = aExp < 0;
5317 zSig = aExp << 23;
5318
5319 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5320 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5321 if ( aSig & 0x01000000 ) {
5322 aSig >>= 1;
5323 zSig |= i;
5324 }
5325 }
5326
5327 if ( zSign )
5328 zSig = -zSig;
5329
ff32e16e 5330 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5331}
5332
158142c2 5333/*----------------------------------------------------------------------------
158142c2
FB
5334| Returns the result of converting the double-precision floating-point value
5335| `a' to the extended double-precision floating-point format. The conversion
5336| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5337| Arithmetic.
5338*----------------------------------------------------------------------------*/
5339
e5a41ffa 5340floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5341{
c120391c 5342 bool aSign;
0c48262d 5343 int aExp;
bb98fe42 5344 uint64_t aSig;
158142c2 5345
ff32e16e 5346 a = float64_squash_input_denormal(a, status);
158142c2
FB
5347 aSig = extractFloat64Frac( a );
5348 aExp = extractFloat64Exp( a );
5349 aSign = extractFloat64Sign( a );
5350 if ( aExp == 0x7FF ) {
ff32e16e 5351 if (aSig) {
7537c2b4
JM
5352 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5353 status);
5354 return floatx80_silence_nan(res, status);
ff32e16e 5355 }
0f605c88
LV
5356 return packFloatx80(aSign,
5357 floatx80_infinity_high,
5358 floatx80_infinity_low);
158142c2
FB
5359 }
5360 if ( aExp == 0 ) {
5361 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5362 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5363 }
5364 return
5365 packFloatx80(
e9321124 5366 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5367
5368}
5369
158142c2
FB
5370/*----------------------------------------------------------------------------
5371| Returns the result of converting the double-precision floating-point value
5372| `a' to the quadruple-precision floating-point format. The conversion is
5373| performed according to the IEC/IEEE Standard for Binary Floating-Point
5374| Arithmetic.
5375*----------------------------------------------------------------------------*/
5376
e5a41ffa 5377float128 float64_to_float128(float64 a, float_status *status)
158142c2 5378{
c120391c 5379 bool aSign;
0c48262d 5380 int aExp;
bb98fe42 5381 uint64_t aSig, zSig0, zSig1;
158142c2 5382
ff32e16e 5383 a = float64_squash_input_denormal(a, status);
158142c2
FB
5384 aSig = extractFloat64Frac( a );
5385 aExp = extractFloat64Exp( a );
5386 aSign = extractFloat64Sign( a );
5387 if ( aExp == 0x7FF ) {
ff32e16e
PM
5388 if (aSig) {
5389 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5390 }
158142c2
FB
5391 return packFloat128( aSign, 0x7FFF, 0, 0 );
5392 }
5393 if ( aExp == 0 ) {
5394 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5395 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5396 --aExp;
5397 }
5398 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5399 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5400
5401}
5402
158142c2
FB
5403
5404/*----------------------------------------------------------------------------
5405| Returns the remainder of the double-precision floating-point value `a'
5406| with respect to the corresponding value `b'. The operation is performed
5407| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5408*----------------------------------------------------------------------------*/
5409
e5a41ffa 5410float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5411{
c120391c 5412 bool aSign, zSign;
0c48262d 5413 int aExp, bExp, expDiff;
bb98fe42
AF
5414 uint64_t aSig, bSig;
5415 uint64_t q, alternateASig;
5416 int64_t sigMean;
158142c2 5417
ff32e16e
PM
5418 a = float64_squash_input_denormal(a, status);
5419 b = float64_squash_input_denormal(b, status);
158142c2
FB
5420 aSig = extractFloat64Frac( a );
5421 aExp = extractFloat64Exp( a );
5422 aSign = extractFloat64Sign( a );
5423 bSig = extractFloat64Frac( b );
5424 bExp = extractFloat64Exp( b );
158142c2
FB
5425 if ( aExp == 0x7FF ) {
5426 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5427 return propagateFloat64NaN(a, b, status);
158142c2 5428 }
ff32e16e 5429 float_raise(float_flag_invalid, status);
af39bc8c 5430 return float64_default_nan(status);
158142c2
FB
5431 }
5432 if ( bExp == 0x7FF ) {
ff32e16e
PM
5433 if (bSig) {
5434 return propagateFloat64NaN(a, b, status);
5435 }
158142c2
FB
5436 return a;
5437 }
5438 if ( bExp == 0 ) {
5439 if ( bSig == 0 ) {
ff32e16e 5440 float_raise(float_flag_invalid, status);
af39bc8c 5441 return float64_default_nan(status);
158142c2
FB
5442 }
5443 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5444 }
5445 if ( aExp == 0 ) {
5446 if ( aSig == 0 ) return a;
5447 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5448 }
5449 expDiff = aExp - bExp;
e9321124
AB
5450 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5451 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5452 if ( expDiff < 0 ) {
5453 if ( expDiff < -1 ) return a;
5454 aSig >>= 1;
5455 }
5456 q = ( bSig <= aSig );
5457 if ( q ) aSig -= bSig;
5458 expDiff -= 64;
5459 while ( 0 < expDiff ) {
5460 q = estimateDiv128To64( aSig, 0, bSig );
5461 q = ( 2 < q ) ? q - 2 : 0;
5462 aSig = - ( ( bSig>>2 ) * q );
5463 expDiff -= 62;
5464 }
5465 expDiff += 64;
5466 if ( 0 < expDiff ) {
5467 q = estimateDiv128To64( aSig, 0, bSig );
5468 q = ( 2 < q ) ? q - 2 : 0;
5469 q >>= 64 - expDiff;
5470 bSig >>= 2;
5471 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5472 }
5473 else {
5474 aSig >>= 2;
5475 bSig >>= 2;
5476 }
5477 do {
5478 alternateASig = aSig;
5479 ++q;
5480 aSig -= bSig;
bb98fe42 5481 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5482 sigMean = aSig + alternateASig;
5483 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5484 aSig = alternateASig;
5485 }
bb98fe42 5486 zSign = ( (int64_t) aSig < 0 );
158142c2 5487 if ( zSign ) aSig = - aSig;
ff32e16e 5488 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5489
5490}
5491
374dfc33
AJ
5492/*----------------------------------------------------------------------------
5493| Returns the binary log of the double-precision floating-point value `a'.
5494| The operation is performed according to the IEC/IEEE Standard for Binary
5495| Floating-Point Arithmetic.
5496*----------------------------------------------------------------------------*/
e5a41ffa 5497float64 float64_log2(float64 a, float_status *status)
374dfc33 5498{
c120391c 5499 bool aSign, zSign;
0c48262d 5500 int aExp;
bb98fe42 5501 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5502 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5503
5504 aSig = extractFloat64Frac( a );
5505 aExp = extractFloat64Exp( a );
5506 aSign = extractFloat64Sign( a );
5507
5508 if ( aExp == 0 ) {
5509 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5510 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5511 }
5512 if ( aSign ) {
ff32e16e 5513 float_raise(float_flag_invalid, status);
af39bc8c 5514 return float64_default_nan(status);
374dfc33
AJ
5515 }
5516 if ( aExp == 0x7FF ) {
ff32e16e
PM
5517 if (aSig) {
5518 return propagateFloat64NaN(a, float64_zero, status);
5519 }
374dfc33
AJ
5520 return a;
5521 }
5522
5523 aExp -= 0x3FF;
e9321124 5524 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5525 zSign = aExp < 0;
bb98fe42 5526 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5527 for (i = 1LL << 51; i > 0; i >>= 1) {
5528 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5529 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5530 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5531 aSig >>= 1;
5532 zSig |= i;
5533 }
5534 }
5535
5536 if ( zSign )
5537 zSig = -zSig;
ff32e16e 5538 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5539}
5540
158142c2
FB
5541/*----------------------------------------------------------------------------
5542| Returns the result of converting the extended double-precision floating-
5543| point value `a' to the 32-bit two's complement integer format. The
5544| conversion is performed according to the IEC/IEEE Standard for Binary
5545| Floating-Point Arithmetic---which means in particular that the conversion
5546| is rounded according to the current rounding mode. If `a' is a NaN, the
5547| largest positive integer is returned. Otherwise, if the conversion
5548| overflows, the largest integer with the same sign as `a' is returned.
5549*----------------------------------------------------------------------------*/
5550
f4014512 5551int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5552{
c120391c 5553 bool aSign;
f4014512 5554 int32_t aExp, shiftCount;
bb98fe42 5555 uint64_t aSig;
158142c2 5556
d1eb8f2a
AD
5557 if (floatx80_invalid_encoding(a)) {
5558 float_raise(float_flag_invalid, status);
5559 return 1 << 31;
5560 }
158142c2
FB
5561 aSig = extractFloatx80Frac( a );
5562 aExp = extractFloatx80Exp( a );
5563 aSign = extractFloatx80Sign( a );
bb98fe42 5564 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5565 shiftCount = 0x4037 - aExp;
5566 if ( shiftCount <= 0 ) shiftCount = 1;
5567 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5568 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5569
5570}
5571
5572/*----------------------------------------------------------------------------
5573| Returns the result of converting the extended double-precision floating-
5574| point value `a' to the 32-bit two's complement integer format. The
5575| conversion is performed according to the IEC/IEEE Standard for Binary
5576| Floating-Point Arithmetic, except that the conversion is always rounded
5577| toward zero. If `a' is a NaN, the largest positive integer is returned.
5578| Otherwise, if the conversion overflows, the largest integer with the same
5579| sign as `a' is returned.
5580*----------------------------------------------------------------------------*/
5581
f4014512 5582int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5583{
c120391c 5584 bool aSign;
f4014512 5585 int32_t aExp, shiftCount;
bb98fe42 5586 uint64_t aSig, savedASig;
b3a6a2e0 5587 int32_t z;
158142c2 5588
d1eb8f2a
AD
5589 if (floatx80_invalid_encoding(a)) {
5590 float_raise(float_flag_invalid, status);
5591 return 1 << 31;
5592 }
158142c2
FB
5593 aSig = extractFloatx80Frac( a );
5594 aExp = extractFloatx80Exp( a );
5595 aSign = extractFloatx80Sign( a );
5596 if ( 0x401E < aExp ) {
bb98fe42 5597 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5598 goto invalid;
5599 }
5600 else if ( aExp < 0x3FFF ) {
a2f2d288 5601 if (aExp || aSig) {
d82f3b2d 5602 float_raise(float_flag_inexact, status);
a2f2d288 5603 }
158142c2
FB
5604 return 0;
5605 }
5606 shiftCount = 0x403E - aExp;
5607 savedASig = aSig;
5608 aSig >>= shiftCount;
5609 z = aSig;
5610 if ( aSign ) z = - z;
5611 if ( ( z < 0 ) ^ aSign ) {
5612 invalid:
ff32e16e 5613 float_raise(float_flag_invalid, status);
bb98fe42 5614 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5615 }
5616 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5617 float_raise(float_flag_inexact, status);
158142c2
FB
5618 }
5619 return z;
5620
5621}
5622
5623/*----------------------------------------------------------------------------
5624| Returns the result of converting the extended double-precision floating-
5625| point value `a' to the 64-bit two's complement integer format. The
5626| conversion is performed according to the IEC/IEEE Standard for Binary
5627| Floating-Point Arithmetic---which means in particular that the conversion
5628| is rounded according to the current rounding mode. If `a' is a NaN,
5629| the largest positive integer is returned. Otherwise, if the conversion
5630| overflows, the largest integer with the same sign as `a' is returned.
5631*----------------------------------------------------------------------------*/
5632
f42c2224 5633int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5634{
c120391c 5635 bool aSign;
f4014512 5636 int32_t aExp, shiftCount;
bb98fe42 5637 uint64_t aSig, aSigExtra;
158142c2 5638
d1eb8f2a
AD
5639 if (floatx80_invalid_encoding(a)) {
5640 float_raise(float_flag_invalid, status);
5641 return 1ULL << 63;
5642 }
158142c2
FB
5643 aSig = extractFloatx80Frac( a );
5644 aExp = extractFloatx80Exp( a );
5645 aSign = extractFloatx80Sign( a );
5646 shiftCount = 0x403E - aExp;
5647 if ( shiftCount <= 0 ) {
5648 if ( shiftCount ) {
ff32e16e 5649 float_raise(float_flag_invalid, status);
0f605c88 5650 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5651 return INT64_MAX;
158142c2 5652 }
2c217da0 5653 return INT64_MIN;
158142c2
FB
5654 }
5655 aSigExtra = 0;
5656 }
5657 else {
5658 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5659 }
ff32e16e 5660 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5661
5662}
5663
5664/*----------------------------------------------------------------------------
5665| Returns the result of converting the extended double-precision floating-
5666| point value `a' to the 64-bit two's complement integer format. The
5667| conversion is performed according to the IEC/IEEE Standard for Binary
5668| Floating-Point Arithmetic, except that the conversion is always rounded
5669| toward zero. If `a' is a NaN, the largest positive integer is returned.
5670| Otherwise, if the conversion overflows, the largest integer with the same
5671| sign as `a' is returned.
5672*----------------------------------------------------------------------------*/
5673
f42c2224 5674int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5675{
c120391c 5676 bool aSign;
f4014512 5677 int32_t aExp, shiftCount;
bb98fe42 5678 uint64_t aSig;
f42c2224 5679 int64_t z;
158142c2 5680
d1eb8f2a
AD
5681 if (floatx80_invalid_encoding(a)) {
5682 float_raise(float_flag_invalid, status);
5683 return 1ULL << 63;
5684 }
158142c2
FB
5685 aSig = extractFloatx80Frac( a );
5686 aExp = extractFloatx80Exp( a );
5687 aSign = extractFloatx80Sign( a );
5688 shiftCount = aExp - 0x403E;
5689 if ( 0 <= shiftCount ) {
e9321124 5690 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5691 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5692 float_raise(float_flag_invalid, status);
158142c2 5693 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5694 return INT64_MAX;
158142c2
FB
5695 }
5696 }
2c217da0 5697 return INT64_MIN;
158142c2
FB
5698 }
5699 else if ( aExp < 0x3FFF ) {
a2f2d288 5700 if (aExp | aSig) {
d82f3b2d 5701 float_raise(float_flag_inexact, status);
a2f2d288 5702 }
158142c2
FB
5703 return 0;
5704 }
5705 z = aSig>>( - shiftCount );
bb98fe42 5706 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5707 float_raise(float_flag_inexact, status);
158142c2
FB
5708 }
5709 if ( aSign ) z = - z;
5710 return z;
5711
5712}
5713
5714/*----------------------------------------------------------------------------
5715| Returns the result of converting the extended double-precision floating-
5716| point value `a' to the single-precision floating-point format. The
5717| conversion is performed according to the IEC/IEEE Standard for Binary
5718| Floating-Point Arithmetic.
5719*----------------------------------------------------------------------------*/
5720
e5a41ffa 5721float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5722{
c120391c 5723 bool aSign;
f4014512 5724 int32_t aExp;
bb98fe42 5725 uint64_t aSig;
158142c2 5726
d1eb8f2a
AD
5727 if (floatx80_invalid_encoding(a)) {
5728 float_raise(float_flag_invalid, status);
5729 return float32_default_nan(status);
5730 }
158142c2
FB
5731 aSig = extractFloatx80Frac( a );
5732 aExp = extractFloatx80Exp( a );
5733 aSign = extractFloatx80Sign( a );
5734 if ( aExp == 0x7FFF ) {
bb98fe42 5735 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5736 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5737 status);
5738 return float32_silence_nan(res, status);
158142c2
FB
5739 }
5740 return packFloat32( aSign, 0xFF, 0 );
5741 }
5742 shift64RightJamming( aSig, 33, &aSig );
5743 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5744 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5745
5746}
5747
5748/*----------------------------------------------------------------------------
5749| Returns the result of converting the extended double-precision floating-
5750| point value `a' to the double-precision floating-point format. The
5751| conversion is performed according to the IEC/IEEE Standard for Binary
5752| Floating-Point Arithmetic.
5753*----------------------------------------------------------------------------*/
5754
e5a41ffa 5755float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5756{
c120391c 5757 bool aSign;
f4014512 5758 int32_t aExp;
bb98fe42 5759 uint64_t aSig, zSig;
158142c2 5760
d1eb8f2a
AD
5761 if (floatx80_invalid_encoding(a)) {
5762 float_raise(float_flag_invalid, status);
5763 return float64_default_nan(status);
5764 }
158142c2
FB
5765 aSig = extractFloatx80Frac( a );
5766 aExp = extractFloatx80Exp( a );
5767 aSign = extractFloatx80Sign( a );
5768 if ( aExp == 0x7FFF ) {
bb98fe42 5769 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5770 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5771 status);
5772 return float64_silence_nan(res, status);
158142c2
FB
5773 }
5774 return packFloat64( aSign, 0x7FF, 0 );
5775 }
5776 shift64RightJamming( aSig, 1, &zSig );
5777 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5778 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5779
5780}
5781
158142c2
FB
5782/*----------------------------------------------------------------------------
5783| Returns the result of converting the extended double-precision floating-
5784| point value `a' to the quadruple-precision floating-point format. The
5785| conversion is performed according to the IEC/IEEE Standard for Binary
5786| Floating-Point Arithmetic.
5787*----------------------------------------------------------------------------*/
5788
e5a41ffa 5789float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5790{
c120391c 5791 bool aSign;
0c48262d 5792 int aExp;
bb98fe42 5793 uint64_t aSig, zSig0, zSig1;
158142c2 5794
d1eb8f2a
AD
5795 if (floatx80_invalid_encoding(a)) {
5796 float_raise(float_flag_invalid, status);
5797 return float128_default_nan(status);
5798 }
158142c2
FB
5799 aSig = extractFloatx80Frac( a );
5800 aExp = extractFloatx80Exp( a );
5801 aSign = extractFloatx80Sign( a );
bb98fe42 5802 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5803 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5804 status);
5805 return float128_silence_nan(res, status);
158142c2
FB
5806 }
5807 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5808 return packFloat128( aSign, aExp, zSig0, zSig1 );
5809
5810}
5811
0f721292
LV
5812/*----------------------------------------------------------------------------
5813| Rounds the extended double-precision floating-point value `a'
5814| to the precision provided by floatx80_rounding_precision and returns the
5815| result as an extended double-precision floating-point value.
5816| The operation is performed according to the IEC/IEEE Standard for Binary
5817| Floating-Point Arithmetic.
5818*----------------------------------------------------------------------------*/
5819
5820floatx80 floatx80_round(floatx80 a, float_status *status)
5821{
5822 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5823 extractFloatx80Sign(a),
5824 extractFloatx80Exp(a),
5825 extractFloatx80Frac(a), 0, status);
5826}
5827
158142c2
FB
5828/*----------------------------------------------------------------------------
5829| Rounds the extended double-precision floating-point value `a' to an integer,
5830| and returns the result as an extended quadruple-precision floating-point
5831| value. The operation is performed according to the IEC/IEEE Standard for
5832| Binary Floating-Point Arithmetic.
5833*----------------------------------------------------------------------------*/
5834
e5a41ffa 5835floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5836{
c120391c 5837 bool aSign;
f4014512 5838 int32_t aExp;
bb98fe42 5839 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5840 floatx80 z;
5841
d1eb8f2a
AD
5842 if (floatx80_invalid_encoding(a)) {
5843 float_raise(float_flag_invalid, status);
5844 return floatx80_default_nan(status);
5845 }
158142c2
FB
5846 aExp = extractFloatx80Exp( a );
5847 if ( 0x403E <= aExp ) {
bb98fe42 5848 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5849 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5850 }
5851 return a;
5852 }
5853 if ( aExp < 0x3FFF ) {
5854 if ( ( aExp == 0 )
9ecaf5cc 5855 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5856 return a;
5857 }
d82f3b2d 5858 float_raise(float_flag_inexact, status);
158142c2 5859 aSign = extractFloatx80Sign( a );
a2f2d288 5860 switch (status->float_rounding_mode) {
158142c2 5861 case float_round_nearest_even:
bb98fe42 5862 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5863 ) {
5864 return
e9321124 5865 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5866 }
5867 break;
f9288a76
PM
5868 case float_round_ties_away:
5869 if (aExp == 0x3FFE) {
e9321124 5870 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5871 }
5872 break;
158142c2
FB
5873 case float_round_down:
5874 return
5875 aSign ?
e9321124 5876 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5877 : packFloatx80( 0, 0, 0 );
5878 case float_round_up:
5879 return
5880 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5881 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5882
5883 case float_round_to_zero:
5884 break;
5885 default:
5886 g_assert_not_reached();
158142c2
FB
5887 }
5888 return packFloatx80( aSign, 0, 0 );
5889 }
5890 lastBitMask = 1;
5891 lastBitMask <<= 0x403E - aExp;
5892 roundBitsMask = lastBitMask - 1;
5893 z = a;
a2f2d288 5894 switch (status->float_rounding_mode) {
dc355b76 5895 case float_round_nearest_even:
158142c2 5896 z.low += lastBitMask>>1;
dc355b76
PM
5897 if ((z.low & roundBitsMask) == 0) {
5898 z.low &= ~lastBitMask;
5899 }
5900 break;
f9288a76
PM
5901 case float_round_ties_away:
5902 z.low += lastBitMask >> 1;
5903 break;
dc355b76
PM
5904 case float_round_to_zero:
5905 break;
5906 case float_round_up:
5907 if (!extractFloatx80Sign(z)) {
5908 z.low += roundBitsMask;
5909 }
5910 break;
5911 case float_round_down:
5912 if (extractFloatx80Sign(z)) {
158142c2
FB
5913 z.low += roundBitsMask;
5914 }
dc355b76
PM
5915 break;
5916 default:
5917 abort();
158142c2
FB
5918 }
5919 z.low &= ~ roundBitsMask;
5920 if ( z.low == 0 ) {
5921 ++z.high;
e9321124 5922 z.low = UINT64_C(0x8000000000000000);
158142c2 5923 }
a2f2d288 5924 if (z.low != a.low) {
d82f3b2d 5925 float_raise(float_flag_inexact, status);
a2f2d288 5926 }
158142c2
FB
5927 return z;
5928
5929}
5930
5931/*----------------------------------------------------------------------------
5932| Returns the result of adding the absolute values of the extended double-
5933| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5934| negated before being returned. `zSign' is ignored if the result is a NaN.
5935| The addition is performed according to the IEC/IEEE Standard for Binary
5936| Floating-Point Arithmetic.
5937*----------------------------------------------------------------------------*/
5938
c120391c 5939static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5940 float_status *status)
158142c2 5941{
f4014512 5942 int32_t aExp, bExp, zExp;
bb98fe42 5943 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5944 int32_t expDiff;
158142c2
FB
5945
5946 aSig = extractFloatx80Frac( a );
5947 aExp = extractFloatx80Exp( a );
5948 bSig = extractFloatx80Frac( b );
5949 bExp = extractFloatx80Exp( b );
5950 expDiff = aExp - bExp;
5951 if ( 0 < expDiff ) {
5952 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5953 if ((uint64_t)(aSig << 1)) {
5954 return propagateFloatx80NaN(a, b, status);
5955 }
158142c2
FB
5956 return a;
5957 }
5958 if ( bExp == 0 ) --expDiff;
5959 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5960 zExp = aExp;
5961 }
5962 else if ( expDiff < 0 ) {
5963 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5964 if ((uint64_t)(bSig << 1)) {
5965 return propagateFloatx80NaN(a, b, status);
5966 }
0f605c88
LV
5967 return packFloatx80(zSign,
5968 floatx80_infinity_high,
5969 floatx80_infinity_low);
158142c2
FB
5970 }
5971 if ( aExp == 0 ) ++expDiff;
5972 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5973 zExp = bExp;
5974 }
5975 else {
5976 if ( aExp == 0x7FFF ) {
bb98fe42 5977 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5978 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5979 }
5980 return a;
5981 }
5982 zSig1 = 0;
5983 zSig0 = aSig + bSig;
5984 if ( aExp == 0 ) {
41602807
JM
5985 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5986 /* At least one of the values is a pseudo-denormal,
5987 * and there is a carry out of the result. */
5988 zExp = 1;
5989 goto shiftRight1;
5990 }
2f311075
RH
5991 if (zSig0 == 0) {
5992 return packFloatx80(zSign, 0, 0);
5993 }
158142c2
FB
5994 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5995 goto roundAndPack;
5996 }
5997 zExp = aExp;
5998 goto shiftRight1;
5999 }
6000 zSig0 = aSig + bSig;
bb98fe42 6001 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
6002 shiftRight1:
6003 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 6004 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
6005 ++zExp;
6006 roundAndPack:
a2f2d288 6007 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6008 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6009}
6010
6011/*----------------------------------------------------------------------------
6012| Returns the result of subtracting the absolute values of the extended
6013| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
6014| difference is negated before being returned. `zSign' is ignored if the
6015| result is a NaN. The subtraction is performed according to the IEC/IEEE
6016| Standard for Binary Floating-Point Arithmetic.
6017*----------------------------------------------------------------------------*/
6018
c120391c 6019static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 6020 float_status *status)
158142c2 6021{
f4014512 6022 int32_t aExp, bExp, zExp;
bb98fe42 6023 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 6024 int32_t expDiff;
158142c2
FB
6025
6026 aSig = extractFloatx80Frac( a );
6027 aExp = extractFloatx80Exp( a );
6028 bSig = extractFloatx80Frac( b );
6029 bExp = extractFloatx80Exp( b );
6030 expDiff = aExp - bExp;
6031 if ( 0 < expDiff ) goto aExpBigger;
6032 if ( expDiff < 0 ) goto bExpBigger;
6033 if ( aExp == 0x7FFF ) {
bb98fe42 6034 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6035 return propagateFloatx80NaN(a, b, status);
158142c2 6036 }
ff32e16e 6037 float_raise(float_flag_invalid, status);
af39bc8c 6038 return floatx80_default_nan(status);
158142c2
FB
6039 }
6040 if ( aExp == 0 ) {
6041 aExp = 1;
6042 bExp = 1;
6043 }
6044 zSig1 = 0;
6045 if ( bSig < aSig ) goto aBigger;
6046 if ( aSig < bSig ) goto bBigger;
a2f2d288 6047 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
6048 bExpBigger:
6049 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6050 if ((uint64_t)(bSig << 1)) {
6051 return propagateFloatx80NaN(a, b, status);
6052 }
0f605c88
LV
6053 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6054 floatx80_infinity_low);
158142c2
FB
6055 }
6056 if ( aExp == 0 ) ++expDiff;
6057 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6058 bBigger:
6059 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6060 zExp = bExp;
6061 zSign ^= 1;
6062 goto normalizeRoundAndPack;
6063 aExpBigger:
6064 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6065 if ((uint64_t)(aSig << 1)) {
6066 return propagateFloatx80NaN(a, b, status);
6067 }
158142c2
FB
6068 return a;
6069 }
6070 if ( bExp == 0 ) --expDiff;
6071 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6072 aBigger:
6073 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6074 zExp = aExp;
6075 normalizeRoundAndPack:
a2f2d288 6076 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6077 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6078}
6079
6080/*----------------------------------------------------------------------------
6081| Returns the result of adding the extended double-precision floating-point
6082| values `a' and `b'. The operation is performed according to the IEC/IEEE
6083| Standard for Binary Floating-Point Arithmetic.
6084*----------------------------------------------------------------------------*/
6085
e5a41ffa 6086floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 6087{
c120391c 6088 bool aSign, bSign;
158142c2 6089
d1eb8f2a
AD
6090 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6091 float_raise(float_flag_invalid, status);
6092 return floatx80_default_nan(status);
6093 }
158142c2
FB
6094 aSign = extractFloatx80Sign( a );
6095 bSign = extractFloatx80Sign( b );
6096 if ( aSign == bSign ) {
ff32e16e 6097 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6098 }
6099 else {
ff32e16e 6100 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6101 }
6102
6103}
6104
6105/*----------------------------------------------------------------------------
6106| Returns the result of subtracting the extended double-precision floating-
6107| point values `a' and `b'. The operation is performed according to the
6108| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6109*----------------------------------------------------------------------------*/
6110
e5a41ffa 6111floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 6112{
c120391c 6113 bool aSign, bSign;
158142c2 6114
d1eb8f2a
AD
6115 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6116 float_raise(float_flag_invalid, status);
6117 return floatx80_default_nan(status);
6118 }
158142c2
FB
6119 aSign = extractFloatx80Sign( a );
6120 bSign = extractFloatx80Sign( b );
6121 if ( aSign == bSign ) {
ff32e16e 6122 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6123 }
6124 else {
ff32e16e 6125 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6126 }
6127
6128}
6129
6130/*----------------------------------------------------------------------------
6131| Returns the result of multiplying the extended double-precision floating-
6132| point values `a' and `b'. The operation is performed according to the
6133| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6134*----------------------------------------------------------------------------*/
6135
e5a41ffa 6136floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 6137{
c120391c 6138 bool aSign, bSign, zSign;
f4014512 6139 int32_t aExp, bExp, zExp;
bb98fe42 6140 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6141
d1eb8f2a
AD
6142 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6143 float_raise(float_flag_invalid, status);
6144 return floatx80_default_nan(status);
6145 }
158142c2
FB
6146 aSig = extractFloatx80Frac( a );
6147 aExp = extractFloatx80Exp( a );
6148 aSign = extractFloatx80Sign( a );
6149 bSig = extractFloatx80Frac( b );
6150 bExp = extractFloatx80Exp( b );
6151 bSign = extractFloatx80Sign( b );
6152 zSign = aSign ^ bSign;
6153 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6154 if ( (uint64_t) ( aSig<<1 )
6155 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6156 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6157 }
6158 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6159 return packFloatx80(zSign, floatx80_infinity_high,
6160 floatx80_infinity_low);
158142c2
FB
6161 }
6162 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6163 if ((uint64_t)(bSig << 1)) {
6164 return propagateFloatx80NaN(a, b, status);
6165 }
158142c2
FB
6166 if ( ( aExp | aSig ) == 0 ) {
6167 invalid:
ff32e16e 6168 float_raise(float_flag_invalid, status);
af39bc8c 6169 return floatx80_default_nan(status);
158142c2 6170 }
0f605c88
LV
6171 return packFloatx80(zSign, floatx80_infinity_high,
6172 floatx80_infinity_low);
158142c2
FB
6173 }
6174 if ( aExp == 0 ) {
6175 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6176 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6177 }
6178 if ( bExp == 0 ) {
6179 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6180 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6181 }
6182 zExp = aExp + bExp - 0x3FFE;
6183 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6184 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6185 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6186 --zExp;
6187 }
a2f2d288 6188 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6189 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6190}
6191
6192/*----------------------------------------------------------------------------
6193| Returns the result of dividing the extended double-precision floating-point
6194| value `a' by the corresponding value `b'. The operation is performed
6195| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6196*----------------------------------------------------------------------------*/
6197
e5a41ffa 6198floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6199{
c120391c 6200 bool aSign, bSign, zSign;
f4014512 6201 int32_t aExp, bExp, zExp;
bb98fe42
AF
6202 uint64_t aSig, bSig, zSig0, zSig1;
6203 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6204
d1eb8f2a
AD
6205 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6206 float_raise(float_flag_invalid, status);
6207 return floatx80_default_nan(status);
6208 }
158142c2
FB
6209 aSig = extractFloatx80Frac( a );
6210 aExp = extractFloatx80Exp( a );
6211 aSign = extractFloatx80Sign( a );
6212 bSig = extractFloatx80Frac( b );
6213 bExp = extractFloatx80Exp( b );
6214 bSign = extractFloatx80Sign( b );
6215 zSign = aSign ^ bSign;
6216 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6217 if ((uint64_t)(aSig << 1)) {
6218 return propagateFloatx80NaN(a, b, status);
6219 }
158142c2 6220 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6221 if ((uint64_t)(bSig << 1)) {
6222 return propagateFloatx80NaN(a, b, status);
6223 }
158142c2
FB
6224 goto invalid;
6225 }
0f605c88
LV
6226 return packFloatx80(zSign, floatx80_infinity_high,
6227 floatx80_infinity_low);
158142c2
FB
6228 }
6229 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6230 if ((uint64_t)(bSig << 1)) {
6231 return propagateFloatx80NaN(a, b, status);
6232 }
158142c2
FB
6233 return packFloatx80( zSign, 0, 0 );
6234 }
6235 if ( bExp == 0 ) {
6236 if ( bSig == 0 ) {
6237 if ( ( aExp | aSig ) == 0 ) {
6238 invalid:
ff32e16e 6239 float_raise(float_flag_invalid, status);
af39bc8c 6240 return floatx80_default_nan(status);
158142c2 6241 }
ff32e16e 6242 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6243 return packFloatx80(zSign, floatx80_infinity_high,
6244 floatx80_infinity_low);
158142c2
FB
6245 }
6246 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6247 }
6248 if ( aExp == 0 ) {
6249 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6250 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6251 }
6252 zExp = aExp - bExp + 0x3FFE;
6253 rem1 = 0;
6254 if ( bSig <= aSig ) {
6255 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6256 ++zExp;
6257 }
6258 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6259 mul64To128( bSig, zSig0, &term0, &term1 );
6260 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6261 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6262 --zSig0;
6263 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6264 }
6265 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6266 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6267 mul64To128( bSig, zSig1, &term1, &term2 );
6268 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6269 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6270 --zSig1;
6271 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6272 }
6273 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6274 }
a2f2d288 6275 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6276 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6277}
6278
6279/*----------------------------------------------------------------------------
6280| Returns the remainder of the extended double-precision floating-point value
6281| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6282| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6283| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6284| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6285| the absolute value of the integer quotient.
158142c2
FB
6286*----------------------------------------------------------------------------*/
6287
445810ec 6288floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6289 float_status *status)
158142c2 6290{
c120391c 6291 bool aSign, zSign;
b662495d 6292 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6293 uint64_t aSig0, aSig1, bSig;
6294 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6295
445810ec 6296 *quotient = 0;
d1eb8f2a
AD
6297 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6298 float_raise(float_flag_invalid, status);
6299 return floatx80_default_nan(status);
6300 }
158142c2 6301 aSig0 = extractFloatx80Frac( a );
b662495d 6302 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6303 aSign = extractFloatx80Sign( a );
6304 bSig = extractFloatx80Frac( b );
6305 bExp = extractFloatx80Exp( b );
158142c2 6306 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6307 if ( (uint64_t) ( aSig0<<1 )
6308 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6309 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6310 }
6311 goto invalid;
6312 }
6313 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6314 if ((uint64_t)(bSig << 1)) {
6315 return propagateFloatx80NaN(a, b, status);
6316 }
b662495d
JM
6317 if (aExp == 0 && aSig0 >> 63) {
6318 /*
6319 * Pseudo-denormal argument must be returned in normalized
6320 * form.
6321 */
6322 return packFloatx80(aSign, 1, aSig0);
6323 }
158142c2
FB
6324 return a;
6325 }
6326 if ( bExp == 0 ) {
6327 if ( bSig == 0 ) {
6328 invalid:
ff32e16e 6329 float_raise(float_flag_invalid, status);
af39bc8c 6330 return floatx80_default_nan(status);
158142c2
FB
6331 }
6332 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6333 }
6334 if ( aExp == 0 ) {
499a2f7b 6335 if ( aSig0 == 0 ) return a;
158142c2
FB
6336 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6337 }
158142c2
FB
6338 zSign = aSign;
6339 expDiff = aExp - bExp;
6340 aSig1 = 0;
6341 if ( expDiff < 0 ) {
b662495d
JM
6342 if ( mod || expDiff < -1 ) {
6343 if (aExp == 1 && aExpOrig == 0) {
6344 /*
6345 * Pseudo-denormal argument must be returned in
6346 * normalized form.
6347 */
6348 return packFloatx80(aSign, aExp, aSig0);
6349 }
6350 return a;
6351 }
158142c2
FB
6352 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6353 expDiff = 0;
6354 }
445810ec 6355 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6356 if ( q ) aSig0 -= bSig;
6357 expDiff -= 64;
6358 while ( 0 < expDiff ) {
6359 q = estimateDiv128To64( aSig0, aSig1, bSig );
6360 q = ( 2 < q ) ? q - 2 : 0;
6361 mul64To128( bSig, q, &term0, &term1 );
6362 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6363 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6364 expDiff -= 62;
445810ec
JM
6365 *quotient <<= 62;
6366 *quotient += q;
158142c2
FB
6367 }
6368 expDiff += 64;
6369 if ( 0 < expDiff ) {
6370 q = estimateDiv128To64( aSig0, aSig1, bSig );
6371 q = ( 2 < q ) ? q - 2 : 0;
6372 q >>= 64 - expDiff;
6373 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6374 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6375 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6376 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6377 ++q;
6378 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6379 }
445810ec
JM
6380 if (expDiff < 64) {
6381 *quotient <<= expDiff;
6382 } else {
6383 *quotient = 0;
6384 }
6385 *quotient += q;
158142c2
FB
6386 }
6387 else {
6388 term1 = 0;
6389 term0 = bSig;
6390 }
6b8b0136
JM
6391 if (!mod) {
6392 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6393 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6394 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6395 && ( q & 1 ) )
6396 ) {
6397 aSig0 = alternateASig0;
6398 aSig1 = alternateASig1;
6399 zSign = ! zSign;
445810ec 6400 ++*quotient;
6b8b0136 6401 }
158142c2
FB
6402 }
6403 return
6404 normalizeRoundAndPackFloatx80(
ff32e16e 6405 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6406
6407}
6408
6b8b0136
JM
6409/*----------------------------------------------------------------------------
6410| Returns the remainder of the extended double-precision floating-point value
6411| `a' with respect to the corresponding value `b'. The operation is performed
6412| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6413*----------------------------------------------------------------------------*/
6414
6415floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6416{
445810ec
JM
6417 uint64_t quotient;
6418 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6419}
6420
6421/*----------------------------------------------------------------------------
6422| Returns the remainder of the extended double-precision floating-point value
6423| `a' with respect to the corresponding value `b', with the quotient truncated
6424| toward zero.
6425*----------------------------------------------------------------------------*/
6426
6427floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6428{
445810ec
JM
6429 uint64_t quotient;
6430 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6431}
6432
158142c2
FB
6433/*----------------------------------------------------------------------------
6434| Returns the square root of the extended double-precision floating-point
6435| value `a'. The operation is performed according to the IEC/IEEE Standard
6436| for Binary Floating-Point Arithmetic.
6437*----------------------------------------------------------------------------*/
6438
e5a41ffa 6439floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6440{
c120391c 6441 bool aSign;
f4014512 6442 int32_t aExp, zExp;
bb98fe42
AF
6443 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6444 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6445
d1eb8f2a
AD
6446 if (floatx80_invalid_encoding(a)) {
6447 float_raise(float_flag_invalid, status);
6448 return floatx80_default_nan(status);
6449 }
158142c2
FB
6450 aSig0 = extractFloatx80Frac( a );
6451 aExp = extractFloatx80Exp( a );
6452 aSign = extractFloatx80Sign( a );
6453 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6454 if ((uint64_t)(aSig0 << 1)) {
6455 return propagateFloatx80NaN(a, a, status);
6456 }
158142c2
FB
6457 if ( ! aSign ) return a;
6458 goto invalid;
6459 }
6460 if ( aSign ) {
6461 if ( ( aExp | aSig0 ) == 0 ) return a;
6462 invalid:
ff32e16e 6463 float_raise(float_flag_invalid, status);
af39bc8c 6464 return floatx80_default_nan(status);
158142c2
FB
6465 }
6466 if ( aExp == 0 ) {
6467 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6468 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6469 }
6470 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6471 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6472 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6473 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6474 doubleZSig0 = zSig0<<1;
6475 mul64To128( zSig0, zSig0, &term0, &term1 );
6476 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6477 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6478 --zSig0;
6479 doubleZSig0 -= 2;
6480 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6481 }
6482 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6483 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6484 if ( zSig1 == 0 ) zSig1 = 1;
6485 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6486 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6487 mul64To128( zSig1, zSig1, &term2, &term3 );
6488 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6489 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6490 --zSig1;
6491 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6492 term3 |= 1;
6493 term2 |= doubleZSig0;
6494 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6495 }
6496 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6497 }
6498 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6499 zSig0 |= doubleZSig0;
a2f2d288
PM
6500 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6501 0, zExp, zSig0, zSig1, status);
158142c2
FB
6502}
6503
6504/*----------------------------------------------------------------------------
158142c2
FB
6505| Returns the result of converting the quadruple-precision floating-point
6506| value `a' to the 32-bit two's complement integer format. The conversion
6507| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6508| Arithmetic---which means in particular that the conversion is rounded
6509| according to the current rounding mode. If `a' is a NaN, the largest
6510| positive integer is returned. Otherwise, if the conversion overflows, the
6511| largest integer with the same sign as `a' is returned.
6512*----------------------------------------------------------------------------*/
6513
f4014512 6514int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6515{
c120391c 6516 bool aSign;
f4014512 6517 int32_t aExp, shiftCount;
bb98fe42 6518 uint64_t aSig0, aSig1;
158142c2
FB
6519
6520 aSig1 = extractFloat128Frac1( a );
6521 aSig0 = extractFloat128Frac0( a );
6522 aExp = extractFloat128Exp( a );
6523 aSign = extractFloat128Sign( a );
6524 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6525 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6526 aSig0 |= ( aSig1 != 0 );
6527 shiftCount = 0x4028 - aExp;
6528 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6529 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6530
6531}
6532
6533/*----------------------------------------------------------------------------
6534| Returns the result of converting the quadruple-precision floating-point
6535| value `a' to the 32-bit two's complement integer format. The conversion
6536| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6537| Arithmetic, except that the conversion is always rounded toward zero. If
6538| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6539| conversion overflows, the largest integer with the same sign as `a' is
6540| returned.
6541*----------------------------------------------------------------------------*/
6542
f4014512 6543int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6544{
c120391c 6545 bool aSign;
f4014512 6546 int32_t aExp, shiftCount;
bb98fe42 6547 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6548 int32_t z;
158142c2
FB
6549
6550 aSig1 = extractFloat128Frac1( a );
6551 aSig0 = extractFloat128Frac0( a );
6552 aExp = extractFloat128Exp( a );
6553 aSign = extractFloat128Sign( a );
6554 aSig0 |= ( aSig1 != 0 );
6555 if ( 0x401E < aExp ) {
6556 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6557 goto invalid;
6558 }
6559 else if ( aExp < 0x3FFF ) {
a2f2d288 6560 if (aExp || aSig0) {
d82f3b2d 6561 float_raise(float_flag_inexact, status);
a2f2d288 6562 }
158142c2
FB
6563 return 0;
6564 }
e9321124 6565 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6566 shiftCount = 0x402F - aExp;
6567 savedASig = aSig0;
6568 aSig0 >>= shiftCount;
6569 z = aSig0;
6570 if ( aSign ) z = - z;
6571 if ( ( z < 0 ) ^ aSign ) {
6572 invalid:
ff32e16e 6573 float_raise(float_flag_invalid, status);
2c217da0 6574 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6575 }
6576 if ( ( aSig0<<shiftCount ) != savedASig ) {
d82f3b2d 6577 float_raise(float_flag_inexact, status);
158142c2
FB
6578 }
6579 return z;
6580
6581}
6582
6583/*----------------------------------------------------------------------------
6584| Returns the result of converting the quadruple-precision floating-point
6585| value `a' to the 64-bit two's complement integer format. The conversion
6586| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6587| Arithmetic---which means in particular that the conversion is rounded
6588| according to the current rounding mode. If `a' is a NaN, the largest
6589| positive integer is returned. Otherwise, if the conversion overflows, the
6590| largest integer with the same sign as `a' is returned.
6591*----------------------------------------------------------------------------*/
6592
f42c2224 6593int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6594{
c120391c 6595 bool aSign;
f4014512 6596 int32_t aExp, shiftCount;
bb98fe42 6597 uint64_t aSig0, aSig1;
158142c2
FB
6598
6599 aSig1 = extractFloat128Frac1( a );
6600 aSig0 = extractFloat128Frac0( a );
6601 aExp = extractFloat128Exp( a );
6602 aSign = extractFloat128Sign( a );
e9321124 6603 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6604 shiftCount = 0x402F - aExp;
6605 if ( shiftCount <= 0 ) {
6606 if ( 0x403E < aExp ) {
ff32e16e 6607 float_raise(float_flag_invalid, status);
158142c2
FB
6608 if ( ! aSign
6609 || ( ( aExp == 0x7FFF )
e9321124 6610 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6611 )
6612 ) {
2c217da0 6613 return INT64_MAX;
158142c2 6614 }
2c217da0 6615 return INT64_MIN;
158142c2
FB
6616 }
6617 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6618 }
6619 else {
6620 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6621 }
ff32e16e 6622 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6623
6624}
6625
6626/*----------------------------------------------------------------------------
6627| Returns the result of converting the quadruple-precision floating-point
6628| value `a' to the 64-bit two's complement integer format. The conversion
6629| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6630| Arithmetic, except that the conversion is always rounded toward zero.
6631| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6632| the conversion overflows, the largest integer with the same sign as `a' is
6633| returned.
6634*----------------------------------------------------------------------------*/
6635
f42c2224 6636int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6637{
c120391c 6638 bool aSign;
f4014512 6639 int32_t aExp, shiftCount;
bb98fe42 6640 uint64_t aSig0, aSig1;
f42c2224 6641 int64_t z;
158142c2
FB
6642
6643 aSig1 = extractFloat128Frac1( a );
6644 aSig0 = extractFloat128Frac0( a );
6645 aExp = extractFloat128Exp( a );
6646 aSign = extractFloat128Sign( a );
e9321124 6647 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6648 shiftCount = aExp - 0x402F;
6649 if ( 0 < shiftCount ) {
6650 if ( 0x403E <= aExp ) {
e9321124
AB
6651 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6652 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6653 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288 6654 if (aSig1) {
d82f3b2d 6655 float_raise(float_flag_inexact, status);
a2f2d288 6656 }
158142c2
FB
6657 }
6658 else {
ff32e16e 6659 float_raise(float_flag_invalid, status);
158142c2 6660 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6661 return INT64_MAX;
158142c2
FB
6662 }
6663 }
2c217da0 6664 return INT64_MIN;
158142c2
FB
6665 }
6666 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6667 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
d82f3b2d 6668 float_raise(float_flag_inexact, status);
158142c2
FB
6669 }
6670 }
6671 else {
6672 if ( aExp < 0x3FFF ) {
6673 if ( aExp | aSig0 | aSig1 ) {
d82f3b2d 6674 float_raise(float_flag_inexact, status);
158142c2
FB
6675 }
6676 return 0;
6677 }
6678 z = aSig0>>( - shiftCount );
6679 if ( aSig1
bb98fe42 6680 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
d82f3b2d 6681 float_raise(float_flag_inexact, status);
158142c2
FB
6682 }
6683 }
6684 if ( aSign ) z = - z;
6685 return z;
6686
6687}
6688
2e6d8568
BR
6689/*----------------------------------------------------------------------------
6690| Returns the result of converting the quadruple-precision floating-point value
6691| `a' to the 64-bit unsigned integer format. The conversion is
6692| performed according to the IEC/IEEE Standard for Binary Floating-Point
6693| Arithmetic---which means in particular that the conversion is rounded
6694| according to the current rounding mode. If `a' is a NaN, the largest
6695| positive integer is returned. If the conversion overflows, the
6696| largest unsigned integer is returned. If 'a' is negative, the value is
6697| rounded and zero is returned; negative values that do not round to zero
6698| will raise the inexact exception.
6699*----------------------------------------------------------------------------*/
6700
6701uint64_t float128_to_uint64(float128 a, float_status *status)
6702{
c120391c 6703 bool aSign;
2e6d8568
BR
6704 int aExp;
6705 int shiftCount;
6706 uint64_t aSig0, aSig1;
6707
6708 aSig0 = extractFloat128Frac0(a);
6709 aSig1 = extractFloat128Frac1(a);
6710 aExp = extractFloat128Exp(a);
6711 aSign = extractFloat128Sign(a);
6712 if (aSign && (aExp > 0x3FFE)) {
6713 float_raise(float_flag_invalid, status);
6714 if (float128_is_any_nan(a)) {
2c217da0 6715 return UINT64_MAX;
2e6d8568
BR
6716 } else {
6717 return 0;
6718 }
6719 }
6720 if (aExp) {
2c217da0 6721 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6722 }
6723 shiftCount = 0x402F - aExp;
6724 if (shiftCount <= 0) {
6725 if (0x403E < aExp) {
6726 float_raise(float_flag_invalid, status);
2c217da0 6727 return UINT64_MAX;
2e6d8568
BR
6728 }
6729 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6730 } else {
6731 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6732 }
6733 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6734}
6735
6736uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6737{
6738 uint64_t v;
6739 signed char current_rounding_mode = status->float_rounding_mode;
6740
6741 set_float_rounding_mode(float_round_to_zero, status);
6742 v = float128_to_uint64(a, status);
6743 set_float_rounding_mode(current_rounding_mode, status);
6744
6745 return v;
6746}
6747
158142c2
FB
6748/*----------------------------------------------------------------------------
6749| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6750| value `a' to the 32-bit unsigned integer format. The conversion
6751| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6752| Arithmetic except that the conversion is always rounded toward zero.
6753| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6754| if the conversion overflows, the largest unsigned integer is returned.
6755| If 'a' is negative, the value is rounded and zero is returned; negative
6756| values that do not round to zero will raise the inexact exception.
6757*----------------------------------------------------------------------------*/
6758
6759uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6760{
6761 uint64_t v;
6762 uint32_t res;
6763 int old_exc_flags = get_float_exception_flags(status);
6764
6765 v = float128_to_uint64_round_to_zero(a, status);
6766 if (v > 0xffffffff) {
6767 res = 0xffffffff;
6768 } else {
6769 return v;
6770 }
6771 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6772 float_raise(float_flag_invalid, status);
6773 return res;
6774}
6775
6776/*----------------------------------------------------------------------------
6777| Returns the result of converting the quadruple-precision floating-point value
6778| `a' to the 32-bit unsigned integer format. The conversion is
6779| performed according to the IEC/IEEE Standard for Binary Floating-Point
6780| Arithmetic---which means in particular that the conversion is rounded
6781| according to the current rounding mode. If `a' is a NaN, the largest
6782| positive integer is returned. If the conversion overflows, the
6783| largest unsigned integer is returned. If 'a' is negative, the value is
6784| rounded and zero is returned; negative values that do not round to zero
6785| will raise the inexact exception.
6786*----------------------------------------------------------------------------*/
6787
6788uint32_t float128_to_uint32(float128 a, float_status *status)
6789{
6790 uint64_t v;
6791 uint32_t res;
6792 int old_exc_flags = get_float_exception_flags(status);
6793
6794 v = float128_to_uint64(a, status);
6795 if (v > 0xffffffff) {
6796 res = 0xffffffff;
6797 } else {
6798 return v;
6799 }
6800 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6801 float_raise(float_flag_invalid, status);
6802 return res;
6803}
6804
6805/*----------------------------------------------------------------------------
6806| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6807| value `a' to the single-precision floating-point format. The conversion
6808| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6809| Arithmetic.
6810*----------------------------------------------------------------------------*/
6811
e5a41ffa 6812float32 float128_to_float32(float128 a, float_status *status)
158142c2 6813{
c120391c 6814 bool aSign;
f4014512 6815 int32_t aExp;
bb98fe42
AF
6816 uint64_t aSig0, aSig1;
6817 uint32_t zSig;
158142c2
FB
6818
6819 aSig1 = extractFloat128Frac1( a );
6820 aSig0 = extractFloat128Frac0( a );
6821 aExp = extractFloat128Exp( a );
6822 aSign = extractFloat128Sign( a );
6823 if ( aExp == 0x7FFF ) {
6824 if ( aSig0 | aSig1 ) {
ff32e16e 6825 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6826 }
6827 return packFloat32( aSign, 0xFF, 0 );
6828 }
6829 aSig0 |= ( aSig1 != 0 );
6830 shift64RightJamming( aSig0, 18, &aSig0 );
6831 zSig = aSig0;
6832 if ( aExp || zSig ) {
6833 zSig |= 0x40000000;
6834 aExp -= 0x3F81;
6835 }
ff32e16e 6836 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6837
6838}
6839
6840/*----------------------------------------------------------------------------
6841| Returns the result of converting the quadruple-precision floating-point
6842| value `a' to the double-precision floating-point format. The conversion
6843| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6844| Arithmetic.
6845*----------------------------------------------------------------------------*/
6846
e5a41ffa 6847float64 float128_to_float64(float128 a, float_status *status)
158142c2 6848{
c120391c 6849 bool aSign;
f4014512 6850 int32_t aExp;
bb98fe42 6851 uint64_t aSig0, aSig1;
158142c2
FB
6852
6853 aSig1 = extractFloat128Frac1( a );
6854 aSig0 = extractFloat128Frac0( a );
6855 aExp = extractFloat128Exp( a );
6856 aSign = extractFloat128Sign( a );
6857 if ( aExp == 0x7FFF ) {
6858 if ( aSig0 | aSig1 ) {
ff32e16e 6859 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6860 }
6861 return packFloat64( aSign, 0x7FF, 0 );
6862 }
6863 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6864 aSig0 |= ( aSig1 != 0 );
6865 if ( aExp || aSig0 ) {
e9321124 6866 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6867 aExp -= 0x3C01;
6868 }
ff32e16e 6869 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6870
6871}
6872
158142c2
FB
6873/*----------------------------------------------------------------------------
6874| Returns the result of converting the quadruple-precision floating-point
6875| value `a' to the extended double-precision floating-point format. The
6876| conversion is performed according to the IEC/IEEE Standard for Binary
6877| Floating-Point Arithmetic.
6878*----------------------------------------------------------------------------*/
6879
e5a41ffa 6880floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6881{
c120391c 6882 bool aSign;
f4014512 6883 int32_t aExp;
bb98fe42 6884 uint64_t aSig0, aSig1;
158142c2
FB
6885
6886 aSig1 = extractFloat128Frac1( a );
6887 aSig0 = extractFloat128Frac0( a );
6888 aExp = extractFloat128Exp( a );
6889 aSign = extractFloat128Sign( a );
6890 if ( aExp == 0x7FFF ) {
6891 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6892 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6893 status);
6894 return floatx80_silence_nan(res, status);
158142c2 6895 }
0f605c88
LV
6896 return packFloatx80(aSign, floatx80_infinity_high,
6897 floatx80_infinity_low);
158142c2
FB
6898 }
6899 if ( aExp == 0 ) {
6900 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6901 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6902 }
6903 else {
e9321124 6904 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6905 }
6906 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6907 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6908
6909}
6910
158142c2
FB
6911/*----------------------------------------------------------------------------
6912| Rounds the quadruple-precision floating-point value `a' to an integer, and
6913| returns the result as a quadruple-precision floating-point value. The
6914| operation is performed according to the IEC/IEEE Standard for Binary
6915| Floating-Point Arithmetic.
6916*----------------------------------------------------------------------------*/
6917
e5a41ffa 6918float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6919{
c120391c 6920 bool aSign;
f4014512 6921 int32_t aExp;
bb98fe42 6922 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6923 float128 z;
6924
6925 aExp = extractFloat128Exp( a );
6926 if ( 0x402F <= aExp ) {
6927 if ( 0x406F <= aExp ) {
6928 if ( ( aExp == 0x7FFF )
6929 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6930 ) {
ff32e16e 6931 return propagateFloat128NaN(a, a, status);
158142c2
FB
6932 }
6933 return a;
6934 }
6935 lastBitMask = 1;
6936 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6937 roundBitsMask = lastBitMask - 1;
6938 z = a;
a2f2d288 6939 switch (status->float_rounding_mode) {
dc355b76 6940 case float_round_nearest_even:
158142c2
FB
6941 if ( lastBitMask ) {
6942 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6943 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6944 }
6945 else {
bb98fe42 6946 if ( (int64_t) z.low < 0 ) {
158142c2 6947 ++z.high;
bb98fe42 6948 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6949 }
6950 }
dc355b76 6951 break;
f9288a76
PM
6952 case float_round_ties_away:
6953 if (lastBitMask) {
6954 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6955 } else {
6956 if ((int64_t) z.low < 0) {
6957 ++z.high;
6958 }
6959 }
6960 break;
dc355b76
PM
6961 case float_round_to_zero:
6962 break;
6963 case float_round_up:
6964 if (!extractFloat128Sign(z)) {
6965 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6966 }
6967 break;
6968 case float_round_down:
6969 if (extractFloat128Sign(z)) {
6970 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6971 }
dc355b76 6972 break;
5d64abb3
RH
6973 case float_round_to_odd:
6974 /*
6975 * Note that if lastBitMask == 0, the last bit is the lsb
6976 * of high, and roundBitsMask == -1.
6977 */
6978 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6979 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6980 }
6981 break;
dc355b76
PM
6982 default:
6983 abort();
158142c2
FB
6984 }
6985 z.low &= ~ roundBitsMask;
6986 }
6987 else {
6988 if ( aExp < 0x3FFF ) {
bb98fe42 6989 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
d82f3b2d 6990 float_raise(float_flag_inexact, status);
158142c2 6991 aSign = extractFloat128Sign( a );
a2f2d288 6992 switch (status->float_rounding_mode) {
5d64abb3 6993 case float_round_nearest_even:
158142c2
FB
6994 if ( ( aExp == 0x3FFE )
6995 && ( extractFloat128Frac0( a )
6996 | extractFloat128Frac1( a ) )
6997 ) {
6998 return packFloat128( aSign, 0x3FFF, 0, 0 );
6999 }
7000 break;
f9288a76
PM
7001 case float_round_ties_away:
7002 if (aExp == 0x3FFE) {
7003 return packFloat128(aSign, 0x3FFF, 0, 0);
7004 }
7005 break;
5d64abb3 7006 case float_round_down:
158142c2
FB
7007 return
7008 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7009 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7010 case float_round_up:
158142c2
FB
7011 return
7012 aSign ? packFloat128( 1, 0, 0, 0 )
7013 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7014
7015 case float_round_to_odd:
7016 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
7017
7018 case float_round_to_zero:
7019 break;
158142c2
FB
7020 }
7021 return packFloat128( aSign, 0, 0, 0 );
7022 }
7023 lastBitMask = 1;
7024 lastBitMask <<= 0x402F - aExp;
7025 roundBitsMask = lastBitMask - 1;
7026 z.low = 0;
7027 z.high = a.high;
a2f2d288 7028 switch (status->float_rounding_mode) {
dc355b76 7029 case float_round_nearest_even:
158142c2
FB
7030 z.high += lastBitMask>>1;
7031 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7032 z.high &= ~ lastBitMask;
7033 }
dc355b76 7034 break;
f9288a76
PM
7035 case float_round_ties_away:
7036 z.high += lastBitMask>>1;
7037 break;
dc355b76
PM
7038 case float_round_to_zero:
7039 break;
7040 case float_round_up:
7041 if (!extractFloat128Sign(z)) {
158142c2
FB
7042 z.high |= ( a.low != 0 );
7043 z.high += roundBitsMask;
7044 }
dc355b76
PM
7045 break;
7046 case float_round_down:
7047 if (extractFloat128Sign(z)) {
7048 z.high |= (a.low != 0);
7049 z.high += roundBitsMask;
7050 }
7051 break;
5d64abb3
RH
7052 case float_round_to_odd:
7053 if ((z.high & lastBitMask) == 0) {
7054 z.high |= (a.low != 0);
7055 z.high += roundBitsMask;
7056 }
7057 break;
dc355b76
PM
7058 default:
7059 abort();
158142c2
FB
7060 }
7061 z.high &= ~ roundBitsMask;
7062 }
7063 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
d82f3b2d 7064 float_raise(float_flag_inexact, status);
158142c2
FB
7065 }
7066 return z;
7067
7068}
7069
7070/*----------------------------------------------------------------------------
7071| Returns the result of adding the absolute values of the quadruple-precision
7072| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7073| before being returned. `zSign' is ignored if the result is a NaN.
7074| The addition is performed according to the IEC/IEEE Standard for Binary
7075| Floating-Point Arithmetic.
7076*----------------------------------------------------------------------------*/
7077
c120391c 7078static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 7079 float_status *status)
158142c2 7080{
f4014512 7081 int32_t aExp, bExp, zExp;
bb98fe42 7082 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 7083 int32_t expDiff;
158142c2
FB
7084
7085 aSig1 = extractFloat128Frac1( a );
7086 aSig0 = extractFloat128Frac0( a );
7087 aExp = extractFloat128Exp( a );
7088 bSig1 = extractFloat128Frac1( b );
7089 bSig0 = extractFloat128Frac0( b );
7090 bExp = extractFloat128Exp( b );
7091 expDiff = aExp - bExp;
7092 if ( 0 < expDiff ) {
7093 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7094 if (aSig0 | aSig1) {
7095 return propagateFloat128NaN(a, b, status);
7096 }
158142c2
FB
7097 return a;
7098 }
7099 if ( bExp == 0 ) {
7100 --expDiff;
7101 }
7102 else {
e9321124 7103 bSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7104 }
7105 shift128ExtraRightJamming(
7106 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7107 zExp = aExp;
7108 }
7109 else if ( expDiff < 0 ) {
7110 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7111 if (bSig0 | bSig1) {
7112 return propagateFloat128NaN(a, b, status);
7113 }
158142c2
FB
7114 return packFloat128( zSign, 0x7FFF, 0, 0 );
7115 }
7116 if ( aExp == 0 ) {
7117 ++expDiff;
7118 }
7119 else {
e9321124 7120 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7121 }
7122 shift128ExtraRightJamming(
7123 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7124 zExp = bExp;
7125 }
7126 else {
7127 if ( aExp == 0x7FFF ) {
7128 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7129 return propagateFloat128NaN(a, b, status);
158142c2
FB
7130 }
7131 return a;
7132 }
7133 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7134 if ( aExp == 0 ) {
a2f2d288 7135 if (status->flush_to_zero) {
e6afc87f 7136 if (zSig0 | zSig1) {
ff32e16e 7137 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7138 }
7139 return packFloat128(zSign, 0, 0, 0);
7140 }
fe76d976
PB
7141 return packFloat128( zSign, 0, zSig0, zSig1 );
7142 }
158142c2 7143 zSig2 = 0;
e9321124 7144 zSig0 |= UINT64_C(0x0002000000000000);
158142c2
FB
7145 zExp = aExp;
7146 goto shiftRight1;
7147 }
e9321124 7148 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7149 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7150 --zExp;
e9321124 7151 if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
158142c2
FB
7152 ++zExp;
7153 shiftRight1:
7154 shift128ExtraRightJamming(
7155 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7156 roundAndPack:
ff32e16e 7157 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7158
7159}
7160
7161/*----------------------------------------------------------------------------
7162| Returns the result of subtracting the absolute values of the quadruple-
7163| precision floating-point values `a' and `b'. If `zSign' is 1, the
7164| difference is negated before being returned. `zSign' is ignored if the
7165| result is a NaN. The subtraction is performed according to the IEC/IEEE
7166| Standard for Binary Floating-Point Arithmetic.
7167*----------------------------------------------------------------------------*/
7168
c120391c 7169static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 7170 float_status *status)
158142c2 7171{
f4014512 7172 int32_t aExp, bExp, zExp;
bb98fe42 7173 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7174 int32_t expDiff;
158142c2
FB
7175
7176 aSig1 = extractFloat128Frac1( a );
7177 aSig0 = extractFloat128Frac0( a );
7178 aExp = extractFloat128Exp( a );
7179 bSig1 = extractFloat128Frac1( b );
7180 bSig0 = extractFloat128Frac0( b );
7181 bExp = extractFloat128Exp( b );
7182 expDiff = aExp - bExp;
7183 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7184 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7185 if ( 0 < expDiff ) goto aExpBigger;
7186 if ( expDiff < 0 ) goto bExpBigger;
7187 if ( aExp == 0x7FFF ) {
7188 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7189 return propagateFloat128NaN(a, b, status);
158142c2 7190 }
ff32e16e 7191 float_raise(float_flag_invalid, status);
af39bc8c 7192 return float128_default_nan(status);
158142c2
FB
7193 }
7194 if ( aExp == 0 ) {
7195 aExp = 1;
7196 bExp = 1;
7197 }
7198 if ( bSig0 < aSig0 ) goto aBigger;
7199 if ( aSig0 < bSig0 ) goto bBigger;
7200 if ( bSig1 < aSig1 ) goto aBigger;
7201 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7202 return packFloat128(status->float_rounding_mode == float_round_down,
7203 0, 0, 0);
158142c2
FB
7204 bExpBigger:
7205 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7206 if (bSig0 | bSig1) {
7207 return propagateFloat128NaN(a, b, status);
7208 }
158142c2
FB
7209 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7210 }
7211 if ( aExp == 0 ) {
7212 ++expDiff;
7213 }
7214 else {
e9321124 7215 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7216 }
7217 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
e9321124 7218 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7219 bBigger:
7220 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7221 zExp = bExp;
7222 zSign ^= 1;
7223 goto normalizeRoundAndPack;
7224 aExpBigger:
7225 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7226 if (aSig0 | aSig1) {
7227 return propagateFloat128NaN(a, b, status);
7228 }
158142c2
FB
7229 return a;
7230 }
7231 if ( bExp == 0 ) {
7232 --expDiff;
7233 }
7234 else {
e9321124 7235 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7236 }
7237 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
e9321124 7238 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7239 aBigger:
7240 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7241 zExp = aExp;
7242 normalizeRoundAndPack:
7243 --zExp;
ff32e16e
PM
7244 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7245 status);
158142c2
FB
7246
7247}
7248
7249/*----------------------------------------------------------------------------
7250| Returns the result of adding the quadruple-precision floating-point values
7251| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7252| for Binary Floating-Point Arithmetic.
7253*----------------------------------------------------------------------------*/
7254
e5a41ffa 7255float128 float128_add(float128 a, float128 b, float_status *status)
158142c2 7256{
c120391c 7257 bool aSign, bSign;
158142c2
FB
7258
7259 aSign = extractFloat128Sign( a );
7260 bSign = extractFloat128Sign( b );
7261 if ( aSign == bSign ) {
ff32e16e 7262 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7263 }
7264 else {
ff32e16e 7265 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7266 }
7267
7268}
7269
7270/*----------------------------------------------------------------------------
7271| Returns the result of subtracting the quadruple-precision floating-point
7272| values `a' and `b'. The operation is performed according to the IEC/IEEE
7273| Standard for Binary Floating-Point Arithmetic.
7274*----------------------------------------------------------------------------*/
7275
e5a41ffa 7276float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2 7277{
c120391c 7278 bool aSign, bSign;
158142c2
FB
7279
7280 aSign = extractFloat128Sign( a );
7281 bSign = extractFloat128Sign( b );
7282 if ( aSign == bSign ) {
ff32e16e 7283 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7284 }
7285 else {
ff32e16e 7286 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7287 }
7288
7289}
7290
7291/*----------------------------------------------------------------------------
7292| Returns the result of multiplying the quadruple-precision floating-point
7293| values `a' and `b'. The operation is performed according to the IEC/IEEE
7294| Standard for Binary Floating-Point Arithmetic.
7295*----------------------------------------------------------------------------*/
7296
e5a41ffa 7297float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2 7298{
c120391c 7299 bool aSign, bSign, zSign;
f4014512 7300 int32_t aExp, bExp, zExp;
bb98fe42 7301 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7302
7303 aSig1 = extractFloat128Frac1( a );
7304 aSig0 = extractFloat128Frac0( a );
7305 aExp = extractFloat128Exp( a );
7306 aSign = extractFloat128Sign( a );
7307 bSig1 = extractFloat128Frac1( b );
7308 bSig0 = extractFloat128Frac0( b );
7309 bExp = extractFloat128Exp( b );
7310 bSign = extractFloat128Sign( b );
7311 zSign = aSign ^ bSign;
7312 if ( aExp == 0x7FFF ) {
7313 if ( ( aSig0 | aSig1 )
7314 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7315 return propagateFloat128NaN(a, b, status);
158142c2
FB
7316 }
7317 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7318 return packFloat128( zSign, 0x7FFF, 0, 0 );
7319 }
7320 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7321 if (bSig0 | bSig1) {
7322 return propagateFloat128NaN(a, b, status);
7323 }
158142c2
FB
7324 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7325 invalid:
ff32e16e 7326 float_raise(float_flag_invalid, status);
af39bc8c 7327 return float128_default_nan(status);
158142c2
FB
7328 }
7329 return packFloat128( zSign, 0x7FFF, 0, 0 );
7330 }
7331 if ( aExp == 0 ) {
7332 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7333 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7334 }
7335 if ( bExp == 0 ) {
7336 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7337 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7338 }
7339 zExp = aExp + bExp - 0x4000;
e9321124 7340 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7341 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7342 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7343 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7344 zSig2 |= ( zSig3 != 0 );
e9321124 7345 if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
158142c2
FB
7346 shift128ExtraRightJamming(
7347 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7348 ++zExp;
7349 }
ff32e16e 7350 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7351
7352}
7353
7354/*----------------------------------------------------------------------------
7355| Returns the result of dividing the quadruple-precision floating-point value
7356| `a' by the corresponding value `b'. The operation is performed according to
7357| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7358*----------------------------------------------------------------------------*/
7359
e5a41ffa 7360float128 float128_div(float128 a, float128 b, float_status *status)
158142c2 7361{
c120391c 7362 bool aSign, bSign, zSign;
f4014512 7363 int32_t aExp, bExp, zExp;
bb98fe42
AF
7364 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7365 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7366
7367 aSig1 = extractFloat128Frac1( a );
7368 aSig0 = extractFloat128Frac0( a );
7369 aExp = extractFloat128Exp( a );
7370 aSign = extractFloat128Sign( a );
7371 bSig1 = extractFloat128Frac1( b );
7372 bSig0 = extractFloat128Frac0( b );
7373 bExp = extractFloat128Exp( b );
7374 bSign = extractFloat128Sign( b );
7375 zSign = aSign ^ bSign;
7376 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7377 if (aSig0 | aSig1) {
7378 return propagateFloat128NaN(a, b, status);
7379 }
158142c2 7380 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7381 if (bSig0 | bSig1) {
7382 return propagateFloat128NaN(a, b, status);
7383 }
158142c2
FB
7384 goto invalid;
7385 }
7386 return packFloat128( zSign, 0x7FFF, 0, 0 );
7387 }
7388 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7389 if (bSig0 | bSig1) {
7390 return propagateFloat128NaN(a, b, status);
7391 }
158142c2
FB
7392 return packFloat128( zSign, 0, 0, 0 );
7393 }
7394 if ( bExp == 0 ) {
7395 if ( ( bSig0 | bSig1 ) == 0 ) {
7396 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7397 invalid:
ff32e16e 7398 float_raise(float_flag_invalid, status);
af39bc8c 7399 return float128_default_nan(status);
158142c2 7400 }
ff32e16e 7401 float_raise(float_flag_divbyzero, status);
158142c2
FB
7402 return packFloat128( zSign, 0x7FFF, 0, 0 );
7403 }
7404 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7405 }
7406 if ( aExp == 0 ) {
7407 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7408 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7409 }
7410 zExp = aExp - bExp + 0x3FFD;
7411 shortShift128Left(
e9321124 7412 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7413 shortShift128Left(
e9321124 7414 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7415 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7416 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7417 ++zExp;
7418 }
7419 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7420 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7421 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7422 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7423 --zSig0;
7424 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7425 }
7426 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7427 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7428 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7429 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7430 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7431 --zSig1;
7432 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7433 }
7434 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7435 }
7436 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7437 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7438
7439}
7440
7441/*----------------------------------------------------------------------------
7442| Returns the remainder of the quadruple-precision floating-point value `a'
7443| with respect to the corresponding value `b'. The operation is performed
7444| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7445*----------------------------------------------------------------------------*/
7446
e5a41ffa 7447float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7448{
c120391c 7449 bool aSign, zSign;
f4014512 7450 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7451 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7452 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7453 int64_t sigMean0;
158142c2
FB
7454
7455 aSig1 = extractFloat128Frac1( a );
7456 aSig0 = extractFloat128Frac0( a );
7457 aExp = extractFloat128Exp( a );
7458 aSign = extractFloat128Sign( a );
7459 bSig1 = extractFloat128Frac1( b );
7460 bSig0 = extractFloat128Frac0( b );
7461 bExp = extractFloat128Exp( b );
158142c2
FB
7462 if ( aExp == 0x7FFF ) {
7463 if ( ( aSig0 | aSig1 )
7464 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7465 return propagateFloat128NaN(a, b, status);
158142c2
FB
7466 }
7467 goto invalid;
7468 }
7469 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7470 if (bSig0 | bSig1) {
7471 return propagateFloat128NaN(a, b, status);
7472 }
158142c2
FB
7473 return a;
7474 }
7475 if ( bExp == 0 ) {
7476 if ( ( bSig0 | bSig1 ) == 0 ) {
7477 invalid:
ff32e16e 7478 float_raise(float_flag_invalid, status);
af39bc8c 7479 return float128_default_nan(status);
158142c2
FB
7480 }
7481 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7482 }
7483 if ( aExp == 0 ) {
7484 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7485 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7486 }
7487 expDiff = aExp - bExp;
7488 if ( expDiff < -1 ) return a;
7489 shortShift128Left(
e9321124 7490 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7491 aSig1,
7492 15 - ( expDiff < 0 ),
7493 &aSig0,
7494 &aSig1
7495 );
7496 shortShift128Left(
e9321124 7497 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7498 q = le128( bSig0, bSig1, aSig0, aSig1 );
7499 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7500 expDiff -= 64;
7501 while ( 0 < expDiff ) {
7502 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7503 q = ( 4 < q ) ? q - 4 : 0;
7504 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7505 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7506 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7507 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7508 expDiff -= 61;
7509 }
7510 if ( -64 < expDiff ) {
7511 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7512 q = ( 4 < q ) ? q - 4 : 0;
7513 q >>= - expDiff;
7514 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7515 expDiff += 52;
7516 if ( expDiff < 0 ) {
7517 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7518 }
7519 else {
7520 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7521 }
7522 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7523 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7524 }
7525 else {
7526 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7527 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7528 }
7529 do {
7530 alternateASig0 = aSig0;
7531 alternateASig1 = aSig1;
7532 ++q;
7533 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7534 } while ( 0 <= (int64_t) aSig0 );
158142c2 7535 add128(
bb98fe42 7536 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7537 if ( ( sigMean0 < 0 )
7538 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7539 aSig0 = alternateASig0;
7540 aSig1 = alternateASig1;
7541 }
bb98fe42 7542 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7543 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7544 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7545 status);
158142c2
FB
7546}
7547
7548/*----------------------------------------------------------------------------
7549| Returns the square root of the quadruple-precision floating-point value `a'.
7550| The operation is performed according to the IEC/IEEE Standard for Binary
7551| Floating-Point Arithmetic.
7552*----------------------------------------------------------------------------*/
7553
e5a41ffa 7554float128 float128_sqrt(float128 a, float_status *status)
158142c2 7555{
c120391c 7556 bool aSign;
f4014512 7557 int32_t aExp, zExp;
bb98fe42
AF
7558 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7559 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7560
7561 aSig1 = extractFloat128Frac1( a );
7562 aSig0 = extractFloat128Frac0( a );
7563 aExp = extractFloat128Exp( a );
7564 aSign = extractFloat128Sign( a );
7565 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7566 if (aSig0 | aSig1) {
7567 return propagateFloat128NaN(a, a, status);
7568 }
158142c2
FB
7569 if ( ! aSign ) return a;
7570 goto invalid;
7571 }
7572 if ( aSign ) {
7573 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7574 invalid:
ff32e16e 7575 float_raise(float_flag_invalid, status);
af39bc8c 7576 return float128_default_nan(status);
158142c2
FB
7577 }
7578 if ( aExp == 0 ) {
7579 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7580 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7581 }
7582 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7583 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7584 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7585 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7586 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7587 doubleZSig0 = zSig0<<1;
7588 mul64To128( zSig0, zSig0, &term0, &term1 );
7589 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7590 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7591 --zSig0;
7592 doubleZSig0 -= 2;
7593 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7594 }
7595 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7596 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7597 if ( zSig1 == 0 ) zSig1 = 1;
7598 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7599 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7600 mul64To128( zSig1, zSig1, &term2, &term3 );
7601 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7602 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7603 --zSig1;
7604 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7605 term3 |= 1;
7606 term2 |= doubleZSig0;
7607 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7608 }
7609 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7610 }
7611 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7612 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7613
7614}
7615
71bfd65c
RH
7616static inline FloatRelation
7617floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7618 float_status *status)
f6714d36 7619{
c120391c 7620 bool aSign, bSign;
f6714d36 7621
d1eb8f2a
AD
7622 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7623 float_raise(float_flag_invalid, status);
7624 return float_relation_unordered;
7625 }
f6714d36
AJ
7626 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7627 ( extractFloatx80Frac( a )<<1 ) ) ||
7628 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7629 ( extractFloatx80Frac( b )<<1 ) )) {
7630 if (!is_quiet ||
af39bc8c
AM
7631 floatx80_is_signaling_nan(a, status) ||
7632 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7633 float_raise(float_flag_invalid, status);
f6714d36
AJ
7634 }
7635 return float_relation_unordered;
7636 }
7637 aSign = extractFloatx80Sign( a );
7638 bSign = extractFloatx80Sign( b );
7639 if ( aSign != bSign ) {
7640
7641 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7642 ( ( a.low | b.low ) == 0 ) ) {
7643 /* zero case */
7644 return float_relation_equal;
7645 } else {
7646 return 1 - (2 * aSign);
7647 }
7648 } else {
be53fa78
JM
7649 /* Normalize pseudo-denormals before comparison. */
7650 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7651 ++a.high;
7652 }
7653 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7654 ++b.high;
7655 }
f6714d36
AJ
7656 if (a.low == b.low && a.high == b.high) {
7657 return float_relation_equal;
7658 } else {
7659 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7660 }
7661 }
7662}
7663
71bfd65c 7664FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7665{
ff32e16e 7666 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7667}
7668
71bfd65c
RH
7669FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7670 float_status *status)
f6714d36 7671{
ff32e16e 7672 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7673}
7674
71bfd65c
RH
7675static inline FloatRelation
7676float128_compare_internal(float128 a, float128 b, bool is_quiet,
7677 float_status *status)
1f587329 7678{
c120391c 7679 bool aSign, bSign;
1f587329
BS
7680
7681 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7682 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7683 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7684 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7685 if (!is_quiet ||
af39bc8c
AM
7686 float128_is_signaling_nan(a, status) ||
7687 float128_is_signaling_nan(b, status)) {
ff32e16e 7688 float_raise(float_flag_invalid, status);
1f587329
BS
7689 }
7690 return float_relation_unordered;
7691 }
7692 aSign = extractFloat128Sign( a );
7693 bSign = extractFloat128Sign( b );
7694 if ( aSign != bSign ) {
7695 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7696 /* zero case */
7697 return float_relation_equal;
7698 } else {
7699 return 1 - (2 * aSign);
7700 }
7701 } else {
7702 if (a.low == b.low && a.high == b.high) {
7703 return float_relation_equal;
7704 } else {
7705 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7706 }
7707 }
7708}
7709
71bfd65c 7710FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 7711{
ff32e16e 7712 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7713}
7714
71bfd65c
RH
7715FloatRelation float128_compare_quiet(float128 a, float128 b,
7716 float_status *status)
1f587329 7717{
ff32e16e 7718 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7719}
7720
e5a41ffa 7721floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7722{
c120391c 7723 bool aSign;
326b9e98 7724 int32_t aExp;
bb98fe42 7725 uint64_t aSig;
9ee6e8bb 7726
d1eb8f2a
AD
7727 if (floatx80_invalid_encoding(a)) {
7728 float_raise(float_flag_invalid, status);
7729 return floatx80_default_nan(status);
7730 }
9ee6e8bb
PB
7731 aSig = extractFloatx80Frac( a );
7732 aExp = extractFloatx80Exp( a );
7733 aSign = extractFloatx80Sign( a );
7734
326b9e98
AJ
7735 if ( aExp == 0x7FFF ) {
7736 if ( aSig<<1 ) {
ff32e16e 7737 return propagateFloatx80NaN(a, a, status);
326b9e98 7738 }
9ee6e8bb
PB
7739 return a;
7740 }
326b9e98 7741
3c85c37f
PM
7742 if (aExp == 0) {
7743 if (aSig == 0) {
7744 return a;
7745 }
7746 aExp++;
7747 }
69397542 7748
326b9e98
AJ
7749 if (n > 0x10000) {
7750 n = 0x10000;
7751 } else if (n < -0x10000) {
7752 n = -0x10000;
7753 }
7754
9ee6e8bb 7755 aExp += n;
a2f2d288
PM
7756 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7757 aSign, aExp, aSig, 0, status);
9ee6e8bb 7758}
9ee6e8bb 7759
e5a41ffa 7760float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7761{
c120391c 7762 bool aSign;
326b9e98 7763 int32_t aExp;
bb98fe42 7764 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7765
7766 aSig1 = extractFloat128Frac1( a );
7767 aSig0 = extractFloat128Frac0( a );
7768 aExp = extractFloat128Exp( a );
7769 aSign = extractFloat128Sign( a );
7770 if ( aExp == 0x7FFF ) {
326b9e98 7771 if ( aSig0 | aSig1 ) {
ff32e16e 7772 return propagateFloat128NaN(a, a, status);
326b9e98 7773 }
9ee6e8bb
PB
7774 return a;
7775 }
3c85c37f 7776 if (aExp != 0) {
e9321124 7777 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7778 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7779 return a;
3c85c37f
PM
7780 } else {
7781 aExp++;
7782 }
69397542 7783
326b9e98
AJ
7784 if (n > 0x10000) {
7785 n = 0x10000;
7786 } else if (n < -0x10000) {
7787 n = -0x10000;
7788 }
7789
69397542
PB
7790 aExp += n - 1;
7791 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7792 , status);
9ee6e8bb
PB
7793
7794}
f6b3b108
EC
7795
7796static void __attribute__((constructor)) softfloat_init(void)
7797{
7798 union_float64 ua, ub, uc, ur;
7799
7800 if (QEMU_NO_HARDFLOAT) {
7801 return;
7802 }
7803 /*
7804 * Test that the host's FMA is not obviously broken. For example,
7805 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7806 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7807 */
7808 ua.s = 0x0020000000000001ULL;
7809 ub.s = 0x3ca0000000000000ULL;
7810 uc.s = 0x0020000000000000ULL;
7811 ur.h = fma(ua.h, ub.h, uc.h);
7812 if (ur.s != 0x0020000000000001ULL) {
7813 force_soft_fma = true;
7814 }
7815}