]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Tidy mul128By64To192
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
485
247d1f21
RH
486/* Simple helpers for checking if, or what kind of, NaN we have */
487static inline __attribute__((unused)) bool is_nan(FloatClass c)
488{
489 return unlikely(c >= float_class_qnan);
490}
491
492static inline __attribute__((unused)) bool is_snan(FloatClass c)
493{
494 return c == float_class_snan;
495}
496
497static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498{
499 return c == float_class_qnan;
500}
501
a90119b5 502/*
0018b1f4
RH
503 * Structure holding all of the decomposed parts of a float.
504 * The exponent is unbiased and the fraction is normalized.
a90119b5 505 *
0018b1f4
RH
506 * The fraction words are stored in big-endian word ordering,
507 * so that truncation from a larger format to a smaller format
508 * can be done simply by ignoring subsequent elements.
a90119b5
AB
509 */
510
511typedef struct {
a90119b5
AB
512 FloatClass cls;
513 bool sign;
4109b9ea
RH
514 int32_t exp;
515 union {
516 /* Routines that know the structure may reference the singular name. */
517 uint64_t frac;
518 /*
519 * Routines expanded with multiple structures reference "hi" and "lo"
520 * depending on the operation. In FloatParts64, "hi" and "lo" are
521 * both the same word and aliased here.
522 */
523 uint64_t frac_hi;
524 uint64_t frac_lo;
525 };
f8155c1d 526} FloatParts64;
a90119b5 527
0018b1f4
RH
528typedef struct {
529 FloatClass cls;
530 bool sign;
531 int32_t exp;
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534} FloatParts128;
535
aca84527
RH
536typedef struct {
537 FloatClass cls;
538 bool sign;
539 int32_t exp;
540 uint64_t frac_hi;
541 uint64_t frac_hm; /* high-middle */
542 uint64_t frac_lm; /* low-middle */
543 uint64_t frac_lo;
544} FloatParts256;
545
0018b1f4 546/* These apply to the most significant word of each FloatPartsN. */
e99c4373 547#define DECOMPOSED_BINARY_POINT 63
a90119b5 548#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
549
550/* Structure holding all of the relevant parameters for a format.
551 * exp_size: the size of the exponent field
552 * exp_bias: the offset applied to the exponent field
553 * exp_max: the maximum normalised exponent
554 * frac_size: the size of the fraction field
555 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556 * The following are computed based the size of fraction
557 * frac_lsb: least significant bit of fraction
ca3a3d5a 558 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 559 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
560 * The following optional modifiers are available:
561 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
562 */
563typedef struct {
564 int exp_size;
565 int exp_bias;
566 int exp_max;
567 int frac_size;
568 int frac_shift;
569 uint64_t frac_lsb;
570 uint64_t frac_lsbm1;
571 uint64_t round_mask;
572 uint64_t roundeven_mask;
ca3a3d5a 573 bool arm_althp;
a90119b5
AB
574} FloatFmt;
575
576/* Expand fields based on the size of exponent and fraction */
577#define FLOAT_PARAMS(E, F) \
578 .exp_size = E, \
579 .exp_bias = ((1 << E) - 1) >> 1, \
580 .exp_max = (1 << E) - 1, \
581 .frac_size = F, \
0018b1f4
RH
582 .frac_shift = (-F - 1) & 63, \
583 .frac_lsb = 1ull << ((-F - 1) & 63), \
584 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
585 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
586 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
587
588static const FloatFmt float16_params = {
589 FLOAT_PARAMS(5, 10)
590};
591
6fed16b2
AB
592static const FloatFmt float16_params_ahp = {
593 FLOAT_PARAMS(5, 10),
594 .arm_althp = true
595};
596
8282310d
LZ
597static const FloatFmt bfloat16_params = {
598 FLOAT_PARAMS(8, 7)
599};
600
a90119b5
AB
601static const FloatFmt float32_params = {
602 FLOAT_PARAMS(8, 23)
603};
604
605static const FloatFmt float64_params = {
606 FLOAT_PARAMS(11, 52)
607};
608
0018b1f4
RH
609static const FloatFmt float128_params = {
610 FLOAT_PARAMS(15, 112)
611};
612
6fff2167 613/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 614static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 615{
d8fdd172
RH
616 const int f_size = fmt->frac_size;
617 const int e_size = fmt->exp_size;
6fff2167 618
d8fdd172 619 *r = (FloatParts64) {
6fff2167 620 .cls = float_class_unclassified,
d8fdd172
RH
621 .sign = extract64(raw, f_size + e_size, 1),
622 .exp = extract64(raw, f_size, e_size),
623 .frac = extract64(raw, 0, f_size)
6fff2167
AB
624 };
625}
626
3dddb203 627static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 628{
3dddb203 629 unpack_raw64(p, &float16_params, f);
6fff2167
AB
630}
631
3dddb203 632static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 633{
3dddb203 634 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
635}
636
3dddb203 637static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 638{
3dddb203 639 unpack_raw64(p, &float32_params, f);
6fff2167
AB
640}
641
3dddb203 642static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 643{
3dddb203 644 unpack_raw64(p, &float64_params, f);
6fff2167
AB
645}
646
0018b1f4
RH
647static void float128_unpack_raw(FloatParts128 *p, float128 f)
648{
649 const int f_size = float128_params.frac_size - 64;
650 const int e_size = float128_params.exp_size;
651
652 *p = (FloatParts128) {
653 .cls = float_class_unclassified,
654 .sign = extract64(f.high, f_size + e_size, 1),
655 .exp = extract64(f.high, f_size, e_size),
656 .frac_hi = extract64(f.high, 0, f_size),
657 .frac_lo = f.low,
658 };
659}
660
6fff2167 661/* Pack a float from parts, but do not canonicalize. */
9e4af58c 662static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 663{
9e4af58c
RH
664 const int f_size = fmt->frac_size;
665 const int e_size = fmt->exp_size;
666 uint64_t ret;
667
668 ret = (uint64_t)p->sign << (f_size + e_size);
669 ret = deposit64(ret, f_size, e_size, p->exp);
670 ret = deposit64(ret, 0, f_size, p->frac);
671 return ret;
6fff2167
AB
672}
673
71fd178e 674static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 675{
71fd178e 676 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
677}
678
71fd178e 679static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 680{
71fd178e 681 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
682}
683
71fd178e 684static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 685{
71fd178e 686 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
687}
688
71fd178e 689static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 690{
71fd178e 691 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
692}
693
0018b1f4
RH
694static float128 float128_pack_raw(const FloatParts128 *p)
695{
696 const int f_size = float128_params.frac_size - 64;
697 const int e_size = float128_params.exp_size;
698 uint64_t hi;
699
700 hi = (uint64_t)p->sign << (f_size + e_size);
701 hi = deposit64(hi, f_size, e_size, p->exp);
702 hi = deposit64(hi, 0, f_size, p->frac_hi);
703 return make_float128(hi, p->frac_lo);
704}
705
0664335a
RH
706/*----------------------------------------------------------------------------
707| Functions and definitions to determine: (1) whether tininess for underflow
708| is detected before or after rounding by default, (2) what (if anything)
709| happens when exceptions are raised, (3) how signaling NaNs are distinguished
710| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711| are propagated from function inputs to output. These details are target-
712| specific.
713*----------------------------------------------------------------------------*/
139c1837 714#include "softfloat-specialize.c.inc"
0664335a 715
0018b1f4
RH
716#define PARTS_GENERIC_64_128(NAME, P) \
717 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718
dedd123c
RH
719#define PARTS_GENERIC_64_128_256(NAME, P) \
720 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722
e9034ea8 723#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
724#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725
7c45bad8
RH
726static void parts64_return_nan(FloatParts64 *a, float_status *s);
727static void parts128_return_nan(FloatParts128 *a, float_status *s);
728
729#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 730
22c355f4
RH
731static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732 float_status *s);
733static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734 float_status *s);
735
736#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737
979582d0
RH
738static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739 FloatParts64 *c, float_status *s,
740 int ab_mask, int abc_mask);
741static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742 FloatParts128 *b,
743 FloatParts128 *c,
744 float_status *s,
745 int ab_mask, int abc_mask);
746
747#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749
d46975bc
RH
750static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751 const FloatFmt *fmt);
752static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753 const FloatFmt *fmt);
754
755#define parts_canonicalize(A, S, F) \
756 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757
ee6959f2
RH
758static void parts64_uncanon(FloatParts64 *p, float_status *status,
759 const FloatFmt *fmt);
760static void parts128_uncanon(FloatParts128 *p, float_status *status,
761 const FloatFmt *fmt);
762
763#define parts_uncanon(A, S, F) \
764 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765
da10a907
RH
766static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 768static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
769
770#define parts_add_normal(A, B) \
dedd123c 771 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
772
773static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 775static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
776
777#define parts_sub_normal(A, B) \
dedd123c 778 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
779
780static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781 float_status *s, bool subtract);
782static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783 float_status *s, bool subtract);
784
785#define parts_addsub(A, B, S, Z) \
786 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787
aca84527
RH
788static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789 float_status *s);
790static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791 float_status *s);
792
793#define parts_mul(A, B, S) \
794 PARTS_GENERIC_64_128(mul, A)(A, B, S)
795
dedd123c
RH
796static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797 FloatParts64 *c, int flags,
798 float_status *s);
799static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800 FloatParts128 *c, int flags,
801 float_status *s);
802
803#define parts_muladd(A, B, C, Z, S) \
804 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805
0018b1f4
RH
806/*
807 * Helper functions for softfloat-parts.c.inc, per-size operations.
808 */
809
22c355f4
RH
810#define FRAC_GENERIC_64_128(NAME, P) \
811 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
812
dedd123c
RH
813#define FRAC_GENERIC_64_128_256(NAME, P) \
814 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
815 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
816
da10a907
RH
817static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
818{
819 return uadd64_overflow(a->frac, b->frac, &r->frac);
820}
821
822static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
823{
824 bool c = 0;
825 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
826 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
827 return c;
828}
829
dedd123c
RH
830static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
831{
832 bool c = 0;
833 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
834 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
835 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
836 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
837 return c;
838}
839
840#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 841
ee6959f2
RH
842static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
843{
844 return uadd64_overflow(a->frac, c, &r->frac);
845}
846
847static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
848{
849 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
850 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
851}
852
853#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
854
855static void frac64_allones(FloatParts64 *a)
856{
857 a->frac = -1;
858}
859
860static void frac128_allones(FloatParts128 *a)
861{
862 a->frac_hi = a->frac_lo = -1;
863}
864
865#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
866
22c355f4
RH
867static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
868{
869 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
870}
871
872static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
873{
874 uint64_t ta = a->frac_hi, tb = b->frac_hi;
875 if (ta == tb) {
876 ta = a->frac_lo, tb = b->frac_lo;
877 if (ta == tb) {
878 return 0;
879 }
880 }
881 return ta < tb ? -1 : 1;
882}
883
884#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
885
d46975bc 886static void frac64_clear(FloatParts64 *a)
0018b1f4 887{
d46975bc
RH
888 a->frac = 0;
889}
890
891static void frac128_clear(FloatParts128 *a)
892{
893 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
894}
895
d46975bc 896#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 897
d46975bc 898static bool frac64_eqz(FloatParts64 *a)
0018b1f4 899{
d46975bc
RH
900 return a->frac == 0;
901}
902
903static bool frac128_eqz(FloatParts128 *a)
904{
905 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
906}
907
d46975bc 908#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 909
aca84527
RH
910static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
911{
912 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
913}
914
915static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
916{
917 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
918 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
919}
920
921#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
922
da10a907
RH
923static void frac64_neg(FloatParts64 *a)
924{
925 a->frac = -a->frac;
926}
927
928static void frac128_neg(FloatParts128 *a)
929{
930 bool c = 0;
931 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
932 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
933}
934
dedd123c
RH
935static void frac256_neg(FloatParts256 *a)
936{
937 bool c = 0;
938 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
939 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
940 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
941 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
942}
943
944#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 945
d46975bc 946static int frac64_normalize(FloatParts64 *a)
6fff2167 947{
d46975bc
RH
948 if (a->frac) {
949 int shift = clz64(a->frac);
950 a->frac <<= shift;
951 return shift;
952 }
953 return 64;
954}
955
956static int frac128_normalize(FloatParts128 *a)
957{
958 if (a->frac_hi) {
959 int shl = clz64(a->frac_hi);
960 if (shl) {
961 int shr = 64 - shl;
962 a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
963 a->frac_lo = (a->frac_lo << shl);
6fff2167 964 }
d46975bc
RH
965 return shl;
966 } else if (a->frac_lo) {
967 int shl = clz64(a->frac_lo);
968 a->frac_hi = (a->frac_lo << shl);
969 a->frac_lo = 0;
970 return shl + 64;
6fff2167 971 }
d46975bc 972 return 128;
6fff2167
AB
973}
974
dedd123c
RH
975static int frac256_normalize(FloatParts256 *a)
976{
977 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
978 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
979 int ret, shl, shr;
980
981 if (likely(a0)) {
982 shl = clz64(a0);
983 if (shl == 0) {
984 return 0;
985 }
986 ret = shl;
987 } else {
988 if (a1) {
989 ret = 64;
990 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
991 } else if (a2) {
992 ret = 128;
993 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
994 } else if (a3) {
995 ret = 192;
996 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
997 } else {
998 ret = 256;
999 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1000 goto done;
1001 }
1002 shl = clz64(a0);
1003 if (shl == 0) {
1004 goto done;
1005 }
1006 ret += shl;
1007 }
1008
1009 shr = -shl & 63;
1010 a0 = (a0 << shl) | (a1 >> shr);
1011 a1 = (a1 << shl) | (a2 >> shr);
1012 a2 = (a2 << shl) | (a3 >> shr);
1013 a3 = (a3 << shl);
1014
1015 done:
1016 a->frac_hi = a0;
1017 a->frac_hm = a1;
1018 a->frac_lm = a2;
1019 a->frac_lo = a3;
1020 return ret;
1021}
1022
1023#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1024
1025static void frac64_shl(FloatParts64 *a, int c)
1026{
1027 a->frac <<= c;
1028}
1029
1030static void frac128_shl(FloatParts128 *a, int c)
1031{
1032 shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
1033}
1034
1035#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1036
1037static void frac64_shr(FloatParts64 *a, int c)
1038{
1039 a->frac >>= c;
1040}
1041
1042static void frac128_shr(FloatParts128 *a, int c)
1043{
1044 shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
1045}
1046
1047#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1048
ee6959f2 1049static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1050{
ee6959f2
RH
1051 shift64RightJamming(a->frac, c, &a->frac);
1052}
6fff2167 1053
ee6959f2
RH
1054static void frac128_shrjam(FloatParts128 *a, int c)
1055{
1056 shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
6fff2167
AB
1057}
1058
dedd123c
RH
1059static void frac256_shrjam(FloatParts256 *a, int c)
1060{
1061 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1062 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1063 uint64_t sticky = 0;
1064 int invc;
1065
1066 if (unlikely(c == 0)) {
1067 return;
1068 } else if (likely(c < 64)) {
1069 /* nothing */
1070 } else if (likely(c < 256)) {
1071 if (unlikely(c & 128)) {
1072 sticky |= a2 | a3;
1073 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1074 }
1075 if (unlikely(c & 64)) {
1076 sticky |= a3;
1077 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1078 }
1079 c &= 63;
1080 if (c == 0) {
1081 goto done;
1082 }
1083 } else {
1084 sticky = a0 | a1 | a2 | a3;
1085 a0 = a1 = a2 = a3 = 0;
1086 goto done;
1087 }
1088
1089 invc = -c & 63;
1090 sticky |= a3 << invc;
1091 a3 = (a3 >> c) | (a2 << invc);
1092 a2 = (a2 >> c) | (a1 << invc);
1093 a1 = (a1 >> c) | (a0 << invc);
1094 a0 = (a0 >> c);
1095
1096 done:
1097 a->frac_lo = a3 | (sticky != 0);
1098 a->frac_lm = a2;
1099 a->frac_hm = a1;
1100 a->frac_hi = a0;
1101}
1102
1103#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1104
da10a907
RH
1105static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1106{
1107 return usub64_overflow(a->frac, b->frac, &r->frac);
1108}
7c45bad8 1109
da10a907
RH
1110static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1111{
1112 bool c = 0;
1113 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1114 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1115 return c;
1116}
1117
dedd123c
RH
1118static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1119{
1120 bool c = 0;
1121 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1122 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1123 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1124 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1125 return c;
1126}
1127
1128#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1129
aca84527
RH
1130static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1131{
1132 r->frac = a->frac_hi | (a->frac_lo != 0);
1133}
1134
1135static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1136{
1137 r->frac_hi = a->frac_hi;
1138 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1139}
1140
1141#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1142
dedd123c
RH
1143static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1144{
1145 r->frac_hi = a->frac;
1146 r->frac_lo = 0;
1147}
1148
1149static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1150{
1151 r->frac_hi = a->frac_hi;
1152 r->frac_hm = a->frac_lo;
1153 r->frac_lm = 0;
1154 r->frac_lo = 0;
1155}
1156
1157#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1158
da10a907
RH
1159#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1160#define FloatPartsN glue(FloatParts,N)
aca84527 1161#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1162
1163#define N 64
aca84527 1164#define W 128
da10a907
RH
1165
1166#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1167#include "softfloat-parts.c.inc"
1168
da10a907 1169#undef N
aca84527 1170#undef W
da10a907 1171#define N 128
aca84527 1172#define W 256
7c45bad8 1173
da10a907 1174#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1175#include "softfloat-parts.c.inc"
1176
dedd123c
RH
1177#undef N
1178#undef W
1179#define N 256
1180
1181#include "softfloat-parts-addsub.c.inc"
1182
da10a907 1183#undef N
aca84527 1184#undef W
7c45bad8
RH
1185#undef partsN
1186#undef FloatPartsN
aca84527 1187#undef FloatPartsW
7c45bad8 1188
aaffb7bf
RH
1189/*
1190 * Pack/unpack routines with a specific FloatFmt.
1191 */
1192
98e256fc
RH
1193static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1194 float_status *s, const FloatFmt *params)
aaffb7bf 1195{
98e256fc 1196 float16_unpack_raw(p, f);
d46975bc 1197 parts_canonicalize(p, s, params);
aaffb7bf
RH
1198}
1199
98e256fc
RH
1200static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1201 float_status *s)
aaffb7bf 1202{
98e256fc 1203 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1204}
1205
98e256fc
RH
1206static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1207 float_status *s)
aaffb7bf 1208{
98e256fc 1209 bfloat16_unpack_raw(p, f);
d46975bc 1210 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1211}
1212
e293e927
RH
1213static float16 float16a_round_pack_canonical(FloatParts64 *p,
1214 float_status *s,
aaffb7bf
RH
1215 const FloatFmt *params)
1216{
ee6959f2 1217 parts_uncanon(p, s, params);
e293e927 1218 return float16_pack_raw(p);
aaffb7bf
RH
1219}
1220
e293e927
RH
1221static float16 float16_round_pack_canonical(FloatParts64 *p,
1222 float_status *s)
aaffb7bf
RH
1223{
1224 return float16a_round_pack_canonical(p, s, &float16_params);
1225}
1226
e293e927
RH
1227static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1228 float_status *s)
aaffb7bf 1229{
ee6959f2 1230 parts_uncanon(p, s, &bfloat16_params);
e293e927 1231 return bfloat16_pack_raw(p);
aaffb7bf
RH
1232}
1233
98e256fc
RH
1234static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1235 float_status *s)
aaffb7bf 1236{
98e256fc 1237 float32_unpack_raw(p, f);
d46975bc 1238 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1239}
1240
e293e927
RH
1241static float32 float32_round_pack_canonical(FloatParts64 *p,
1242 float_status *s)
aaffb7bf 1243{
ee6959f2 1244 parts_uncanon(p, s, &float32_params);
e293e927 1245 return float32_pack_raw(p);
aaffb7bf
RH
1246}
1247
98e256fc
RH
1248static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1249 float_status *s)
aaffb7bf 1250{
98e256fc 1251 float64_unpack_raw(p, f);
d46975bc 1252 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1253}
1254
e293e927
RH
1255static float64 float64_round_pack_canonical(FloatParts64 *p,
1256 float_status *s)
aaffb7bf 1257{
ee6959f2 1258 parts_uncanon(p, s, &float64_params);
e293e927 1259 return float64_pack_raw(p);
aaffb7bf
RH
1260}
1261
3ff49e56
RH
1262static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1263 float_status *s)
1264{
1265 float128_unpack_raw(p, f);
1266 parts_canonicalize(p, s, &float128_params);
1267}
1268
1269static float128 float128_round_pack_canonical(FloatParts128 *p,
1270 float_status *s)
1271{
1272 parts_uncanon(p, s, &float128_params);
1273 return float128_pack_raw(p);
1274}
1275
6fff2167 1276/*
da10a907 1277 * Addition and subtraction
6fff2167
AB
1278 */
1279
da10a907
RH
1280static float16 QEMU_FLATTEN
1281float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1282{
da10a907 1283 FloatParts64 pa, pb, *pr;
98e256fc
RH
1284
1285 float16_unpack_canonical(&pa, a, status);
1286 float16_unpack_canonical(&pb, b, status);
da10a907 1287 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1288
da10a907 1289 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1290}
1291
da10a907 1292float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1293{
da10a907
RH
1294 return float16_addsub(a, b, status, false);
1295}
1b615d48 1296
da10a907
RH
1297float16 float16_sub(float16 a, float16 b, float_status *status)
1298{
1299 return float16_addsub(a, b, status, true);
1b615d48
EC
1300}
1301
1302static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1303soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1304{
da10a907 1305 FloatParts64 pa, pb, *pr;
98e256fc
RH
1306
1307 float32_unpack_canonical(&pa, a, status);
1308 float32_unpack_canonical(&pb, b, status);
da10a907 1309 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1310
da10a907 1311 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1312}
1313
da10a907 1314static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1315{
da10a907 1316 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1317}
1318
da10a907 1319static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1320{
da10a907 1321 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1322}
1323
1324static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1325soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1326{
da10a907 1327 FloatParts64 pa, pb, *pr;
98e256fc
RH
1328
1329 float64_unpack_canonical(&pa, a, status);
1330 float64_unpack_canonical(&pb, b, status);
da10a907 1331 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1332
da10a907 1333 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1334}
1335
da10a907 1336static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1337{
da10a907 1338 return soft_f64_addsub(a, b, status, false);
1b615d48 1339}
6fff2167 1340
da10a907 1341static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1342{
da10a907 1343 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1344}
1345
1b615d48 1346static float hard_f32_add(float a, float b)
6fff2167 1347{
1b615d48
EC
1348 return a + b;
1349}
6fff2167 1350
1b615d48
EC
1351static float hard_f32_sub(float a, float b)
1352{
1353 return a - b;
6fff2167
AB
1354}
1355
1b615d48 1356static double hard_f64_add(double a, double b)
6fff2167 1357{
1b615d48
EC
1358 return a + b;
1359}
6fff2167 1360
1b615d48
EC
1361static double hard_f64_sub(double a, double b)
1362{
1363 return a - b;
1364}
1365
b240c9c4 1366static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1367{
1368 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1369 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1370 }
1371 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1372}
1373
b240c9c4 1374static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1375{
1376 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1377 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1378 } else {
1379 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1380 }
1381}
1382
1383static float32 float32_addsub(float32 a, float32 b, float_status *s,
1384 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1385{
1386 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1387 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1388}
1389
1390static float64 float64_addsub(float64 a, float64 b, float_status *s,
1391 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1392{
1393 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1394 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1395}
1396
1397float32 QEMU_FLATTEN
1398float32_add(float32 a, float32 b, float_status *s)
1399{
1400 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1401}
1402
1403float32 QEMU_FLATTEN
1404float32_sub(float32 a, float32 b, float_status *s)
1405{
1406 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1407}
1408
1409float64 QEMU_FLATTEN
1410float64_add(float64 a, float64 b, float_status *s)
1411{
1412 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1413}
1414
1415float64 QEMU_FLATTEN
1416float64_sub(float64 a, float64 b, float_status *s)
1417{
1418 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1419}
1420
da10a907
RH
1421static bfloat16 QEMU_FLATTEN
1422bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1423{
da10a907 1424 FloatParts64 pa, pb, *pr;
98e256fc
RH
1425
1426 bfloat16_unpack_canonical(&pa, a, status);
1427 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1428 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1429
da10a907 1430 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1431}
1432
da10a907 1433bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1434{
da10a907
RH
1435 return bfloat16_addsub(a, b, status, false);
1436}
8282310d 1437
da10a907
RH
1438bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1439{
1440 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1441}
1442
3ff49e56
RH
1443static float128 QEMU_FLATTEN
1444float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1445{
1446 FloatParts128 pa, pb, *pr;
1447
1448 float128_unpack_canonical(&pa, a, status);
1449 float128_unpack_canonical(&pb, b, status);
1450 pr = parts_addsub(&pa, &pb, status, subtract);
1451
1452 return float128_round_pack_canonical(pr, status);
1453}
1454
1455float128 float128_add(float128 a, float128 b, float_status *status)
1456{
1457 return float128_addsub(a, b, status, false);
1458}
1459
1460float128 float128_sub(float128 a, float128 b, float_status *status)
1461{
1462 return float128_addsub(a, b, status, true);
1463}
1464
74d707e2 1465/*
aca84527 1466 * Multiplication
74d707e2
AB
1467 */
1468
97ff87c0 1469float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1470{
aca84527 1471 FloatParts64 pa, pb, *pr;
98e256fc
RH
1472
1473 float16_unpack_canonical(&pa, a, status);
1474 float16_unpack_canonical(&pb, b, status);
aca84527 1475 pr = parts_mul(&pa, &pb, status);
74d707e2 1476
aca84527 1477 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1478}
1479
2dfabc86
EC
1480static float32 QEMU_SOFTFLOAT_ATTR
1481soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1482{
aca84527 1483 FloatParts64 pa, pb, *pr;
98e256fc
RH
1484
1485 float32_unpack_canonical(&pa, a, status);
1486 float32_unpack_canonical(&pb, b, status);
aca84527 1487 pr = parts_mul(&pa, &pb, status);
74d707e2 1488
aca84527 1489 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1490}
1491
2dfabc86
EC
1492static float64 QEMU_SOFTFLOAT_ATTR
1493soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1494{
aca84527 1495 FloatParts64 pa, pb, *pr;
98e256fc
RH
1496
1497 float64_unpack_canonical(&pa, a, status);
1498 float64_unpack_canonical(&pb, b, status);
aca84527 1499 pr = parts_mul(&pa, &pb, status);
74d707e2 1500
aca84527 1501 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1502}
1503
2dfabc86
EC
1504static float hard_f32_mul(float a, float b)
1505{
1506 return a * b;
1507}
1508
1509static double hard_f64_mul(double a, double b)
1510{
1511 return a * b;
1512}
1513
2dfabc86
EC
1514float32 QEMU_FLATTEN
1515float32_mul(float32 a, float32 b, float_status *s)
1516{
1517 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1518 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1519}
1520
1521float64 QEMU_FLATTEN
1522float64_mul(float64 a, float64 b, float_status *s)
1523{
1524 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1525 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1526}
1527
aca84527
RH
1528bfloat16 QEMU_FLATTEN
1529bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1530{
aca84527 1531 FloatParts64 pa, pb, *pr;
98e256fc
RH
1532
1533 bfloat16_unpack_canonical(&pa, a, status);
1534 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1535 pr = parts_mul(&pa, &pb, status);
8282310d 1536
aca84527
RH
1537 return bfloat16_round_pack_canonical(pr, status);
1538}
1539
1540float128 QEMU_FLATTEN
1541float128_mul(float128 a, float128 b, float_status *status)
1542{
1543 FloatParts128 pa, pb, *pr;
1544
1545 float128_unpack_canonical(&pa, a, status);
1546 float128_unpack_canonical(&pb, b, status);
1547 pr = parts_mul(&pa, &pb, status);
1548
1549 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1550}
1551
d446830a 1552/*
dedd123c 1553 * Fused multiply-add
d446830a
AB
1554 */
1555
97ff87c0 1556float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1557 int flags, float_status *status)
d446830a 1558{
dedd123c 1559 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1560
1561 float16_unpack_canonical(&pa, a, status);
1562 float16_unpack_canonical(&pb, b, status);
1563 float16_unpack_canonical(&pc, c, status);
dedd123c 1564 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1565
dedd123c 1566 return float16_round_pack_canonical(pr, status);
d446830a
AB
1567}
1568
ccf770ba
EC
1569static float32 QEMU_SOFTFLOAT_ATTR
1570soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1571 float_status *status)
d446830a 1572{
dedd123c 1573 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1574
1575 float32_unpack_canonical(&pa, a, status);
1576 float32_unpack_canonical(&pb, b, status);
1577 float32_unpack_canonical(&pc, c, status);
dedd123c 1578 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1579
dedd123c 1580 return float32_round_pack_canonical(pr, status);
d446830a
AB
1581}
1582
ccf770ba
EC
1583static float64 QEMU_SOFTFLOAT_ATTR
1584soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1585 float_status *status)
d446830a 1586{
dedd123c 1587 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1588
1589 float64_unpack_canonical(&pa, a, status);
1590 float64_unpack_canonical(&pb, b, status);
1591 float64_unpack_canonical(&pc, c, status);
dedd123c 1592 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1593
dedd123c 1594 return float64_round_pack_canonical(pr, status);
d446830a
AB
1595}
1596
f6b3b108
EC
1597static bool force_soft_fma;
1598
ccf770ba
EC
1599float32 QEMU_FLATTEN
1600float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1601{
1602 union_float32 ua, ub, uc, ur;
1603
1604 ua.s = xa;
1605 ub.s = xb;
1606 uc.s = xc;
1607
1608 if (unlikely(!can_use_fpu(s))) {
1609 goto soft;
1610 }
1611 if (unlikely(flags & float_muladd_halve_result)) {
1612 goto soft;
1613 }
1614
1615 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1616 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1617 goto soft;
1618 }
f6b3b108
EC
1619
1620 if (unlikely(force_soft_fma)) {
1621 goto soft;
1622 }
1623
ccf770ba
EC
1624 /*
1625 * When (a || b) == 0, there's no need to check for under/over flow,
1626 * since we know the addend is (normal || 0) and the product is 0.
1627 */
1628 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1629 union_float32 up;
1630 bool prod_sign;
1631
1632 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1633 prod_sign ^= !!(flags & float_muladd_negate_product);
1634 up.s = float32_set_sign(float32_zero, prod_sign);
1635
1636 if (flags & float_muladd_negate_c) {
1637 uc.h = -uc.h;
1638 }
1639 ur.h = up.h + uc.h;
1640 } else {
896f51fb
KC
1641 union_float32 ua_orig = ua;
1642 union_float32 uc_orig = uc;
1643
ccf770ba
EC
1644 if (flags & float_muladd_negate_product) {
1645 ua.h = -ua.h;
1646 }
1647 if (flags & float_muladd_negate_c) {
1648 uc.h = -uc.h;
1649 }
1650
1651 ur.h = fmaf(ua.h, ub.h, uc.h);
1652
1653 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1654 float_raise(float_flag_overflow, s);
ccf770ba 1655 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1656 ua = ua_orig;
1657 uc = uc_orig;
ccf770ba
EC
1658 goto soft;
1659 }
1660 }
1661 if (flags & float_muladd_negate_result) {
1662 return float32_chs(ur.s);
1663 }
1664 return ur.s;
1665
1666 soft:
1667 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1668}
1669
1670float64 QEMU_FLATTEN
1671float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1672{
1673 union_float64 ua, ub, uc, ur;
1674
1675 ua.s = xa;
1676 ub.s = xb;
1677 uc.s = xc;
1678
1679 if (unlikely(!can_use_fpu(s))) {
1680 goto soft;
1681 }
1682 if (unlikely(flags & float_muladd_halve_result)) {
1683 goto soft;
1684 }
1685
1686 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1687 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1688 goto soft;
1689 }
f6b3b108
EC
1690
1691 if (unlikely(force_soft_fma)) {
1692 goto soft;
1693 }
1694
ccf770ba
EC
1695 /*
1696 * When (a || b) == 0, there's no need to check for under/over flow,
1697 * since we know the addend is (normal || 0) and the product is 0.
1698 */
1699 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1700 union_float64 up;
1701 bool prod_sign;
1702
1703 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1704 prod_sign ^= !!(flags & float_muladd_negate_product);
1705 up.s = float64_set_sign(float64_zero, prod_sign);
1706
1707 if (flags & float_muladd_negate_c) {
1708 uc.h = -uc.h;
1709 }
1710 ur.h = up.h + uc.h;
1711 } else {
896f51fb
KC
1712 union_float64 ua_orig = ua;
1713 union_float64 uc_orig = uc;
1714
ccf770ba
EC
1715 if (flags & float_muladd_negate_product) {
1716 ua.h = -ua.h;
1717 }
1718 if (flags & float_muladd_negate_c) {
1719 uc.h = -uc.h;
1720 }
1721
1722 ur.h = fma(ua.h, ub.h, uc.h);
1723
1724 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1725 float_raise(float_flag_overflow, s);
ccf770ba 1726 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1727 ua = ua_orig;
1728 uc = uc_orig;
ccf770ba
EC
1729 goto soft;
1730 }
1731 }
1732 if (flags & float_muladd_negate_result) {
1733 return float64_chs(ur.s);
1734 }
1735 return ur.s;
1736
1737 soft:
1738 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1739}
1740
8282310d
LZ
1741bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1742 int flags, float_status *status)
1743{
dedd123c 1744 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1745
1746 bfloat16_unpack_canonical(&pa, a, status);
1747 bfloat16_unpack_canonical(&pb, b, status);
1748 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
1749 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1750
1751 return bfloat16_round_pack_canonical(pr, status);
1752}
8282310d 1753
dedd123c
RH
1754float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1755 int flags, float_status *status)
1756{
1757 FloatParts128 pa, pb, pc, *pr;
1758
1759 float128_unpack_canonical(&pa, a, status);
1760 float128_unpack_canonical(&pb, b, status);
1761 float128_unpack_canonical(&pc, c, status);
1762 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1763
1764 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1765}
1766
cf07323d
AB
1767/*
1768 * Returns the result of dividing the floating-point value `a' by the
1769 * corresponding value `b'. The operation is performed according to
1770 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1771 */
1772
f8155c1d 1773static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
cf07323d
AB
1774{
1775 bool sign = a.sign ^ b.sign;
1776
1777 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1778 uint64_t n0, n1, q, r;
cf07323d 1779 int exp = a.exp - b.exp;
5dfbc9e4
RH
1780
1781 /*
1782 * We want a 2*N / N-bit division to produce exactly an N-bit
1783 * result, so that we do not lose any precision and so that we
1784 * do not have to renormalize afterward. If A.frac < B.frac,
1785 * then division would produce an (N-1)-bit result; shift A left
1786 * by one to produce the an N-bit result, and decrement the
1787 * exponent to match.
1788 *
1789 * The udiv_qrnnd algorithm that we're using requires normalization,
e99c4373 1790 * i.e. the msb of the denominator must be set, which is already true.
5dfbc9e4 1791 */
cf07323d
AB
1792 if (a.frac < b.frac) {
1793 exp -= 1;
5dfbc9e4 1794 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
e99c4373
RH
1795 } else {
1796 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
cf07323d 1797 }
e99c4373 1798 q = udiv_qrnnd(&r, n1, n0, b.frac);
5dfbc9e4 1799
e99c4373 1800 /* Set lsb if there is a remainder, to set inexact. */
5dfbc9e4 1801 a.frac = q | (r != 0);
cf07323d
AB
1802 a.sign = sign;
1803 a.exp = exp;
1804 return a;
1805 }
1806 /* handle all the NaN cases */
1807 if (is_nan(a.cls) || is_nan(b.cls)) {
22c355f4 1808 return *parts_pick_nan(&a, &b, s);
cf07323d
AB
1809 }
1810 /* 0/0 or Inf/Inf */
1811 if (a.cls == b.cls
1812 &&
1813 (a.cls == float_class_inf || a.cls == float_class_zero)) {
d82f3b2d 1814 float_raise(float_flag_invalid, s);
0fc07cad
RH
1815 parts_default_nan(&a, s);
1816 return a;
cf07323d 1817 }
9cb4e398
AB
1818 /* Inf / x or 0 / x */
1819 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1820 a.sign = sign;
1821 return a;
1822 }
cf07323d
AB
1823 /* Div 0 => Inf */
1824 if (b.cls == float_class_zero) {
d82f3b2d 1825 float_raise(float_flag_divbyzero, s);
cf07323d
AB
1826 a.cls = float_class_inf;
1827 a.sign = sign;
1828 return a;
1829 }
cf07323d
AB
1830 /* Div by Inf */
1831 if (b.cls == float_class_inf) {
1832 a.cls = float_class_zero;
1833 a.sign = sign;
1834 return a;
1835 }
1836 g_assert_not_reached();
1837}
1838
1839float16 float16_div(float16 a, float16 b, float_status *status)
1840{
98e256fc
RH
1841 FloatParts64 pa, pb, pr;
1842
1843 float16_unpack_canonical(&pa, a, status);
1844 float16_unpack_canonical(&pb, b, status);
1845 pr = div_floats(pa, pb, status);
cf07323d 1846
e293e927 1847 return float16_round_pack_canonical(&pr, status);
cf07323d
AB
1848}
1849
4a629561
EC
1850static float32 QEMU_SOFTFLOAT_ATTR
1851soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 1852{
98e256fc
RH
1853 FloatParts64 pa, pb, pr;
1854
1855 float32_unpack_canonical(&pa, a, status);
1856 float32_unpack_canonical(&pb, b, status);
1857 pr = div_floats(pa, pb, status);
cf07323d 1858
e293e927 1859 return float32_round_pack_canonical(&pr, status);
cf07323d
AB
1860}
1861
4a629561
EC
1862static float64 QEMU_SOFTFLOAT_ATTR
1863soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 1864{
98e256fc
RH
1865 FloatParts64 pa, pb, pr;
1866
1867 float64_unpack_canonical(&pa, a, status);
1868 float64_unpack_canonical(&pb, b, status);
1869 pr = div_floats(pa, pb, status);
cf07323d 1870
e293e927 1871 return float64_round_pack_canonical(&pr, status);
cf07323d
AB
1872}
1873
4a629561
EC
1874static float hard_f32_div(float a, float b)
1875{
1876 return a / b;
1877}
1878
1879static double hard_f64_div(double a, double b)
1880{
1881 return a / b;
1882}
1883
1884static bool f32_div_pre(union_float32 a, union_float32 b)
1885{
1886 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1887 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1888 fpclassify(b.h) == FP_NORMAL;
1889 }
1890 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1891}
1892
1893static bool f64_div_pre(union_float64 a, union_float64 b)
1894{
1895 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1896 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1897 fpclassify(b.h) == FP_NORMAL;
1898 }
1899 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1900}
1901
1902static bool f32_div_post(union_float32 a, union_float32 b)
1903{
1904 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1905 return fpclassify(a.h) != FP_ZERO;
1906 }
1907 return !float32_is_zero(a.s);
1908}
1909
1910static bool f64_div_post(union_float64 a, union_float64 b)
1911{
1912 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1913 return fpclassify(a.h) != FP_ZERO;
1914 }
1915 return !float64_is_zero(a.s);
1916}
1917
1918float32 QEMU_FLATTEN
1919float32_div(float32 a, float32 b, float_status *s)
1920{
1921 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1922 f32_div_pre, f32_div_post);
4a629561
EC
1923}
1924
1925float64 QEMU_FLATTEN
1926float64_div(float64 a, float64 b, float_status *s)
1927{
1928 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 1929 f64_div_pre, f64_div_post);
4a629561
EC
1930}
1931
8282310d
LZ
1932/*
1933 * Returns the result of dividing the bfloat16
1934 * value `a' by the corresponding value `b'.
1935 */
1936
1937bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1938{
98e256fc
RH
1939 FloatParts64 pa, pb, pr;
1940
1941 bfloat16_unpack_canonical(&pa, a, status);
1942 bfloat16_unpack_canonical(&pb, b, status);
1943 pr = div_floats(pa, pb, status);
8282310d 1944
e293e927 1945 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1946}
1947
6fed16b2
AB
1948/*
1949 * Float to Float conversions
1950 *
1951 * Returns the result of converting one float format to another. The
1952 * conversion is performed according to the IEC/IEEE Standard for
1953 * Binary Floating-Point Arithmetic.
1954 *
1955 * The float_to_float helper only needs to take care of raising
1956 * invalid exceptions and handling the conversion on NaNs.
1957 */
1958
f8155c1d 1959static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
6fed16b2
AB
1960 float_status *s)
1961{
1962 if (dstf->arm_althp) {
1963 switch (a.cls) {
1964 case float_class_qnan:
1965 case float_class_snan:
1966 /* There is no NaN in the destination format. Raise Invalid
1967 * and return a zero with the sign of the input NaN.
1968 */
d82f3b2d 1969 float_raise(float_flag_invalid, s);
6fed16b2
AB
1970 a.cls = float_class_zero;
1971 a.frac = 0;
1972 a.exp = 0;
1973 break;
1974
1975 case float_class_inf:
1976 /* There is no Inf in the destination format. Raise Invalid
1977 * and return the maximum normal with the correct sign.
1978 */
d82f3b2d 1979 float_raise(float_flag_invalid, s);
6fed16b2
AB
1980 a.cls = float_class_normal;
1981 a.exp = dstf->exp_max;
1982 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1983 break;
1984
1985 default:
1986 break;
1987 }
1988 } else if (is_nan(a.cls)) {
7c45bad8 1989 parts_return_nan(&a, s);
6fed16b2
AB
1990 }
1991 return a;
1992}
1993
1994float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1995{
1996 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
1997 FloatParts64 pa, pr;
1998
1999 float16a_unpack_canonical(&pa, a, s, fmt16);
2000 pr = float_to_float(pa, &float32_params, s);
e293e927 2001 return float32_round_pack_canonical(&pr, s);
6fed16b2
AB
2002}
2003
2004float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2005{
2006 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2007 FloatParts64 pa, pr;
2008
2009 float16a_unpack_canonical(&pa, a, s, fmt16);
2010 pr = float_to_float(pa, &float64_params, s);
e293e927 2011 return float64_round_pack_canonical(&pr, s);
6fed16b2
AB
2012}
2013
2014float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2015{
2016 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2017 FloatParts64 pa, pr;
2018
2019 float32_unpack_canonical(&pa, a, s);
2020 pr = float_to_float(pa, fmt16, s);
e293e927 2021 return float16a_round_pack_canonical(&pr, s, fmt16);
6fed16b2
AB
2022}
2023
21381dcf
MK
2024static float64 QEMU_SOFTFLOAT_ATTR
2025soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2026{
98e256fc
RH
2027 FloatParts64 pa, pr;
2028
2029 float32_unpack_canonical(&pa, a, s);
2030 pr = float_to_float(pa, &float64_params, s);
e293e927 2031 return float64_round_pack_canonical(&pr, s);
6fed16b2
AB
2032}
2033
21381dcf
MK
2034float64 float32_to_float64(float32 a, float_status *s)
2035{
2036 if (likely(float32_is_normal(a))) {
2037 /* Widening conversion can never produce inexact results. */
2038 union_float32 uf;
2039 union_float64 ud;
2040 uf.s = a;
2041 ud.h = uf.h;
2042 return ud.s;
2043 } else if (float32_is_zero(a)) {
2044 return float64_set_sign(float64_zero, float32_is_neg(a));
2045 } else {
2046 return soft_float32_to_float64(a, s);
2047 }
2048}
2049
6fed16b2
AB
2050float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2051{
2052 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2053 FloatParts64 pa, pr;
2054
2055 float64_unpack_canonical(&pa, a, s);
2056 pr = float_to_float(pa, fmt16, s);
e293e927 2057 return float16a_round_pack_canonical(&pr, s, fmt16);
6fed16b2
AB
2058}
2059
2060float32 float64_to_float32(float64 a, float_status *s)
2061{
98e256fc
RH
2062 FloatParts64 pa, pr;
2063
2064 float64_unpack_canonical(&pa, a, s);
2065 pr = float_to_float(pa, &float32_params, s);
e293e927 2066 return float32_round_pack_canonical(&pr, s);
6fed16b2
AB
2067}
2068
34f0c0a9
LZ
2069float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2070{
98e256fc
RH
2071 FloatParts64 pa, pr;
2072
2073 bfloat16_unpack_canonical(&pa, a, s);
2074 pr = float_to_float(pa, &float32_params, s);
e293e927 2075 return float32_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2076}
2077
2078float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2079{
98e256fc
RH
2080 FloatParts64 pa, pr;
2081
2082 bfloat16_unpack_canonical(&pa, a, s);
2083 pr = float_to_float(pa, &float64_params, s);
e293e927 2084 return float64_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2085}
2086
2087bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2088{
98e256fc
RH
2089 FloatParts64 pa, pr;
2090
2091 float32_unpack_canonical(&pa, a, s);
2092 pr = float_to_float(pa, &bfloat16_params, s);
e293e927 2093 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2094}
2095
2096bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2097{
98e256fc
RH
2098 FloatParts64 pa, pr;
2099
2100 float64_unpack_canonical(&pa, a, s);
2101 pr = float_to_float(pa, &bfloat16_params, s);
e293e927 2102 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2103}
2104
dbe4d53a
AB
2105/*
2106 * Rounds the floating-point value `a' to an integer, and returns the
2107 * result as a floating-point value. The operation is performed
2108 * according to the IEC/IEEE Standard for Binary Floating-Point
2109 * Arithmetic.
2110 */
2111
f8155c1d 2112static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2f6c74be 2113 int scale, float_status *s)
dbe4d53a 2114{
2f6c74be
RH
2115 switch (a.cls) {
2116 case float_class_qnan:
2117 case float_class_snan:
7c45bad8
RH
2118 parts_return_nan(&a, s);
2119 break;
dbe4d53a 2120
dbe4d53a
AB
2121 case float_class_zero:
2122 case float_class_inf:
dbe4d53a
AB
2123 /* already "integral" */
2124 break;
2f6c74be 2125
dbe4d53a 2126 case float_class_normal:
2f6c74be
RH
2127 scale = MIN(MAX(scale, -0x10000), 0x10000);
2128 a.exp += scale;
2129
dbe4d53a
AB
2130 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2131 /* already integral */
2132 break;
2133 }
2134 if (a.exp < 0) {
2135 bool one;
2136 /* all fractional */
d82f3b2d 2137 float_raise(float_flag_inexact, s);
2f6c74be 2138 switch (rmode) {
dbe4d53a
AB
2139 case float_round_nearest_even:
2140 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2141 break;
2142 case float_round_ties_away:
2143 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2144 break;
2145 case float_round_to_zero:
2146 one = false;
2147 break;
2148 case float_round_up:
2149 one = !a.sign;
2150 break;
2151 case float_round_down:
2152 one = a.sign;
2153 break;
5d64abb3
RH
2154 case float_round_to_odd:
2155 one = true;
2156 break;
dbe4d53a
AB
2157 default:
2158 g_assert_not_reached();
2159 }
2160
2161 if (one) {
2162 a.frac = DECOMPOSED_IMPLICIT_BIT;
2163 a.exp = 0;
2164 } else {
2165 a.cls = float_class_zero;
2166 }
2167 } else {
2168 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2169 uint64_t frac_lsbm1 = frac_lsb >> 1;
2170 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2171 uint64_t rnd_mask = rnd_even_mask >> 1;
2172 uint64_t inc;
2173
2f6c74be 2174 switch (rmode) {
dbe4d53a
AB
2175 case float_round_nearest_even:
2176 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2177 break;
2178 case float_round_ties_away:
2179 inc = frac_lsbm1;
2180 break;
2181 case float_round_to_zero:
2182 inc = 0;
2183 break;
2184 case float_round_up:
2185 inc = a.sign ? 0 : rnd_mask;
2186 break;
2187 case float_round_down:
2188 inc = a.sign ? rnd_mask : 0;
2189 break;
5d64abb3
RH
2190 case float_round_to_odd:
2191 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2192 break;
dbe4d53a
AB
2193 default:
2194 g_assert_not_reached();
2195 }
2196
2197 if (a.frac & rnd_mask) {
d82f3b2d 2198 float_raise(float_flag_inexact, s);
e99c4373 2199 if (uadd64_overflow(a.frac, inc, &a.frac)) {
dbe4d53a 2200 a.frac >>= 1;
e99c4373 2201 a.frac |= DECOMPOSED_IMPLICIT_BIT;
dbe4d53a
AB
2202 a.exp++;
2203 }
e99c4373 2204 a.frac &= ~rnd_mask;
dbe4d53a
AB
2205 }
2206 }
2207 break;
2208 default:
2209 g_assert_not_reached();
2210 }
2211 return a;
2212}
2213
2214float16 float16_round_to_int(float16 a, float_status *s)
2215{
98e256fc
RH
2216 FloatParts64 pa, pr;
2217
2218 float16_unpack_canonical(&pa, a, s);
2219 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2220 return float16_round_pack_canonical(&pr, s);
dbe4d53a
AB
2221}
2222
2223float32 float32_round_to_int(float32 a, float_status *s)
2224{
98e256fc
RH
2225 FloatParts64 pa, pr;
2226
2227 float32_unpack_canonical(&pa, a, s);
2228 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2229 return float32_round_pack_canonical(&pr, s);
dbe4d53a
AB
2230}
2231
2232float64 float64_round_to_int(float64 a, float_status *s)
2233{
98e256fc
RH
2234 FloatParts64 pa, pr;
2235
2236 float64_unpack_canonical(&pa, a, s);
2237 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2238 return float64_round_pack_canonical(&pr, s);
dbe4d53a
AB
2239}
2240
34f0c0a9
LZ
2241/*
2242 * Rounds the bfloat16 value `a' to an integer, and returns the
2243 * result as a bfloat16 value.
2244 */
2245
2246bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2247{
98e256fc
RH
2248 FloatParts64 pa, pr;
2249
2250 bfloat16_unpack_canonical(&pa, a, s);
2251 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2252 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2253}
2254
ab52f973
AB
2255/*
2256 * Returns the result of converting the floating-point value `a' to
2257 * the two's complement integer format. The conversion is performed
2258 * according to the IEC/IEEE Standard for Binary Floating-Point
2259 * Arithmetic---which means in particular that the conversion is
2260 * rounded according to the current rounding mode. If `a' is a NaN,
2261 * the largest positive integer is returned. Otherwise, if the
2262 * conversion overflows, the largest integer with the same sign as `a'
2263 * is returned.
2264*/
2265
f8155c1d 2266static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407 2267 int scale, int64_t min, int64_t max,
ab52f973
AB
2268 float_status *s)
2269{
2270 uint64_t r;
2271 int orig_flags = get_float_exception_flags(s);
f8155c1d 2272 FloatParts64 p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2273
2274 switch (p.cls) {
2275 case float_class_snan:
2276 case float_class_qnan:
801bc563 2277 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2278 return max;
2279 case float_class_inf:
801bc563 2280 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2281 return p.sign ? min : max;
2282 case float_class_zero:
2283 return 0;
2284 case float_class_normal:
e99c4373 2285 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2286 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2287 } else {
2288 r = UINT64_MAX;
2289 }
2290 if (p.sign) {
33358375 2291 if (r <= -(uint64_t) min) {
ab52f973
AB
2292 return -r;
2293 } else {
2294 s->float_exception_flags = orig_flags | float_flag_invalid;
2295 return min;
2296 }
2297 } else {
33358375 2298 if (r <= max) {
ab52f973
AB
2299 return r;
2300 } else {
2301 s->float_exception_flags = orig_flags | float_flag_invalid;
2302 return max;
2303 }
2304 }
2305 default:
2306 g_assert_not_reached();
2307 }
2308}
2309
0d93d8ec
FC
2310int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2311 float_status *s)
2312{
98e256fc
RH
2313 FloatParts64 p;
2314
2315 float16_unpack_canonical(&p, a, s);
2316 return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2317}
2318
3dede407 2319int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2320 float_status *s)
2321{
98e256fc
RH
2322 FloatParts64 p;
2323
2324 float16_unpack_canonical(&p, a, s);
2325 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2326}
2327
3dede407 2328int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2329 float_status *s)
2330{
98e256fc
RH
2331 FloatParts64 p;
2332
2333 float16_unpack_canonical(&p, a, s);
2334 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2335}
2336
3dede407 2337int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2338 float_status *s)
2339{
98e256fc
RH
2340 FloatParts64 p;
2341
2342 float16_unpack_canonical(&p, a, s);
2343 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2344}
2345
3dede407 2346int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2347 float_status *s)
2348{
98e256fc
RH
2349 FloatParts64 p;
2350
2351 float32_unpack_canonical(&p, a, s);
2352 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2353}
2354
3dede407 2355int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2356 float_status *s)
2357{
98e256fc
RH
2358 FloatParts64 p;
2359
2360 float32_unpack_canonical(&p, a, s);
2361 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2362}
2363
3dede407 2364int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2365 float_status *s)
2366{
98e256fc
RH
2367 FloatParts64 p;
2368
2369 float32_unpack_canonical(&p, a, s);
2370 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2371}
2372
3dede407 2373int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2374 float_status *s)
2375{
98e256fc
RH
2376 FloatParts64 p;
2377
2378 float64_unpack_canonical(&p, a, s);
2379 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2380}
2381
3dede407 2382int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2383 float_status *s)
2384{
98e256fc
RH
2385 FloatParts64 p;
2386
2387 float64_unpack_canonical(&p, a, s);
2388 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2389}
2390
3dede407 2391int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2392 float_status *s)
2393{
98e256fc
RH
2394 FloatParts64 p;
2395
2396 float64_unpack_canonical(&p, a, s);
2397 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2398}
2399
0d93d8ec
FC
2400int8_t float16_to_int8(float16 a, float_status *s)
2401{
2402 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2403}
2404
2f6c74be
RH
2405int16_t float16_to_int16(float16 a, float_status *s)
2406{
2407 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2408}
2409
2410int32_t float16_to_int32(float16 a, float_status *s)
2411{
2412 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2413}
2414
2415int64_t float16_to_int64(float16 a, float_status *s)
2416{
2417 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2418}
2419
2420int16_t float32_to_int16(float32 a, float_status *s)
2421{
2422 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2423}
2424
2425int32_t float32_to_int32(float32 a, float_status *s)
2426{
2427 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2428}
2429
2430int64_t float32_to_int64(float32 a, float_status *s)
2431{
2432 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2433}
2434
2435int16_t float64_to_int16(float64 a, float_status *s)
2436{
2437 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2438}
2439
2440int32_t float64_to_int32(float64 a, float_status *s)
2441{
2442 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2443}
2444
2445int64_t float64_to_int64(float64 a, float_status *s)
2446{
2447 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2448}
2449
2450int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2451{
2452 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2453}
2454
2455int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2456{
2457 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2458}
2459
2460int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2461{
2462 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2463}
2464
2f6c74be
RH
2465int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2466{
2467 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2468}
ab52f973 2469
2f6c74be
RH
2470int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2471{
2472 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2473}
2474
2475int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2476{
2477 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2478}
2479
2480int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2481{
2482 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2483}
ab52f973 2484
2f6c74be
RH
2485int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2486{
2487 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2488}
ab52f973 2489
2f6c74be
RH
2490int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2491{
2492 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2493}
ab52f973 2494
34f0c0a9
LZ
2495/*
2496 * Returns the result of converting the floating-point value `a' to
2497 * the two's complement integer format.
2498 */
2499
2500int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2501 float_status *s)
2502{
98e256fc
RH
2503 FloatParts64 p;
2504
2505 bfloat16_unpack_canonical(&p, a, s);
2506 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
34f0c0a9
LZ
2507}
2508
2509int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2510 float_status *s)
2511{
98e256fc
RH
2512 FloatParts64 p;
2513
2514 bfloat16_unpack_canonical(&p, a, s);
2515 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
34f0c0a9
LZ
2516}
2517
2518int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2519 float_status *s)
2520{
98e256fc
RH
2521 FloatParts64 p;
2522
2523 bfloat16_unpack_canonical(&p, a, s);
2524 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
34f0c0a9
LZ
2525}
2526
2527int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2528{
2529 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2530}
2531
2532int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2533{
2534 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2535}
2536
2537int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2538{
2539 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2540}
2541
2542int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2543{
2544 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2545}
2546
2547int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2548{
2549 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2550}
2551
2552int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2553{
2554 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2555}
2556
ab52f973
AB
2557/*
2558 * Returns the result of converting the floating-point value `a' to
2559 * the unsigned integer format. The conversion is performed according
2560 * to the IEC/IEEE Standard for Binary Floating-Point
2561 * Arithmetic---which means in particular that the conversion is
2562 * rounded according to the current rounding mode. If `a' is a NaN,
2563 * the largest unsigned integer is returned. Otherwise, if the
2564 * conversion overflows, the largest unsigned integer is returned. If
2565 * the 'a' is negative, the result is rounded and zero is returned;
2566 * values that do not round to zero will raise the inexact exception
2567 * flag.
2568 */
2569
f8155c1d 2570static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407
RH
2571 int scale, uint64_t max,
2572 float_status *s)
ab52f973
AB
2573{
2574 int orig_flags = get_float_exception_flags(s);
f8155c1d 2575 FloatParts64 p = round_to_int(in, rmode, scale, s);
2f6c74be 2576 uint64_t r;
ab52f973
AB
2577
2578 switch (p.cls) {
2579 case float_class_snan:
2580 case float_class_qnan:
2581 s->float_exception_flags = orig_flags | float_flag_invalid;
2582 return max;
2583 case float_class_inf:
801bc563 2584 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2585 return p.sign ? 0 : max;
2586 case float_class_zero:
2587 return 0;
2588 case float_class_normal:
ab52f973
AB
2589 if (p.sign) {
2590 s->float_exception_flags = orig_flags | float_flag_invalid;
2591 return 0;
2592 }
2593
e99c4373 2594 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2595 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2596 } else {
2597 s->float_exception_flags = orig_flags | float_flag_invalid;
2598 return max;
2599 }
2600
2601 /* For uint64 this will never trip, but if p.exp is too large
2602 * to shift a decomposed fraction we shall have exited via the
2603 * 3rd leg above.
2604 */
2605 if (r > max) {
2606 s->float_exception_flags = orig_flags | float_flag_invalid;
2607 return max;
ab52f973 2608 }
2f6c74be 2609 return r;
ab52f973
AB
2610 default:
2611 g_assert_not_reached();
2612 }
2613}
2614
0d93d8ec
FC
2615uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2616 float_status *s)
2617{
98e256fc
RH
2618 FloatParts64 p;
2619
2620 float16_unpack_canonical(&p, a, s);
2621 return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2622}
2623
3dede407 2624uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2625 float_status *s)
2626{
98e256fc
RH
2627 FloatParts64 p;
2628
2629 float16_unpack_canonical(&p, a, s);
2630 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2631}
2632
3dede407 2633uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2634 float_status *s)
2635{
98e256fc
RH
2636 FloatParts64 p;
2637
2638 float16_unpack_canonical(&p, a, s);
2639 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2640}
2641
3dede407 2642uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2643 float_status *s)
2644{
98e256fc
RH
2645 FloatParts64 p;
2646
2647 float16_unpack_canonical(&p, a, s);
2648 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2649}
2650
3dede407 2651uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2652 float_status *s)
2653{
98e256fc
RH
2654 FloatParts64 p;
2655
2656 float32_unpack_canonical(&p, a, s);
2657 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2658}
2659
3dede407 2660uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2661 float_status *s)
2662{
98e256fc
RH
2663 FloatParts64 p;
2664
2665 float32_unpack_canonical(&p, a, s);
2666 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2667}
2668
3dede407 2669uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2670 float_status *s)
2671{
98e256fc
RH
2672 FloatParts64 p;
2673
2674 float32_unpack_canonical(&p, a, s);
2675 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2676}
2677
3dede407 2678uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2679 float_status *s)
2680{
98e256fc
RH
2681 FloatParts64 p;
2682
2683 float64_unpack_canonical(&p, a, s);
2684 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2685}
2686
3dede407 2687uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2688 float_status *s)
2689{
98e256fc
RH
2690 FloatParts64 p;
2691
2692 float64_unpack_canonical(&p, a, s);
2693 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2694}
2695
3dede407 2696uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2697 float_status *s)
2698{
98e256fc
RH
2699 FloatParts64 p;
2700
2701 float64_unpack_canonical(&p, a, s);
2702 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2703}
2704
0d93d8ec
FC
2705uint8_t float16_to_uint8(float16 a, float_status *s)
2706{
2707 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2708}
2709
2f6c74be
RH
2710uint16_t float16_to_uint16(float16 a, float_status *s)
2711{
2712 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2713}
2714
2715uint32_t float16_to_uint32(float16 a, float_status *s)
2716{
2717 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2718}
2719
2720uint64_t float16_to_uint64(float16 a, float_status *s)
2721{
2722 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2723}
2724
2725uint16_t float32_to_uint16(float32 a, float_status *s)
2726{
2727 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2728}
2729
2730uint32_t float32_to_uint32(float32 a, float_status *s)
2731{
2732 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2733}
2734
2735uint64_t float32_to_uint64(float32 a, float_status *s)
2736{
2737 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2738}
2739
2740uint16_t float64_to_uint16(float64 a, float_status *s)
2741{
2742 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2743}
2744
2745uint32_t float64_to_uint32(float64 a, float_status *s)
2746{
2747 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2748}
2749
2750uint64_t float64_to_uint64(float64 a, float_status *s)
2751{
2752 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2753}
2754
2755uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2756{
2757 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2758}
2759
2760uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2761{
2762 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2763}
2764
2765uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2766{
2767 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2768}
2769
2770uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2771{
2772 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2773}
2774
2775uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2776{
2777 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2778}
2779
2780uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2781{
2782 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2783}
2784
2785uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2786{
2787 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2788}
2789
2790uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2791{
2792 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2793}
2794
2795uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2796{
2797 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2798}
ab52f973 2799
34f0c0a9
LZ
2800/*
2801 * Returns the result of converting the bfloat16 value `a' to
2802 * the unsigned integer format.
2803 */
2804
2805uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2806 int scale, float_status *s)
2807{
98e256fc
RH
2808 FloatParts64 p;
2809
2810 bfloat16_unpack_canonical(&p, a, s);
2811 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
34f0c0a9
LZ
2812}
2813
2814uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2815 int scale, float_status *s)
2816{
98e256fc
RH
2817 FloatParts64 p;
2818
2819 bfloat16_unpack_canonical(&p, a, s);
2820 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
34f0c0a9
LZ
2821}
2822
2823uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2824 int scale, float_status *s)
2825{
98e256fc
RH
2826 FloatParts64 p;
2827
2828 bfloat16_unpack_canonical(&p, a, s);
2829 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
34f0c0a9
LZ
2830}
2831
2832uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2833{
2834 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2835}
2836
2837uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2838{
2839 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2840}
2841
2842uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2843{
2844 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2845}
2846
2847uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2848{
2849 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2850}
2851
2852uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2853{
2854 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2855}
2856
2857uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2858{
2859 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2860}
2861
c02e1fb8
AB
2862/*
2863 * Integer to float conversions
2864 *
2865 * Returns the result of converting the two's complement integer `a'
2866 * to the floating-point format. The conversion is performed according
2867 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2868 */
2869
f8155c1d 2870static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2871{
f8155c1d 2872 FloatParts64 r = { .sign = false };
2abdfe24 2873
c02e1fb8
AB
2874 if (a == 0) {
2875 r.cls = float_class_zero;
c02e1fb8 2876 } else {
2abdfe24
RH
2877 uint64_t f = a;
2878 int shift;
2879
2880 r.cls = float_class_normal;
c02e1fb8 2881 if (a < 0) {
2abdfe24 2882 f = -f;
c02e1fb8 2883 r.sign = true;
c02e1fb8 2884 }
e99c4373 2885 shift = clz64(f);
2abdfe24
RH
2886 scale = MIN(MAX(scale, -0x10000), 0x10000);
2887
2888 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
e99c4373 2889 r.frac = f << shift;
c02e1fb8
AB
2890 }
2891
2892 return r;
2893}
2894
2abdfe24 2895float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2896{
f8155c1d 2897 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2898 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
2899}
2900
2abdfe24
RH
2901float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2902{
2903 return int64_to_float16_scalbn(a, scale, status);
2904}
2905
2906float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2907{
2908 return int64_to_float16_scalbn(a, scale, status);
2909}
2910
2911float16 int64_to_float16(int64_t a, float_status *status)
2912{
2913 return int64_to_float16_scalbn(a, 0, status);
2914}
2915
c02e1fb8
AB
2916float16 int32_to_float16(int32_t a, float_status *status)
2917{
2abdfe24 2918 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2919}
2920
2921float16 int16_to_float16(int16_t a, float_status *status)
2922{
2abdfe24 2923 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2924}
2925
0d93d8ec
FC
2926float16 int8_to_float16(int8_t a, float_status *status)
2927{
2928 return int64_to_float16_scalbn(a, 0, status);
2929}
2930
2abdfe24 2931float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2932{
f8155c1d 2933 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2934 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
2935}
2936
2abdfe24
RH
2937float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2938{
2939 return int64_to_float32_scalbn(a, scale, status);
2940}
2941
2942float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2943{
2944 return int64_to_float32_scalbn(a, scale, status);
2945}
2946
2947float32 int64_to_float32(int64_t a, float_status *status)
2948{
2949 return int64_to_float32_scalbn(a, 0, status);
2950}
2951
c02e1fb8
AB
2952float32 int32_to_float32(int32_t a, float_status *status)
2953{
2abdfe24 2954 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2955}
2956
2957float32 int16_to_float32(int16_t a, float_status *status)
2958{
2abdfe24 2959 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2960}
2961
2abdfe24 2962float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2963{
f8155c1d 2964 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2965 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
2966}
2967
2abdfe24
RH
2968float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2969{
2970 return int64_to_float64_scalbn(a, scale, status);
2971}
2972
2973float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2974{
2975 return int64_to_float64_scalbn(a, scale, status);
2976}
2977
2978float64 int64_to_float64(int64_t a, float_status *status)
2979{
2980 return int64_to_float64_scalbn(a, 0, status);
2981}
2982
c02e1fb8
AB
2983float64 int32_to_float64(int32_t a, float_status *status)
2984{
2abdfe24 2985 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2986}
2987
2988float64 int16_to_float64(int16_t a, float_status *status)
2989{
2abdfe24 2990 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2991}
2992
34f0c0a9
LZ
2993/*
2994 * Returns the result of converting the two's complement integer `a'
2995 * to the bfloat16 format.
2996 */
2997
2998bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
2999{
f8155c1d 3000 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3001 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3002}
3003
3004bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3005{
3006 return int64_to_bfloat16_scalbn(a, scale, status);
3007}
3008
3009bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3010{
3011 return int64_to_bfloat16_scalbn(a, scale, status);
3012}
3013
3014bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3015{
3016 return int64_to_bfloat16_scalbn(a, 0, status);
3017}
3018
3019bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3020{
3021 return int64_to_bfloat16_scalbn(a, 0, status);
3022}
3023
3024bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3025{
3026 return int64_to_bfloat16_scalbn(a, 0, status);
3027}
c02e1fb8
AB
3028
3029/*
3030 * Unsigned Integer to float conversions
3031 *
3032 * Returns the result of converting the unsigned integer `a' to the
3033 * floating-point format. The conversion is performed according to the
3034 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3035 */
3036
f8155c1d 3037static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 3038{
f8155c1d 3039 FloatParts64 r = { .sign = false };
e99c4373 3040 int shift;
c02e1fb8
AB
3041
3042 if (a == 0) {
3043 r.cls = float_class_zero;
3044 } else {
2abdfe24 3045 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 3046 shift = clz64(a);
c02e1fb8 3047 r.cls = float_class_normal;
e99c4373
RH
3048 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3049 r.frac = a << shift;
c02e1fb8
AB
3050 }
3051
3052 return r;
3053}
3054
2abdfe24 3055float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3056{
f8155c1d 3057 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3058 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3059}
3060
2abdfe24
RH
3061float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3062{
3063 return uint64_to_float16_scalbn(a, scale, status);
3064}
3065
3066float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3067{
3068 return uint64_to_float16_scalbn(a, scale, status);
3069}
3070
3071float16 uint64_to_float16(uint64_t a, float_status *status)
3072{
3073 return uint64_to_float16_scalbn(a, 0, status);
3074}
3075
c02e1fb8
AB
3076float16 uint32_to_float16(uint32_t a, float_status *status)
3077{
2abdfe24 3078 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3079}
3080
3081float16 uint16_to_float16(uint16_t a, float_status *status)
3082{
2abdfe24 3083 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3084}
3085
0d93d8ec
FC
3086float16 uint8_to_float16(uint8_t a, float_status *status)
3087{
3088 return uint64_to_float16_scalbn(a, 0, status);
3089}
3090
2abdfe24 3091float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3092{
f8155c1d 3093 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3094 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3095}
3096
2abdfe24
RH
3097float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3098{
3099 return uint64_to_float32_scalbn(a, scale, status);
3100}
3101
3102float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3103{
3104 return uint64_to_float32_scalbn(a, scale, status);
3105}
3106
3107float32 uint64_to_float32(uint64_t a, float_status *status)
3108{
3109 return uint64_to_float32_scalbn(a, 0, status);
3110}
3111
c02e1fb8
AB
3112float32 uint32_to_float32(uint32_t a, float_status *status)
3113{
2abdfe24 3114 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3115}
3116
3117float32 uint16_to_float32(uint16_t a, float_status *status)
3118{
2abdfe24 3119 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3120}
3121
2abdfe24 3122float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3123{
f8155c1d 3124 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3125 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3126}
3127
2abdfe24
RH
3128float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3129{
3130 return uint64_to_float64_scalbn(a, scale, status);
3131}
3132
3133float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3134{
3135 return uint64_to_float64_scalbn(a, scale, status);
3136}
3137
3138float64 uint64_to_float64(uint64_t a, float_status *status)
3139{
3140 return uint64_to_float64_scalbn(a, 0, status);
3141}
3142
c02e1fb8
AB
3143float64 uint32_to_float64(uint32_t a, float_status *status)
3144{
2abdfe24 3145 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3146}
3147
3148float64 uint16_to_float64(uint16_t a, float_status *status)
3149{
2abdfe24 3150 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3151}
3152
34f0c0a9
LZ
3153/*
3154 * Returns the result of converting the unsigned integer `a' to the
3155 * bfloat16 format.
3156 */
3157
3158bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3159{
f8155c1d 3160 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3161 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3162}
3163
3164bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3165{
3166 return uint64_to_bfloat16_scalbn(a, scale, status);
3167}
3168
3169bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3170{
3171 return uint64_to_bfloat16_scalbn(a, scale, status);
3172}
3173
3174bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3175{
3176 return uint64_to_bfloat16_scalbn(a, 0, status);
3177}
3178
3179bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3180{
3181 return uint64_to_bfloat16_scalbn(a, 0, status);
3182}
3183
3184bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3185{
3186 return uint64_to_bfloat16_scalbn(a, 0, status);
3187}
3188
89360067
AB
3189/* Float Min/Max */
3190/* min() and max() functions. These can't be implemented as
3191 * 'compare and pick one input' because that would mishandle
3192 * NaNs and +0 vs -0.
3193 *
3194 * minnum() and maxnum() functions. These are similar to the min()
3195 * and max() functions but if one of the arguments is a QNaN and
3196 * the other is numerical then the numerical argument is returned.
3197 * SNaNs will get quietened before being returned.
3198 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3199 * and maxNum() operations. min() and max() are the typical min/max
3200 * semantics provided by many CPUs which predate that specification.
3201 *
3202 * minnummag() and maxnummag() functions correspond to minNumMag()
3203 * and minNumMag() from the IEEE-754 2008.
3204 */
f8155c1d 3205static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
89360067
AB
3206 bool ieee, bool ismag, float_status *s)
3207{
3208 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3209 if (ieee) {
3210 /* Takes two floating-point values `a' and `b', one of
3211 * which is a NaN, and returns the appropriate NaN
3212 * result. If either `a' or `b' is a signaling NaN,
3213 * the invalid exception is raised.
3214 */
3215 if (is_snan(a.cls) || is_snan(b.cls)) {
22c355f4 3216 return *parts_pick_nan(&a, &b, s);
89360067
AB
3217 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3218 return b;
3219 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3220 return a;
3221 }
3222 }
22c355f4 3223 return *parts_pick_nan(&a, &b, s);
89360067
AB
3224 } else {
3225 int a_exp, b_exp;
89360067
AB
3226
3227 switch (a.cls) {
3228 case float_class_normal:
3229 a_exp = a.exp;
3230 break;
3231 case float_class_inf:
3232 a_exp = INT_MAX;
3233 break;
3234 case float_class_zero:
3235 a_exp = INT_MIN;
3236 break;
3237 default:
3238 g_assert_not_reached();
3239 break;
3240 }
3241 switch (b.cls) {
3242 case float_class_normal:
3243 b_exp = b.exp;
3244 break;
3245 case float_class_inf:
3246 b_exp = INT_MAX;
3247 break;
3248 case float_class_zero:
3249 b_exp = INT_MIN;
3250 break;
3251 default:
3252 g_assert_not_reached();
3253 break;
3254 }
3255
6245327a
EC
3256 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3257 bool a_less = a_exp < b_exp;
3258 if (a_exp == b_exp) {
3259 a_less = a.frac < b.frac;
3260 }
3261 return a_less ^ ismin ? b : a;
89360067
AB
3262 }
3263
6245327a 3264 if (a.sign == b.sign) {
89360067
AB
3265 bool a_less = a_exp < b_exp;
3266 if (a_exp == b_exp) {
3267 a_less = a.frac < b.frac;
3268 }
6245327a 3269 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3270 } else {
6245327a 3271 return a.sign ^ ismin ? b : a;
89360067
AB
3272 }
3273 }
3274}
3275
3276#define MINMAX(sz, name, ismin, isiee, ismag) \
3277float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3278 float_status *s) \
3279{ \
98e256fc
RH
3280 FloatParts64 pa, pb, pr; \
3281 float ## sz ## _unpack_canonical(&pa, a, s); \
3282 float ## sz ## _unpack_canonical(&pb, b, s); \
3283 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3284 return float ## sz ## _round_pack_canonical(&pr, s); \
89360067
AB
3285}
3286
3287MINMAX(16, min, true, false, false)
3288MINMAX(16, minnum, true, true, false)
3289MINMAX(16, minnummag, true, true, true)
3290MINMAX(16, max, false, false, false)
3291MINMAX(16, maxnum, false, true, false)
3292MINMAX(16, maxnummag, false, true, true)
3293
3294MINMAX(32, min, true, false, false)
3295MINMAX(32, minnum, true, true, false)
3296MINMAX(32, minnummag, true, true, true)
3297MINMAX(32, max, false, false, false)
3298MINMAX(32, maxnum, false, true, false)
3299MINMAX(32, maxnummag, false, true, true)
3300
3301MINMAX(64, min, true, false, false)
3302MINMAX(64, minnum, true, true, false)
3303MINMAX(64, minnummag, true, true, true)
3304MINMAX(64, max, false, false, false)
3305MINMAX(64, maxnum, false, true, false)
3306MINMAX(64, maxnummag, false, true, true)
3307
3308#undef MINMAX
3309
8282310d
LZ
3310#define BF16_MINMAX(name, ismin, isiee, ismag) \
3311bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3312{ \
98e256fc
RH
3313 FloatParts64 pa, pb, pr; \
3314 bfloat16_unpack_canonical(&pa, a, s); \
3315 bfloat16_unpack_canonical(&pb, b, s); \
3316 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3317 return bfloat16_round_pack_canonical(&pr, s); \
8282310d
LZ
3318}
3319
3320BF16_MINMAX(min, true, false, false)
3321BF16_MINMAX(minnum, true, true, false)
3322BF16_MINMAX(minnummag, true, true, true)
3323BF16_MINMAX(max, false, false, false)
3324BF16_MINMAX(maxnum, false, true, false)
3325BF16_MINMAX(maxnummag, false, true, true)
3326
3327#undef BF16_MINMAX
3328
0c4c9092 3329/* Floating point compare */
f8155c1d 3330static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3331 float_status *s)
0c4c9092
AB
3332{
3333 if (is_nan(a.cls) || is_nan(b.cls)) {
3334 if (!is_quiet ||
3335 a.cls == float_class_snan ||
3336 b.cls == float_class_snan) {
d82f3b2d 3337 float_raise(float_flag_invalid, s);
0c4c9092
AB
3338 }
3339 return float_relation_unordered;
3340 }
3341
3342 if (a.cls == float_class_zero) {
3343 if (b.cls == float_class_zero) {
3344 return float_relation_equal;
3345 }
3346 return b.sign ? float_relation_greater : float_relation_less;
3347 } else if (b.cls == float_class_zero) {
3348 return a.sign ? float_relation_less : float_relation_greater;
3349 }
3350
3351 /* The only really important thing about infinity is its sign. If
3352 * both are infinities the sign marks the smallest of the two.
3353 */
3354 if (a.cls == float_class_inf) {
3355 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3356 return float_relation_equal;
3357 }
3358 return a.sign ? float_relation_less : float_relation_greater;
3359 } else if (b.cls == float_class_inf) {
3360 return b.sign ? float_relation_greater : float_relation_less;
3361 }
3362
3363 if (a.sign != b.sign) {
3364 return a.sign ? float_relation_less : float_relation_greater;
3365 }
3366
3367 if (a.exp == b.exp) {
3368 if (a.frac == b.frac) {
3369 return float_relation_equal;
3370 }
3371 if (a.sign) {
3372 return a.frac > b.frac ?
3373 float_relation_less : float_relation_greater;
3374 } else {
3375 return a.frac > b.frac ?
3376 float_relation_greater : float_relation_less;
3377 }
3378 } else {
3379 if (a.sign) {
3380 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3381 } else {
3382 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3383 }
3384 }
3385}
3386
d9fe9db9
EC
3387#define COMPARE(name, attr, sz) \
3388static int attr \
3389name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3390{ \
98e256fc
RH
3391 FloatParts64 pa, pb; \
3392 float ## sz ## _unpack_canonical(&pa, a, s); \
3393 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3394 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3395}
3396
d9fe9db9
EC
3397COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3398COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3399COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3400
3401#undef COMPARE
3402
71bfd65c 3403FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3404{
3405 return soft_f16_compare(a, b, false, s);
3406}
3407
71bfd65c 3408FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3409{
3410 return soft_f16_compare(a, b, true, s);
3411}
3412
71bfd65c 3413static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3414f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3415{
3416 union_float32 ua, ub;
3417
3418 ua.s = xa;
3419 ub.s = xb;
3420
3421 if (QEMU_NO_HARDFLOAT) {
3422 goto soft;
3423 }
3424
3425 float32_input_flush2(&ua.s, &ub.s, s);
3426 if (isgreaterequal(ua.h, ub.h)) {
3427 if (isgreater(ua.h, ub.h)) {
3428 return float_relation_greater;
3429 }
3430 return float_relation_equal;
3431 }
3432 if (likely(isless(ua.h, ub.h))) {
3433 return float_relation_less;
3434 }
3435 /* The only condition remaining is unordered.
3436 * Fall through to set flags.
3437 */
3438 soft:
3439 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3440}
3441
71bfd65c 3442FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3443{
3444 return f32_compare(a, b, false, s);
3445}
3446
71bfd65c 3447FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3448{
3449 return f32_compare(a, b, true, s);
3450}
3451
71bfd65c 3452static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3453f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3454{
3455 union_float64 ua, ub;
3456
3457 ua.s = xa;
3458 ub.s = xb;
3459
3460 if (QEMU_NO_HARDFLOAT) {
3461 goto soft;
3462 }
3463
3464 float64_input_flush2(&ua.s, &ub.s, s);
3465 if (isgreaterequal(ua.h, ub.h)) {
3466 if (isgreater(ua.h, ub.h)) {
3467 return float_relation_greater;
3468 }
3469 return float_relation_equal;
3470 }
3471 if (likely(isless(ua.h, ub.h))) {
3472 return float_relation_less;
3473 }
3474 /* The only condition remaining is unordered.
3475 * Fall through to set flags.
3476 */
3477 soft:
3478 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3479}
3480
71bfd65c 3481FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3482{
3483 return f64_compare(a, b, false, s);
3484}
3485
71bfd65c 3486FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3487{
3488 return f64_compare(a, b, true, s);
3489}
3490
8282310d
LZ
3491static FloatRelation QEMU_FLATTEN
3492soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3493{
98e256fc
RH
3494 FloatParts64 pa, pb;
3495
3496 bfloat16_unpack_canonical(&pa, a, s);
3497 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3498 return compare_floats(pa, pb, is_quiet, s);
3499}
3500
3501FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3502{
3503 return soft_bf16_compare(a, b, false, s);
3504}
3505
3506FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3507{
3508 return soft_bf16_compare(a, b, true, s);
3509}
3510
0bfc9f19 3511/* Multiply A by 2 raised to the power N. */
f8155c1d 3512static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3513{
3514 if (unlikely(is_nan(a.cls))) {
7c45bad8 3515 parts_return_nan(&a, s);
0bfc9f19
AB
3516 }
3517 if (a.cls == float_class_normal) {
f8155c1d 3518 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3519 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3520 * still allows rounding to infinity, without allowing overflow
f8155c1d 3521 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3522 */
3523 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3524 a.exp += n;
3525 }
3526 return a;
3527}
3528
3529float16 float16_scalbn(float16 a, int n, float_status *status)
3530{
98e256fc
RH
3531 FloatParts64 pa, pr;
3532
3533 float16_unpack_canonical(&pa, a, status);
3534 pr = scalbn_decomposed(pa, n, status);
e293e927 3535 return float16_round_pack_canonical(&pr, status);
0bfc9f19
AB
3536}
3537
3538float32 float32_scalbn(float32 a, int n, float_status *status)
3539{
98e256fc
RH
3540 FloatParts64 pa, pr;
3541
3542 float32_unpack_canonical(&pa, a, status);
3543 pr = scalbn_decomposed(pa, n, status);
e293e927 3544 return float32_round_pack_canonical(&pr, status);
0bfc9f19
AB
3545}
3546
3547float64 float64_scalbn(float64 a, int n, float_status *status)
3548{
98e256fc
RH
3549 FloatParts64 pa, pr;
3550
3551 float64_unpack_canonical(&pa, a, status);
3552 pr = scalbn_decomposed(pa, n, status);
e293e927 3553 return float64_round_pack_canonical(&pr, status);
0bfc9f19
AB
3554}
3555
8282310d
LZ
3556bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3557{
98e256fc
RH
3558 FloatParts64 pa, pr;
3559
3560 bfloat16_unpack_canonical(&pa, a, status);
3561 pr = scalbn_decomposed(pa, n, status);
e293e927 3562 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3563}
3564
c13bb2da
AB
3565/*
3566 * Square Root
3567 *
3568 * The old softfloat code did an approximation step before zeroing in
3569 * on the final result. However for simpleness we just compute the
3570 * square root by iterating down from the implicit bit to enough extra
3571 * bits to ensure we get a correctly rounded result.
3572 *
3573 * This does mean however the calculation is slower than before,
3574 * especially for 64 bit floats.
3575 */
3576
f8155c1d 3577static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3578{
3579 uint64_t a_frac, r_frac, s_frac;
3580 int bit, last_bit;
3581
3582 if (is_nan(a.cls)) {
7c45bad8
RH
3583 parts_return_nan(&a, s);
3584 return a;
c13bb2da
AB
3585 }
3586 if (a.cls == float_class_zero) {
3587 return a; /* sqrt(+-0) = +-0 */
3588 }
3589 if (a.sign) {
d82f3b2d 3590 float_raise(float_flag_invalid, s);
0fc07cad
RH
3591 parts_default_nan(&a, s);
3592 return a;
c13bb2da
AB
3593 }
3594 if (a.cls == float_class_inf) {
3595 return a; /* sqrt(+inf) = +inf */
3596 }
3597
3598 assert(a.cls == float_class_normal);
3599
3600 /* We need two overflow bits at the top. Adding room for that is a
3601 * right shift. If the exponent is odd, we can discard the low bit
3602 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3603 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3604 */
e99c4373 3605 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3606 a.exp >>= 1;
3607
3608 /* Bit-by-bit computation of sqrt. */
3609 r_frac = 0;
3610 s_frac = 0;
3611
3612 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3613 * properly rounded result. Remember we've inserted two more bits
3614 * at the top, so these positions are two less.
c13bb2da 3615 */
e99c4373 3616 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3617 last_bit = MAX(p->frac_shift - 4, 0);
3618 do {
3619 uint64_t q = 1ULL << bit;
3620 uint64_t t_frac = s_frac + q;
3621 if (t_frac <= a_frac) {
3622 s_frac = t_frac + q;
3623 a_frac -= t_frac;
3624 r_frac += q;
3625 }
3626 a_frac <<= 1;
3627 } while (--bit >= last_bit);
3628
3629 /* Undo the right shift done above. If there is any remaining
3630 * fraction, the result is inexact. Set the sticky bit.
3631 */
e99c4373 3632 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3633
3634 return a;
3635}
3636
97ff87c0 3637float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3638{
98e256fc
RH
3639 FloatParts64 pa, pr;
3640
3641 float16_unpack_canonical(&pa, a, status);
3642 pr = sqrt_float(pa, status, &float16_params);
e293e927 3643 return float16_round_pack_canonical(&pr, status);
c13bb2da
AB
3644}
3645
f131bae8
EC
3646static float32 QEMU_SOFTFLOAT_ATTR
3647soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3648{
98e256fc
RH
3649 FloatParts64 pa, pr;
3650
3651 float32_unpack_canonical(&pa, a, status);
3652 pr = sqrt_float(pa, status, &float32_params);
e293e927 3653 return float32_round_pack_canonical(&pr, status);
c13bb2da
AB
3654}
3655
f131bae8
EC
3656static float64 QEMU_SOFTFLOAT_ATTR
3657soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3658{
98e256fc
RH
3659 FloatParts64 pa, pr;
3660
3661 float64_unpack_canonical(&pa, a, status);
3662 pr = sqrt_float(pa, status, &float64_params);
e293e927 3663 return float64_round_pack_canonical(&pr, status);
c13bb2da
AB
3664}
3665
f131bae8
EC
3666float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3667{
3668 union_float32 ua, ur;
3669
3670 ua.s = xa;
3671 if (unlikely(!can_use_fpu(s))) {
3672 goto soft;
3673 }
3674
3675 float32_input_flush1(&ua.s, s);
3676 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3677 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3678 fpclassify(ua.h) == FP_ZERO) ||
3679 signbit(ua.h))) {
3680 goto soft;
3681 }
3682 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3683 float32_is_neg(ua.s))) {
3684 goto soft;
3685 }
3686 ur.h = sqrtf(ua.h);
3687 return ur.s;
3688
3689 soft:
3690 return soft_f32_sqrt(ua.s, s);
3691}
3692
3693float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3694{
3695 union_float64 ua, ur;
3696
3697 ua.s = xa;
3698 if (unlikely(!can_use_fpu(s))) {
3699 goto soft;
3700 }
3701
3702 float64_input_flush1(&ua.s, s);
3703 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3704 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3705 fpclassify(ua.h) == FP_ZERO) ||
3706 signbit(ua.h))) {
3707 goto soft;
3708 }
3709 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3710 float64_is_neg(ua.s))) {
3711 goto soft;
3712 }
3713 ur.h = sqrt(ua.h);
3714 return ur.s;
3715
3716 soft:
3717 return soft_f64_sqrt(ua.s, s);
3718}
3719
8282310d
LZ
3720bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3721{
98e256fc
RH
3722 FloatParts64 pa, pr;
3723
3724 bfloat16_unpack_canonical(&pa, a, status);
3725 pr = sqrt_float(pa, status, &bfloat16_params);
e293e927 3726 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3727}
3728
0218a16e
RH
3729/*----------------------------------------------------------------------------
3730| The pattern for a default generated NaN.
3731*----------------------------------------------------------------------------*/
3732
3733float16 float16_default_nan(float_status *status)
3734{
0fc07cad
RH
3735 FloatParts64 p;
3736
3737 parts_default_nan(&p, status);
0218a16e 3738 p.frac >>= float16_params.frac_shift;
71fd178e 3739 return float16_pack_raw(&p);
0218a16e
RH
3740}
3741
3742float32 float32_default_nan(float_status *status)
3743{
0fc07cad
RH
3744 FloatParts64 p;
3745
3746 parts_default_nan(&p, status);
0218a16e 3747 p.frac >>= float32_params.frac_shift;
71fd178e 3748 return float32_pack_raw(&p);
0218a16e
RH
3749}
3750
3751float64 float64_default_nan(float_status *status)
3752{
0fc07cad
RH
3753 FloatParts64 p;
3754
3755 parts_default_nan(&p, status);
0218a16e 3756 p.frac >>= float64_params.frac_shift;
71fd178e 3757 return float64_pack_raw(&p);
0218a16e
RH
3758}
3759
3760float128 float128_default_nan(float_status *status)
3761{
e9034ea8 3762 FloatParts128 p;
0218a16e 3763
0fc07cad 3764 parts_default_nan(&p, status);
e9034ea8
RH
3765 frac_shr(&p, float128_params.frac_shift);
3766 return float128_pack_raw(&p);
0218a16e 3767}
c13bb2da 3768
8282310d
LZ
3769bfloat16 bfloat16_default_nan(float_status *status)
3770{
0fc07cad
RH
3771 FloatParts64 p;
3772
3773 parts_default_nan(&p, status);
8282310d 3774 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3775 return bfloat16_pack_raw(&p);
8282310d
LZ
3776}
3777
158142c2 3778/*----------------------------------------------------------------------------
377ed926
RH
3779| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3780*----------------------------------------------------------------------------*/
3781
3782float16 float16_silence_nan(float16 a, float_status *status)
3783{
3dddb203
RH
3784 FloatParts64 p;
3785
3786 float16_unpack_raw(&p, a);
377ed926 3787 p.frac <<= float16_params.frac_shift;
92ff426d 3788 parts_silence_nan(&p, status);
377ed926 3789 p.frac >>= float16_params.frac_shift;
71fd178e 3790 return float16_pack_raw(&p);
377ed926
RH
3791}
3792
3793float32 float32_silence_nan(float32 a, float_status *status)
3794{
3dddb203
RH
3795 FloatParts64 p;
3796
3797 float32_unpack_raw(&p, a);
377ed926 3798 p.frac <<= float32_params.frac_shift;
92ff426d 3799 parts_silence_nan(&p, status);
377ed926 3800 p.frac >>= float32_params.frac_shift;
71fd178e 3801 return float32_pack_raw(&p);
377ed926
RH
3802}
3803
3804float64 float64_silence_nan(float64 a, float_status *status)
3805{
3dddb203
RH
3806 FloatParts64 p;
3807
3808 float64_unpack_raw(&p, a);
377ed926 3809 p.frac <<= float64_params.frac_shift;
92ff426d 3810 parts_silence_nan(&p, status);
377ed926 3811 p.frac >>= float64_params.frac_shift;
71fd178e 3812 return float64_pack_raw(&p);
377ed926
RH
3813}
3814
8282310d
LZ
3815bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3816{
3dddb203
RH
3817 FloatParts64 p;
3818
3819 bfloat16_unpack_raw(&p, a);
8282310d 3820 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3821 parts_silence_nan(&p, status);
8282310d 3822 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3823 return bfloat16_pack_raw(&p);
8282310d 3824}
e6b405fe 3825
0018b1f4
RH
3826float128 float128_silence_nan(float128 a, float_status *status)
3827{
3828 FloatParts128 p;
3829
3830 float128_unpack_raw(&p, a);
3831 frac_shl(&p, float128_params.frac_shift);
3832 parts_silence_nan(&p, status);
3833 frac_shr(&p, float128_params.frac_shift);
3834 return float128_pack_raw(&p);
3835}
3836
e6b405fe
AB
3837/*----------------------------------------------------------------------------
3838| If `a' is denormal and we are in flush-to-zero mode then set the
3839| input-denormal exception and return zero. Otherwise just return the value.
3840*----------------------------------------------------------------------------*/
3841
f8155c1d 3842static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3843{
3844 if (p.exp == 0 && p.frac != 0) {
3845 float_raise(float_flag_input_denormal, status);
3846 return true;
3847 }
3848
3849 return false;
3850}
3851
3852float16 float16_squash_input_denormal(float16 a, float_status *status)
3853{
3854 if (status->flush_inputs_to_zero) {
3dddb203
RH
3855 FloatParts64 p;
3856
3857 float16_unpack_raw(&p, a);
e6b405fe
AB
3858 if (parts_squash_denormal(p, status)) {
3859 return float16_set_sign(float16_zero, p.sign);
3860 }
3861 }
3862 return a;
3863}
3864
3865float32 float32_squash_input_denormal(float32 a, float_status *status)
3866{
3867 if (status->flush_inputs_to_zero) {
3dddb203
RH
3868 FloatParts64 p;
3869
3870 float32_unpack_raw(&p, a);
e6b405fe
AB
3871 if (parts_squash_denormal(p, status)) {
3872 return float32_set_sign(float32_zero, p.sign);
3873 }
3874 }
3875 return a;
3876}
3877
3878float64 float64_squash_input_denormal(float64 a, float_status *status)
3879{
3880 if (status->flush_inputs_to_zero) {
3dddb203
RH
3881 FloatParts64 p;
3882
3883 float64_unpack_raw(&p, a);
e6b405fe
AB
3884 if (parts_squash_denormal(p, status)) {
3885 return float64_set_sign(float64_zero, p.sign);
3886 }
3887 }
3888 return a;
3889}
3890
8282310d
LZ
3891bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3892{
3893 if (status->flush_inputs_to_zero) {
3dddb203
RH
3894 FloatParts64 p;
3895
3896 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3897 if (parts_squash_denormal(p, status)) {
3898 return bfloat16_set_sign(bfloat16_zero, p.sign);
3899 }
3900 }
3901 return a;
3902}
3903
377ed926 3904/*----------------------------------------------------------------------------
158142c2
FB
3905| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3906| and 7, and returns the properly rounded 32-bit integer corresponding to the
3907| input. If `zSign' is 1, the input is negated before being converted to an
3908| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3909| is simply rounded to an integer, with the inexact exception raised if the
3910| input cannot be represented exactly as an integer. However, if the fixed-
3911| point input is too large, the invalid exception is raised and the largest
3912| positive or negative integer is returned.
3913*----------------------------------------------------------------------------*/
3914
c120391c
RH
3915static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3916 float_status *status)
158142c2 3917{
8f506c70 3918 int8_t roundingMode;
c120391c 3919 bool roundNearestEven;
8f506c70 3920 int8_t roundIncrement, roundBits;
760e1416 3921 int32_t z;
158142c2 3922
a2f2d288 3923 roundingMode = status->float_rounding_mode;
158142c2 3924 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3925 switch (roundingMode) {
3926 case float_round_nearest_even:
f9288a76 3927 case float_round_ties_away:
dc355b76
PM
3928 roundIncrement = 0x40;
3929 break;
3930 case float_round_to_zero:
3931 roundIncrement = 0;
3932 break;
3933 case float_round_up:
3934 roundIncrement = zSign ? 0 : 0x7f;
3935 break;
3936 case float_round_down:
3937 roundIncrement = zSign ? 0x7f : 0;
3938 break;
5d64abb3
RH
3939 case float_round_to_odd:
3940 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3941 break;
dc355b76
PM
3942 default:
3943 abort();
158142c2
FB
3944 }
3945 roundBits = absZ & 0x7F;
3946 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3947 if (!(roundBits ^ 0x40) && roundNearestEven) {
3948 absZ &= ~1;
3949 }
158142c2
FB
3950 z = absZ;
3951 if ( zSign ) z = - z;
3952 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3953 float_raise(float_flag_invalid, status);
2c217da0 3954 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3955 }
a2f2d288 3956 if (roundBits) {
d82f3b2d 3957 float_raise(float_flag_inexact, status);
a2f2d288 3958 }
158142c2
FB
3959 return z;
3960
3961}
3962
3963/*----------------------------------------------------------------------------
3964| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3965| `absZ1', with binary point between bits 63 and 64 (between the input words),
3966| and returns the properly rounded 64-bit integer corresponding to the input.
3967| If `zSign' is 1, the input is negated before being converted to an integer.
3968| Ordinarily, the fixed-point input is simply rounded to an integer, with
3969| the inexact exception raised if the input cannot be represented exactly as
3970| an integer. However, if the fixed-point input is too large, the invalid
3971| exception is raised and the largest positive or negative integer is
3972| returned.
3973*----------------------------------------------------------------------------*/
3974
c120391c 3975static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3976 float_status *status)
158142c2 3977{
8f506c70 3978 int8_t roundingMode;
c120391c 3979 bool roundNearestEven, increment;
760e1416 3980 int64_t z;
158142c2 3981
a2f2d288 3982 roundingMode = status->float_rounding_mode;
158142c2 3983 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3984 switch (roundingMode) {
3985 case float_round_nearest_even:
f9288a76 3986 case float_round_ties_away:
dc355b76
PM
3987 increment = ((int64_t) absZ1 < 0);
3988 break;
3989 case float_round_to_zero:
3990 increment = 0;
3991 break;
3992 case float_round_up:
3993 increment = !zSign && absZ1;
3994 break;
3995 case float_round_down:
3996 increment = zSign && absZ1;
3997 break;
5d64abb3
RH
3998 case float_round_to_odd:
3999 increment = !(absZ0 & 1) && absZ1;
4000 break;
dc355b76
PM
4001 default:
4002 abort();
158142c2
FB
4003 }
4004 if ( increment ) {
4005 ++absZ0;
4006 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4007 if (!(absZ1 << 1) && roundNearestEven) {
4008 absZ0 &= ~1;
4009 }
158142c2
FB
4010 }
4011 z = absZ0;
4012 if ( zSign ) z = - z;
4013 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4014 overflow:
ff32e16e 4015 float_raise(float_flag_invalid, status);
2c217da0 4016 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4017 }
a2f2d288 4018 if (absZ1) {
d82f3b2d 4019 float_raise(float_flag_inexact, status);
a2f2d288 4020 }
158142c2
FB
4021 return z;
4022
4023}
4024
fb3ea83a
TM
4025/*----------------------------------------------------------------------------
4026| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4027| `absZ1', with binary point between bits 63 and 64 (between the input words),
4028| and returns the properly rounded 64-bit unsigned integer corresponding to the
4029| input. Ordinarily, the fixed-point input is simply rounded to an integer,
4030| with the inexact exception raised if the input cannot be represented exactly
4031| as an integer. However, if the fixed-point input is too large, the invalid
4032| exception is raised and the largest unsigned integer is returned.
4033*----------------------------------------------------------------------------*/
4034
c120391c 4035static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 4036 uint64_t absZ1, float_status *status)
fb3ea83a 4037{
8f506c70 4038 int8_t roundingMode;
c120391c 4039 bool roundNearestEven, increment;
fb3ea83a 4040
a2f2d288 4041 roundingMode = status->float_rounding_mode;
fb3ea83a 4042 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
4043 switch (roundingMode) {
4044 case float_round_nearest_even:
f9288a76 4045 case float_round_ties_away:
dc355b76
PM
4046 increment = ((int64_t)absZ1 < 0);
4047 break;
4048 case float_round_to_zero:
4049 increment = 0;
4050 break;
4051 case float_round_up:
4052 increment = !zSign && absZ1;
4053 break;
4054 case float_round_down:
4055 increment = zSign && absZ1;
4056 break;
5d64abb3
RH
4057 case float_round_to_odd:
4058 increment = !(absZ0 & 1) && absZ1;
4059 break;
dc355b76
PM
4060 default:
4061 abort();
fb3ea83a
TM
4062 }
4063 if (increment) {
4064 ++absZ0;
4065 if (absZ0 == 0) {
ff32e16e 4066 float_raise(float_flag_invalid, status);
2c217da0 4067 return UINT64_MAX;
fb3ea83a 4068 }
40662886
PMD
4069 if (!(absZ1 << 1) && roundNearestEven) {
4070 absZ0 &= ~1;
4071 }
fb3ea83a
TM
4072 }
4073
4074 if (zSign && absZ0) {
ff32e16e 4075 float_raise(float_flag_invalid, status);
fb3ea83a
TM
4076 return 0;
4077 }
4078
4079 if (absZ1) {
d82f3b2d 4080 float_raise(float_flag_inexact, status);
fb3ea83a
TM
4081 }
4082 return absZ0;
4083}
4084
158142c2
FB
4085/*----------------------------------------------------------------------------
4086| Normalizes the subnormal single-precision floating-point value represented
4087| by the denormalized significand `aSig'. The normalized exponent and
4088| significand are stored at the locations pointed to by `zExpPtr' and
4089| `zSigPtr', respectively.
4090*----------------------------------------------------------------------------*/
4091
4092static void
0c48262d 4093 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4094{
8f506c70 4095 int8_t shiftCount;
158142c2 4096
0019d5c3 4097 shiftCount = clz32(aSig) - 8;
158142c2
FB
4098 *zSigPtr = aSig<<shiftCount;
4099 *zExpPtr = 1 - shiftCount;
4100
4101}
4102
158142c2
FB
4103/*----------------------------------------------------------------------------
4104| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4105| and significand `zSig', and returns the proper single-precision floating-
4106| point value corresponding to the abstract input. Ordinarily, the abstract
4107| value is simply rounded and packed into the single-precision format, with
4108| the inexact exception raised if the abstract input cannot be represented
4109| exactly. However, if the abstract value is too large, the overflow and
4110| inexact exceptions are raised and an infinity or maximal finite value is
4111| returned. If the abstract value is too small, the input value is rounded to
4112| a subnormal number, and the underflow and inexact exceptions are raised if
4113| the abstract input cannot be represented exactly as a subnormal single-
4114| precision floating-point number.
4115| The input significand `zSig' has its binary point between bits 30
4116| and 29, which is 7 bits to the left of the usual location. This shifted
4117| significand must be normalized or smaller. If `zSig' is not normalized,
4118| `zExp' must be 0; in that case, the result returned is a subnormal number,
4119| and it must not require rounding. In the usual case that `zSig' is
4120| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4121| The handling of underflow and overflow follows the IEC/IEEE Standard for
4122| Binary Floating-Point Arithmetic.
4123*----------------------------------------------------------------------------*/
4124
c120391c 4125static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4126 float_status *status)
158142c2 4127{
8f506c70 4128 int8_t roundingMode;
c120391c 4129 bool roundNearestEven;
8f506c70 4130 int8_t roundIncrement, roundBits;
c120391c 4131 bool isTiny;
158142c2 4132
a2f2d288 4133 roundingMode = status->float_rounding_mode;
158142c2 4134 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4135 switch (roundingMode) {
4136 case float_round_nearest_even:
f9288a76 4137 case float_round_ties_away:
dc355b76
PM
4138 roundIncrement = 0x40;
4139 break;
4140 case float_round_to_zero:
4141 roundIncrement = 0;
4142 break;
4143 case float_round_up:
4144 roundIncrement = zSign ? 0 : 0x7f;
4145 break;
4146 case float_round_down:
4147 roundIncrement = zSign ? 0x7f : 0;
4148 break;
5d64abb3
RH
4149 case float_round_to_odd:
4150 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4151 break;
dc355b76
PM
4152 default:
4153 abort();
4154 break;
158142c2
FB
4155 }
4156 roundBits = zSig & 0x7F;
bb98fe42 4157 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4158 if ( ( 0xFD < zExp )
4159 || ( ( zExp == 0xFD )
bb98fe42 4160 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4161 ) {
5d64abb3
RH
4162 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4163 roundIncrement != 0;
ff32e16e 4164 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4165 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4166 }
4167 if ( zExp < 0 ) {
a2f2d288 4168 if (status->flush_to_zero) {
ff32e16e 4169 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4170 return packFloat32(zSign, 0, 0);
4171 }
a828b373
RH
4172 isTiny = status->tininess_before_rounding
4173 || (zExp < -1)
4174 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4175 shift32RightJamming( zSig, - zExp, &zSig );
4176 zExp = 0;
4177 roundBits = zSig & 0x7F;
ff32e16e
PM
4178 if (isTiny && roundBits) {
4179 float_raise(float_flag_underflow, status);
4180 }
5d64abb3
RH
4181 if (roundingMode == float_round_to_odd) {
4182 /*
4183 * For round-to-odd case, the roundIncrement depends on
4184 * zSig which just changed.
4185 */
4186 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4187 }
158142c2
FB
4188 }
4189 }
a2f2d288 4190 if (roundBits) {
d82f3b2d 4191 float_raise(float_flag_inexact, status);
a2f2d288 4192 }
158142c2 4193 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4194 if (!(roundBits ^ 0x40) && roundNearestEven) {
4195 zSig &= ~1;
4196 }
158142c2
FB
4197 if ( zSig == 0 ) zExp = 0;
4198 return packFloat32( zSign, zExp, zSig );
4199
4200}
4201
4202/*----------------------------------------------------------------------------
4203| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4204| and significand `zSig', and returns the proper single-precision floating-
4205| point value corresponding to the abstract input. This routine is just like
4206| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4207| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4208| floating-point exponent.
4209*----------------------------------------------------------------------------*/
4210
4211static float32
c120391c 4212 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4213 float_status *status)
158142c2 4214{
8f506c70 4215 int8_t shiftCount;
158142c2 4216
0019d5c3 4217 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4218 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4219 status);
158142c2
FB
4220
4221}
4222
158142c2
FB
4223/*----------------------------------------------------------------------------
4224| Normalizes the subnormal double-precision floating-point value represented
4225| by the denormalized significand `aSig'. The normalized exponent and
4226| significand are stored at the locations pointed to by `zExpPtr' and
4227| `zSigPtr', respectively.
4228*----------------------------------------------------------------------------*/
4229
4230static void
0c48262d 4231 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4232{
8f506c70 4233 int8_t shiftCount;
158142c2 4234
0019d5c3 4235 shiftCount = clz64(aSig) - 11;
158142c2
FB
4236 *zSigPtr = aSig<<shiftCount;
4237 *zExpPtr = 1 - shiftCount;
4238
4239}
4240
4241/*----------------------------------------------------------------------------
4242| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4243| double-precision floating-point value, returning the result. After being
4244| shifted into the proper positions, the three fields are simply added
4245| together to form the result. This means that any integer portion of `zSig'
4246| will be added into the exponent. Since a properly normalized significand
4247| will have an integer portion equal to 1, the `zExp' input should be 1 less
4248| than the desired result exponent whenever `zSig' is a complete, normalized
4249| significand.
4250*----------------------------------------------------------------------------*/
4251
c120391c 4252static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4253{
4254
f090c9d4 4255 return make_float64(
bb98fe42 4256 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4257
4258}
4259
4260/*----------------------------------------------------------------------------
4261| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4262| and significand `zSig', and returns the proper double-precision floating-
4263| point value corresponding to the abstract input. Ordinarily, the abstract
4264| value is simply rounded and packed into the double-precision format, with
4265| the inexact exception raised if the abstract input cannot be represented
4266| exactly. However, if the abstract value is too large, the overflow and
4267| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4268| returned. If the abstract value is too small, the input value is rounded to
4269| a subnormal number, and the underflow and inexact exceptions are raised if
4270| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4271| precision floating-point number.
4272| The input significand `zSig' has its binary point between bits 62
4273| and 61, which is 10 bits to the left of the usual location. This shifted
4274| significand must be normalized or smaller. If `zSig' is not normalized,
4275| `zExp' must be 0; in that case, the result returned is a subnormal number,
4276| and it must not require rounding. In the usual case that `zSig' is
4277| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4278| The handling of underflow and overflow follows the IEC/IEEE Standard for
4279| Binary Floating-Point Arithmetic.
4280*----------------------------------------------------------------------------*/
4281
c120391c 4282static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4283 float_status *status)
158142c2 4284{
8f506c70 4285 int8_t roundingMode;
c120391c 4286 bool roundNearestEven;
0c48262d 4287 int roundIncrement, roundBits;
c120391c 4288 bool isTiny;
158142c2 4289
a2f2d288 4290 roundingMode = status->float_rounding_mode;
158142c2 4291 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4292 switch (roundingMode) {
4293 case float_round_nearest_even:
f9288a76 4294 case float_round_ties_away:
dc355b76
PM
4295 roundIncrement = 0x200;
4296 break;
4297 case float_round_to_zero:
4298 roundIncrement = 0;
4299 break;
4300 case float_round_up:
4301 roundIncrement = zSign ? 0 : 0x3ff;
4302 break;
4303 case float_round_down:
4304 roundIncrement = zSign ? 0x3ff : 0;
4305 break;
9ee6f678
BR
4306 case float_round_to_odd:
4307 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4308 break;
dc355b76
PM
4309 default:
4310 abort();
158142c2
FB
4311 }
4312 roundBits = zSig & 0x3FF;
bb98fe42 4313 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4314 if ( ( 0x7FD < zExp )
4315 || ( ( zExp == 0x7FD )
bb98fe42 4316 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4317 ) {
9ee6f678
BR
4318 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4319 roundIncrement != 0;
ff32e16e 4320 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4321 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4322 }
4323 if ( zExp < 0 ) {
a2f2d288 4324 if (status->flush_to_zero) {
ff32e16e 4325 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4326 return packFloat64(zSign, 0, 0);
4327 }
a828b373
RH
4328 isTiny = status->tininess_before_rounding
4329 || (zExp < -1)
4330 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4331 shift64RightJamming( zSig, - zExp, &zSig );
4332 zExp = 0;
4333 roundBits = zSig & 0x3FF;
ff32e16e
PM
4334 if (isTiny && roundBits) {
4335 float_raise(float_flag_underflow, status);
4336 }
9ee6f678
BR
4337 if (roundingMode == float_round_to_odd) {
4338 /*
4339 * For round-to-odd case, the roundIncrement depends on
4340 * zSig which just changed.
4341 */
4342 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4343 }
158142c2
FB
4344 }
4345 }
a2f2d288 4346 if (roundBits) {
d82f3b2d 4347 float_raise(float_flag_inexact, status);
a2f2d288 4348 }
158142c2 4349 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4350 if (!(roundBits ^ 0x200) && roundNearestEven) {
4351 zSig &= ~1;
4352 }
158142c2
FB
4353 if ( zSig == 0 ) zExp = 0;
4354 return packFloat64( zSign, zExp, zSig );
4355
4356}
4357
4358/*----------------------------------------------------------------------------
4359| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4360| and significand `zSig', and returns the proper double-precision floating-
4361| point value corresponding to the abstract input. This routine is just like
4362| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4363| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4364| floating-point exponent.
4365*----------------------------------------------------------------------------*/
4366
4367static float64
c120391c 4368 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4369 float_status *status)
158142c2 4370{
8f506c70 4371 int8_t shiftCount;
158142c2 4372
0019d5c3 4373 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4374 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4375 status);
158142c2
FB
4376
4377}
4378
158142c2
FB
4379/*----------------------------------------------------------------------------
4380| Normalizes the subnormal extended double-precision floating-point value
4381| represented by the denormalized significand `aSig'. The normalized exponent
4382| and significand are stored at the locations pointed to by `zExpPtr' and
4383| `zSigPtr', respectively.
4384*----------------------------------------------------------------------------*/
4385
88857aca
LV
4386void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4387 uint64_t *zSigPtr)
158142c2 4388{
8f506c70 4389 int8_t shiftCount;
158142c2 4390
0019d5c3 4391 shiftCount = clz64(aSig);
158142c2
FB
4392 *zSigPtr = aSig<<shiftCount;
4393 *zExpPtr = 1 - shiftCount;
158142c2
FB
4394}
4395
4396/*----------------------------------------------------------------------------
4397| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4398| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4399| and returns the proper extended double-precision floating-point value
4400| corresponding to the abstract input. Ordinarily, the abstract value is
4401| rounded and packed into the extended double-precision format, with the
4402| inexact exception raised if the abstract input cannot be represented
4403| exactly. However, if the abstract value is too large, the overflow and
4404| inexact exceptions are raised and an infinity or maximal finite value is
4405| returned. If the abstract value is too small, the input value is rounded to
4406| a subnormal number, and the underflow and inexact exceptions are raised if
4407| the abstract input cannot be represented exactly as a subnormal extended
4408| double-precision floating-point number.
4409| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4410| number of bits as single or double precision, respectively. Otherwise, the
4411| result is rounded to the full precision of the extended double-precision
4412| format.
4413| The input significand must be normalized or smaller. If the input
4414| significand is not normalized, `zExp' must be 0; in that case, the result
4415| returned is a subnormal number, and it must not require rounding. The
4416| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4417| Floating-Point Arithmetic.
4418*----------------------------------------------------------------------------*/
4419
c120391c 4420floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4421 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4422 float_status *status)
158142c2 4423{
8f506c70 4424 int8_t roundingMode;
c120391c 4425 bool roundNearestEven, increment, isTiny;
f42c2224 4426 int64_t roundIncrement, roundMask, roundBits;
158142c2 4427
a2f2d288 4428 roundingMode = status->float_rounding_mode;
158142c2
FB
4429 roundNearestEven = ( roundingMode == float_round_nearest_even );
4430 if ( roundingPrecision == 80 ) goto precision80;
4431 if ( roundingPrecision == 64 ) {
e9321124
AB
4432 roundIncrement = UINT64_C(0x0000000000000400);
4433 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4434 }
4435 else if ( roundingPrecision == 32 ) {
e9321124
AB
4436 roundIncrement = UINT64_C(0x0000008000000000);
4437 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4438 }
4439 else {
4440 goto precision80;
4441 }
4442 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4443 switch (roundingMode) {
4444 case float_round_nearest_even:
f9288a76 4445 case float_round_ties_away:
dc355b76
PM
4446 break;
4447 case float_round_to_zero:
4448 roundIncrement = 0;
4449 break;
4450 case float_round_up:
4451 roundIncrement = zSign ? 0 : roundMask;
4452 break;
4453 case float_round_down:
4454 roundIncrement = zSign ? roundMask : 0;
4455 break;
4456 default:
4457 abort();
158142c2
FB
4458 }
4459 roundBits = zSig0 & roundMask;
bb98fe42 4460 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4461 if ( ( 0x7FFE < zExp )
4462 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4463 ) {
4464 goto overflow;
4465 }
4466 if ( zExp <= 0 ) {
a2f2d288 4467 if (status->flush_to_zero) {
ff32e16e 4468 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4469 return packFloatx80(zSign, 0, 0);
4470 }
a828b373
RH
4471 isTiny = status->tininess_before_rounding
4472 || (zExp < 0 )
4473 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4474 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4475 zExp = 0;
4476 roundBits = zSig0 & roundMask;
ff32e16e
PM
4477 if (isTiny && roundBits) {
4478 float_raise(float_flag_underflow, status);
4479 }
a2f2d288 4480 if (roundBits) {
d82f3b2d 4481 float_raise(float_flag_inexact, status);
a2f2d288 4482 }
158142c2 4483 zSig0 += roundIncrement;
bb98fe42 4484 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4485 roundIncrement = roundMask + 1;
4486 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4487 roundMask |= roundIncrement;
4488 }
4489 zSig0 &= ~ roundMask;
4490 return packFloatx80( zSign, zExp, zSig0 );
4491 }
4492 }
a2f2d288 4493 if (roundBits) {
d82f3b2d 4494 float_raise(float_flag_inexact, status);
a2f2d288 4495 }
158142c2
FB
4496 zSig0 += roundIncrement;
4497 if ( zSig0 < roundIncrement ) {
4498 ++zExp;
e9321124 4499 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4500 }
4501 roundIncrement = roundMask + 1;
4502 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4503 roundMask |= roundIncrement;
4504 }
4505 zSig0 &= ~ roundMask;
4506 if ( zSig0 == 0 ) zExp = 0;
4507 return packFloatx80( zSign, zExp, zSig0 );
4508 precision80:
dc355b76
PM
4509 switch (roundingMode) {
4510 case float_round_nearest_even:
f9288a76 4511 case float_round_ties_away:
dc355b76
PM
4512 increment = ((int64_t)zSig1 < 0);
4513 break;
4514 case float_round_to_zero:
4515 increment = 0;
4516 break;
4517 case float_round_up:
4518 increment = !zSign && zSig1;
4519 break;
4520 case float_round_down:
4521 increment = zSign && zSig1;
4522 break;
4523 default:
4524 abort();
158142c2 4525 }
bb98fe42 4526 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4527 if ( ( 0x7FFE < zExp )
4528 || ( ( zExp == 0x7FFE )
e9321124 4529 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4530 && increment
4531 )
4532 ) {
4533 roundMask = 0;
4534 overflow:
ff32e16e 4535 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4536 if ( ( roundingMode == float_round_to_zero )
4537 || ( zSign && ( roundingMode == float_round_up ) )
4538 || ( ! zSign && ( roundingMode == float_round_down ) )
4539 ) {
4540 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4541 }
0f605c88
LV
4542 return packFloatx80(zSign,
4543 floatx80_infinity_high,
4544 floatx80_infinity_low);
158142c2
FB
4545 }
4546 if ( zExp <= 0 ) {
a828b373
RH
4547 isTiny = status->tininess_before_rounding
4548 || (zExp < 0)
4549 || !increment
4550 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4551 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4552 zExp = 0;
ff32e16e
PM
4553 if (isTiny && zSig1) {
4554 float_raise(float_flag_underflow, status);
4555 }
a2f2d288 4556 if (zSig1) {
d82f3b2d 4557 float_raise(float_flag_inexact, status);
a2f2d288 4558 }
dc355b76
PM
4559 switch (roundingMode) {
4560 case float_round_nearest_even:
f9288a76 4561 case float_round_ties_away:
dc355b76
PM
4562 increment = ((int64_t)zSig1 < 0);
4563 break;
4564 case float_round_to_zero:
4565 increment = 0;
4566 break;
4567 case float_round_up:
4568 increment = !zSign && zSig1;
4569 break;
4570 case float_round_down:
4571 increment = zSign && zSig1;
4572 break;
4573 default:
4574 abort();
158142c2
FB
4575 }
4576 if ( increment ) {
4577 ++zSig0;
40662886
PMD
4578 if (!(zSig1 << 1) && roundNearestEven) {
4579 zSig0 &= ~1;
4580 }
bb98fe42 4581 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4582 }
4583 return packFloatx80( zSign, zExp, zSig0 );
4584 }
4585 }
a2f2d288 4586 if (zSig1) {
d82f3b2d 4587 float_raise(float_flag_inexact, status);
a2f2d288 4588 }
158142c2
FB
4589 if ( increment ) {
4590 ++zSig0;
4591 if ( zSig0 == 0 ) {
4592 ++zExp;
e9321124 4593 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4594 }
4595 else {
40662886
PMD
4596 if (!(zSig1 << 1) && roundNearestEven) {
4597 zSig0 &= ~1;
4598 }
158142c2
FB
4599 }
4600 }
4601 else {
4602 if ( zSig0 == 0 ) zExp = 0;
4603 }
4604 return packFloatx80( zSign, zExp, zSig0 );
4605
4606}
4607
4608/*----------------------------------------------------------------------------
4609| Takes an abstract floating-point value having sign `zSign', exponent
4610| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4611| and returns the proper extended double-precision floating-point value
4612| corresponding to the abstract input. This routine is just like
4613| `roundAndPackFloatx80' except that the input significand does not have to be
4614| normalized.
4615*----------------------------------------------------------------------------*/
4616
88857aca 4617floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4618 bool zSign, int32_t zExp,
88857aca
LV
4619 uint64_t zSig0, uint64_t zSig1,
4620 float_status *status)
158142c2 4621{
8f506c70 4622 int8_t shiftCount;
158142c2
FB
4623
4624 if ( zSig0 == 0 ) {
4625 zSig0 = zSig1;
4626 zSig1 = 0;
4627 zExp -= 64;
4628 }
0019d5c3 4629 shiftCount = clz64(zSig0);
158142c2
FB
4630 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4631 zExp -= shiftCount;
ff32e16e
PM
4632 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4633 zSig0, zSig1, status);
158142c2
FB
4634
4635}
4636
158142c2
FB
4637/*----------------------------------------------------------------------------
4638| Returns the least-significant 64 fraction bits of the quadruple-precision
4639| floating-point value `a'.
4640*----------------------------------------------------------------------------*/
4641
a49db98d 4642static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4643{
4644
4645 return a.low;
4646
4647}
4648
4649/*----------------------------------------------------------------------------
4650| Returns the most-significant 48 fraction bits of the quadruple-precision
4651| floating-point value `a'.
4652*----------------------------------------------------------------------------*/
4653
a49db98d 4654static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4655{
4656
e9321124 4657 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4658
4659}
4660
4661/*----------------------------------------------------------------------------
4662| Returns the exponent bits of the quadruple-precision floating-point value
4663| `a'.
4664*----------------------------------------------------------------------------*/
4665
f4014512 4666static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4667{
4668
4669 return ( a.high>>48 ) & 0x7FFF;
4670
4671}
4672
4673/*----------------------------------------------------------------------------
4674| Returns the sign bit of the quadruple-precision floating-point value `a'.
4675*----------------------------------------------------------------------------*/
4676
c120391c 4677static inline bool extractFloat128Sign(float128 a)
158142c2 4678{
c120391c 4679 return a.high >> 63;
158142c2
FB
4680}
4681
4682/*----------------------------------------------------------------------------
4683| Normalizes the subnormal quadruple-precision floating-point value
4684| represented by the denormalized significand formed by the concatenation of
4685| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4686| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4687| significand are stored at the location pointed to by `zSig0Ptr', and the
4688| least significant 64 bits of the normalized significand are stored at the
4689| location pointed to by `zSig1Ptr'.
4690*----------------------------------------------------------------------------*/
4691
4692static void
4693 normalizeFloat128Subnormal(
bb98fe42
AF
4694 uint64_t aSig0,
4695 uint64_t aSig1,
f4014512 4696 int32_t *zExpPtr,
bb98fe42
AF
4697 uint64_t *zSig0Ptr,
4698 uint64_t *zSig1Ptr
158142c2
FB
4699 )
4700{
8f506c70 4701 int8_t shiftCount;
158142c2
FB
4702
4703 if ( aSig0 == 0 ) {
0019d5c3 4704 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4705 if ( shiftCount < 0 ) {
4706 *zSig0Ptr = aSig1>>( - shiftCount );
4707 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4708 }
4709 else {
4710 *zSig0Ptr = aSig1<<shiftCount;
4711 *zSig1Ptr = 0;
4712 }
4713 *zExpPtr = - shiftCount - 63;
4714 }
4715 else {
0019d5c3 4716 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4717 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4718 *zExpPtr = 1 - shiftCount;
4719 }
4720
4721}
4722
4723/*----------------------------------------------------------------------------
4724| Packs the sign `zSign', the exponent `zExp', and the significand formed
4725| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4726| floating-point value, returning the result. After being shifted into the
4727| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4728| added together to form the most significant 32 bits of the result. This
4729| means that any integer portion of `zSig0' will be added into the exponent.
4730| Since a properly normalized significand will have an integer portion equal
4731| to 1, the `zExp' input should be 1 less than the desired result exponent
4732| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4733| significand.
4734*----------------------------------------------------------------------------*/
4735
a49db98d 4736static inline float128
c120391c 4737packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4738{
4739 float128 z;
4740
4741 z.low = zSig1;
c120391c 4742 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4743 return z;
158142c2
FB
4744}
4745
4746/*----------------------------------------------------------------------------
4747| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4748| and extended significand formed by the concatenation of `zSig0', `zSig1',
4749| and `zSig2', and returns the proper quadruple-precision floating-point value
4750| corresponding to the abstract input. Ordinarily, the abstract value is
4751| simply rounded and packed into the quadruple-precision format, with the
4752| inexact exception raised if the abstract input cannot be represented
4753| exactly. However, if the abstract value is too large, the overflow and
4754| inexact exceptions are raised and an infinity or maximal finite value is
4755| returned. If the abstract value is too small, the input value is rounded to
4756| a subnormal number, and the underflow and inexact exceptions are raised if
4757| the abstract input cannot be represented exactly as a subnormal quadruple-
4758| precision floating-point number.
4759| The input significand must be normalized or smaller. If the input
4760| significand is not normalized, `zExp' must be 0; in that case, the result
4761| returned is a subnormal number, and it must not require rounding. In the
4762| usual case that the input significand is normalized, `zExp' must be 1 less
4763| than the ``true'' floating-point exponent. The handling of underflow and
4764| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4765*----------------------------------------------------------------------------*/
4766
c120391c 4767static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4768 uint64_t zSig0, uint64_t zSig1,
4769 uint64_t zSig2, float_status *status)
158142c2 4770{
8f506c70 4771 int8_t roundingMode;
c120391c 4772 bool roundNearestEven, increment, isTiny;
158142c2 4773
a2f2d288 4774 roundingMode = status->float_rounding_mode;
158142c2 4775 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4776 switch (roundingMode) {
4777 case float_round_nearest_even:
f9288a76 4778 case float_round_ties_away:
dc355b76
PM
4779 increment = ((int64_t)zSig2 < 0);
4780 break;
4781 case float_round_to_zero:
4782 increment = 0;
4783 break;
4784 case float_round_up:
4785 increment = !zSign && zSig2;
4786 break;
4787 case float_round_down:
4788 increment = zSign && zSig2;
4789 break;
9ee6f678
BR
4790 case float_round_to_odd:
4791 increment = !(zSig1 & 0x1) && zSig2;
4792 break;
dc355b76
PM
4793 default:
4794 abort();
158142c2 4795 }
bb98fe42 4796 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4797 if ( ( 0x7FFD < zExp )
4798 || ( ( zExp == 0x7FFD )
4799 && eq128(
e9321124
AB
4800 UINT64_C(0x0001FFFFFFFFFFFF),
4801 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4802 zSig0,
4803 zSig1
4804 )
4805 && increment
4806 )
4807 ) {
ff32e16e 4808 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4809 if ( ( roundingMode == float_round_to_zero )
4810 || ( zSign && ( roundingMode == float_round_up ) )
4811 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4812 || (roundingMode == float_round_to_odd)
158142c2
FB
4813 ) {
4814 return
4815 packFloat128(
4816 zSign,
4817 0x7FFE,
e9321124
AB
4818 UINT64_C(0x0000FFFFFFFFFFFF),
4819 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4820 );
4821 }
4822 return packFloat128( zSign, 0x7FFF, 0, 0 );
4823 }
4824 if ( zExp < 0 ) {
a2f2d288 4825 if (status->flush_to_zero) {
ff32e16e 4826 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4827 return packFloat128(zSign, 0, 0, 0);
4828 }
a828b373
RH
4829 isTiny = status->tininess_before_rounding
4830 || (zExp < -1)
4831 || !increment
4832 || lt128(zSig0, zSig1,
4833 UINT64_C(0x0001FFFFFFFFFFFF),
4834 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4835 shift128ExtraRightJamming(
4836 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4837 zExp = 0;
ff32e16e
PM
4838 if (isTiny && zSig2) {
4839 float_raise(float_flag_underflow, status);
4840 }
dc355b76
PM
4841 switch (roundingMode) {
4842 case float_round_nearest_even:
f9288a76 4843 case float_round_ties_away:
dc355b76
PM
4844 increment = ((int64_t)zSig2 < 0);
4845 break;
4846 case float_round_to_zero:
4847 increment = 0;
4848 break;
4849 case float_round_up:
4850 increment = !zSign && zSig2;
4851 break;
4852 case float_round_down:
4853 increment = zSign && zSig2;
4854 break;
9ee6f678
BR
4855 case float_round_to_odd:
4856 increment = !(zSig1 & 0x1) && zSig2;
4857 break;
dc355b76
PM
4858 default:
4859 abort();
158142c2
FB
4860 }
4861 }
4862 }
a2f2d288 4863 if (zSig2) {
d82f3b2d 4864 float_raise(float_flag_inexact, status);
a2f2d288 4865 }
158142c2
FB
4866 if ( increment ) {
4867 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4868 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4869 zSig1 &= ~1;
4870 }
158142c2
FB
4871 }
4872 else {
4873 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4874 }
4875 return packFloat128( zSign, zExp, zSig0, zSig1 );
4876
4877}
4878
4879/*----------------------------------------------------------------------------
4880| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4881| and significand formed by the concatenation of `zSig0' and `zSig1', and
4882| returns the proper quadruple-precision floating-point value corresponding
4883| to the abstract input. This routine is just like `roundAndPackFloat128'
4884| except that the input significand has fewer bits and does not have to be
4885| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4886| point exponent.
4887*----------------------------------------------------------------------------*/
4888
c120391c 4889static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4890 uint64_t zSig0, uint64_t zSig1,
4891 float_status *status)
158142c2 4892{
8f506c70 4893 int8_t shiftCount;
bb98fe42 4894 uint64_t zSig2;
158142c2
FB
4895
4896 if ( zSig0 == 0 ) {
4897 zSig0 = zSig1;
4898 zSig1 = 0;
4899 zExp -= 64;
4900 }
0019d5c3 4901 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4902 if ( 0 <= shiftCount ) {
4903 zSig2 = 0;
4904 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4905 }
4906 else {
4907 shift128ExtraRightJamming(
4908 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4909 }
4910 zExp -= shiftCount;
ff32e16e 4911 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4912
4913}
4914
158142c2 4915
158142c2
FB
4916/*----------------------------------------------------------------------------
4917| Returns the result of converting the 32-bit two's complement integer `a'
4918| to the extended double-precision floating-point format. The conversion
4919| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4920| Arithmetic.
4921*----------------------------------------------------------------------------*/
4922
e5a41ffa 4923floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4924{
c120391c 4925 bool zSign;
3a87d009 4926 uint32_t absA;
8f506c70 4927 int8_t shiftCount;
bb98fe42 4928 uint64_t zSig;
158142c2
FB
4929
4930 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4931 zSign = ( a < 0 );
4932 absA = zSign ? - a : a;
0019d5c3 4933 shiftCount = clz32(absA) + 32;
158142c2
FB
4934 zSig = absA;
4935 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4936
4937}
4938
158142c2
FB
4939/*----------------------------------------------------------------------------
4940| Returns the result of converting the 32-bit two's complement integer `a' to
4941| the quadruple-precision floating-point format. The conversion is performed
4942| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4943*----------------------------------------------------------------------------*/
4944
e5a41ffa 4945float128 int32_to_float128(int32_t a, float_status *status)
158142c2 4946{
c120391c 4947 bool zSign;
3a87d009 4948 uint32_t absA;
8f506c70 4949 int8_t shiftCount;
bb98fe42 4950 uint64_t zSig0;
158142c2
FB
4951
4952 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4953 zSign = ( a < 0 );
4954 absA = zSign ? - a : a;
0019d5c3 4955 shiftCount = clz32(absA) + 17;
158142c2
FB
4956 zSig0 = absA;
4957 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4958
4959}
4960
158142c2
FB
4961/*----------------------------------------------------------------------------
4962| Returns the result of converting the 64-bit two's complement integer `a'
4963| to the extended double-precision floating-point format. The conversion
4964| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4965| Arithmetic.
4966*----------------------------------------------------------------------------*/
4967
e5a41ffa 4968floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4969{
c120391c 4970 bool zSign;
182f42fd 4971 uint64_t absA;
8f506c70 4972 int8_t shiftCount;
158142c2
FB
4973
4974 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4975 zSign = ( a < 0 );
4976 absA = zSign ? - a : a;
0019d5c3 4977 shiftCount = clz64(absA);
158142c2
FB
4978 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4979
4980}
4981
158142c2
FB
4982/*----------------------------------------------------------------------------
4983| Returns the result of converting the 64-bit two's complement integer `a' to
4984| the quadruple-precision floating-point format. The conversion is performed
4985| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4986*----------------------------------------------------------------------------*/
4987
e5a41ffa 4988float128 int64_to_float128(int64_t a, float_status *status)
158142c2 4989{
c120391c 4990 bool zSign;
182f42fd 4991 uint64_t absA;
8f506c70 4992 int8_t shiftCount;
f4014512 4993 int32_t zExp;
bb98fe42 4994 uint64_t zSig0, zSig1;
158142c2
FB
4995
4996 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4997 zSign = ( a < 0 );
4998 absA = zSign ? - a : a;
0019d5c3 4999 shiftCount = clz64(absA) + 49;
158142c2
FB
5000 zExp = 0x406E - shiftCount;
5001 if ( 64 <= shiftCount ) {
5002 zSig1 = 0;
5003 zSig0 = absA;
5004 shiftCount -= 64;
5005 }
5006 else {
5007 zSig1 = absA;
5008 zSig0 = 0;
5009 }
5010 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5011 return packFloat128( zSign, zExp, zSig0, zSig1 );
5012
5013}
5014
6bb8e0f1
PM
5015/*----------------------------------------------------------------------------
5016| Returns the result of converting the 64-bit unsigned integer `a'
5017| to the quadruple-precision floating-point format. The conversion is performed
5018| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5019*----------------------------------------------------------------------------*/
5020
e5a41ffa 5021float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
5022{
5023 if (a == 0) {
5024 return float128_zero;
5025 }
6603d506 5026 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
5027}
5028
158142c2
FB
5029/*----------------------------------------------------------------------------
5030| Returns the result of converting the single-precision floating-point value
5031| `a' to the extended double-precision floating-point format. The conversion
5032| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5033| Arithmetic.
5034*----------------------------------------------------------------------------*/
5035
e5a41ffa 5036floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 5037{
c120391c 5038 bool aSign;
0c48262d 5039 int aExp;
bb98fe42 5040 uint32_t aSig;
158142c2 5041
ff32e16e 5042 a = float32_squash_input_denormal(a, status);
158142c2
FB
5043 aSig = extractFloat32Frac( a );
5044 aExp = extractFloat32Exp( a );
5045 aSign = extractFloat32Sign( a );
5046 if ( aExp == 0xFF ) {
ff32e16e 5047 if (aSig) {
7537c2b4
JM
5048 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5049 status);
5050 return floatx80_silence_nan(res, status);
ff32e16e 5051 }
0f605c88
LV
5052 return packFloatx80(aSign,
5053 floatx80_infinity_high,
5054 floatx80_infinity_low);
158142c2
FB
5055 }
5056 if ( aExp == 0 ) {
5057 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5058 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5059 }
5060 aSig |= 0x00800000;
bb98fe42 5061 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
5062
5063}
5064
158142c2
FB
5065/*----------------------------------------------------------------------------
5066| Returns the result of converting the single-precision floating-point value
5067| `a' to the double-precision floating-point format. The conversion is
5068| performed according to the IEC/IEEE Standard for Binary Floating-Point
5069| Arithmetic.
5070*----------------------------------------------------------------------------*/
5071
e5a41ffa 5072float128 float32_to_float128(float32 a, float_status *status)
158142c2 5073{
c120391c 5074 bool aSign;
0c48262d 5075 int aExp;
bb98fe42 5076 uint32_t aSig;
158142c2 5077
ff32e16e 5078 a = float32_squash_input_denormal(a, status);
158142c2
FB
5079 aSig = extractFloat32Frac( a );
5080 aExp = extractFloat32Exp( a );
5081 aSign = extractFloat32Sign( a );
5082 if ( aExp == 0xFF ) {
ff32e16e
PM
5083 if (aSig) {
5084 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5085 }
158142c2
FB
5086 return packFloat128( aSign, 0x7FFF, 0, 0 );
5087 }
5088 if ( aExp == 0 ) {
5089 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5090 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5091 --aExp;
5092 }
bb98fe42 5093 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
5094
5095}
5096
158142c2
FB
5097/*----------------------------------------------------------------------------
5098| Returns the remainder of the single-precision floating-point value `a'
5099| with respect to the corresponding value `b'. The operation is performed
5100| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5101*----------------------------------------------------------------------------*/
5102
e5a41ffa 5103float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 5104{
c120391c 5105 bool aSign, zSign;
0c48262d 5106 int aExp, bExp, expDiff;
bb98fe42
AF
5107 uint32_t aSig, bSig;
5108 uint32_t q;
5109 uint64_t aSig64, bSig64, q64;
5110 uint32_t alternateASig;
5111 int32_t sigMean;
ff32e16e
PM
5112 a = float32_squash_input_denormal(a, status);
5113 b = float32_squash_input_denormal(b, status);
158142c2
FB
5114
5115 aSig = extractFloat32Frac( a );
5116 aExp = extractFloat32Exp( a );
5117 aSign = extractFloat32Sign( a );
5118 bSig = extractFloat32Frac( b );
5119 bExp = extractFloat32Exp( b );
158142c2
FB
5120 if ( aExp == 0xFF ) {
5121 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 5122 return propagateFloat32NaN(a, b, status);
158142c2 5123 }
ff32e16e 5124 float_raise(float_flag_invalid, status);
af39bc8c 5125 return float32_default_nan(status);
158142c2
FB
5126 }
5127 if ( bExp == 0xFF ) {
ff32e16e
PM
5128 if (bSig) {
5129 return propagateFloat32NaN(a, b, status);
5130 }
158142c2
FB
5131 return a;
5132 }
5133 if ( bExp == 0 ) {
5134 if ( bSig == 0 ) {
ff32e16e 5135 float_raise(float_flag_invalid, status);
af39bc8c 5136 return float32_default_nan(status);
158142c2
FB
5137 }
5138 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5139 }
5140 if ( aExp == 0 ) {
5141 if ( aSig == 0 ) return a;
5142 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5143 }
5144 expDiff = aExp - bExp;
5145 aSig |= 0x00800000;
5146 bSig |= 0x00800000;
5147 if ( expDiff < 32 ) {
5148 aSig <<= 8;
5149 bSig <<= 8;
5150 if ( expDiff < 0 ) {
5151 if ( expDiff < -1 ) return a;
5152 aSig >>= 1;
5153 }
5154 q = ( bSig <= aSig );
5155 if ( q ) aSig -= bSig;
5156 if ( 0 < expDiff ) {
bb98fe42 5157 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5158 q >>= 32 - expDiff;
5159 bSig >>= 2;
5160 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5161 }
5162 else {
5163 aSig >>= 2;
5164 bSig >>= 2;
5165 }
5166 }
5167 else {
5168 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5169 aSig64 = ( (uint64_t) aSig )<<40;
5170 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5171 expDiff -= 64;
5172 while ( 0 < expDiff ) {
5173 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5174 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5175 aSig64 = - ( ( bSig * q64 )<<38 );
5176 expDiff -= 62;
5177 }
5178 expDiff += 64;
5179 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5180 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5181 q = q64>>( 64 - expDiff );
5182 bSig <<= 6;
5183 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5184 }
5185 do {
5186 alternateASig = aSig;
5187 ++q;
5188 aSig -= bSig;
bb98fe42 5189 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5190 sigMean = aSig + alternateASig;
5191 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5192 aSig = alternateASig;
5193 }
bb98fe42 5194 zSign = ( (int32_t) aSig < 0 );
158142c2 5195 if ( zSign ) aSig = - aSig;
ff32e16e 5196 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5197}
5198
369be8f6 5199
158142c2 5200
8229c991
AJ
5201/*----------------------------------------------------------------------------
5202| Returns the binary exponential of the single-precision floating-point value
5203| `a'. The operation is performed according to the IEC/IEEE Standard for
5204| Binary Floating-Point Arithmetic.
5205|
5206| Uses the following identities:
5207|
5208| 1. -------------------------------------------------------------------------
5209| x x*ln(2)
5210| 2 = e
5211|
5212| 2. -------------------------------------------------------------------------
5213| 2 3 4 5 n
5214| x x x x x x x
5215| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5216| 1! 2! 3! 4! 5! n!
5217*----------------------------------------------------------------------------*/
5218
5219static const float64 float32_exp2_coefficients[15] =
5220{
d5138cf4
PM
5221 const_float64( 0x3ff0000000000000ll ), /* 1 */
5222 const_float64( 0x3fe0000000000000ll ), /* 2 */
5223 const_float64( 0x3fc5555555555555ll ), /* 3 */
5224 const_float64( 0x3fa5555555555555ll ), /* 4 */
5225 const_float64( 0x3f81111111111111ll ), /* 5 */
5226 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5227 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5228 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5229 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5230 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5231 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5232 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5233 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5234 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5235 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5236};
5237
e5a41ffa 5238float32 float32_exp2(float32 a, float_status *status)
8229c991 5239{
c120391c 5240 bool aSign;
0c48262d 5241 int aExp;
bb98fe42 5242 uint32_t aSig;
8229c991
AJ
5243 float64 r, x, xn;
5244 int i;
ff32e16e 5245 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5246
5247 aSig = extractFloat32Frac( a );
5248 aExp = extractFloat32Exp( a );
5249 aSign = extractFloat32Sign( a );
5250
5251 if ( aExp == 0xFF) {
ff32e16e
PM
5252 if (aSig) {
5253 return propagateFloat32NaN(a, float32_zero, status);
5254 }
8229c991
AJ
5255 return (aSign) ? float32_zero : a;
5256 }
5257 if (aExp == 0) {
5258 if (aSig == 0) return float32_one;
5259 }
5260
ff32e16e 5261 float_raise(float_flag_inexact, status);
8229c991
AJ
5262
5263 /* ******************************* */
5264 /* using float64 for approximation */
5265 /* ******************************* */
ff32e16e
PM
5266 x = float32_to_float64(a, status);
5267 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5268
5269 xn = x;
5270 r = float64_one;
5271 for (i = 0 ; i < 15 ; i++) {
5272 float64 f;
5273
ff32e16e
PM
5274 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5275 r = float64_add(r, f, status);
8229c991 5276
ff32e16e 5277 xn = float64_mul(xn, x, status);
8229c991
AJ
5278 }
5279
5280 return float64_to_float32(r, status);
5281}
5282
374dfc33
AJ
5283/*----------------------------------------------------------------------------
5284| Returns the binary log of the single-precision floating-point value `a'.
5285| The operation is performed according to the IEC/IEEE Standard for Binary
5286| Floating-Point Arithmetic.
5287*----------------------------------------------------------------------------*/
e5a41ffa 5288float32 float32_log2(float32 a, float_status *status)
374dfc33 5289{
c120391c 5290 bool aSign, zSign;
0c48262d 5291 int aExp;
bb98fe42 5292 uint32_t aSig, zSig, i;
374dfc33 5293
ff32e16e 5294 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5295 aSig = extractFloat32Frac( a );
5296 aExp = extractFloat32Exp( a );
5297 aSign = extractFloat32Sign( a );
5298
5299 if ( aExp == 0 ) {
5300 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5301 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5302 }
5303 if ( aSign ) {
ff32e16e 5304 float_raise(float_flag_invalid, status);
af39bc8c 5305 return float32_default_nan(status);
374dfc33
AJ
5306 }
5307 if ( aExp == 0xFF ) {
ff32e16e
PM
5308 if (aSig) {
5309 return propagateFloat32NaN(a, float32_zero, status);
5310 }
374dfc33
AJ
5311 return a;
5312 }
5313
5314 aExp -= 0x7F;
5315 aSig |= 0x00800000;
5316 zSign = aExp < 0;
5317 zSig = aExp << 23;
5318
5319 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5320 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5321 if ( aSig & 0x01000000 ) {
5322 aSig >>= 1;
5323 zSig |= i;
5324 }
5325 }
5326
5327 if ( zSign )
5328 zSig = -zSig;
5329
ff32e16e 5330 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5331}
5332
158142c2 5333/*----------------------------------------------------------------------------
158142c2
FB
5334| Returns the result of converting the double-precision floating-point value
5335| `a' to the extended double-precision floating-point format. The conversion
5336| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5337| Arithmetic.
5338*----------------------------------------------------------------------------*/
5339
e5a41ffa 5340floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5341{
c120391c 5342 bool aSign;
0c48262d 5343 int aExp;
bb98fe42 5344 uint64_t aSig;
158142c2 5345
ff32e16e 5346 a = float64_squash_input_denormal(a, status);
158142c2
FB
5347 aSig = extractFloat64Frac( a );
5348 aExp = extractFloat64Exp( a );
5349 aSign = extractFloat64Sign( a );
5350 if ( aExp == 0x7FF ) {
ff32e16e 5351 if (aSig) {
7537c2b4
JM
5352 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5353 status);
5354 return floatx80_silence_nan(res, status);
ff32e16e 5355 }
0f605c88
LV
5356 return packFloatx80(aSign,
5357 floatx80_infinity_high,
5358 floatx80_infinity_low);
158142c2
FB
5359 }
5360 if ( aExp == 0 ) {
5361 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5362 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5363 }
5364 return
5365 packFloatx80(
e9321124 5366 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5367
5368}
5369
158142c2
FB
5370/*----------------------------------------------------------------------------
5371| Returns the result of converting the double-precision floating-point value
5372| `a' to the quadruple-precision floating-point format. The conversion is
5373| performed according to the IEC/IEEE Standard for Binary Floating-Point
5374| Arithmetic.
5375*----------------------------------------------------------------------------*/
5376
e5a41ffa 5377float128 float64_to_float128(float64 a, float_status *status)
158142c2 5378{
c120391c 5379 bool aSign;
0c48262d 5380 int aExp;
bb98fe42 5381 uint64_t aSig, zSig0, zSig1;
158142c2 5382
ff32e16e 5383 a = float64_squash_input_denormal(a, status);
158142c2
FB
5384 aSig = extractFloat64Frac( a );
5385 aExp = extractFloat64Exp( a );
5386 aSign = extractFloat64Sign( a );
5387 if ( aExp == 0x7FF ) {
ff32e16e
PM
5388 if (aSig) {
5389 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5390 }
158142c2
FB
5391 return packFloat128( aSign, 0x7FFF, 0, 0 );
5392 }
5393 if ( aExp == 0 ) {
5394 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5395 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5396 --aExp;
5397 }
5398 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5399 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5400
5401}
5402
158142c2
FB
5403
5404/*----------------------------------------------------------------------------
5405| Returns the remainder of the double-precision floating-point value `a'
5406| with respect to the corresponding value `b'. The operation is performed
5407| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5408*----------------------------------------------------------------------------*/
5409
e5a41ffa 5410float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5411{
c120391c 5412 bool aSign, zSign;
0c48262d 5413 int aExp, bExp, expDiff;
bb98fe42
AF
5414 uint64_t aSig, bSig;
5415 uint64_t q, alternateASig;
5416 int64_t sigMean;
158142c2 5417
ff32e16e
PM
5418 a = float64_squash_input_denormal(a, status);
5419 b = float64_squash_input_denormal(b, status);
158142c2
FB
5420 aSig = extractFloat64Frac( a );
5421 aExp = extractFloat64Exp( a );
5422 aSign = extractFloat64Sign( a );
5423 bSig = extractFloat64Frac( b );
5424 bExp = extractFloat64Exp( b );
158142c2
FB
5425 if ( aExp == 0x7FF ) {
5426 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5427 return propagateFloat64NaN(a, b, status);
158142c2 5428 }
ff32e16e 5429 float_raise(float_flag_invalid, status);
af39bc8c 5430 return float64_default_nan(status);
158142c2
FB
5431 }
5432 if ( bExp == 0x7FF ) {
ff32e16e
PM
5433 if (bSig) {
5434 return propagateFloat64NaN(a, b, status);
5435 }
158142c2
FB
5436 return a;
5437 }
5438 if ( bExp == 0 ) {
5439 if ( bSig == 0 ) {
ff32e16e 5440 float_raise(float_flag_invalid, status);
af39bc8c 5441 return float64_default_nan(status);
158142c2
FB
5442 }
5443 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5444 }
5445 if ( aExp == 0 ) {
5446 if ( aSig == 0 ) return a;
5447 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5448 }
5449 expDiff = aExp - bExp;
e9321124
AB
5450 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5451 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5452 if ( expDiff < 0 ) {
5453 if ( expDiff < -1 ) return a;
5454 aSig >>= 1;
5455 }
5456 q = ( bSig <= aSig );
5457 if ( q ) aSig -= bSig;
5458 expDiff -= 64;
5459 while ( 0 < expDiff ) {
5460 q = estimateDiv128To64( aSig, 0, bSig );
5461 q = ( 2 < q ) ? q - 2 : 0;
5462 aSig = - ( ( bSig>>2 ) * q );
5463 expDiff -= 62;
5464 }
5465 expDiff += 64;
5466 if ( 0 < expDiff ) {
5467 q = estimateDiv128To64( aSig, 0, bSig );
5468 q = ( 2 < q ) ? q - 2 : 0;
5469 q >>= 64 - expDiff;
5470 bSig >>= 2;
5471 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5472 }
5473 else {
5474 aSig >>= 2;
5475 bSig >>= 2;
5476 }
5477 do {
5478 alternateASig = aSig;
5479 ++q;
5480 aSig -= bSig;
bb98fe42 5481 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5482 sigMean = aSig + alternateASig;
5483 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5484 aSig = alternateASig;
5485 }
bb98fe42 5486 zSign = ( (int64_t) aSig < 0 );
158142c2 5487 if ( zSign ) aSig = - aSig;
ff32e16e 5488 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5489
5490}
5491
374dfc33
AJ
5492/*----------------------------------------------------------------------------
5493| Returns the binary log of the double-precision floating-point value `a'.
5494| The operation is performed according to the IEC/IEEE Standard for Binary
5495| Floating-Point Arithmetic.
5496*----------------------------------------------------------------------------*/
e5a41ffa 5497float64 float64_log2(float64 a, float_status *status)
374dfc33 5498{
c120391c 5499 bool aSign, zSign;
0c48262d 5500 int aExp;
bb98fe42 5501 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5502 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5503
5504 aSig = extractFloat64Frac( a );
5505 aExp = extractFloat64Exp( a );
5506 aSign = extractFloat64Sign( a );
5507
5508 if ( aExp == 0 ) {
5509 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5510 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5511 }
5512 if ( aSign ) {
ff32e16e 5513 float_raise(float_flag_invalid, status);
af39bc8c 5514 return float64_default_nan(status);
374dfc33
AJ
5515 }
5516 if ( aExp == 0x7FF ) {
ff32e16e
PM
5517 if (aSig) {
5518 return propagateFloat64NaN(a, float64_zero, status);
5519 }
374dfc33
AJ
5520 return a;
5521 }
5522
5523 aExp -= 0x3FF;
e9321124 5524 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5525 zSign = aExp < 0;
bb98fe42 5526 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5527 for (i = 1LL << 51; i > 0; i >>= 1) {
5528 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5529 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5530 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5531 aSig >>= 1;
5532 zSig |= i;
5533 }
5534 }
5535
5536 if ( zSign )
5537 zSig = -zSig;
ff32e16e 5538 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5539}
5540
158142c2
FB
5541/*----------------------------------------------------------------------------
5542| Returns the result of converting the extended double-precision floating-
5543| point value `a' to the 32-bit two's complement integer format. The
5544| conversion is performed according to the IEC/IEEE Standard for Binary
5545| Floating-Point Arithmetic---which means in particular that the conversion
5546| is rounded according to the current rounding mode. If `a' is a NaN, the
5547| largest positive integer is returned. Otherwise, if the conversion
5548| overflows, the largest integer with the same sign as `a' is returned.
5549*----------------------------------------------------------------------------*/
5550
f4014512 5551int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5552{
c120391c 5553 bool aSign;
f4014512 5554 int32_t aExp, shiftCount;
bb98fe42 5555 uint64_t aSig;
158142c2 5556
d1eb8f2a
AD
5557 if (floatx80_invalid_encoding(a)) {
5558 float_raise(float_flag_invalid, status);
5559 return 1 << 31;
5560 }
158142c2
FB
5561 aSig = extractFloatx80Frac( a );
5562 aExp = extractFloatx80Exp( a );
5563 aSign = extractFloatx80Sign( a );
bb98fe42 5564 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5565 shiftCount = 0x4037 - aExp;
5566 if ( shiftCount <= 0 ) shiftCount = 1;
5567 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5568 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5569
5570}
5571
5572/*----------------------------------------------------------------------------
5573| Returns the result of converting the extended double-precision floating-
5574| point value `a' to the 32-bit two's complement integer format. The
5575| conversion is performed according to the IEC/IEEE Standard for Binary
5576| Floating-Point Arithmetic, except that the conversion is always rounded
5577| toward zero. If `a' is a NaN, the largest positive integer is returned.
5578| Otherwise, if the conversion overflows, the largest integer with the same
5579| sign as `a' is returned.
5580*----------------------------------------------------------------------------*/
5581
f4014512 5582int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5583{
c120391c 5584 bool aSign;
f4014512 5585 int32_t aExp, shiftCount;
bb98fe42 5586 uint64_t aSig, savedASig;
b3a6a2e0 5587 int32_t z;
158142c2 5588
d1eb8f2a
AD
5589 if (floatx80_invalid_encoding(a)) {
5590 float_raise(float_flag_invalid, status);
5591 return 1 << 31;
5592 }
158142c2
FB
5593 aSig = extractFloatx80Frac( a );
5594 aExp = extractFloatx80Exp( a );
5595 aSign = extractFloatx80Sign( a );
5596 if ( 0x401E < aExp ) {
bb98fe42 5597 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5598 goto invalid;
5599 }
5600 else if ( aExp < 0x3FFF ) {
a2f2d288 5601 if (aExp || aSig) {
d82f3b2d 5602 float_raise(float_flag_inexact, status);
a2f2d288 5603 }
158142c2
FB
5604 return 0;
5605 }
5606 shiftCount = 0x403E - aExp;
5607 savedASig = aSig;
5608 aSig >>= shiftCount;
5609 z = aSig;
5610 if ( aSign ) z = - z;
5611 if ( ( z < 0 ) ^ aSign ) {
5612 invalid:
ff32e16e 5613 float_raise(float_flag_invalid, status);
bb98fe42 5614 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5615 }
5616 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5617 float_raise(float_flag_inexact, status);
158142c2
FB
5618 }
5619 return z;
5620
5621}
5622
5623/*----------------------------------------------------------------------------
5624| Returns the result of converting the extended double-precision floating-
5625| point value `a' to the 64-bit two's complement integer format. The
5626| conversion is performed according to the IEC/IEEE Standard for Binary
5627| Floating-Point Arithmetic---which means in particular that the conversion
5628| is rounded according to the current rounding mode. If `a' is a NaN,
5629| the largest positive integer is returned. Otherwise, if the conversion
5630| overflows, the largest integer with the same sign as `a' is returned.
5631*----------------------------------------------------------------------------*/
5632
f42c2224 5633int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5634{
c120391c 5635 bool aSign;
f4014512 5636 int32_t aExp, shiftCount;
bb98fe42 5637 uint64_t aSig, aSigExtra;
158142c2 5638
d1eb8f2a
AD
5639 if (floatx80_invalid_encoding(a)) {
5640 float_raise(float_flag_invalid, status);
5641 return 1ULL << 63;
5642 }
158142c2
FB
5643 aSig = extractFloatx80Frac( a );
5644 aExp = extractFloatx80Exp( a );
5645 aSign = extractFloatx80Sign( a );
5646 shiftCount = 0x403E - aExp;
5647 if ( shiftCount <= 0 ) {
5648 if ( shiftCount ) {
ff32e16e 5649 float_raise(float_flag_invalid, status);
0f605c88 5650 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5651 return INT64_MAX;
158142c2 5652 }
2c217da0 5653 return INT64_MIN;
158142c2
FB
5654 }
5655 aSigExtra = 0;
5656 }
5657 else {
5658 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5659 }
ff32e16e 5660 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5661
5662}
5663
5664/*----------------------------------------------------------------------------
5665| Returns the result of converting the extended double-precision floating-
5666| point value `a' to the 64-bit two's complement integer format. The
5667| conversion is performed according to the IEC/IEEE Standard for Binary
5668| Floating-Point Arithmetic, except that the conversion is always rounded
5669| toward zero. If `a' is a NaN, the largest positive integer is returned.
5670| Otherwise, if the conversion overflows, the largest integer with the same
5671| sign as `a' is returned.
5672*----------------------------------------------------------------------------*/
5673
f42c2224 5674int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5675{
c120391c 5676 bool aSign;
f4014512 5677 int32_t aExp, shiftCount;
bb98fe42 5678 uint64_t aSig;
f42c2224 5679 int64_t z;
158142c2 5680
d1eb8f2a
AD
5681 if (floatx80_invalid_encoding(a)) {
5682 float_raise(float_flag_invalid, status);
5683 return 1ULL << 63;
5684 }
158142c2
FB
5685 aSig = extractFloatx80Frac( a );
5686 aExp = extractFloatx80Exp( a );
5687 aSign = extractFloatx80Sign( a );
5688 shiftCount = aExp - 0x403E;
5689 if ( 0 <= shiftCount ) {
e9321124 5690 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5691 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5692 float_raise(float_flag_invalid, status);
158142c2 5693 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5694 return INT64_MAX;
158142c2
FB
5695 }
5696 }
2c217da0 5697 return INT64_MIN;
158142c2
FB
5698 }
5699 else if ( aExp < 0x3FFF ) {
a2f2d288 5700 if (aExp | aSig) {
d82f3b2d 5701 float_raise(float_flag_inexact, status);
a2f2d288 5702 }
158142c2
FB
5703 return 0;
5704 }
5705 z = aSig>>( - shiftCount );
bb98fe42 5706 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5707 float_raise(float_flag_inexact, status);
158142c2
FB
5708 }
5709 if ( aSign ) z = - z;
5710 return z;
5711
5712}
5713
5714/*----------------------------------------------------------------------------
5715| Returns the result of converting the extended double-precision floating-
5716| point value `a' to the single-precision floating-point format. The
5717| conversion is performed according to the IEC/IEEE Standard for Binary
5718| Floating-Point Arithmetic.
5719*----------------------------------------------------------------------------*/
5720
e5a41ffa 5721float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5722{
c120391c 5723 bool aSign;
f4014512 5724 int32_t aExp;
bb98fe42 5725 uint64_t aSig;
158142c2 5726
d1eb8f2a
AD
5727 if (floatx80_invalid_encoding(a)) {
5728 float_raise(float_flag_invalid, status);
5729 return float32_default_nan(status);
5730 }
158142c2
FB
5731 aSig = extractFloatx80Frac( a );
5732 aExp = extractFloatx80Exp( a );
5733 aSign = extractFloatx80Sign( a );
5734 if ( aExp == 0x7FFF ) {
bb98fe42 5735 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5736 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5737 status);
5738 return float32_silence_nan(res, status);
158142c2
FB
5739 }
5740 return packFloat32( aSign, 0xFF, 0 );
5741 }
5742 shift64RightJamming( aSig, 33, &aSig );
5743 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5744 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5745
5746}
5747
5748/*----------------------------------------------------------------------------
5749| Returns the result of converting the extended double-precision floating-
5750| point value `a' to the double-precision floating-point format. The
5751| conversion is performed according to the IEC/IEEE Standard for Binary
5752| Floating-Point Arithmetic.
5753*----------------------------------------------------------------------------*/
5754
e5a41ffa 5755float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5756{
c120391c 5757 bool aSign;
f4014512 5758 int32_t aExp;
bb98fe42 5759 uint64_t aSig, zSig;
158142c2 5760
d1eb8f2a
AD
5761 if (floatx80_invalid_encoding(a)) {
5762 float_raise(float_flag_invalid, status);
5763 return float64_default_nan(status);
5764 }
158142c2
FB
5765 aSig = extractFloatx80Frac( a );
5766 aExp = extractFloatx80Exp( a );
5767 aSign = extractFloatx80Sign( a );
5768 if ( aExp == 0x7FFF ) {
bb98fe42 5769 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5770 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5771 status);
5772 return float64_silence_nan(res, status);
158142c2
FB
5773 }
5774 return packFloat64( aSign, 0x7FF, 0 );
5775 }
5776 shift64RightJamming( aSig, 1, &zSig );
5777 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5778 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5779
5780}
5781
158142c2
FB
5782/*----------------------------------------------------------------------------
5783| Returns the result of converting the extended double-precision floating-
5784| point value `a' to the quadruple-precision floating-point format. The
5785| conversion is performed according to the IEC/IEEE Standard for Binary
5786| Floating-Point Arithmetic.
5787*----------------------------------------------------------------------------*/
5788
e5a41ffa 5789float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5790{
c120391c 5791 bool aSign;
0c48262d 5792 int aExp;
bb98fe42 5793 uint64_t aSig, zSig0, zSig1;
158142c2 5794
d1eb8f2a
AD
5795 if (floatx80_invalid_encoding(a)) {
5796 float_raise(float_flag_invalid, status);
5797 return float128_default_nan(status);
5798 }
158142c2
FB
5799 aSig = extractFloatx80Frac( a );
5800 aExp = extractFloatx80Exp( a );
5801 aSign = extractFloatx80Sign( a );
bb98fe42 5802 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5803 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5804 status);
5805 return float128_silence_nan(res, status);
158142c2
FB
5806 }
5807 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5808 return packFloat128( aSign, aExp, zSig0, zSig1 );
5809
5810}
5811
0f721292
LV
5812/*----------------------------------------------------------------------------
5813| Rounds the extended double-precision floating-point value `a'
5814| to the precision provided by floatx80_rounding_precision and returns the
5815| result as an extended double-precision floating-point value.
5816| The operation is performed according to the IEC/IEEE Standard for Binary
5817| Floating-Point Arithmetic.
5818*----------------------------------------------------------------------------*/
5819
5820floatx80 floatx80_round(floatx80 a, float_status *status)
5821{
5822 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5823 extractFloatx80Sign(a),
5824 extractFloatx80Exp(a),
5825 extractFloatx80Frac(a), 0, status);
5826}
5827
158142c2
FB
5828/*----------------------------------------------------------------------------
5829| Rounds the extended double-precision floating-point value `a' to an integer,
5830| and returns the result as an extended quadruple-precision floating-point
5831| value. The operation is performed according to the IEC/IEEE Standard for
5832| Binary Floating-Point Arithmetic.
5833*----------------------------------------------------------------------------*/
5834
e5a41ffa 5835floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5836{
c120391c 5837 bool aSign;
f4014512 5838 int32_t aExp;
bb98fe42 5839 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5840 floatx80 z;
5841
d1eb8f2a
AD
5842 if (floatx80_invalid_encoding(a)) {
5843 float_raise(float_flag_invalid, status);
5844 return floatx80_default_nan(status);
5845 }
158142c2
FB
5846 aExp = extractFloatx80Exp( a );
5847 if ( 0x403E <= aExp ) {
bb98fe42 5848 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5849 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5850 }
5851 return a;
5852 }
5853 if ( aExp < 0x3FFF ) {
5854 if ( ( aExp == 0 )
9ecaf5cc 5855 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5856 return a;
5857 }
d82f3b2d 5858 float_raise(float_flag_inexact, status);
158142c2 5859 aSign = extractFloatx80Sign( a );
a2f2d288 5860 switch (status->float_rounding_mode) {
158142c2 5861 case float_round_nearest_even:
bb98fe42 5862 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5863 ) {
5864 return
e9321124 5865 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5866 }
5867 break;
f9288a76
PM
5868 case float_round_ties_away:
5869 if (aExp == 0x3FFE) {
e9321124 5870 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5871 }
5872 break;
158142c2
FB
5873 case float_round_down:
5874 return
5875 aSign ?
e9321124 5876 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5877 : packFloatx80( 0, 0, 0 );
5878 case float_round_up:
5879 return
5880 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5881 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5882
5883 case float_round_to_zero:
5884 break;
5885 default:
5886 g_assert_not_reached();
158142c2
FB
5887 }
5888 return packFloatx80( aSign, 0, 0 );
5889 }
5890 lastBitMask = 1;
5891 lastBitMask <<= 0x403E - aExp;
5892 roundBitsMask = lastBitMask - 1;
5893 z = a;
a2f2d288 5894 switch (status->float_rounding_mode) {
dc355b76 5895 case float_round_nearest_even:
158142c2 5896 z.low += lastBitMask>>1;
dc355b76
PM
5897 if ((z.low & roundBitsMask) == 0) {
5898 z.low &= ~lastBitMask;
5899 }
5900 break;
f9288a76
PM
5901 case float_round_ties_away:
5902 z.low += lastBitMask >> 1;
5903 break;
dc355b76
PM
5904 case float_round_to_zero:
5905 break;
5906 case float_round_up:
5907 if (!extractFloatx80Sign(z)) {
5908 z.low += roundBitsMask;
5909 }
5910 break;
5911 case float_round_down:
5912 if (extractFloatx80Sign(z)) {
158142c2
FB
5913 z.low += roundBitsMask;
5914 }
dc355b76
PM
5915 break;
5916 default:
5917 abort();
158142c2
FB
5918 }
5919 z.low &= ~ roundBitsMask;
5920 if ( z.low == 0 ) {
5921 ++z.high;
e9321124 5922 z.low = UINT64_C(0x8000000000000000);
158142c2 5923 }
a2f2d288 5924 if (z.low != a.low) {
d82f3b2d 5925 float_raise(float_flag_inexact, status);
a2f2d288 5926 }
158142c2
FB
5927 return z;
5928
5929}
5930
5931/*----------------------------------------------------------------------------
5932| Returns the result of adding the absolute values of the extended double-
5933| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5934| negated before being returned. `zSign' is ignored if the result is a NaN.
5935| The addition is performed according to the IEC/IEEE Standard for Binary
5936| Floating-Point Arithmetic.
5937*----------------------------------------------------------------------------*/
5938
c120391c 5939static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5940 float_status *status)
158142c2 5941{
f4014512 5942 int32_t aExp, bExp, zExp;
bb98fe42 5943 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5944 int32_t expDiff;
158142c2
FB
5945
5946 aSig = extractFloatx80Frac( a );
5947 aExp = extractFloatx80Exp( a );
5948 bSig = extractFloatx80Frac( b );
5949 bExp = extractFloatx80Exp( b );
5950 expDiff = aExp - bExp;
5951 if ( 0 < expDiff ) {
5952 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5953 if ((uint64_t)(aSig << 1)) {
5954 return propagateFloatx80NaN(a, b, status);
5955 }
158142c2
FB
5956 return a;
5957 }
5958 if ( bExp == 0 ) --expDiff;
5959 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5960 zExp = aExp;
5961 }
5962 else if ( expDiff < 0 ) {
5963 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5964 if ((uint64_t)(bSig << 1)) {
5965 return propagateFloatx80NaN(a, b, status);
5966 }
0f605c88
LV
5967 return packFloatx80(zSign,
5968 floatx80_infinity_high,
5969 floatx80_infinity_low);
158142c2
FB
5970 }
5971 if ( aExp == 0 ) ++expDiff;
5972 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5973 zExp = bExp;
5974 }
5975 else {
5976 if ( aExp == 0x7FFF ) {
bb98fe42 5977 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5978 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5979 }
5980 return a;
5981 }
5982 zSig1 = 0;
5983 zSig0 = aSig + bSig;
5984 if ( aExp == 0 ) {
41602807
JM
5985 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5986 /* At least one of the values is a pseudo-denormal,
5987 * and there is a carry out of the result. */
5988 zExp = 1;
5989 goto shiftRight1;
5990 }
2f311075
RH
5991 if (zSig0 == 0) {
5992 return packFloatx80(zSign, 0, 0);
5993 }
158142c2
FB
5994 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5995 goto roundAndPack;
5996 }
5997 zExp = aExp;
5998 goto shiftRight1;
5999 }
6000 zSig0 = aSig + bSig;
bb98fe42 6001 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
6002 shiftRight1:
6003 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 6004 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
6005 ++zExp;
6006 roundAndPack:
a2f2d288 6007 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6008 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6009}
6010
6011/*----------------------------------------------------------------------------
6012| Returns the result of subtracting the absolute values of the extended
6013| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
6014| difference is negated before being returned. `zSign' is ignored if the
6015| result is a NaN. The subtraction is performed according to the IEC/IEEE
6016| Standard for Binary Floating-Point Arithmetic.
6017*----------------------------------------------------------------------------*/
6018
c120391c 6019static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 6020 float_status *status)
158142c2 6021{
f4014512 6022 int32_t aExp, bExp, zExp;
bb98fe42 6023 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 6024 int32_t expDiff;
158142c2
FB
6025
6026 aSig = extractFloatx80Frac( a );
6027 aExp = extractFloatx80Exp( a );
6028 bSig = extractFloatx80Frac( b );
6029 bExp = extractFloatx80Exp( b );
6030 expDiff = aExp - bExp;
6031 if ( 0 < expDiff ) goto aExpBigger;
6032 if ( expDiff < 0 ) goto bExpBigger;
6033 if ( aExp == 0x7FFF ) {
bb98fe42 6034 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6035 return propagateFloatx80NaN(a, b, status);
158142c2 6036 }
ff32e16e 6037 float_raise(float_flag_invalid, status);
af39bc8c 6038 return floatx80_default_nan(status);
158142c2
FB
6039 }
6040 if ( aExp == 0 ) {
6041 aExp = 1;
6042 bExp = 1;
6043 }
6044 zSig1 = 0;
6045 if ( bSig < aSig ) goto aBigger;
6046 if ( aSig < bSig ) goto bBigger;
a2f2d288 6047 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
6048 bExpBigger:
6049 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6050 if ((uint64_t)(bSig << 1)) {
6051 return propagateFloatx80NaN(a, b, status);
6052 }
0f605c88
LV
6053 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6054 floatx80_infinity_low);
158142c2
FB
6055 }
6056 if ( aExp == 0 ) ++expDiff;
6057 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6058 bBigger:
6059 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6060 zExp = bExp;
6061 zSign ^= 1;
6062 goto normalizeRoundAndPack;
6063 aExpBigger:
6064 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6065 if ((uint64_t)(aSig << 1)) {
6066 return propagateFloatx80NaN(a, b, status);
6067 }
158142c2
FB
6068 return a;
6069 }
6070 if ( bExp == 0 ) --expDiff;
6071 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6072 aBigger:
6073 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6074 zExp = aExp;
6075 normalizeRoundAndPack:
a2f2d288 6076 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6077 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6078}
6079
6080/*----------------------------------------------------------------------------
6081| Returns the result of adding the extended double-precision floating-point
6082| values `a' and `b'. The operation is performed according to the IEC/IEEE
6083| Standard for Binary Floating-Point Arithmetic.
6084*----------------------------------------------------------------------------*/
6085
e5a41ffa 6086floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 6087{
c120391c 6088 bool aSign, bSign;
158142c2 6089
d1eb8f2a
AD
6090 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6091 float_raise(float_flag_invalid, status);
6092 return floatx80_default_nan(status);
6093 }
158142c2
FB
6094 aSign = extractFloatx80Sign( a );
6095 bSign = extractFloatx80Sign( b );
6096 if ( aSign == bSign ) {
ff32e16e 6097 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6098 }
6099 else {
ff32e16e 6100 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6101 }
6102
6103}
6104
6105/*----------------------------------------------------------------------------
6106| Returns the result of subtracting the extended double-precision floating-
6107| point values `a' and `b'. The operation is performed according to the
6108| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6109*----------------------------------------------------------------------------*/
6110
e5a41ffa 6111floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 6112{
c120391c 6113 bool aSign, bSign;
158142c2 6114
d1eb8f2a
AD
6115 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6116 float_raise(float_flag_invalid, status);
6117 return floatx80_default_nan(status);
6118 }
158142c2
FB
6119 aSign = extractFloatx80Sign( a );
6120 bSign = extractFloatx80Sign( b );
6121 if ( aSign == bSign ) {
ff32e16e 6122 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6123 }
6124 else {
ff32e16e 6125 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6126 }
6127
6128}
6129
6130/*----------------------------------------------------------------------------
6131| Returns the result of multiplying the extended double-precision floating-
6132| point values `a' and `b'. The operation is performed according to the
6133| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6134*----------------------------------------------------------------------------*/
6135
e5a41ffa 6136floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 6137{
c120391c 6138 bool aSign, bSign, zSign;
f4014512 6139 int32_t aExp, bExp, zExp;
bb98fe42 6140 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6141
d1eb8f2a
AD
6142 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6143 float_raise(float_flag_invalid, status);
6144 return floatx80_default_nan(status);
6145 }
158142c2
FB
6146 aSig = extractFloatx80Frac( a );
6147 aExp = extractFloatx80Exp( a );
6148 aSign = extractFloatx80Sign( a );
6149 bSig = extractFloatx80Frac( b );
6150 bExp = extractFloatx80Exp( b );
6151 bSign = extractFloatx80Sign( b );
6152 zSign = aSign ^ bSign;
6153 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6154 if ( (uint64_t) ( aSig<<1 )
6155 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6156 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6157 }
6158 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6159 return packFloatx80(zSign, floatx80_infinity_high,
6160 floatx80_infinity_low);
158142c2
FB
6161 }
6162 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6163 if ((uint64_t)(bSig << 1)) {
6164 return propagateFloatx80NaN(a, b, status);
6165 }
158142c2
FB
6166 if ( ( aExp | aSig ) == 0 ) {
6167 invalid:
ff32e16e 6168 float_raise(float_flag_invalid, status);
af39bc8c 6169 return floatx80_default_nan(status);
158142c2 6170 }
0f605c88
LV
6171 return packFloatx80(zSign, floatx80_infinity_high,
6172 floatx80_infinity_low);
158142c2
FB
6173 }
6174 if ( aExp == 0 ) {
6175 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6176 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6177 }
6178 if ( bExp == 0 ) {
6179 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6180 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6181 }
6182 zExp = aExp + bExp - 0x3FFE;
6183 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6184 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6185 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6186 --zExp;
6187 }
a2f2d288 6188 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6189 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6190}
6191
6192/*----------------------------------------------------------------------------
6193| Returns the result of dividing the extended double-precision floating-point
6194| value `a' by the corresponding value `b'. The operation is performed
6195| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6196*----------------------------------------------------------------------------*/
6197
e5a41ffa 6198floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6199{
c120391c 6200 bool aSign, bSign, zSign;
f4014512 6201 int32_t aExp, bExp, zExp;
bb98fe42
AF
6202 uint64_t aSig, bSig, zSig0, zSig1;
6203 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6204
d1eb8f2a
AD
6205 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6206 float_raise(float_flag_invalid, status);
6207 return floatx80_default_nan(status);
6208 }
158142c2
FB
6209 aSig = extractFloatx80Frac( a );
6210 aExp = extractFloatx80Exp( a );
6211 aSign = extractFloatx80Sign( a );
6212 bSig = extractFloatx80Frac( b );
6213 bExp = extractFloatx80Exp( b );
6214 bSign = extractFloatx80Sign( b );
6215 zSign = aSign ^ bSign;
6216 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6217 if ((uint64_t)(aSig << 1)) {
6218 return propagateFloatx80NaN(a, b, status);
6219 }
158142c2 6220 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6221 if ((uint64_t)(bSig << 1)) {
6222 return propagateFloatx80NaN(a, b, status);
6223 }
158142c2
FB
6224 goto invalid;
6225 }
0f605c88
LV
6226 return packFloatx80(zSign, floatx80_infinity_high,
6227 floatx80_infinity_low);
158142c2
FB
6228 }
6229 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6230 if ((uint64_t)(bSig << 1)) {
6231 return propagateFloatx80NaN(a, b, status);
6232 }
158142c2
FB
6233 return packFloatx80( zSign, 0, 0 );
6234 }
6235 if ( bExp == 0 ) {
6236 if ( bSig == 0 ) {
6237 if ( ( aExp | aSig ) == 0 ) {
6238 invalid:
ff32e16e 6239 float_raise(float_flag_invalid, status);
af39bc8c 6240 return floatx80_default_nan(status);
158142c2 6241 }
ff32e16e 6242 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6243 return packFloatx80(zSign, floatx80_infinity_high,
6244 floatx80_infinity_low);
158142c2
FB
6245 }
6246 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6247 }
6248 if ( aExp == 0 ) {
6249 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6250 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6251 }
6252 zExp = aExp - bExp + 0x3FFE;
6253 rem1 = 0;
6254 if ( bSig <= aSig ) {
6255 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6256 ++zExp;
6257 }
6258 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6259 mul64To128( bSig, zSig0, &term0, &term1 );
6260 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6261 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6262 --zSig0;
6263 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6264 }
6265 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6266 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6267 mul64To128( bSig, zSig1, &term1, &term2 );
6268 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6269 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6270 --zSig1;
6271 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6272 }
6273 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6274 }
a2f2d288 6275 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6276 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6277}
6278
6279/*----------------------------------------------------------------------------
6280| Returns the remainder of the extended double-precision floating-point value
6281| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6282| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6283| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6284| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6285| the absolute value of the integer quotient.
158142c2
FB
6286*----------------------------------------------------------------------------*/
6287
445810ec 6288floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6289 float_status *status)
158142c2 6290{
c120391c 6291 bool aSign, zSign;
b662495d 6292 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6293 uint64_t aSig0, aSig1, bSig;
6294 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6295
445810ec 6296 *quotient = 0;
d1eb8f2a
AD
6297 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6298 float_raise(float_flag_invalid, status);
6299 return floatx80_default_nan(status);
6300 }
158142c2 6301 aSig0 = extractFloatx80Frac( a );
b662495d 6302 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6303 aSign = extractFloatx80Sign( a );
6304 bSig = extractFloatx80Frac( b );
6305 bExp = extractFloatx80Exp( b );
158142c2 6306 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6307 if ( (uint64_t) ( aSig0<<1 )
6308 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6309 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6310 }
6311 goto invalid;
6312 }
6313 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6314 if ((uint64_t)(bSig << 1)) {
6315 return propagateFloatx80NaN(a, b, status);
6316 }
b662495d
JM
6317 if (aExp == 0 && aSig0 >> 63) {
6318 /*
6319 * Pseudo-denormal argument must be returned in normalized
6320 * form.
6321 */
6322 return packFloatx80(aSign, 1, aSig0);
6323 }
158142c2
FB
6324 return a;
6325 }
6326 if ( bExp == 0 ) {
6327 if ( bSig == 0 ) {
6328 invalid:
ff32e16e 6329 float_raise(float_flag_invalid, status);
af39bc8c 6330 return floatx80_default_nan(status);
158142c2
FB
6331 }
6332 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6333 }
6334 if ( aExp == 0 ) {
499a2f7b 6335 if ( aSig0 == 0 ) return a;
158142c2
FB
6336 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6337 }
158142c2
FB
6338 zSign = aSign;
6339 expDiff = aExp - bExp;
6340 aSig1 = 0;
6341 if ( expDiff < 0 ) {
b662495d
JM
6342 if ( mod || expDiff < -1 ) {
6343 if (aExp == 1 && aExpOrig == 0) {
6344 /*
6345 * Pseudo-denormal argument must be returned in
6346 * normalized form.
6347 */
6348 return packFloatx80(aSign, aExp, aSig0);
6349 }
6350 return a;
6351 }
158142c2
FB
6352 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6353 expDiff = 0;
6354 }
445810ec 6355 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6356 if ( q ) aSig0 -= bSig;
6357 expDiff -= 64;
6358 while ( 0 < expDiff ) {
6359 q = estimateDiv128To64( aSig0, aSig1, bSig );
6360 q = ( 2 < q ) ? q - 2 : 0;
6361 mul64To128( bSig, q, &term0, &term1 );
6362 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6363 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6364 expDiff -= 62;
445810ec
JM
6365 *quotient <<= 62;
6366 *quotient += q;
158142c2
FB
6367 }
6368 expDiff += 64;
6369 if ( 0 < expDiff ) {
6370 q = estimateDiv128To64( aSig0, aSig1, bSig );
6371 q = ( 2 < q ) ? q - 2 : 0;
6372 q >>= 64 - expDiff;
6373 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6374 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6375 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6376 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6377 ++q;
6378 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6379 }
445810ec
JM
6380 if (expDiff < 64) {
6381 *quotient <<= expDiff;
6382 } else {
6383 *quotient = 0;
6384 }
6385 *quotient += q;
158142c2
FB
6386 }
6387 else {
6388 term1 = 0;
6389 term0 = bSig;
6390 }
6b8b0136
JM
6391 if (!mod) {
6392 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6393 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6394 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6395 && ( q & 1 ) )
6396 ) {
6397 aSig0 = alternateASig0;
6398 aSig1 = alternateASig1;
6399 zSign = ! zSign;
445810ec 6400 ++*quotient;
6b8b0136 6401 }
158142c2
FB
6402 }
6403 return
6404 normalizeRoundAndPackFloatx80(
ff32e16e 6405 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6406
6407}
6408
6b8b0136
JM
6409/*----------------------------------------------------------------------------
6410| Returns the remainder of the extended double-precision floating-point value
6411| `a' with respect to the corresponding value `b'. The operation is performed
6412| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6413*----------------------------------------------------------------------------*/
6414
6415floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6416{
445810ec
JM
6417 uint64_t quotient;
6418 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6419}
6420
6421/*----------------------------------------------------------------------------
6422| Returns the remainder of the extended double-precision floating-point value
6423| `a' with respect to the corresponding value `b', with the quotient truncated
6424| toward zero.
6425*----------------------------------------------------------------------------*/
6426
6427floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6428{
445810ec
JM
6429 uint64_t quotient;
6430 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6431}
6432
158142c2
FB
6433/*----------------------------------------------------------------------------
6434| Returns the square root of the extended double-precision floating-point
6435| value `a'. The operation is performed according to the IEC/IEEE Standard
6436| for Binary Floating-Point Arithmetic.
6437*----------------------------------------------------------------------------*/
6438
e5a41ffa 6439floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6440{
c120391c 6441 bool aSign;
f4014512 6442 int32_t aExp, zExp;
bb98fe42
AF
6443 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6444 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6445
d1eb8f2a
AD
6446 if (floatx80_invalid_encoding(a)) {
6447 float_raise(float_flag_invalid, status);
6448 return floatx80_default_nan(status);
6449 }
158142c2
FB
6450 aSig0 = extractFloatx80Frac( a );
6451 aExp = extractFloatx80Exp( a );
6452 aSign = extractFloatx80Sign( a );
6453 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6454 if ((uint64_t)(aSig0 << 1)) {
6455 return propagateFloatx80NaN(a, a, status);
6456 }
158142c2
FB
6457 if ( ! aSign ) return a;
6458 goto invalid;
6459 }
6460 if ( aSign ) {
6461 if ( ( aExp | aSig0 ) == 0 ) return a;
6462 invalid:
ff32e16e 6463 float_raise(float_flag_invalid, status);
af39bc8c 6464 return floatx80_default_nan(status);
158142c2
FB
6465 }
6466 if ( aExp == 0 ) {
6467 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6468 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6469 }
6470 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6471 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6472 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6473 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6474 doubleZSig0 = zSig0<<1;
6475 mul64To128( zSig0, zSig0, &term0, &term1 );
6476 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6477 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6478 --zSig0;
6479 doubleZSig0 -= 2;
6480 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6481 }
6482 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6483 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6484 if ( zSig1 == 0 ) zSig1 = 1;
6485 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6486 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6487 mul64To128( zSig1, zSig1, &term2, &term3 );
6488 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6489 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6490 --zSig1;
6491 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6492 term3 |= 1;
6493 term2 |= doubleZSig0;
6494 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6495 }
6496 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6497 }
6498 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6499 zSig0 |= doubleZSig0;
a2f2d288
PM
6500 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6501 0, zExp, zSig0, zSig1, status);
158142c2
FB
6502}
6503
6504/*----------------------------------------------------------------------------
158142c2
FB
6505| Returns the result of converting the quadruple-precision floating-point
6506| value `a' to the 32-bit two's complement integer format. The conversion
6507| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6508| Arithmetic---which means in particular that the conversion is rounded
6509| according to the current rounding mode. If `a' is a NaN, the largest
6510| positive integer is returned. Otherwise, if the conversion overflows, the
6511| largest integer with the same sign as `a' is returned.
6512*----------------------------------------------------------------------------*/
6513
f4014512 6514int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6515{
c120391c 6516 bool aSign;
f4014512 6517 int32_t aExp, shiftCount;
bb98fe42 6518 uint64_t aSig0, aSig1;
158142c2
FB
6519
6520 aSig1 = extractFloat128Frac1( a );
6521 aSig0 = extractFloat128Frac0( a );
6522 aExp = extractFloat128Exp( a );
6523 aSign = extractFloat128Sign( a );
6524 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6525 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6526 aSig0 |= ( aSig1 != 0 );
6527 shiftCount = 0x4028 - aExp;
6528 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6529 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6530
6531}
6532
6533/*----------------------------------------------------------------------------
6534| Returns the result of converting the quadruple-precision floating-point
6535| value `a' to the 32-bit two's complement integer format. The conversion
6536| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6537| Arithmetic, except that the conversion is always rounded toward zero. If
6538| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6539| conversion overflows, the largest integer with the same sign as `a' is
6540| returned.
6541*----------------------------------------------------------------------------*/
6542
f4014512 6543int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6544{
c120391c 6545 bool aSign;
f4014512 6546 int32_t aExp, shiftCount;
bb98fe42 6547 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6548 int32_t z;
158142c2
FB
6549
6550 aSig1 = extractFloat128Frac1( a );
6551 aSig0 = extractFloat128Frac0( a );
6552 aExp = extractFloat128Exp( a );
6553 aSign = extractFloat128Sign( a );
6554 aSig0 |= ( aSig1 != 0 );
6555 if ( 0x401E < aExp ) {
6556 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6557 goto invalid;
6558 }
6559 else if ( aExp < 0x3FFF ) {
a2f2d288 6560 if (aExp || aSig0) {
d82f3b2d 6561 float_raise(float_flag_inexact, status);
a2f2d288 6562 }
158142c2
FB
6563 return 0;
6564 }
e9321124 6565 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6566 shiftCount = 0x402F - aExp;
6567 savedASig = aSig0;
6568 aSig0 >>= shiftCount;
6569 z = aSig0;
6570 if ( aSign ) z = - z;
6571 if ( ( z < 0 ) ^ aSign ) {
6572 invalid:
ff32e16e 6573 float_raise(float_flag_invalid, status);
2c217da0 6574 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6575 }
6576 if ( ( aSig0<<shiftCount ) != savedASig ) {
d82f3b2d 6577 float_raise(float_flag_inexact, status);
158142c2
FB
6578 }
6579 return z;
6580
6581}
6582
6583/*----------------------------------------------------------------------------
6584| Returns the result of converting the quadruple-precision floating-point
6585| value `a' to the 64-bit two's complement integer format. The conversion
6586| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6587| Arithmetic---which means in particular that the conversion is rounded
6588| according to the current rounding mode. If `a' is a NaN, the largest
6589| positive integer is returned. Otherwise, if the conversion overflows, the
6590| largest integer with the same sign as `a' is returned.
6591*----------------------------------------------------------------------------*/
6592
f42c2224 6593int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6594{
c120391c 6595 bool aSign;
f4014512 6596 int32_t aExp, shiftCount;
bb98fe42 6597 uint64_t aSig0, aSig1;
158142c2
FB
6598
6599 aSig1 = extractFloat128Frac1( a );
6600 aSig0 = extractFloat128Frac0( a );
6601 aExp = extractFloat128Exp( a );
6602 aSign = extractFloat128Sign( a );
e9321124 6603 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6604 shiftCount = 0x402F - aExp;
6605 if ( shiftCount <= 0 ) {
6606 if ( 0x403E < aExp ) {
ff32e16e 6607 float_raise(float_flag_invalid, status);
158142c2
FB
6608 if ( ! aSign
6609 || ( ( aExp == 0x7FFF )
e9321124 6610 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6611 )
6612 ) {
2c217da0 6613 return INT64_MAX;
158142c2 6614 }
2c217da0 6615 return INT64_MIN;
158142c2
FB
6616 }
6617 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6618 }
6619 else {
6620 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6621 }
ff32e16e 6622 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6623
6624}
6625
6626/*----------------------------------------------------------------------------
6627| Returns the result of converting the quadruple-precision floating-point
6628| value `a' to the 64-bit two's complement integer format. The conversion
6629| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6630| Arithmetic, except that the conversion is always rounded toward zero.
6631| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6632| the conversion overflows, the largest integer with the same sign as `a' is
6633| returned.
6634*----------------------------------------------------------------------------*/
6635
f42c2224 6636int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6637{
c120391c 6638 bool aSign;
f4014512 6639 int32_t aExp, shiftCount;
bb98fe42 6640 uint64_t aSig0, aSig1;
f42c2224 6641 int64_t z;
158142c2
FB
6642
6643 aSig1 = extractFloat128Frac1( a );
6644 aSig0 = extractFloat128Frac0( a );
6645 aExp = extractFloat128Exp( a );
6646 aSign = extractFloat128Sign( a );
e9321124 6647 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6648 shiftCount = aExp - 0x402F;
6649 if ( 0 < shiftCount ) {
6650 if ( 0x403E <= aExp ) {
e9321124
AB
6651 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6652 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6653 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288 6654 if (aSig1) {
d82f3b2d 6655 float_raise(float_flag_inexact, status);
a2f2d288 6656 }
158142c2
FB
6657 }
6658 else {
ff32e16e 6659 float_raise(float_flag_invalid, status);
158142c2 6660 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6661 return INT64_MAX;
158142c2
FB
6662 }
6663 }
2c217da0 6664 return INT64_MIN;
158142c2
FB
6665 }
6666 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6667 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
d82f3b2d 6668 float_raise(float_flag_inexact, status);
158142c2
FB
6669 }
6670 }
6671 else {
6672 if ( aExp < 0x3FFF ) {
6673 if ( aExp | aSig0 | aSig1 ) {
d82f3b2d 6674 float_raise(float_flag_inexact, status);
158142c2
FB
6675 }
6676 return 0;
6677 }
6678 z = aSig0>>( - shiftCount );
6679 if ( aSig1
bb98fe42 6680 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
d82f3b2d 6681 float_raise(float_flag_inexact, status);
158142c2
FB
6682 }
6683 }
6684 if ( aSign ) z = - z;
6685 return z;
6686
6687}
6688
2e6d8568
BR
6689/*----------------------------------------------------------------------------
6690| Returns the result of converting the quadruple-precision floating-point value
6691| `a' to the 64-bit unsigned integer format. The conversion is
6692| performed according to the IEC/IEEE Standard for Binary Floating-Point
6693| Arithmetic---which means in particular that the conversion is rounded
6694| according to the current rounding mode. If `a' is a NaN, the largest
6695| positive integer is returned. If the conversion overflows, the
6696| largest unsigned integer is returned. If 'a' is negative, the value is
6697| rounded and zero is returned; negative values that do not round to zero
6698| will raise the inexact exception.
6699*----------------------------------------------------------------------------*/
6700
6701uint64_t float128_to_uint64(float128 a, float_status *status)
6702{
c120391c 6703 bool aSign;
2e6d8568
BR
6704 int aExp;
6705 int shiftCount;
6706 uint64_t aSig0, aSig1;
6707
6708 aSig0 = extractFloat128Frac0(a);
6709 aSig1 = extractFloat128Frac1(a);
6710 aExp = extractFloat128Exp(a);
6711 aSign = extractFloat128Sign(a);
6712 if (aSign && (aExp > 0x3FFE)) {
6713 float_raise(float_flag_invalid, status);
6714 if (float128_is_any_nan(a)) {
2c217da0 6715 return UINT64_MAX;
2e6d8568
BR
6716 } else {
6717 return 0;
6718 }
6719 }
6720 if (aExp) {
2c217da0 6721 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6722 }
6723 shiftCount = 0x402F - aExp;
6724 if (shiftCount <= 0) {
6725 if (0x403E < aExp) {
6726 float_raise(float_flag_invalid, status);
2c217da0 6727 return UINT64_MAX;
2e6d8568
BR
6728 }
6729 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6730 } else {
6731 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6732 }
6733 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6734}
6735
6736uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6737{
6738 uint64_t v;
6739 signed char current_rounding_mode = status->float_rounding_mode;
6740
6741 set_float_rounding_mode(float_round_to_zero, status);
6742 v = float128_to_uint64(a, status);
6743 set_float_rounding_mode(current_rounding_mode, status);
6744
6745 return v;
6746}
6747
158142c2
FB
6748/*----------------------------------------------------------------------------
6749| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6750| value `a' to the 32-bit unsigned integer format. The conversion
6751| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6752| Arithmetic except that the conversion is always rounded toward zero.
6753| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6754| if the conversion overflows, the largest unsigned integer is returned.
6755| If 'a' is negative, the value is rounded and zero is returned; negative
6756| values that do not round to zero will raise the inexact exception.
6757*----------------------------------------------------------------------------*/
6758
6759uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6760{
6761 uint64_t v;
6762 uint32_t res;
6763 int old_exc_flags = get_float_exception_flags(status);
6764
6765 v = float128_to_uint64_round_to_zero(a, status);
6766 if (v > 0xffffffff) {
6767 res = 0xffffffff;
6768 } else {
6769 return v;
6770 }
6771 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6772 float_raise(float_flag_invalid, status);
6773 return res;
6774}
6775
6776/*----------------------------------------------------------------------------
6777| Returns the result of converting the quadruple-precision floating-point value
6778| `a' to the 32-bit unsigned integer format. The conversion is
6779| performed according to the IEC/IEEE Standard for Binary Floating-Point
6780| Arithmetic---which means in particular that the conversion is rounded
6781| according to the current rounding mode. If `a' is a NaN, the largest
6782| positive integer is returned. If the conversion overflows, the
6783| largest unsigned integer is returned. If 'a' is negative, the value is
6784| rounded and zero is returned; negative values that do not round to zero
6785| will raise the inexact exception.
6786*----------------------------------------------------------------------------*/
6787
6788uint32_t float128_to_uint32(float128 a, float_status *status)
6789{
6790 uint64_t v;
6791 uint32_t res;
6792 int old_exc_flags = get_float_exception_flags(status);
6793
6794 v = float128_to_uint64(a, status);
6795 if (v > 0xffffffff) {
6796 res = 0xffffffff;
6797 } else {
6798 return v;
6799 }
6800 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6801 float_raise(float_flag_invalid, status);
6802 return res;
6803}
6804
6805/*----------------------------------------------------------------------------
6806| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6807| value `a' to the single-precision floating-point format. The conversion
6808| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6809| Arithmetic.
6810*----------------------------------------------------------------------------*/
6811
e5a41ffa 6812float32 float128_to_float32(float128 a, float_status *status)
158142c2 6813{
c120391c 6814 bool aSign;
f4014512 6815 int32_t aExp;
bb98fe42
AF
6816 uint64_t aSig0, aSig1;
6817 uint32_t zSig;
158142c2
FB
6818
6819 aSig1 = extractFloat128Frac1( a );
6820 aSig0 = extractFloat128Frac0( a );
6821 aExp = extractFloat128Exp( a );
6822 aSign = extractFloat128Sign( a );
6823 if ( aExp == 0x7FFF ) {
6824 if ( aSig0 | aSig1 ) {
ff32e16e 6825 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6826 }
6827 return packFloat32( aSign, 0xFF, 0 );
6828 }
6829 aSig0 |= ( aSig1 != 0 );
6830 shift64RightJamming( aSig0, 18, &aSig0 );
6831 zSig = aSig0;
6832 if ( aExp || zSig ) {
6833 zSig |= 0x40000000;
6834 aExp -= 0x3F81;
6835 }
ff32e16e 6836 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6837
6838}
6839
6840/*----------------------------------------------------------------------------
6841| Returns the result of converting the quadruple-precision floating-point
6842| value `a' to the double-precision floating-point format. The conversion
6843| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6844| Arithmetic.
6845*----------------------------------------------------------------------------*/
6846
e5a41ffa 6847float64 float128_to_float64(float128 a, float_status *status)
158142c2 6848{
c120391c 6849 bool aSign;
f4014512 6850 int32_t aExp;
bb98fe42 6851 uint64_t aSig0, aSig1;
158142c2
FB
6852
6853 aSig1 = extractFloat128Frac1( a );
6854 aSig0 = extractFloat128Frac0( a );
6855 aExp = extractFloat128Exp( a );
6856 aSign = extractFloat128Sign( a );
6857 if ( aExp == 0x7FFF ) {
6858 if ( aSig0 | aSig1 ) {
ff32e16e 6859 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6860 }
6861 return packFloat64( aSign, 0x7FF, 0 );
6862 }
6863 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6864 aSig0 |= ( aSig1 != 0 );
6865 if ( aExp || aSig0 ) {
e9321124 6866 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6867 aExp -= 0x3C01;
6868 }
ff32e16e 6869 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6870
6871}
6872
158142c2
FB
6873/*----------------------------------------------------------------------------
6874| Returns the result of converting the quadruple-precision floating-point
6875| value `a' to the extended double-precision floating-point format. The
6876| conversion is performed according to the IEC/IEEE Standard for Binary
6877| Floating-Point Arithmetic.
6878*----------------------------------------------------------------------------*/
6879
e5a41ffa 6880floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6881{
c120391c 6882 bool aSign;
f4014512 6883 int32_t aExp;
bb98fe42 6884 uint64_t aSig0, aSig1;
158142c2
FB
6885
6886 aSig1 = extractFloat128Frac1( a );
6887 aSig0 = extractFloat128Frac0( a );
6888 aExp = extractFloat128Exp( a );
6889 aSign = extractFloat128Sign( a );
6890 if ( aExp == 0x7FFF ) {
6891 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6892 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6893 status);
6894 return floatx80_silence_nan(res, status);
158142c2 6895 }
0f605c88
LV
6896 return packFloatx80(aSign, floatx80_infinity_high,
6897 floatx80_infinity_low);
158142c2
FB
6898 }
6899 if ( aExp == 0 ) {
6900 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6901 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6902 }
6903 else {
e9321124 6904 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6905 }
6906 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6907 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6908
6909}
6910
158142c2
FB
6911/*----------------------------------------------------------------------------
6912| Rounds the quadruple-precision floating-point value `a' to an integer, and
6913| returns the result as a quadruple-precision floating-point value. The
6914| operation is performed according to the IEC/IEEE Standard for Binary
6915| Floating-Point Arithmetic.
6916*----------------------------------------------------------------------------*/
6917
e5a41ffa 6918float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6919{
c120391c 6920 bool aSign;
f4014512 6921 int32_t aExp;
bb98fe42 6922 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6923 float128 z;
6924
6925 aExp = extractFloat128Exp( a );
6926 if ( 0x402F <= aExp ) {
6927 if ( 0x406F <= aExp ) {
6928 if ( ( aExp == 0x7FFF )
6929 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6930 ) {
ff32e16e 6931 return propagateFloat128NaN(a, a, status);
158142c2
FB
6932 }
6933 return a;
6934 }
6935 lastBitMask = 1;
6936 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6937 roundBitsMask = lastBitMask - 1;
6938 z = a;
a2f2d288 6939 switch (status->float_rounding_mode) {
dc355b76 6940 case float_round_nearest_even:
158142c2
FB
6941 if ( lastBitMask ) {
6942 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6943 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6944 }
6945 else {
bb98fe42 6946 if ( (int64_t) z.low < 0 ) {
158142c2 6947 ++z.high;
bb98fe42 6948 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6949 }
6950 }
dc355b76 6951 break;
f9288a76
PM
6952 case float_round_ties_away:
6953 if (lastBitMask) {
6954 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6955 } else {
6956 if ((int64_t) z.low < 0) {
6957 ++z.high;
6958 }
6959 }
6960 break;
dc355b76
PM
6961 case float_round_to_zero:
6962 break;
6963 case float_round_up:
6964 if (!extractFloat128Sign(z)) {
6965 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6966 }
6967 break;
6968 case float_round_down:
6969 if (extractFloat128Sign(z)) {
6970 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6971 }
dc355b76 6972 break;
5d64abb3
RH
6973 case float_round_to_odd:
6974 /*
6975 * Note that if lastBitMask == 0, the last bit is the lsb
6976 * of high, and roundBitsMask == -1.
6977 */
6978 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6979 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6980 }
6981 break;
dc355b76
PM
6982 default:
6983 abort();
158142c2
FB
6984 }
6985 z.low &= ~ roundBitsMask;
6986 }
6987 else {
6988 if ( aExp < 0x3FFF ) {
bb98fe42 6989 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
d82f3b2d 6990 float_raise(float_flag_inexact, status);
158142c2 6991 aSign = extractFloat128Sign( a );
a2f2d288 6992 switch (status->float_rounding_mode) {
5d64abb3 6993 case float_round_nearest_even:
158142c2
FB
6994 if ( ( aExp == 0x3FFE )
6995 && ( extractFloat128Frac0( a )
6996 | extractFloat128Frac1( a ) )
6997 ) {
6998 return packFloat128( aSign, 0x3FFF, 0, 0 );
6999 }
7000 break;
f9288a76
PM
7001 case float_round_ties_away:
7002 if (aExp == 0x3FFE) {
7003 return packFloat128(aSign, 0x3FFF, 0, 0);
7004 }
7005 break;
5d64abb3 7006 case float_round_down:
158142c2
FB
7007 return
7008 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7009 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7010 case float_round_up:
158142c2
FB
7011 return
7012 aSign ? packFloat128( 1, 0, 0, 0 )
7013 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7014
7015 case float_round_to_odd:
7016 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
7017
7018 case float_round_to_zero:
7019 break;
158142c2
FB
7020 }
7021 return packFloat128( aSign, 0, 0, 0 );
7022 }
7023 lastBitMask = 1;
7024 lastBitMask <<= 0x402F - aExp;
7025 roundBitsMask = lastBitMask - 1;
7026 z.low = 0;
7027 z.high = a.high;
a2f2d288 7028 switch (status->float_rounding_mode) {
dc355b76 7029 case float_round_nearest_even:
158142c2
FB
7030 z.high += lastBitMask>>1;
7031 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7032 z.high &= ~ lastBitMask;
7033 }
dc355b76 7034 break;
f9288a76
PM
7035 case float_round_ties_away:
7036 z.high += lastBitMask>>1;
7037 break;
dc355b76
PM
7038 case float_round_to_zero:
7039 break;
7040 case float_round_up:
7041 if (!extractFloat128Sign(z)) {
158142c2
FB
7042 z.high |= ( a.low != 0 );
7043 z.high += roundBitsMask;
7044 }
dc355b76
PM
7045 break;
7046 case float_round_down:
7047 if (extractFloat128Sign(z)) {
7048 z.high |= (a.low != 0);
7049 z.high += roundBitsMask;
7050 }
7051 break;
5d64abb3
RH
7052 case float_round_to_odd:
7053 if ((z.high & lastBitMask) == 0) {
7054 z.high |= (a.low != 0);
7055 z.high += roundBitsMask;
7056 }
7057 break;
dc355b76
PM
7058 default:
7059 abort();
158142c2
FB
7060 }
7061 z.high &= ~ roundBitsMask;
7062 }
7063 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
d82f3b2d 7064 float_raise(float_flag_inexact, status);
158142c2
FB
7065 }
7066 return z;
7067
7068}
7069
158142c2
FB
7070/*----------------------------------------------------------------------------
7071| Returns the result of dividing the quadruple-precision floating-point value
7072| `a' by the corresponding value `b'. The operation is performed according to
7073| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7074*----------------------------------------------------------------------------*/
7075
e5a41ffa 7076float128 float128_div(float128 a, float128 b, float_status *status)
158142c2 7077{
c120391c 7078 bool aSign, bSign, zSign;
f4014512 7079 int32_t aExp, bExp, zExp;
bb98fe42
AF
7080 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7081 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7082
7083 aSig1 = extractFloat128Frac1( a );
7084 aSig0 = extractFloat128Frac0( a );
7085 aExp = extractFloat128Exp( a );
7086 aSign = extractFloat128Sign( a );
7087 bSig1 = extractFloat128Frac1( b );
7088 bSig0 = extractFloat128Frac0( b );
7089 bExp = extractFloat128Exp( b );
7090 bSign = extractFloat128Sign( b );
7091 zSign = aSign ^ bSign;
7092 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7093 if (aSig0 | aSig1) {
7094 return propagateFloat128NaN(a, b, status);
7095 }
158142c2 7096 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7097 if (bSig0 | bSig1) {
7098 return propagateFloat128NaN(a, b, status);
7099 }
158142c2
FB
7100 goto invalid;
7101 }
7102 return packFloat128( zSign, 0x7FFF, 0, 0 );
7103 }
7104 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7105 if (bSig0 | bSig1) {
7106 return propagateFloat128NaN(a, b, status);
7107 }
158142c2
FB
7108 return packFloat128( zSign, 0, 0, 0 );
7109 }
7110 if ( bExp == 0 ) {
7111 if ( ( bSig0 | bSig1 ) == 0 ) {
7112 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7113 invalid:
ff32e16e 7114 float_raise(float_flag_invalid, status);
af39bc8c 7115 return float128_default_nan(status);
158142c2 7116 }
ff32e16e 7117 float_raise(float_flag_divbyzero, status);
158142c2
FB
7118 return packFloat128( zSign, 0x7FFF, 0, 0 );
7119 }
7120 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7121 }
7122 if ( aExp == 0 ) {
7123 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7124 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7125 }
7126 zExp = aExp - bExp + 0x3FFD;
7127 shortShift128Left(
e9321124 7128 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7129 shortShift128Left(
e9321124 7130 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7131 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7132 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7133 ++zExp;
7134 }
7135 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7136 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7137 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7138 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7139 --zSig0;
7140 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7141 }
7142 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7143 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7144 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7145 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7146 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7147 --zSig1;
7148 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7149 }
7150 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7151 }
7152 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7153 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7154
7155}
7156
7157/*----------------------------------------------------------------------------
7158| Returns the remainder of the quadruple-precision floating-point value `a'
7159| with respect to the corresponding value `b'. The operation is performed
7160| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7161*----------------------------------------------------------------------------*/
7162
e5a41ffa 7163float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7164{
c120391c 7165 bool aSign, zSign;
f4014512 7166 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7167 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7168 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7169 int64_t sigMean0;
158142c2
FB
7170
7171 aSig1 = extractFloat128Frac1( a );
7172 aSig0 = extractFloat128Frac0( a );
7173 aExp = extractFloat128Exp( a );
7174 aSign = extractFloat128Sign( a );
7175 bSig1 = extractFloat128Frac1( b );
7176 bSig0 = extractFloat128Frac0( b );
7177 bExp = extractFloat128Exp( b );
158142c2
FB
7178 if ( aExp == 0x7FFF ) {
7179 if ( ( aSig0 | aSig1 )
7180 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7181 return propagateFloat128NaN(a, b, status);
158142c2
FB
7182 }
7183 goto invalid;
7184 }
7185 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7186 if (bSig0 | bSig1) {
7187 return propagateFloat128NaN(a, b, status);
7188 }
158142c2
FB
7189 return a;
7190 }
7191 if ( bExp == 0 ) {
7192 if ( ( bSig0 | bSig1 ) == 0 ) {
7193 invalid:
ff32e16e 7194 float_raise(float_flag_invalid, status);
af39bc8c 7195 return float128_default_nan(status);
158142c2
FB
7196 }
7197 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7198 }
7199 if ( aExp == 0 ) {
7200 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7201 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7202 }
7203 expDiff = aExp - bExp;
7204 if ( expDiff < -1 ) return a;
7205 shortShift128Left(
e9321124 7206 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7207 aSig1,
7208 15 - ( expDiff < 0 ),
7209 &aSig0,
7210 &aSig1
7211 );
7212 shortShift128Left(
e9321124 7213 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7214 q = le128( bSig0, bSig1, aSig0, aSig1 );
7215 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7216 expDiff -= 64;
7217 while ( 0 < expDiff ) {
7218 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7219 q = ( 4 < q ) ? q - 4 : 0;
7220 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7221 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7222 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7223 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7224 expDiff -= 61;
7225 }
7226 if ( -64 < expDiff ) {
7227 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7228 q = ( 4 < q ) ? q - 4 : 0;
7229 q >>= - expDiff;
7230 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7231 expDiff += 52;
7232 if ( expDiff < 0 ) {
7233 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7234 }
7235 else {
7236 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7237 }
7238 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7239 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7240 }
7241 else {
7242 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7243 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7244 }
7245 do {
7246 alternateASig0 = aSig0;
7247 alternateASig1 = aSig1;
7248 ++q;
7249 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7250 } while ( 0 <= (int64_t) aSig0 );
158142c2 7251 add128(
bb98fe42 7252 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7253 if ( ( sigMean0 < 0 )
7254 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7255 aSig0 = alternateASig0;
7256 aSig1 = alternateASig1;
7257 }
bb98fe42 7258 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7259 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7260 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7261 status);
158142c2
FB
7262}
7263
7264/*----------------------------------------------------------------------------
7265| Returns the square root of the quadruple-precision floating-point value `a'.
7266| The operation is performed according to the IEC/IEEE Standard for Binary
7267| Floating-Point Arithmetic.
7268*----------------------------------------------------------------------------*/
7269
e5a41ffa 7270float128 float128_sqrt(float128 a, float_status *status)
158142c2 7271{
c120391c 7272 bool aSign;
f4014512 7273 int32_t aExp, zExp;
bb98fe42
AF
7274 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7275 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7276
7277 aSig1 = extractFloat128Frac1( a );
7278 aSig0 = extractFloat128Frac0( a );
7279 aExp = extractFloat128Exp( a );
7280 aSign = extractFloat128Sign( a );
7281 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7282 if (aSig0 | aSig1) {
7283 return propagateFloat128NaN(a, a, status);
7284 }
158142c2
FB
7285 if ( ! aSign ) return a;
7286 goto invalid;
7287 }
7288 if ( aSign ) {
7289 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7290 invalid:
ff32e16e 7291 float_raise(float_flag_invalid, status);
af39bc8c 7292 return float128_default_nan(status);
158142c2
FB
7293 }
7294 if ( aExp == 0 ) {
7295 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7296 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7297 }
7298 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7299 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7300 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7301 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7302 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7303 doubleZSig0 = zSig0<<1;
7304 mul64To128( zSig0, zSig0, &term0, &term1 );
7305 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7306 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7307 --zSig0;
7308 doubleZSig0 -= 2;
7309 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7310 }
7311 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7312 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7313 if ( zSig1 == 0 ) zSig1 = 1;
7314 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7315 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7316 mul64To128( zSig1, zSig1, &term2, &term3 );
7317 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7318 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7319 --zSig1;
7320 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7321 term3 |= 1;
7322 term2 |= doubleZSig0;
7323 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7324 }
7325 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7326 }
7327 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7328 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7329
7330}
7331
71bfd65c
RH
7332static inline FloatRelation
7333floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7334 float_status *status)
f6714d36 7335{
c120391c 7336 bool aSign, bSign;
f6714d36 7337
d1eb8f2a
AD
7338 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7339 float_raise(float_flag_invalid, status);
7340 return float_relation_unordered;
7341 }
f6714d36
AJ
7342 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7343 ( extractFloatx80Frac( a )<<1 ) ) ||
7344 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7345 ( extractFloatx80Frac( b )<<1 ) )) {
7346 if (!is_quiet ||
af39bc8c
AM
7347 floatx80_is_signaling_nan(a, status) ||
7348 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7349 float_raise(float_flag_invalid, status);
f6714d36
AJ
7350 }
7351 return float_relation_unordered;
7352 }
7353 aSign = extractFloatx80Sign( a );
7354 bSign = extractFloatx80Sign( b );
7355 if ( aSign != bSign ) {
7356
7357 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7358 ( ( a.low | b.low ) == 0 ) ) {
7359 /* zero case */
7360 return float_relation_equal;
7361 } else {
7362 return 1 - (2 * aSign);
7363 }
7364 } else {
be53fa78
JM
7365 /* Normalize pseudo-denormals before comparison. */
7366 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7367 ++a.high;
7368 }
7369 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7370 ++b.high;
7371 }
f6714d36
AJ
7372 if (a.low == b.low && a.high == b.high) {
7373 return float_relation_equal;
7374 } else {
7375 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7376 }
7377 }
7378}
7379
71bfd65c 7380FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7381{
ff32e16e 7382 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7383}
7384
71bfd65c
RH
7385FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7386 float_status *status)
f6714d36 7387{
ff32e16e 7388 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7389}
7390
71bfd65c
RH
7391static inline FloatRelation
7392float128_compare_internal(float128 a, float128 b, bool is_quiet,
7393 float_status *status)
1f587329 7394{
c120391c 7395 bool aSign, bSign;
1f587329
BS
7396
7397 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7398 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7399 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7400 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7401 if (!is_quiet ||
af39bc8c
AM
7402 float128_is_signaling_nan(a, status) ||
7403 float128_is_signaling_nan(b, status)) {
ff32e16e 7404 float_raise(float_flag_invalid, status);
1f587329
BS
7405 }
7406 return float_relation_unordered;
7407 }
7408 aSign = extractFloat128Sign( a );
7409 bSign = extractFloat128Sign( b );
7410 if ( aSign != bSign ) {
7411 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7412 /* zero case */
7413 return float_relation_equal;
7414 } else {
7415 return 1 - (2 * aSign);
7416 }
7417 } else {
7418 if (a.low == b.low && a.high == b.high) {
7419 return float_relation_equal;
7420 } else {
7421 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7422 }
7423 }
7424}
7425
71bfd65c 7426FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 7427{
ff32e16e 7428 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7429}
7430
71bfd65c
RH
7431FloatRelation float128_compare_quiet(float128 a, float128 b,
7432 float_status *status)
1f587329 7433{
ff32e16e 7434 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7435}
7436
e5a41ffa 7437floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7438{
c120391c 7439 bool aSign;
326b9e98 7440 int32_t aExp;
bb98fe42 7441 uint64_t aSig;
9ee6e8bb 7442
d1eb8f2a
AD
7443 if (floatx80_invalid_encoding(a)) {
7444 float_raise(float_flag_invalid, status);
7445 return floatx80_default_nan(status);
7446 }
9ee6e8bb
PB
7447 aSig = extractFloatx80Frac( a );
7448 aExp = extractFloatx80Exp( a );
7449 aSign = extractFloatx80Sign( a );
7450
326b9e98
AJ
7451 if ( aExp == 0x7FFF ) {
7452 if ( aSig<<1 ) {
ff32e16e 7453 return propagateFloatx80NaN(a, a, status);
326b9e98 7454 }
9ee6e8bb
PB
7455 return a;
7456 }
326b9e98 7457
3c85c37f
PM
7458 if (aExp == 0) {
7459 if (aSig == 0) {
7460 return a;
7461 }
7462 aExp++;
7463 }
69397542 7464
326b9e98
AJ
7465 if (n > 0x10000) {
7466 n = 0x10000;
7467 } else if (n < -0x10000) {
7468 n = -0x10000;
7469 }
7470
9ee6e8bb 7471 aExp += n;
a2f2d288
PM
7472 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7473 aSign, aExp, aSig, 0, status);
9ee6e8bb 7474}
9ee6e8bb 7475
e5a41ffa 7476float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7477{
c120391c 7478 bool aSign;
326b9e98 7479 int32_t aExp;
bb98fe42 7480 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7481
7482 aSig1 = extractFloat128Frac1( a );
7483 aSig0 = extractFloat128Frac0( a );
7484 aExp = extractFloat128Exp( a );
7485 aSign = extractFloat128Sign( a );
7486 if ( aExp == 0x7FFF ) {
326b9e98 7487 if ( aSig0 | aSig1 ) {
ff32e16e 7488 return propagateFloat128NaN(a, a, status);
326b9e98 7489 }
9ee6e8bb
PB
7490 return a;
7491 }
3c85c37f 7492 if (aExp != 0) {
e9321124 7493 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7494 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7495 return a;
3c85c37f
PM
7496 } else {
7497 aExp++;
7498 }
69397542 7499
326b9e98
AJ
7500 if (n > 0x10000) {
7501 n = 0x10000;
7502 } else if (n < -0x10000) {
7503 n = -0x10000;
7504 }
7505
69397542
PB
7506 aExp += n - 1;
7507 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7508 , status);
9ee6e8bb
PB
7509
7510}
f6b3b108
EC
7511
7512static void __attribute__((constructor)) softfloat_init(void)
7513{
7514 union_float64 ua, ub, uc, ur;
7515
7516 if (QEMU_NO_HARDFLOAT) {
7517 return;
7518 }
7519 /*
7520 * Test that the host's FMA is not obviously broken. For example,
7521 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7522 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7523 */
7524 ua.s = 0x0020000000000001ULL;
7525 ub.s = 0x3ca0000000000000ULL;
7526 uc.s = 0x0020000000000000ULL;
7527 ur.h = fma(ua.h, ub.h, uc.h);
7528 if (ur.s != 0x0020000000000001ULL) {
7529 force_soft_fma = true;
7530 }
7531}