]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Move minmax_flags to softfloat-parts.c.inc
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
e1c4667a
RH
485/* Flags for parts_minmax. */
486enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493};
134eda00 494
247d1f21
RH
495/* Simple helpers for checking if, or what kind of, NaN we have */
496static inline __attribute__((unused)) bool is_nan(FloatClass c)
497{
498 return unlikely(c >= float_class_qnan);
499}
500
501static inline __attribute__((unused)) bool is_snan(FloatClass c)
502{
503 return c == float_class_snan;
504}
505
506static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507{
508 return c == float_class_qnan;
509}
510
a90119b5 511/*
0018b1f4
RH
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
a90119b5 514 *
0018b1f4
RH
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
a90119b5
AB
518 */
519
520typedef struct {
a90119b5
AB
521 FloatClass cls;
522 bool sign;
4109b9ea
RH
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
f8155c1d 535} FloatParts64;
a90119b5 536
0018b1f4
RH
537typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543} FloatParts128;
544
aca84527
RH
545typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553} FloatParts256;
554
0018b1f4 555/* These apply to the most significant word of each FloatPartsN. */
e99c4373 556#define DECOMPOSED_BINARY_POINT 63
a90119b5 557#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
558
559/* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * frac_lsb: least significant bit of fraction
ca3a3d5a 567 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 568 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
569 * The following optional modifiers are available:
570 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
571 */
572typedef struct {
573 int exp_size;
574 int exp_bias;
575 int exp_max;
576 int frac_size;
577 int frac_shift;
578 uint64_t frac_lsb;
579 uint64_t frac_lsbm1;
580 uint64_t round_mask;
581 uint64_t roundeven_mask;
ca3a3d5a 582 bool arm_althp;
a90119b5
AB
583} FloatFmt;
584
585/* Expand fields based on the size of exponent and fraction */
586#define FLOAT_PARAMS(E, F) \
587 .exp_size = E, \
588 .exp_bias = ((1 << E) - 1) >> 1, \
589 .exp_max = (1 << E) - 1, \
590 .frac_size = F, \
0018b1f4
RH
591 .frac_shift = (-F - 1) & 63, \
592 .frac_lsb = 1ull << ((-F - 1) & 63), \
593 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
594 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
595 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
596
597static const FloatFmt float16_params = {
598 FLOAT_PARAMS(5, 10)
599};
600
6fed16b2
AB
601static const FloatFmt float16_params_ahp = {
602 FLOAT_PARAMS(5, 10),
603 .arm_althp = true
604};
605
8282310d
LZ
606static const FloatFmt bfloat16_params = {
607 FLOAT_PARAMS(8, 7)
608};
609
a90119b5
AB
610static const FloatFmt float32_params = {
611 FLOAT_PARAMS(8, 23)
612};
613
614static const FloatFmt float64_params = {
615 FLOAT_PARAMS(11, 52)
616};
617
0018b1f4
RH
618static const FloatFmt float128_params = {
619 FLOAT_PARAMS(15, 112)
620};
621
6fff2167 622/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 623static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 624{
d8fdd172
RH
625 const int f_size = fmt->frac_size;
626 const int e_size = fmt->exp_size;
6fff2167 627
d8fdd172 628 *r = (FloatParts64) {
6fff2167 629 .cls = float_class_unclassified,
d8fdd172
RH
630 .sign = extract64(raw, f_size + e_size, 1),
631 .exp = extract64(raw, f_size, e_size),
632 .frac = extract64(raw, 0, f_size)
6fff2167
AB
633 };
634}
635
3dddb203 636static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 637{
3dddb203 638 unpack_raw64(p, &float16_params, f);
6fff2167
AB
639}
640
3dddb203 641static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 642{
3dddb203 643 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
644}
645
3dddb203 646static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 647{
3dddb203 648 unpack_raw64(p, &float32_params, f);
6fff2167
AB
649}
650
3dddb203 651static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 652{
3dddb203 653 unpack_raw64(p, &float64_params, f);
6fff2167
AB
654}
655
0018b1f4
RH
656static void float128_unpack_raw(FloatParts128 *p, float128 f)
657{
658 const int f_size = float128_params.frac_size - 64;
659 const int e_size = float128_params.exp_size;
660
661 *p = (FloatParts128) {
662 .cls = float_class_unclassified,
663 .sign = extract64(f.high, f_size + e_size, 1),
664 .exp = extract64(f.high, f_size, e_size),
665 .frac_hi = extract64(f.high, 0, f_size),
666 .frac_lo = f.low,
667 };
668}
669
6fff2167 670/* Pack a float from parts, but do not canonicalize. */
9e4af58c 671static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 672{
9e4af58c
RH
673 const int f_size = fmt->frac_size;
674 const int e_size = fmt->exp_size;
675 uint64_t ret;
676
677 ret = (uint64_t)p->sign << (f_size + e_size);
678 ret = deposit64(ret, f_size, e_size, p->exp);
679 ret = deposit64(ret, 0, f_size, p->frac);
680 return ret;
6fff2167
AB
681}
682
71fd178e 683static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 684{
71fd178e 685 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
686}
687
71fd178e 688static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 689{
71fd178e 690 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
691}
692
71fd178e 693static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 694{
71fd178e 695 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
696}
697
71fd178e 698static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 699{
71fd178e 700 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
701}
702
0018b1f4
RH
703static float128 float128_pack_raw(const FloatParts128 *p)
704{
705 const int f_size = float128_params.frac_size - 64;
706 const int e_size = float128_params.exp_size;
707 uint64_t hi;
708
709 hi = (uint64_t)p->sign << (f_size + e_size);
710 hi = deposit64(hi, f_size, e_size, p->exp);
711 hi = deposit64(hi, 0, f_size, p->frac_hi);
712 return make_float128(hi, p->frac_lo);
713}
714
0664335a
RH
715/*----------------------------------------------------------------------------
716| Functions and definitions to determine: (1) whether tininess for underflow
717| is detected before or after rounding by default, (2) what (if anything)
718| happens when exceptions are raised, (3) how signaling NaNs are distinguished
719| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
720| are propagated from function inputs to output. These details are target-
721| specific.
722*----------------------------------------------------------------------------*/
139c1837 723#include "softfloat-specialize.c.inc"
0664335a 724
0018b1f4
RH
725#define PARTS_GENERIC_64_128(NAME, P) \
726 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
727
dedd123c
RH
728#define PARTS_GENERIC_64_128_256(NAME, P) \
729 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
730 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
731
e9034ea8 732#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
733#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
734
7c45bad8
RH
735static void parts64_return_nan(FloatParts64 *a, float_status *s);
736static void parts128_return_nan(FloatParts128 *a, float_status *s);
737
738#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 739
22c355f4
RH
740static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
741 float_status *s);
742static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
743 float_status *s);
744
745#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
746
979582d0
RH
747static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
748 FloatParts64 *c, float_status *s,
749 int ab_mask, int abc_mask);
750static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
751 FloatParts128 *b,
752 FloatParts128 *c,
753 float_status *s,
754 int ab_mask, int abc_mask);
755
756#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
757 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
758
d46975bc
RH
759static void parts64_canonicalize(FloatParts64 *p, float_status *status,
760 const FloatFmt *fmt);
761static void parts128_canonicalize(FloatParts128 *p, float_status *status,
762 const FloatFmt *fmt);
763
764#define parts_canonicalize(A, S, F) \
765 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
766
ee6959f2
RH
767static void parts64_uncanon(FloatParts64 *p, float_status *status,
768 const FloatFmt *fmt);
769static void parts128_uncanon(FloatParts128 *p, float_status *status,
770 const FloatFmt *fmt);
771
772#define parts_uncanon(A, S, F) \
773 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
774
da10a907
RH
775static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
776static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 777static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
778
779#define parts_add_normal(A, B) \
dedd123c 780 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
781
782static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
783static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 784static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
785
786#define parts_sub_normal(A, B) \
dedd123c 787 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
788
789static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
790 float_status *s, bool subtract);
791static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
792 float_status *s, bool subtract);
793
794#define parts_addsub(A, B, S, Z) \
795 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
796
aca84527
RH
797static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
798 float_status *s);
799static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
800 float_status *s);
801
802#define parts_mul(A, B, S) \
803 PARTS_GENERIC_64_128(mul, A)(A, B, S)
804
dedd123c
RH
805static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
806 FloatParts64 *c, int flags,
807 float_status *s);
808static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
809 FloatParts128 *c, int flags,
810 float_status *s);
811
812#define parts_muladd(A, B, C, Z, S) \
813 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
814
ec961b81
RH
815static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
816 float_status *s);
817static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
818 float_status *s);
819
820#define parts_div(A, B, S) \
821 PARTS_GENERIC_64_128(div, A)(A, B, S)
822
afc34931
RH
823static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
824 int scale, int frac_size);
825static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
826 int scale, int frac_size);
827
828#define parts_round_to_int_normal(A, R, C, F) \
829 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
830
831static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
832 int scale, float_status *s,
833 const FloatFmt *fmt);
834static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
835 int scale, float_status *s,
836 const FloatFmt *fmt);
837
838#define parts_round_to_int(A, R, C, S, F) \
839 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
840
463b3f0d
RH
841static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
842 int scale, int64_t min, int64_t max,
843 float_status *s);
844static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
845 int scale, int64_t min, int64_t max,
846 float_status *s);
847
848#define parts_float_to_sint(P, R, Z, MN, MX, S) \
849 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
850
4ab4aef0
RH
851static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
852 int scale, uint64_t max,
853 float_status *s);
854static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
855 int scale, uint64_t max,
856 float_status *s);
857
858#define parts_float_to_uint(P, R, Z, M, S) \
859 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
860
e3689519
RH
861static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
862 int scale, float_status *s);
863static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
864 int scale, float_status *s);
865
866#define parts_sint_to_float(P, I, Z, S) \
867 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
868
37c954a1
RH
869static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
870 int scale, float_status *s);
871static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
872 int scale, float_status *s);
873
874#define parts_uint_to_float(P, I, Z, S) \
875 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
876
e1c4667a
RH
877static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
878 float_status *s, int flags);
879static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
880 float_status *s, int flags);
881
882#define parts_minmax(A, B, S, F) \
883 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
884
0018b1f4
RH
885/*
886 * Helper functions for softfloat-parts.c.inc, per-size operations.
887 */
888
22c355f4
RH
889#define FRAC_GENERIC_64_128(NAME, P) \
890 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
891
dedd123c
RH
892#define FRAC_GENERIC_64_128_256(NAME, P) \
893 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
894 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
895
da10a907
RH
896static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
897{
898 return uadd64_overflow(a->frac, b->frac, &r->frac);
899}
900
901static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
902{
903 bool c = 0;
904 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
905 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
906 return c;
907}
908
dedd123c
RH
909static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
910{
911 bool c = 0;
912 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
913 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
914 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
915 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
916 return c;
917}
918
919#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 920
ee6959f2
RH
921static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
922{
923 return uadd64_overflow(a->frac, c, &r->frac);
924}
925
926static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
927{
928 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
929 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
930}
931
932#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
933
934static void frac64_allones(FloatParts64 *a)
935{
936 a->frac = -1;
937}
938
939static void frac128_allones(FloatParts128 *a)
940{
941 a->frac_hi = a->frac_lo = -1;
942}
943
944#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
945
22c355f4
RH
946static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
947{
948 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
949}
950
951static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
952{
953 uint64_t ta = a->frac_hi, tb = b->frac_hi;
954 if (ta == tb) {
955 ta = a->frac_lo, tb = b->frac_lo;
956 if (ta == tb) {
957 return 0;
958 }
959 }
960 return ta < tb ? -1 : 1;
961}
962
963#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
964
d46975bc 965static void frac64_clear(FloatParts64 *a)
0018b1f4 966{
d46975bc
RH
967 a->frac = 0;
968}
969
970static void frac128_clear(FloatParts128 *a)
971{
972 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
973}
974
d46975bc 975#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 976
ec961b81
RH
977static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
978{
979 uint64_t n1, n0, r, q;
980 bool ret;
981
982 /*
983 * We want a 2*N / N-bit division to produce exactly an N-bit
984 * result, so that we do not lose any precision and so that we
985 * do not have to renormalize afterward. If A.frac < B.frac,
986 * then division would produce an (N-1)-bit result; shift A left
987 * by one to produce the an N-bit result, and return true to
988 * decrement the exponent to match.
989 *
990 * The udiv_qrnnd algorithm that we're using requires normalization,
991 * i.e. the msb of the denominator must be set, which is already true.
992 */
993 ret = a->frac < b->frac;
994 if (ret) {
995 n0 = a->frac;
996 n1 = 0;
997 } else {
998 n0 = a->frac >> 1;
999 n1 = a->frac << 63;
1000 }
1001 q = udiv_qrnnd(&r, n0, n1, b->frac);
1002
1003 /* Set lsb if there is a remainder, to set inexact. */
1004 a->frac = q | (r != 0);
1005
1006 return ret;
1007}
1008
1009static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1010{
1011 uint64_t q0, q1, a0, a1, b0, b1;
1012 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1013 bool ret = false;
1014
1015 a0 = a->frac_hi, a1 = a->frac_lo;
1016 b0 = b->frac_hi, b1 = b->frac_lo;
1017
1018 ret = lt128(a0, a1, b0, b1);
1019 if (!ret) {
1020 a1 = shr_double(a0, a1, 1);
1021 a0 = a0 >> 1;
1022 }
1023
1024 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1025 q0 = estimateDiv128To64(a0, a1, b0);
1026
1027 /*
1028 * Estimate is high because B1 was not included (unless B1 == 0).
1029 * Reduce quotient and increase remainder until remainder is non-negative.
1030 * This loop will execute 0 to 2 times.
1031 */
1032 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1033 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1034 while (r0 != 0) {
1035 q0--;
1036 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1037 }
1038
1039 /* Repeat using the remainder, producing a second word of quotient. */
1040 q1 = estimateDiv128To64(r1, r2, b0);
1041 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1042 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1043 while (r1 != 0) {
1044 q1--;
1045 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1046 }
1047
1048 /* Any remainder indicates inexact; set sticky bit. */
1049 q1 |= (r2 | r3) != 0;
1050
1051 a->frac_hi = q0;
1052 a->frac_lo = q1;
1053 return ret;
1054}
1055
1056#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1057
d46975bc 1058static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1059{
d46975bc
RH
1060 return a->frac == 0;
1061}
1062
1063static bool frac128_eqz(FloatParts128 *a)
1064{
1065 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1066}
1067
d46975bc 1068#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1069
aca84527
RH
1070static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1071{
1072 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1073}
1074
1075static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1076{
1077 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1078 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1079}
1080
1081#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1082
da10a907
RH
1083static void frac64_neg(FloatParts64 *a)
1084{
1085 a->frac = -a->frac;
1086}
1087
1088static void frac128_neg(FloatParts128 *a)
1089{
1090 bool c = 0;
1091 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1092 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1093}
1094
dedd123c
RH
1095static void frac256_neg(FloatParts256 *a)
1096{
1097 bool c = 0;
1098 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1099 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1100 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1101 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1102}
1103
1104#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1105
d46975bc 1106static int frac64_normalize(FloatParts64 *a)
6fff2167 1107{
d46975bc
RH
1108 if (a->frac) {
1109 int shift = clz64(a->frac);
1110 a->frac <<= shift;
1111 return shift;
1112 }
1113 return 64;
1114}
1115
1116static int frac128_normalize(FloatParts128 *a)
1117{
1118 if (a->frac_hi) {
1119 int shl = clz64(a->frac_hi);
463e45dc
RH
1120 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1121 a->frac_lo <<= shl;
d46975bc
RH
1122 return shl;
1123 } else if (a->frac_lo) {
1124 int shl = clz64(a->frac_lo);
463e45dc 1125 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1126 a->frac_lo = 0;
1127 return shl + 64;
6fff2167 1128 }
d46975bc 1129 return 128;
6fff2167
AB
1130}
1131
dedd123c
RH
1132static int frac256_normalize(FloatParts256 *a)
1133{
1134 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1135 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1136 int ret, shl;
dedd123c
RH
1137
1138 if (likely(a0)) {
1139 shl = clz64(a0);
1140 if (shl == 0) {
1141 return 0;
1142 }
1143 ret = shl;
1144 } else {
1145 if (a1) {
1146 ret = 64;
1147 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1148 } else if (a2) {
1149 ret = 128;
1150 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1151 } else if (a3) {
1152 ret = 192;
1153 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1154 } else {
1155 ret = 256;
1156 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1157 goto done;
1158 }
1159 shl = clz64(a0);
1160 if (shl == 0) {
1161 goto done;
1162 }
1163 ret += shl;
1164 }
1165
463e45dc
RH
1166 a0 = shl_double(a0, a1, shl);
1167 a1 = shl_double(a1, a2, shl);
1168 a2 = shl_double(a2, a3, shl);
1169 a3 <<= shl;
dedd123c
RH
1170
1171 done:
1172 a->frac_hi = a0;
1173 a->frac_hm = a1;
1174 a->frac_lm = a2;
1175 a->frac_lo = a3;
1176 return ret;
1177}
1178
1179#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1180
1181static void frac64_shl(FloatParts64 *a, int c)
1182{
1183 a->frac <<= c;
1184}
1185
1186static void frac128_shl(FloatParts128 *a, int c)
1187{
463e45dc
RH
1188 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1189
1190 if (c & 64) {
1191 a0 = a1, a1 = 0;
1192 }
1193
1194 c &= 63;
1195 if (c) {
1196 a0 = shl_double(a0, a1, c);
1197 a1 = a1 << c;
1198 }
1199
1200 a->frac_hi = a0;
1201 a->frac_lo = a1;
d46975bc
RH
1202}
1203
1204#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1205
1206static void frac64_shr(FloatParts64 *a, int c)
1207{
1208 a->frac >>= c;
1209}
1210
1211static void frac128_shr(FloatParts128 *a, int c)
1212{
463e45dc
RH
1213 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1214
1215 if (c & 64) {
1216 a1 = a0, a0 = 0;
1217 }
1218
1219 c &= 63;
1220 if (c) {
1221 a1 = shr_double(a0, a1, c);
1222 a0 = a0 >> c;
1223 }
1224
1225 a->frac_hi = a0;
1226 a->frac_lo = a1;
d46975bc
RH
1227}
1228
1229#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1230
ee6959f2 1231static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1232{
463e45dc
RH
1233 uint64_t a0 = a->frac;
1234
1235 if (likely(c != 0)) {
1236 if (likely(c < 64)) {
1237 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1238 } else {
1239 a0 = a0 != 0;
1240 }
1241 a->frac = a0;
1242 }
ee6959f2 1243}
6fff2167 1244
ee6959f2
RH
1245static void frac128_shrjam(FloatParts128 *a, int c)
1246{
463e45dc
RH
1247 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1248 uint64_t sticky = 0;
1249
1250 if (unlikely(c == 0)) {
1251 return;
1252 } else if (likely(c < 64)) {
1253 /* nothing */
1254 } else if (likely(c < 128)) {
1255 sticky = a1;
1256 a1 = a0;
1257 a0 = 0;
1258 c &= 63;
1259 if (c == 0) {
1260 goto done;
1261 }
1262 } else {
1263 sticky = a0 | a1;
1264 a0 = a1 = 0;
1265 goto done;
1266 }
1267
1268 sticky |= shr_double(a1, 0, c);
1269 a1 = shr_double(a0, a1, c);
1270 a0 = a0 >> c;
1271
1272 done:
1273 a->frac_lo = a1 | (sticky != 0);
1274 a->frac_hi = a0;
6fff2167
AB
1275}
1276
dedd123c
RH
1277static void frac256_shrjam(FloatParts256 *a, int c)
1278{
1279 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1280 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1281 uint64_t sticky = 0;
dedd123c
RH
1282
1283 if (unlikely(c == 0)) {
1284 return;
1285 } else if (likely(c < 64)) {
1286 /* nothing */
1287 } else if (likely(c < 256)) {
1288 if (unlikely(c & 128)) {
1289 sticky |= a2 | a3;
1290 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1291 }
1292 if (unlikely(c & 64)) {
1293 sticky |= a3;
1294 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1295 }
1296 c &= 63;
1297 if (c == 0) {
1298 goto done;
1299 }
1300 } else {
1301 sticky = a0 | a1 | a2 | a3;
1302 a0 = a1 = a2 = a3 = 0;
1303 goto done;
1304 }
1305
463e45dc
RH
1306 sticky |= shr_double(a3, 0, c);
1307 a3 = shr_double(a2, a3, c);
1308 a2 = shr_double(a1, a2, c);
1309 a1 = shr_double(a0, a1, c);
1310 a0 = a0 >> c;
dedd123c
RH
1311
1312 done:
1313 a->frac_lo = a3 | (sticky != 0);
1314 a->frac_lm = a2;
1315 a->frac_hm = a1;
1316 a->frac_hi = a0;
1317}
1318
1319#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1320
da10a907
RH
1321static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1322{
1323 return usub64_overflow(a->frac, b->frac, &r->frac);
1324}
7c45bad8 1325
da10a907
RH
1326static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1327{
1328 bool c = 0;
1329 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1330 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1331 return c;
1332}
1333
dedd123c
RH
1334static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1335{
1336 bool c = 0;
1337 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1338 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1339 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1340 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1341 return c;
1342}
1343
1344#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1345
aca84527
RH
1346static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1347{
1348 r->frac = a->frac_hi | (a->frac_lo != 0);
1349}
1350
1351static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1352{
1353 r->frac_hi = a->frac_hi;
1354 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1355}
1356
1357#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1358
dedd123c
RH
1359static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1360{
1361 r->frac_hi = a->frac;
1362 r->frac_lo = 0;
1363}
1364
1365static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1366{
1367 r->frac_hi = a->frac_hi;
1368 r->frac_hm = a->frac_lo;
1369 r->frac_lm = 0;
1370 r->frac_lo = 0;
1371}
1372
1373#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1374
da10a907
RH
1375#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1376#define FloatPartsN glue(FloatParts,N)
aca84527 1377#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1378
1379#define N 64
aca84527 1380#define W 128
da10a907
RH
1381
1382#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1383#include "softfloat-parts.c.inc"
1384
da10a907 1385#undef N
aca84527 1386#undef W
da10a907 1387#define N 128
aca84527 1388#define W 256
7c45bad8 1389
da10a907 1390#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1391#include "softfloat-parts.c.inc"
1392
dedd123c
RH
1393#undef N
1394#undef W
1395#define N 256
1396
1397#include "softfloat-parts-addsub.c.inc"
1398
da10a907 1399#undef N
aca84527 1400#undef W
7c45bad8
RH
1401#undef partsN
1402#undef FloatPartsN
aca84527 1403#undef FloatPartsW
7c45bad8 1404
aaffb7bf
RH
1405/*
1406 * Pack/unpack routines with a specific FloatFmt.
1407 */
1408
98e256fc
RH
1409static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1410 float_status *s, const FloatFmt *params)
aaffb7bf 1411{
98e256fc 1412 float16_unpack_raw(p, f);
d46975bc 1413 parts_canonicalize(p, s, params);
aaffb7bf
RH
1414}
1415
98e256fc
RH
1416static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1417 float_status *s)
aaffb7bf 1418{
98e256fc 1419 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1420}
1421
98e256fc
RH
1422static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1423 float_status *s)
aaffb7bf 1424{
98e256fc 1425 bfloat16_unpack_raw(p, f);
d46975bc 1426 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1427}
1428
e293e927
RH
1429static float16 float16a_round_pack_canonical(FloatParts64 *p,
1430 float_status *s,
aaffb7bf
RH
1431 const FloatFmt *params)
1432{
ee6959f2 1433 parts_uncanon(p, s, params);
e293e927 1434 return float16_pack_raw(p);
aaffb7bf
RH
1435}
1436
e293e927
RH
1437static float16 float16_round_pack_canonical(FloatParts64 *p,
1438 float_status *s)
aaffb7bf
RH
1439{
1440 return float16a_round_pack_canonical(p, s, &float16_params);
1441}
1442
e293e927
RH
1443static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1444 float_status *s)
aaffb7bf 1445{
ee6959f2 1446 parts_uncanon(p, s, &bfloat16_params);
e293e927 1447 return bfloat16_pack_raw(p);
aaffb7bf
RH
1448}
1449
98e256fc
RH
1450static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1451 float_status *s)
aaffb7bf 1452{
98e256fc 1453 float32_unpack_raw(p, f);
d46975bc 1454 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1455}
1456
e293e927
RH
1457static float32 float32_round_pack_canonical(FloatParts64 *p,
1458 float_status *s)
aaffb7bf 1459{
ee6959f2 1460 parts_uncanon(p, s, &float32_params);
e293e927 1461 return float32_pack_raw(p);
aaffb7bf
RH
1462}
1463
98e256fc
RH
1464static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1465 float_status *s)
aaffb7bf 1466{
98e256fc 1467 float64_unpack_raw(p, f);
d46975bc 1468 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1469}
1470
e293e927
RH
1471static float64 float64_round_pack_canonical(FloatParts64 *p,
1472 float_status *s)
aaffb7bf 1473{
ee6959f2 1474 parts_uncanon(p, s, &float64_params);
e293e927 1475 return float64_pack_raw(p);
aaffb7bf
RH
1476}
1477
3ff49e56
RH
1478static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1479 float_status *s)
1480{
1481 float128_unpack_raw(p, f);
1482 parts_canonicalize(p, s, &float128_params);
1483}
1484
1485static float128 float128_round_pack_canonical(FloatParts128 *p,
1486 float_status *s)
1487{
1488 parts_uncanon(p, s, &float128_params);
1489 return float128_pack_raw(p);
1490}
1491
6fff2167 1492/*
da10a907 1493 * Addition and subtraction
6fff2167
AB
1494 */
1495
da10a907
RH
1496static float16 QEMU_FLATTEN
1497float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1498{
da10a907 1499 FloatParts64 pa, pb, *pr;
98e256fc
RH
1500
1501 float16_unpack_canonical(&pa, a, status);
1502 float16_unpack_canonical(&pb, b, status);
da10a907 1503 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1504
da10a907 1505 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1506}
1507
da10a907 1508float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1509{
da10a907
RH
1510 return float16_addsub(a, b, status, false);
1511}
1b615d48 1512
da10a907
RH
1513float16 float16_sub(float16 a, float16 b, float_status *status)
1514{
1515 return float16_addsub(a, b, status, true);
1b615d48
EC
1516}
1517
1518static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1519soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1520{
da10a907 1521 FloatParts64 pa, pb, *pr;
98e256fc
RH
1522
1523 float32_unpack_canonical(&pa, a, status);
1524 float32_unpack_canonical(&pb, b, status);
da10a907 1525 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1526
da10a907 1527 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1528}
1529
da10a907 1530static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1531{
da10a907 1532 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1533}
1534
da10a907 1535static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1536{
da10a907 1537 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1538}
1539
1540static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1541soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1542{
da10a907 1543 FloatParts64 pa, pb, *pr;
98e256fc
RH
1544
1545 float64_unpack_canonical(&pa, a, status);
1546 float64_unpack_canonical(&pb, b, status);
da10a907 1547 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1548
da10a907 1549 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1550}
1551
da10a907 1552static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1553{
da10a907 1554 return soft_f64_addsub(a, b, status, false);
1b615d48 1555}
6fff2167 1556
da10a907 1557static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1558{
da10a907 1559 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1560}
1561
1b615d48 1562static float hard_f32_add(float a, float b)
6fff2167 1563{
1b615d48
EC
1564 return a + b;
1565}
6fff2167 1566
1b615d48
EC
1567static float hard_f32_sub(float a, float b)
1568{
1569 return a - b;
6fff2167
AB
1570}
1571
1b615d48 1572static double hard_f64_add(double a, double b)
6fff2167 1573{
1b615d48
EC
1574 return a + b;
1575}
6fff2167 1576
1b615d48
EC
1577static double hard_f64_sub(double a, double b)
1578{
1579 return a - b;
1580}
1581
b240c9c4 1582static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1583{
1584 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1585 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1586 }
1587 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1588}
1589
b240c9c4 1590static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1591{
1592 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1593 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1594 } else {
1595 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1596 }
1597}
1598
1599static float32 float32_addsub(float32 a, float32 b, float_status *s,
1600 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1601{
1602 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1603 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1604}
1605
1606static float64 float64_addsub(float64 a, float64 b, float_status *s,
1607 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1608{
1609 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1610 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1611}
1612
1613float32 QEMU_FLATTEN
1614float32_add(float32 a, float32 b, float_status *s)
1615{
1616 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1617}
1618
1619float32 QEMU_FLATTEN
1620float32_sub(float32 a, float32 b, float_status *s)
1621{
1622 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1623}
1624
1625float64 QEMU_FLATTEN
1626float64_add(float64 a, float64 b, float_status *s)
1627{
1628 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1629}
1630
1631float64 QEMU_FLATTEN
1632float64_sub(float64 a, float64 b, float_status *s)
1633{
1634 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1635}
1636
da10a907
RH
1637static bfloat16 QEMU_FLATTEN
1638bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1639{
da10a907 1640 FloatParts64 pa, pb, *pr;
98e256fc
RH
1641
1642 bfloat16_unpack_canonical(&pa, a, status);
1643 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1644 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1645
da10a907 1646 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1647}
1648
da10a907 1649bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1650{
da10a907
RH
1651 return bfloat16_addsub(a, b, status, false);
1652}
8282310d 1653
da10a907
RH
1654bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1655{
1656 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1657}
1658
3ff49e56
RH
1659static float128 QEMU_FLATTEN
1660float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1661{
1662 FloatParts128 pa, pb, *pr;
1663
1664 float128_unpack_canonical(&pa, a, status);
1665 float128_unpack_canonical(&pb, b, status);
1666 pr = parts_addsub(&pa, &pb, status, subtract);
1667
1668 return float128_round_pack_canonical(pr, status);
1669}
1670
1671float128 float128_add(float128 a, float128 b, float_status *status)
1672{
1673 return float128_addsub(a, b, status, false);
1674}
1675
1676float128 float128_sub(float128 a, float128 b, float_status *status)
1677{
1678 return float128_addsub(a, b, status, true);
1679}
1680
74d707e2 1681/*
aca84527 1682 * Multiplication
74d707e2
AB
1683 */
1684
97ff87c0 1685float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1686{
aca84527 1687 FloatParts64 pa, pb, *pr;
98e256fc
RH
1688
1689 float16_unpack_canonical(&pa, a, status);
1690 float16_unpack_canonical(&pb, b, status);
aca84527 1691 pr = parts_mul(&pa, &pb, status);
74d707e2 1692
aca84527 1693 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1694}
1695
2dfabc86
EC
1696static float32 QEMU_SOFTFLOAT_ATTR
1697soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1698{
aca84527 1699 FloatParts64 pa, pb, *pr;
98e256fc
RH
1700
1701 float32_unpack_canonical(&pa, a, status);
1702 float32_unpack_canonical(&pb, b, status);
aca84527 1703 pr = parts_mul(&pa, &pb, status);
74d707e2 1704
aca84527 1705 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1706}
1707
2dfabc86
EC
1708static float64 QEMU_SOFTFLOAT_ATTR
1709soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1710{
aca84527 1711 FloatParts64 pa, pb, *pr;
98e256fc
RH
1712
1713 float64_unpack_canonical(&pa, a, status);
1714 float64_unpack_canonical(&pb, b, status);
aca84527 1715 pr = parts_mul(&pa, &pb, status);
74d707e2 1716
aca84527 1717 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1718}
1719
2dfabc86
EC
1720static float hard_f32_mul(float a, float b)
1721{
1722 return a * b;
1723}
1724
1725static double hard_f64_mul(double a, double b)
1726{
1727 return a * b;
1728}
1729
2dfabc86
EC
1730float32 QEMU_FLATTEN
1731float32_mul(float32 a, float32 b, float_status *s)
1732{
1733 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1734 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1735}
1736
1737float64 QEMU_FLATTEN
1738float64_mul(float64 a, float64 b, float_status *s)
1739{
1740 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1741 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1742}
1743
aca84527
RH
1744bfloat16 QEMU_FLATTEN
1745bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1746{
aca84527 1747 FloatParts64 pa, pb, *pr;
98e256fc
RH
1748
1749 bfloat16_unpack_canonical(&pa, a, status);
1750 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1751 pr = parts_mul(&pa, &pb, status);
8282310d 1752
aca84527
RH
1753 return bfloat16_round_pack_canonical(pr, status);
1754}
1755
1756float128 QEMU_FLATTEN
1757float128_mul(float128 a, float128 b, float_status *status)
1758{
1759 FloatParts128 pa, pb, *pr;
1760
1761 float128_unpack_canonical(&pa, a, status);
1762 float128_unpack_canonical(&pb, b, status);
1763 pr = parts_mul(&pa, &pb, status);
1764
1765 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1766}
1767
d446830a 1768/*
dedd123c 1769 * Fused multiply-add
d446830a
AB
1770 */
1771
97ff87c0 1772float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1773 int flags, float_status *status)
d446830a 1774{
dedd123c 1775 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1776
1777 float16_unpack_canonical(&pa, a, status);
1778 float16_unpack_canonical(&pb, b, status);
1779 float16_unpack_canonical(&pc, c, status);
dedd123c 1780 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1781
dedd123c 1782 return float16_round_pack_canonical(pr, status);
d446830a
AB
1783}
1784
ccf770ba
EC
1785static float32 QEMU_SOFTFLOAT_ATTR
1786soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1787 float_status *status)
d446830a 1788{
dedd123c 1789 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1790
1791 float32_unpack_canonical(&pa, a, status);
1792 float32_unpack_canonical(&pb, b, status);
1793 float32_unpack_canonical(&pc, c, status);
dedd123c 1794 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1795
dedd123c 1796 return float32_round_pack_canonical(pr, status);
d446830a
AB
1797}
1798
ccf770ba
EC
1799static float64 QEMU_SOFTFLOAT_ATTR
1800soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1801 float_status *status)
d446830a 1802{
dedd123c 1803 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1804
1805 float64_unpack_canonical(&pa, a, status);
1806 float64_unpack_canonical(&pb, b, status);
1807 float64_unpack_canonical(&pc, c, status);
dedd123c 1808 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1809
dedd123c 1810 return float64_round_pack_canonical(pr, status);
d446830a
AB
1811}
1812
f6b3b108
EC
1813static bool force_soft_fma;
1814
ccf770ba
EC
1815float32 QEMU_FLATTEN
1816float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1817{
1818 union_float32 ua, ub, uc, ur;
1819
1820 ua.s = xa;
1821 ub.s = xb;
1822 uc.s = xc;
1823
1824 if (unlikely(!can_use_fpu(s))) {
1825 goto soft;
1826 }
1827 if (unlikely(flags & float_muladd_halve_result)) {
1828 goto soft;
1829 }
1830
1831 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1832 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1833 goto soft;
1834 }
f6b3b108
EC
1835
1836 if (unlikely(force_soft_fma)) {
1837 goto soft;
1838 }
1839
ccf770ba
EC
1840 /*
1841 * When (a || b) == 0, there's no need to check for under/over flow,
1842 * since we know the addend is (normal || 0) and the product is 0.
1843 */
1844 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1845 union_float32 up;
1846 bool prod_sign;
1847
1848 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1849 prod_sign ^= !!(flags & float_muladd_negate_product);
1850 up.s = float32_set_sign(float32_zero, prod_sign);
1851
1852 if (flags & float_muladd_negate_c) {
1853 uc.h = -uc.h;
1854 }
1855 ur.h = up.h + uc.h;
1856 } else {
896f51fb
KC
1857 union_float32 ua_orig = ua;
1858 union_float32 uc_orig = uc;
1859
ccf770ba
EC
1860 if (flags & float_muladd_negate_product) {
1861 ua.h = -ua.h;
1862 }
1863 if (flags & float_muladd_negate_c) {
1864 uc.h = -uc.h;
1865 }
1866
1867 ur.h = fmaf(ua.h, ub.h, uc.h);
1868
1869 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1870 float_raise(float_flag_overflow, s);
ccf770ba 1871 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1872 ua = ua_orig;
1873 uc = uc_orig;
ccf770ba
EC
1874 goto soft;
1875 }
1876 }
1877 if (flags & float_muladd_negate_result) {
1878 return float32_chs(ur.s);
1879 }
1880 return ur.s;
1881
1882 soft:
1883 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1884}
1885
1886float64 QEMU_FLATTEN
1887float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1888{
1889 union_float64 ua, ub, uc, ur;
1890
1891 ua.s = xa;
1892 ub.s = xb;
1893 uc.s = xc;
1894
1895 if (unlikely(!can_use_fpu(s))) {
1896 goto soft;
1897 }
1898 if (unlikely(flags & float_muladd_halve_result)) {
1899 goto soft;
1900 }
1901
1902 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1903 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1904 goto soft;
1905 }
f6b3b108
EC
1906
1907 if (unlikely(force_soft_fma)) {
1908 goto soft;
1909 }
1910
ccf770ba
EC
1911 /*
1912 * When (a || b) == 0, there's no need to check for under/over flow,
1913 * since we know the addend is (normal || 0) and the product is 0.
1914 */
1915 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1916 union_float64 up;
1917 bool prod_sign;
1918
1919 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1920 prod_sign ^= !!(flags & float_muladd_negate_product);
1921 up.s = float64_set_sign(float64_zero, prod_sign);
1922
1923 if (flags & float_muladd_negate_c) {
1924 uc.h = -uc.h;
1925 }
1926 ur.h = up.h + uc.h;
1927 } else {
896f51fb
KC
1928 union_float64 ua_orig = ua;
1929 union_float64 uc_orig = uc;
1930
ccf770ba
EC
1931 if (flags & float_muladd_negate_product) {
1932 ua.h = -ua.h;
1933 }
1934 if (flags & float_muladd_negate_c) {
1935 uc.h = -uc.h;
1936 }
1937
1938 ur.h = fma(ua.h, ub.h, uc.h);
1939
1940 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1941 float_raise(float_flag_overflow, s);
ccf770ba 1942 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1943 ua = ua_orig;
1944 uc = uc_orig;
ccf770ba
EC
1945 goto soft;
1946 }
1947 }
1948 if (flags & float_muladd_negate_result) {
1949 return float64_chs(ur.s);
1950 }
1951 return ur.s;
1952
1953 soft:
1954 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1955}
1956
8282310d
LZ
1957bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1958 int flags, float_status *status)
1959{
dedd123c 1960 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1961
1962 bfloat16_unpack_canonical(&pa, a, status);
1963 bfloat16_unpack_canonical(&pb, b, status);
1964 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
1965 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1966
1967 return bfloat16_round_pack_canonical(pr, status);
1968}
8282310d 1969
dedd123c
RH
1970float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1971 int flags, float_status *status)
1972{
1973 FloatParts128 pa, pb, pc, *pr;
1974
1975 float128_unpack_canonical(&pa, a, status);
1976 float128_unpack_canonical(&pb, b, status);
1977 float128_unpack_canonical(&pc, c, status);
1978 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1979
1980 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1981}
1982
cf07323d 1983/*
ec961b81 1984 * Division
cf07323d
AB
1985 */
1986
cf07323d
AB
1987float16 float16_div(float16 a, float16 b, float_status *status)
1988{
ec961b81 1989 FloatParts64 pa, pb, *pr;
98e256fc
RH
1990
1991 float16_unpack_canonical(&pa, a, status);
1992 float16_unpack_canonical(&pb, b, status);
ec961b81 1993 pr = parts_div(&pa, &pb, status);
cf07323d 1994
ec961b81 1995 return float16_round_pack_canonical(pr, status);
cf07323d
AB
1996}
1997
4a629561
EC
1998static float32 QEMU_SOFTFLOAT_ATTR
1999soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 2000{
ec961b81 2001 FloatParts64 pa, pb, *pr;
98e256fc
RH
2002
2003 float32_unpack_canonical(&pa, a, status);
2004 float32_unpack_canonical(&pb, b, status);
ec961b81 2005 pr = parts_div(&pa, &pb, status);
cf07323d 2006
ec961b81 2007 return float32_round_pack_canonical(pr, status);
cf07323d
AB
2008}
2009
4a629561
EC
2010static float64 QEMU_SOFTFLOAT_ATTR
2011soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 2012{
ec961b81 2013 FloatParts64 pa, pb, *pr;
98e256fc
RH
2014
2015 float64_unpack_canonical(&pa, a, status);
2016 float64_unpack_canonical(&pb, b, status);
ec961b81 2017 pr = parts_div(&pa, &pb, status);
cf07323d 2018
ec961b81 2019 return float64_round_pack_canonical(pr, status);
cf07323d
AB
2020}
2021
4a629561
EC
2022static float hard_f32_div(float a, float b)
2023{
2024 return a / b;
2025}
2026
2027static double hard_f64_div(double a, double b)
2028{
2029 return a / b;
2030}
2031
2032static bool f32_div_pre(union_float32 a, union_float32 b)
2033{
2034 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2035 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2036 fpclassify(b.h) == FP_NORMAL;
2037 }
2038 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2039}
2040
2041static bool f64_div_pre(union_float64 a, union_float64 b)
2042{
2043 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2044 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2045 fpclassify(b.h) == FP_NORMAL;
2046 }
2047 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2048}
2049
2050static bool f32_div_post(union_float32 a, union_float32 b)
2051{
2052 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2053 return fpclassify(a.h) != FP_ZERO;
2054 }
2055 return !float32_is_zero(a.s);
2056}
2057
2058static bool f64_div_post(union_float64 a, union_float64 b)
2059{
2060 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2061 return fpclassify(a.h) != FP_ZERO;
2062 }
2063 return !float64_is_zero(a.s);
2064}
2065
2066float32 QEMU_FLATTEN
2067float32_div(float32 a, float32 b, float_status *s)
2068{
2069 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2070 f32_div_pre, f32_div_post);
4a629561
EC
2071}
2072
2073float64 QEMU_FLATTEN
2074float64_div(float64 a, float64 b, float_status *s)
2075{
2076 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2077 f64_div_pre, f64_div_post);
4a629561
EC
2078}
2079
ec961b81
RH
2080bfloat16 QEMU_FLATTEN
2081bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2082{
ec961b81 2083 FloatParts64 pa, pb, *pr;
98e256fc
RH
2084
2085 bfloat16_unpack_canonical(&pa, a, status);
2086 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2087 pr = parts_div(&pa, &pb, status);
8282310d 2088
ec961b81
RH
2089 return bfloat16_round_pack_canonical(pr, status);
2090}
2091
2092float128 QEMU_FLATTEN
2093float128_div(float128 a, float128 b, float_status *status)
2094{
2095 FloatParts128 pa, pb, *pr;
2096
2097 float128_unpack_canonical(&pa, a, status);
2098 float128_unpack_canonical(&pb, b, status);
2099 pr = parts_div(&pa, &pb, status);
2100
2101 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2102}
2103
6fed16b2
AB
2104/*
2105 * Float to Float conversions
2106 *
2107 * Returns the result of converting one float format to another. The
2108 * conversion is performed according to the IEC/IEEE Standard for
2109 * Binary Floating-Point Arithmetic.
2110 *
c3f1875e
RH
2111 * Usually this only needs to take care of raising invalid exceptions
2112 * and handling the conversion on NaNs.
6fed16b2
AB
2113 */
2114
c3f1875e
RH
2115static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2116{
2117 switch (a->cls) {
2118 case float_class_qnan:
2119 case float_class_snan:
2120 /*
2121 * There is no NaN in the destination format. Raise Invalid
2122 * and return a zero with the sign of the input NaN.
2123 */
2124 float_raise(float_flag_invalid, s);
2125 a->cls = float_class_zero;
2126 break;
2127
2128 case float_class_inf:
2129 /*
2130 * There is no Inf in the destination format. Raise Invalid
2131 * and return the maximum normal with the correct sign.
2132 */
2133 float_raise(float_flag_invalid, s);
2134 a->cls = float_class_normal;
2135 a->exp = float16_params_ahp.exp_max;
2136 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2137 float16_params_ahp.frac_size + 1);
2138 break;
2139
2140 case float_class_normal:
2141 case float_class_zero:
2142 break;
2143
2144 default:
2145 g_assert_not_reached();
2146 }
2147}
2148
2149static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2150{
2151 if (is_nan(a->cls)) {
2152 parts_return_nan(a, s);
6fed16b2 2153 }
6fed16b2
AB
2154}
2155
c3f1875e
RH
2156static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2157{
2158 if (is_nan(a->cls)) {
2159 parts_return_nan(a, s);
2160 }
2161}
2162
2163#define parts_float_to_float(P, S) \
2164 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2165
9882ccaf
RH
2166static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2167 float_status *s)
2168{
2169 a->cls = b->cls;
2170 a->sign = b->sign;
2171 a->exp = b->exp;
2172
2173 if (a->cls == float_class_normal) {
2174 frac_truncjam(a, b);
2175 } else if (is_nan(a->cls)) {
2176 /* Discard the low bits of the NaN. */
2177 a->frac = b->frac_hi;
2178 parts_return_nan(a, s);
2179 }
2180}
2181
2182static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2183 float_status *s)
2184{
2185 a->cls = b->cls;
2186 a->sign = b->sign;
2187 a->exp = b->exp;
2188 frac_widen(a, b);
2189
2190 if (is_nan(a->cls)) {
2191 parts_return_nan(a, s);
2192 }
2193}
2194
6fed16b2
AB
2195float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2196{
2197 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2198 FloatParts64 p;
98e256fc 2199
c3f1875e
RH
2200 float16a_unpack_canonical(&p, a, s, fmt16);
2201 parts_float_to_float(&p, s);
2202 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2203}
2204
2205float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2206{
2207 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2208 FloatParts64 p;
98e256fc 2209
c3f1875e
RH
2210 float16a_unpack_canonical(&p, a, s, fmt16);
2211 parts_float_to_float(&p, s);
2212 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2213}
2214
2215float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2216{
c3f1875e
RH
2217 FloatParts64 p;
2218 const FloatFmt *fmt;
98e256fc 2219
c3f1875e
RH
2220 float32_unpack_canonical(&p, a, s);
2221 if (ieee) {
2222 parts_float_to_float(&p, s);
2223 fmt = &float16_params;
2224 } else {
2225 parts_float_to_ahp(&p, s);
2226 fmt = &float16_params_ahp;
2227 }
2228 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2229}
2230
21381dcf
MK
2231static float64 QEMU_SOFTFLOAT_ATTR
2232soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2233{
c3f1875e 2234 FloatParts64 p;
98e256fc 2235
c3f1875e
RH
2236 float32_unpack_canonical(&p, a, s);
2237 parts_float_to_float(&p, s);
2238 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2239}
2240
21381dcf
MK
2241float64 float32_to_float64(float32 a, float_status *s)
2242{
2243 if (likely(float32_is_normal(a))) {
2244 /* Widening conversion can never produce inexact results. */
2245 union_float32 uf;
2246 union_float64 ud;
2247 uf.s = a;
2248 ud.h = uf.h;
2249 return ud.s;
2250 } else if (float32_is_zero(a)) {
2251 return float64_set_sign(float64_zero, float32_is_neg(a));
2252 } else {
2253 return soft_float32_to_float64(a, s);
2254 }
2255}
2256
6fed16b2
AB
2257float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2258{
c3f1875e
RH
2259 FloatParts64 p;
2260 const FloatFmt *fmt;
98e256fc 2261
c3f1875e
RH
2262 float64_unpack_canonical(&p, a, s);
2263 if (ieee) {
2264 parts_float_to_float(&p, s);
2265 fmt = &float16_params;
2266 } else {
2267 parts_float_to_ahp(&p, s);
2268 fmt = &float16_params_ahp;
2269 }
2270 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2271}
2272
2273float32 float64_to_float32(float64 a, float_status *s)
2274{
c3f1875e 2275 FloatParts64 p;
98e256fc 2276
c3f1875e
RH
2277 float64_unpack_canonical(&p, a, s);
2278 parts_float_to_float(&p, s);
2279 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2280}
2281
34f0c0a9
LZ
2282float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2283{
c3f1875e 2284 FloatParts64 p;
98e256fc 2285
c3f1875e
RH
2286 bfloat16_unpack_canonical(&p, a, s);
2287 parts_float_to_float(&p, s);
2288 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2289}
2290
2291float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2292{
c3f1875e 2293 FloatParts64 p;
98e256fc 2294
c3f1875e
RH
2295 bfloat16_unpack_canonical(&p, a, s);
2296 parts_float_to_float(&p, s);
2297 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2298}
2299
2300bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2301{
c3f1875e 2302 FloatParts64 p;
98e256fc 2303
c3f1875e
RH
2304 float32_unpack_canonical(&p, a, s);
2305 parts_float_to_float(&p, s);
2306 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2307}
2308
2309bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2310{
c3f1875e 2311 FloatParts64 p;
98e256fc 2312
c3f1875e
RH
2313 float64_unpack_canonical(&p, a, s);
2314 parts_float_to_float(&p, s);
2315 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2316}
2317
9882ccaf
RH
2318float32 float128_to_float32(float128 a, float_status *s)
2319{
2320 FloatParts64 p64;
2321 FloatParts128 p128;
2322
2323 float128_unpack_canonical(&p128, a, s);
2324 parts_float_to_float_narrow(&p64, &p128, s);
2325 return float32_round_pack_canonical(&p64, s);
2326}
2327
2328float64 float128_to_float64(float128 a, float_status *s)
2329{
2330 FloatParts64 p64;
2331 FloatParts128 p128;
2332
2333 float128_unpack_canonical(&p128, a, s);
2334 parts_float_to_float_narrow(&p64, &p128, s);
2335 return float64_round_pack_canonical(&p64, s);
2336}
2337
2338float128 float32_to_float128(float32 a, float_status *s)
2339{
2340 FloatParts64 p64;
2341 FloatParts128 p128;
2342
2343 float32_unpack_canonical(&p64, a, s);
2344 parts_float_to_float_widen(&p128, &p64, s);
2345 return float128_round_pack_canonical(&p128, s);
2346}
2347
2348float128 float64_to_float128(float64 a, float_status *s)
2349{
2350 FloatParts64 p64;
2351 FloatParts128 p128;
2352
2353 float64_unpack_canonical(&p64, a, s);
2354 parts_float_to_float_widen(&p128, &p64, s);
2355 return float128_round_pack_canonical(&p128, s);
2356}
2357
dbe4d53a 2358/*
afc34931 2359 * Round to integral value
dbe4d53a
AB
2360 */
2361
dbe4d53a
AB
2362float16 float16_round_to_int(float16 a, float_status *s)
2363{
afc34931 2364 FloatParts64 p;
98e256fc 2365
afc34931
RH
2366 float16_unpack_canonical(&p, a, s);
2367 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2368 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2369}
2370
2371float32 float32_round_to_int(float32 a, float_status *s)
2372{
afc34931 2373 FloatParts64 p;
98e256fc 2374
afc34931
RH
2375 float32_unpack_canonical(&p, a, s);
2376 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2377 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2378}
2379
2380float64 float64_round_to_int(float64 a, float_status *s)
2381{
afc34931 2382 FloatParts64 p;
98e256fc 2383
afc34931
RH
2384 float64_unpack_canonical(&p, a, s);
2385 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2386 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2387}
2388
34f0c0a9
LZ
2389bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2390{
afc34931 2391 FloatParts64 p;
98e256fc 2392
afc34931
RH
2393 bfloat16_unpack_canonical(&p, a, s);
2394 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2395 return bfloat16_round_pack_canonical(&p, s);
2396}
2397
2398float128 float128_round_to_int(float128 a, float_status *s)
2399{
2400 FloatParts128 p;
2401
2402 float128_unpack_canonical(&p, a, s);
2403 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2404 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
2405}
2406
ab52f973 2407/*
463b3f0d
RH
2408 * Floating-point to signed integer conversions
2409 */
ab52f973 2410
0d93d8ec
FC
2411int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2412 float_status *s)
2413{
98e256fc
RH
2414 FloatParts64 p;
2415
2416 float16_unpack_canonical(&p, a, s);
463b3f0d 2417 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2418}
2419
3dede407 2420int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2421 float_status *s)
2422{
98e256fc
RH
2423 FloatParts64 p;
2424
2425 float16_unpack_canonical(&p, a, s);
463b3f0d 2426 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2427}
2428
3dede407 2429int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2430 float_status *s)
2431{
98e256fc
RH
2432 FloatParts64 p;
2433
2434 float16_unpack_canonical(&p, a, s);
463b3f0d 2435 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2436}
2437
3dede407 2438int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2439 float_status *s)
2440{
98e256fc
RH
2441 FloatParts64 p;
2442
2443 float16_unpack_canonical(&p, a, s);
463b3f0d 2444 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2445}
2446
3dede407 2447int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2448 float_status *s)
2449{
98e256fc
RH
2450 FloatParts64 p;
2451
2452 float32_unpack_canonical(&p, a, s);
463b3f0d 2453 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2454}
2455
3dede407 2456int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2457 float_status *s)
2458{
98e256fc
RH
2459 FloatParts64 p;
2460
2461 float32_unpack_canonical(&p, a, s);
463b3f0d 2462 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2463}
2464
3dede407 2465int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2466 float_status *s)
2467{
98e256fc
RH
2468 FloatParts64 p;
2469
2470 float32_unpack_canonical(&p, a, s);
463b3f0d 2471 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2472}
2473
3dede407 2474int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2475 float_status *s)
2476{
98e256fc
RH
2477 FloatParts64 p;
2478
2479 float64_unpack_canonical(&p, a, s);
463b3f0d 2480 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2481}
2482
3dede407 2483int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2484 float_status *s)
2485{
98e256fc
RH
2486 FloatParts64 p;
2487
2488 float64_unpack_canonical(&p, a, s);
463b3f0d 2489 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2490}
2491
3dede407 2492int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2493 float_status *s)
2494{
98e256fc
RH
2495 FloatParts64 p;
2496
2497 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
2498 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2499}
2500
2501int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2502 float_status *s)
2503{
2504 FloatParts64 p;
2505
2506 bfloat16_unpack_canonical(&p, a, s);
2507 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2508}
2509
2510int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2511 float_status *s)
2512{
2513 FloatParts64 p;
2514
2515 bfloat16_unpack_canonical(&p, a, s);
2516 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2517}
2518
2519int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2520 float_status *s)
2521{
2522 FloatParts64 p;
2523
2524 bfloat16_unpack_canonical(&p, a, s);
2525 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2526}
2527
2528static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2529 int scale, float_status *s)
2530{
2531 FloatParts128 p;
2532
2533 float128_unpack_canonical(&p, a, s);
2534 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2535}
2536
2537static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2538 int scale, float_status *s)
2539{
2540 FloatParts128 p;
2541
2542 float128_unpack_canonical(&p, a, s);
2543 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2544}
2545
0d93d8ec
FC
2546int8_t float16_to_int8(float16 a, float_status *s)
2547{
2548 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2549}
2550
2f6c74be
RH
2551int16_t float16_to_int16(float16 a, float_status *s)
2552{
2553 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2554}
2555
2556int32_t float16_to_int32(float16 a, float_status *s)
2557{
2558 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2559}
2560
2561int64_t float16_to_int64(float16 a, float_status *s)
2562{
2563 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2564}
2565
2566int16_t float32_to_int16(float32 a, float_status *s)
2567{
2568 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2569}
2570
2571int32_t float32_to_int32(float32 a, float_status *s)
2572{
2573 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2574}
2575
2576int64_t float32_to_int64(float32 a, float_status *s)
2577{
2578 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2579}
2580
2581int16_t float64_to_int16(float64 a, float_status *s)
2582{
2583 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2584}
2585
2586int32_t float64_to_int32(float64 a, float_status *s)
2587{
2588 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2589}
2590
2591int64_t float64_to_int64(float64 a, float_status *s)
2592{
2593 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2594}
2595
463b3f0d
RH
2596int32_t float128_to_int32(float128 a, float_status *s)
2597{
2598 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2599}
2600
2601int64_t float128_to_int64(float128 a, float_status *s)
2602{
2603 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2604}
2605
2f6c74be
RH
2606int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2607{
2608 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2609}
2610
2611int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2612{
2613 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2614}
2615
2616int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2617{
2618 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2619}
2620
2f6c74be
RH
2621int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2622{
2623 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2624}
ab52f973 2625
2f6c74be
RH
2626int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2627{
2628 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2629}
2630
2631int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2632{
2633 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2634}
2635
2636int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2637{
2638 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2639}
ab52f973 2640
2f6c74be
RH
2641int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2642{
2643 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2644}
ab52f973 2645
2f6c74be
RH
2646int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2647{
2648 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2649}
ab52f973 2650
463b3f0d 2651int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2652{
463b3f0d 2653 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2654}
2655
463b3f0d 2656int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2657{
463b3f0d 2658 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2659}
2660
2661int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2662{
2663 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2664}
2665
2666int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2667{
2668 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2669}
2670
2671int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2672{
2673 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2674}
2675
2676int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2677{
2678 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2679}
2680
2681int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2682{
2683 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2684}
2685
2686int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2687{
2688 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2689}
2690
ab52f973 2691/*
4ab4aef0 2692 * Floating-point to unsigned integer conversions
ab52f973
AB
2693 */
2694
0d93d8ec
FC
2695uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2696 float_status *s)
2697{
98e256fc
RH
2698 FloatParts64 p;
2699
2700 float16_unpack_canonical(&p, a, s);
4ab4aef0 2701 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2702}
2703
3dede407 2704uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2705 float_status *s)
2706{
98e256fc
RH
2707 FloatParts64 p;
2708
2709 float16_unpack_canonical(&p, a, s);
4ab4aef0 2710 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2711}
2712
3dede407 2713uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2714 float_status *s)
2715{
98e256fc
RH
2716 FloatParts64 p;
2717
2718 float16_unpack_canonical(&p, a, s);
4ab4aef0 2719 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2720}
2721
3dede407 2722uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2723 float_status *s)
2724{
98e256fc
RH
2725 FloatParts64 p;
2726
2727 float16_unpack_canonical(&p, a, s);
4ab4aef0 2728 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2729}
2730
3dede407 2731uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2732 float_status *s)
2733{
98e256fc
RH
2734 FloatParts64 p;
2735
2736 float32_unpack_canonical(&p, a, s);
4ab4aef0 2737 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2738}
2739
3dede407 2740uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2741 float_status *s)
2742{
98e256fc
RH
2743 FloatParts64 p;
2744
2745 float32_unpack_canonical(&p, a, s);
4ab4aef0 2746 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2747}
2748
3dede407 2749uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2750 float_status *s)
2751{
98e256fc
RH
2752 FloatParts64 p;
2753
2754 float32_unpack_canonical(&p, a, s);
4ab4aef0 2755 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2756}
2757
3dede407 2758uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2759 float_status *s)
2760{
98e256fc
RH
2761 FloatParts64 p;
2762
2763 float64_unpack_canonical(&p, a, s);
4ab4aef0 2764 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2765}
2766
3dede407 2767uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2768 float_status *s)
2769{
98e256fc
RH
2770 FloatParts64 p;
2771
2772 float64_unpack_canonical(&p, a, s);
4ab4aef0 2773 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2774}
2775
3dede407 2776uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2777 float_status *s)
2778{
98e256fc
RH
2779 FloatParts64 p;
2780
2781 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
2782 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2783}
2784
2785uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2786 int scale, float_status *s)
2787{
2788 FloatParts64 p;
2789
2790 bfloat16_unpack_canonical(&p, a, s);
2791 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2792}
2793
2794uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2795 int scale, float_status *s)
2796{
2797 FloatParts64 p;
2798
2799 bfloat16_unpack_canonical(&p, a, s);
2800 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2801}
2802
2803uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2804 int scale, float_status *s)
2805{
2806 FloatParts64 p;
2807
2808 bfloat16_unpack_canonical(&p, a, s);
2809 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2810}
2811
2812static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2813 int scale, float_status *s)
2814{
2815 FloatParts128 p;
2816
2817 float128_unpack_canonical(&p, a, s);
2818 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2819}
2820
2821static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2822 int scale, float_status *s)
2823{
2824 FloatParts128 p;
2825
2826 float128_unpack_canonical(&p, a, s);
2827 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2828}
2829
0d93d8ec
FC
2830uint8_t float16_to_uint8(float16 a, float_status *s)
2831{
2832 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2833}
2834
2f6c74be
RH
2835uint16_t float16_to_uint16(float16 a, float_status *s)
2836{
2837 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2838}
2839
2840uint32_t float16_to_uint32(float16 a, float_status *s)
2841{
2842 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2843}
2844
2845uint64_t float16_to_uint64(float16 a, float_status *s)
2846{
2847 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2848}
2849
2850uint16_t float32_to_uint16(float32 a, float_status *s)
2851{
2852 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2853}
2854
2855uint32_t float32_to_uint32(float32 a, float_status *s)
2856{
2857 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2858}
2859
2860uint64_t float32_to_uint64(float32 a, float_status *s)
2861{
2862 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2863}
2864
2865uint16_t float64_to_uint16(float64 a, float_status *s)
2866{
2867 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2868}
2869
2870uint32_t float64_to_uint32(float64 a, float_status *s)
2871{
2872 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2873}
2874
2875uint64_t float64_to_uint64(float64 a, float_status *s)
2876{
2877 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2878}
2879
4ab4aef0
RH
2880uint32_t float128_to_uint32(float128 a, float_status *s)
2881{
2882 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2883}
2884
2885uint64_t float128_to_uint64(float128 a, float_status *s)
2886{
2887 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2888}
2889
2f6c74be
RH
2890uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2891{
2892 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2893}
2894
2895uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2896{
2897 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2898}
2899
2900uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2901{
2902 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2903}
2904
2905uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2906{
2907 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2908}
2909
2910uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2911{
2912 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2913}
2914
2915uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2916{
2917 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2918}
2919
2920uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2921{
2922 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2923}
2924
2925uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2926{
2927 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2928}
2929
2930uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2931{
2932 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2933}
ab52f973 2934
4ab4aef0 2935uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2936{
4ab4aef0 2937 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2938}
2939
4ab4aef0 2940uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2941{
4ab4aef0 2942 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2943}
2944
2945uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2946{
2947 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2948}
2949
2950uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2951{
2952 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2953}
2954
2955uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2956{
2957 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2958}
2959
2960uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2961{
2962 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2963}
2964
2965uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2966{
2967 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2968}
2969
2970uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2971{
2972 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2973}
2974
c02e1fb8 2975/*
e3689519 2976 * Signed integer to floating-point conversions
c02e1fb8
AB
2977 */
2978
2abdfe24 2979float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2980{
e3689519
RH
2981 FloatParts64 p;
2982
2983 parts_sint_to_float(&p, a, scale, status);
2984 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
2985}
2986
2abdfe24
RH
2987float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2988{
2989 return int64_to_float16_scalbn(a, scale, status);
2990}
2991
2992float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2993{
2994 return int64_to_float16_scalbn(a, scale, status);
2995}
2996
2997float16 int64_to_float16(int64_t a, float_status *status)
2998{
2999 return int64_to_float16_scalbn(a, 0, status);
3000}
3001
c02e1fb8
AB
3002float16 int32_to_float16(int32_t a, float_status *status)
3003{
2abdfe24 3004 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3005}
3006
3007float16 int16_to_float16(int16_t a, float_status *status)
3008{
2abdfe24 3009 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3010}
3011
0d93d8ec
FC
3012float16 int8_to_float16(int8_t a, float_status *status)
3013{
3014 return int64_to_float16_scalbn(a, 0, status);
3015}
3016
2abdfe24 3017float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3018{
e3689519
RH
3019 FloatParts64 p;
3020
3021 parts64_sint_to_float(&p, a, scale, status);
3022 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3023}
3024
2abdfe24
RH
3025float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3026{
3027 return int64_to_float32_scalbn(a, scale, status);
3028}
3029
3030float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3031{
3032 return int64_to_float32_scalbn(a, scale, status);
3033}
3034
3035float32 int64_to_float32(int64_t a, float_status *status)
3036{
3037 return int64_to_float32_scalbn(a, 0, status);
3038}
3039
c02e1fb8
AB
3040float32 int32_to_float32(int32_t a, float_status *status)
3041{
2abdfe24 3042 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3043}
3044
3045float32 int16_to_float32(int16_t a, float_status *status)
3046{
2abdfe24 3047 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3048}
3049
2abdfe24 3050float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3051{
e3689519
RH
3052 FloatParts64 p;
3053
3054 parts_sint_to_float(&p, a, scale, status);
3055 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3056}
3057
2abdfe24
RH
3058float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3059{
3060 return int64_to_float64_scalbn(a, scale, status);
3061}
3062
3063float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3064{
3065 return int64_to_float64_scalbn(a, scale, status);
3066}
3067
3068float64 int64_to_float64(int64_t a, float_status *status)
3069{
3070 return int64_to_float64_scalbn(a, 0, status);
3071}
3072
c02e1fb8
AB
3073float64 int32_to_float64(int32_t a, float_status *status)
3074{
2abdfe24 3075 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3076}
3077
3078float64 int16_to_float64(int16_t a, float_status *status)
3079{
2abdfe24 3080 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3081}
3082
34f0c0a9
LZ
3083bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3084{
e3689519
RH
3085 FloatParts64 p;
3086
3087 parts_sint_to_float(&p, a, scale, status);
3088 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3089}
3090
3091bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3092{
3093 return int64_to_bfloat16_scalbn(a, scale, status);
3094}
3095
3096bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3097{
3098 return int64_to_bfloat16_scalbn(a, scale, status);
3099}
3100
3101bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3102{
3103 return int64_to_bfloat16_scalbn(a, 0, status);
3104}
3105
3106bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3107{
3108 return int64_to_bfloat16_scalbn(a, 0, status);
3109}
3110
3111bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3112{
3113 return int64_to_bfloat16_scalbn(a, 0, status);
3114}
c02e1fb8 3115
e3689519
RH
3116float128 int64_to_float128(int64_t a, float_status *status)
3117{
3118 FloatParts128 p;
3119
3120 parts_sint_to_float(&p, a, 0, status);
3121 return float128_round_pack_canonical(&p, status);
3122}
3123
3124float128 int32_to_float128(int32_t a, float_status *status)
3125{
3126 return int64_to_float128(a, status);
3127}
3128
c02e1fb8 3129/*
37c954a1 3130 * Unsigned Integer to floating-point conversions
c02e1fb8
AB
3131 */
3132
2abdfe24 3133float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3134{
37c954a1
RH
3135 FloatParts64 p;
3136
3137 parts_uint_to_float(&p, a, scale, status);
3138 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3139}
3140
2abdfe24
RH
3141float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3142{
3143 return uint64_to_float16_scalbn(a, scale, status);
3144}
3145
3146float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3147{
3148 return uint64_to_float16_scalbn(a, scale, status);
3149}
3150
3151float16 uint64_to_float16(uint64_t a, float_status *status)
3152{
3153 return uint64_to_float16_scalbn(a, 0, status);
3154}
3155
c02e1fb8
AB
3156float16 uint32_to_float16(uint32_t a, float_status *status)
3157{
2abdfe24 3158 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3159}
3160
3161float16 uint16_to_float16(uint16_t a, float_status *status)
3162{
2abdfe24 3163 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3164}
3165
0d93d8ec
FC
3166float16 uint8_to_float16(uint8_t a, float_status *status)
3167{
3168 return uint64_to_float16_scalbn(a, 0, status);
3169}
3170
2abdfe24 3171float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3172{
37c954a1
RH
3173 FloatParts64 p;
3174
3175 parts_uint_to_float(&p, a, scale, status);
3176 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3177}
3178
2abdfe24
RH
3179float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3180{
3181 return uint64_to_float32_scalbn(a, scale, status);
3182}
3183
3184float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3185{
3186 return uint64_to_float32_scalbn(a, scale, status);
3187}
3188
3189float32 uint64_to_float32(uint64_t a, float_status *status)
3190{
3191 return uint64_to_float32_scalbn(a, 0, status);
3192}
3193
c02e1fb8
AB
3194float32 uint32_to_float32(uint32_t a, float_status *status)
3195{
2abdfe24 3196 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3197}
3198
3199float32 uint16_to_float32(uint16_t a, float_status *status)
3200{
2abdfe24 3201 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3202}
3203
2abdfe24 3204float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3205{
37c954a1
RH
3206 FloatParts64 p;
3207
3208 parts_uint_to_float(&p, a, scale, status);
3209 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3210}
3211
2abdfe24
RH
3212float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3213{
3214 return uint64_to_float64_scalbn(a, scale, status);
3215}
3216
3217float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3218{
3219 return uint64_to_float64_scalbn(a, scale, status);
3220}
3221
3222float64 uint64_to_float64(uint64_t a, float_status *status)
3223{
3224 return uint64_to_float64_scalbn(a, 0, status);
3225}
3226
c02e1fb8
AB
3227float64 uint32_to_float64(uint32_t a, float_status *status)
3228{
2abdfe24 3229 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3230}
3231
3232float64 uint16_to_float64(uint16_t a, float_status *status)
3233{
2abdfe24 3234 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3235}
3236
34f0c0a9
LZ
3237bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3238{
37c954a1
RH
3239 FloatParts64 p;
3240
3241 parts_uint_to_float(&p, a, scale, status);
3242 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3243}
3244
3245bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3246{
3247 return uint64_to_bfloat16_scalbn(a, scale, status);
3248}
3249
3250bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3251{
3252 return uint64_to_bfloat16_scalbn(a, scale, status);
3253}
3254
3255bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3256{
3257 return uint64_to_bfloat16_scalbn(a, 0, status);
3258}
3259
3260bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3261{
3262 return uint64_to_bfloat16_scalbn(a, 0, status);
3263}
3264
3265bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3266{
3267 return uint64_to_bfloat16_scalbn(a, 0, status);
3268}
3269
37c954a1
RH
3270float128 uint64_to_float128(uint64_t a, float_status *status)
3271{
3272 FloatParts128 p;
3273
3274 parts_uint_to_float(&p, a, 0, status);
3275 return float128_round_pack_canonical(&p, status);
3276}
3277
e1c4667a
RH
3278/*
3279 * Minimum and maximum
89360067 3280 */
89360067 3281
e1c4667a
RH
3282static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3283{
3284 FloatParts64 pa, pb, *pr;
89360067 3285
e1c4667a
RH
3286 float16_unpack_canonical(&pa, a, s);
3287 float16_unpack_canonical(&pb, b, s);
3288 pr = parts_minmax(&pa, &pb, s, flags);
3289
3290 return float16_round_pack_canonical(pr, s);
89360067
AB
3291}
3292
e1c4667a
RH
3293static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3294 float_status *s, int flags)
3295{
3296 FloatParts64 pa, pb, *pr;
3297
3298 bfloat16_unpack_canonical(&pa, a, s);
3299 bfloat16_unpack_canonical(&pb, b, s);
3300 pr = parts_minmax(&pa, &pb, s, flags);
3301
3302 return bfloat16_round_pack_canonical(pr, s);
3303}
3304
3305static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3306{
3307 FloatParts64 pa, pb, *pr;
3308
3309 float32_unpack_canonical(&pa, a, s);
3310 float32_unpack_canonical(&pb, b, s);
3311 pr = parts_minmax(&pa, &pb, s, flags);
3312
3313 return float32_round_pack_canonical(pr, s);
3314}
3315
3316static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3317{
3318 FloatParts64 pa, pb, *pr;
3319
3320 float64_unpack_canonical(&pa, a, s);
3321 float64_unpack_canonical(&pb, b, s);
3322 pr = parts_minmax(&pa, &pb, s, flags);
3323
3324 return float64_round_pack_canonical(pr, s);
3325}
3326
3327#define MINMAX_1(type, name, flags) \
3328 type type##_##name(type a, type b, float_status *s) \
3329 { return type##_minmax(a, b, s, flags); }
3330
3331#define MINMAX_2(type) \
3332 MINMAX_1(type, max, 0) \
3333 MINMAX_1(type, maxnum, minmax_isnum) \
3334 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3335 MINMAX_1(type, min, minmax_ismin) \
3336 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3337 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3338
3339MINMAX_2(float16)
3340MINMAX_2(bfloat16)
3341MINMAX_2(float32)
3342MINMAX_2(float64)
3343
3344#undef MINMAX_1
3345#undef MINMAX_2
8282310d 3346
0c4c9092 3347/* Floating point compare */
f8155c1d 3348static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3349 float_status *s)
0c4c9092
AB
3350{
3351 if (is_nan(a.cls) || is_nan(b.cls)) {
3352 if (!is_quiet ||
3353 a.cls == float_class_snan ||
3354 b.cls == float_class_snan) {
d82f3b2d 3355 float_raise(float_flag_invalid, s);
0c4c9092
AB
3356 }
3357 return float_relation_unordered;
3358 }
3359
3360 if (a.cls == float_class_zero) {
3361 if (b.cls == float_class_zero) {
3362 return float_relation_equal;
3363 }
3364 return b.sign ? float_relation_greater : float_relation_less;
3365 } else if (b.cls == float_class_zero) {
3366 return a.sign ? float_relation_less : float_relation_greater;
3367 }
3368
3369 /* The only really important thing about infinity is its sign. If
3370 * both are infinities the sign marks the smallest of the two.
3371 */
3372 if (a.cls == float_class_inf) {
3373 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3374 return float_relation_equal;
3375 }
3376 return a.sign ? float_relation_less : float_relation_greater;
3377 } else if (b.cls == float_class_inf) {
3378 return b.sign ? float_relation_greater : float_relation_less;
3379 }
3380
3381 if (a.sign != b.sign) {
3382 return a.sign ? float_relation_less : float_relation_greater;
3383 }
3384
3385 if (a.exp == b.exp) {
3386 if (a.frac == b.frac) {
3387 return float_relation_equal;
3388 }
3389 if (a.sign) {
3390 return a.frac > b.frac ?
3391 float_relation_less : float_relation_greater;
3392 } else {
3393 return a.frac > b.frac ?
3394 float_relation_greater : float_relation_less;
3395 }
3396 } else {
3397 if (a.sign) {
3398 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3399 } else {
3400 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3401 }
3402 }
3403}
3404
d9fe9db9
EC
3405#define COMPARE(name, attr, sz) \
3406static int attr \
3407name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3408{ \
98e256fc
RH
3409 FloatParts64 pa, pb; \
3410 float ## sz ## _unpack_canonical(&pa, a, s); \
3411 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3412 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3413}
3414
d9fe9db9
EC
3415COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3416COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3417COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3418
3419#undef COMPARE
3420
71bfd65c 3421FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3422{
3423 return soft_f16_compare(a, b, false, s);
3424}
3425
71bfd65c 3426FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3427{
3428 return soft_f16_compare(a, b, true, s);
3429}
3430
71bfd65c 3431static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3432f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3433{
3434 union_float32 ua, ub;
3435
3436 ua.s = xa;
3437 ub.s = xb;
3438
3439 if (QEMU_NO_HARDFLOAT) {
3440 goto soft;
3441 }
3442
3443 float32_input_flush2(&ua.s, &ub.s, s);
3444 if (isgreaterequal(ua.h, ub.h)) {
3445 if (isgreater(ua.h, ub.h)) {
3446 return float_relation_greater;
3447 }
3448 return float_relation_equal;
3449 }
3450 if (likely(isless(ua.h, ub.h))) {
3451 return float_relation_less;
3452 }
3453 /* The only condition remaining is unordered.
3454 * Fall through to set flags.
3455 */
3456 soft:
3457 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3458}
3459
71bfd65c 3460FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3461{
3462 return f32_compare(a, b, false, s);
3463}
3464
71bfd65c 3465FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3466{
3467 return f32_compare(a, b, true, s);
3468}
3469
71bfd65c 3470static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3471f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3472{
3473 union_float64 ua, ub;
3474
3475 ua.s = xa;
3476 ub.s = xb;
3477
3478 if (QEMU_NO_HARDFLOAT) {
3479 goto soft;
3480 }
3481
3482 float64_input_flush2(&ua.s, &ub.s, s);
3483 if (isgreaterequal(ua.h, ub.h)) {
3484 if (isgreater(ua.h, ub.h)) {
3485 return float_relation_greater;
3486 }
3487 return float_relation_equal;
3488 }
3489 if (likely(isless(ua.h, ub.h))) {
3490 return float_relation_less;
3491 }
3492 /* The only condition remaining is unordered.
3493 * Fall through to set flags.
3494 */
3495 soft:
3496 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3497}
3498
71bfd65c 3499FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3500{
3501 return f64_compare(a, b, false, s);
3502}
3503
71bfd65c 3504FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3505{
3506 return f64_compare(a, b, true, s);
3507}
3508
8282310d
LZ
3509static FloatRelation QEMU_FLATTEN
3510soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3511{
98e256fc
RH
3512 FloatParts64 pa, pb;
3513
3514 bfloat16_unpack_canonical(&pa, a, s);
3515 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3516 return compare_floats(pa, pb, is_quiet, s);
3517}
3518
3519FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3520{
3521 return soft_bf16_compare(a, b, false, s);
3522}
3523
3524FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3525{
3526 return soft_bf16_compare(a, b, true, s);
3527}
3528
0bfc9f19 3529/* Multiply A by 2 raised to the power N. */
f8155c1d 3530static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3531{
3532 if (unlikely(is_nan(a.cls))) {
7c45bad8 3533 parts_return_nan(&a, s);
0bfc9f19
AB
3534 }
3535 if (a.cls == float_class_normal) {
f8155c1d 3536 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3537 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3538 * still allows rounding to infinity, without allowing overflow
f8155c1d 3539 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3540 */
3541 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3542 a.exp += n;
3543 }
3544 return a;
3545}
3546
3547float16 float16_scalbn(float16 a, int n, float_status *status)
3548{
98e256fc
RH
3549 FloatParts64 pa, pr;
3550
3551 float16_unpack_canonical(&pa, a, status);
3552 pr = scalbn_decomposed(pa, n, status);
e293e927 3553 return float16_round_pack_canonical(&pr, status);
0bfc9f19
AB
3554}
3555
3556float32 float32_scalbn(float32 a, int n, float_status *status)
3557{
98e256fc
RH
3558 FloatParts64 pa, pr;
3559
3560 float32_unpack_canonical(&pa, a, status);
3561 pr = scalbn_decomposed(pa, n, status);
e293e927 3562 return float32_round_pack_canonical(&pr, status);
0bfc9f19
AB
3563}
3564
3565float64 float64_scalbn(float64 a, int n, float_status *status)
3566{
98e256fc
RH
3567 FloatParts64 pa, pr;
3568
3569 float64_unpack_canonical(&pa, a, status);
3570 pr = scalbn_decomposed(pa, n, status);
e293e927 3571 return float64_round_pack_canonical(&pr, status);
0bfc9f19
AB
3572}
3573
8282310d
LZ
3574bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3575{
98e256fc
RH
3576 FloatParts64 pa, pr;
3577
3578 bfloat16_unpack_canonical(&pa, a, status);
3579 pr = scalbn_decomposed(pa, n, status);
e293e927 3580 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3581}
3582
c13bb2da
AB
3583/*
3584 * Square Root
3585 *
3586 * The old softfloat code did an approximation step before zeroing in
3587 * on the final result. However for simpleness we just compute the
3588 * square root by iterating down from the implicit bit to enough extra
3589 * bits to ensure we get a correctly rounded result.
3590 *
3591 * This does mean however the calculation is slower than before,
3592 * especially for 64 bit floats.
3593 */
3594
f8155c1d 3595static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3596{
3597 uint64_t a_frac, r_frac, s_frac;
3598 int bit, last_bit;
3599
3600 if (is_nan(a.cls)) {
7c45bad8
RH
3601 parts_return_nan(&a, s);
3602 return a;
c13bb2da
AB
3603 }
3604 if (a.cls == float_class_zero) {
3605 return a; /* sqrt(+-0) = +-0 */
3606 }
3607 if (a.sign) {
d82f3b2d 3608 float_raise(float_flag_invalid, s);
0fc07cad
RH
3609 parts_default_nan(&a, s);
3610 return a;
c13bb2da
AB
3611 }
3612 if (a.cls == float_class_inf) {
3613 return a; /* sqrt(+inf) = +inf */
3614 }
3615
3616 assert(a.cls == float_class_normal);
3617
3618 /* We need two overflow bits at the top. Adding room for that is a
3619 * right shift. If the exponent is odd, we can discard the low bit
3620 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3621 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3622 */
e99c4373 3623 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3624 a.exp >>= 1;
3625
3626 /* Bit-by-bit computation of sqrt. */
3627 r_frac = 0;
3628 s_frac = 0;
3629
3630 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3631 * properly rounded result. Remember we've inserted two more bits
3632 * at the top, so these positions are two less.
c13bb2da 3633 */
e99c4373 3634 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3635 last_bit = MAX(p->frac_shift - 4, 0);
3636 do {
3637 uint64_t q = 1ULL << bit;
3638 uint64_t t_frac = s_frac + q;
3639 if (t_frac <= a_frac) {
3640 s_frac = t_frac + q;
3641 a_frac -= t_frac;
3642 r_frac += q;
3643 }
3644 a_frac <<= 1;
3645 } while (--bit >= last_bit);
3646
3647 /* Undo the right shift done above. If there is any remaining
3648 * fraction, the result is inexact. Set the sticky bit.
3649 */
e99c4373 3650 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3651
3652 return a;
3653}
3654
97ff87c0 3655float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3656{
98e256fc
RH
3657 FloatParts64 pa, pr;
3658
3659 float16_unpack_canonical(&pa, a, status);
3660 pr = sqrt_float(pa, status, &float16_params);
e293e927 3661 return float16_round_pack_canonical(&pr, status);
c13bb2da
AB
3662}
3663
f131bae8
EC
3664static float32 QEMU_SOFTFLOAT_ATTR
3665soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3666{
98e256fc
RH
3667 FloatParts64 pa, pr;
3668
3669 float32_unpack_canonical(&pa, a, status);
3670 pr = sqrt_float(pa, status, &float32_params);
e293e927 3671 return float32_round_pack_canonical(&pr, status);
c13bb2da
AB
3672}
3673
f131bae8
EC
3674static float64 QEMU_SOFTFLOAT_ATTR
3675soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3676{
98e256fc
RH
3677 FloatParts64 pa, pr;
3678
3679 float64_unpack_canonical(&pa, a, status);
3680 pr = sqrt_float(pa, status, &float64_params);
e293e927 3681 return float64_round_pack_canonical(&pr, status);
c13bb2da
AB
3682}
3683
f131bae8
EC
3684float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3685{
3686 union_float32 ua, ur;
3687
3688 ua.s = xa;
3689 if (unlikely(!can_use_fpu(s))) {
3690 goto soft;
3691 }
3692
3693 float32_input_flush1(&ua.s, s);
3694 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3695 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3696 fpclassify(ua.h) == FP_ZERO) ||
3697 signbit(ua.h))) {
3698 goto soft;
3699 }
3700 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3701 float32_is_neg(ua.s))) {
3702 goto soft;
3703 }
3704 ur.h = sqrtf(ua.h);
3705 return ur.s;
3706
3707 soft:
3708 return soft_f32_sqrt(ua.s, s);
3709}
3710
3711float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3712{
3713 union_float64 ua, ur;
3714
3715 ua.s = xa;
3716 if (unlikely(!can_use_fpu(s))) {
3717 goto soft;
3718 }
3719
3720 float64_input_flush1(&ua.s, s);
3721 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3722 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3723 fpclassify(ua.h) == FP_ZERO) ||
3724 signbit(ua.h))) {
3725 goto soft;
3726 }
3727 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3728 float64_is_neg(ua.s))) {
3729 goto soft;
3730 }
3731 ur.h = sqrt(ua.h);
3732 return ur.s;
3733
3734 soft:
3735 return soft_f64_sqrt(ua.s, s);
3736}
3737
8282310d
LZ
3738bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3739{
98e256fc
RH
3740 FloatParts64 pa, pr;
3741
3742 bfloat16_unpack_canonical(&pa, a, status);
3743 pr = sqrt_float(pa, status, &bfloat16_params);
e293e927 3744 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3745}
3746
0218a16e
RH
3747/*----------------------------------------------------------------------------
3748| The pattern for a default generated NaN.
3749*----------------------------------------------------------------------------*/
3750
3751float16 float16_default_nan(float_status *status)
3752{
0fc07cad
RH
3753 FloatParts64 p;
3754
3755 parts_default_nan(&p, status);
0218a16e 3756 p.frac >>= float16_params.frac_shift;
71fd178e 3757 return float16_pack_raw(&p);
0218a16e
RH
3758}
3759
3760float32 float32_default_nan(float_status *status)
3761{
0fc07cad
RH
3762 FloatParts64 p;
3763
3764 parts_default_nan(&p, status);
0218a16e 3765 p.frac >>= float32_params.frac_shift;
71fd178e 3766 return float32_pack_raw(&p);
0218a16e
RH
3767}
3768
3769float64 float64_default_nan(float_status *status)
3770{
0fc07cad
RH
3771 FloatParts64 p;
3772
3773 parts_default_nan(&p, status);
0218a16e 3774 p.frac >>= float64_params.frac_shift;
71fd178e 3775 return float64_pack_raw(&p);
0218a16e
RH
3776}
3777
3778float128 float128_default_nan(float_status *status)
3779{
e9034ea8 3780 FloatParts128 p;
0218a16e 3781
0fc07cad 3782 parts_default_nan(&p, status);
e9034ea8
RH
3783 frac_shr(&p, float128_params.frac_shift);
3784 return float128_pack_raw(&p);
0218a16e 3785}
c13bb2da 3786
8282310d
LZ
3787bfloat16 bfloat16_default_nan(float_status *status)
3788{
0fc07cad
RH
3789 FloatParts64 p;
3790
3791 parts_default_nan(&p, status);
8282310d 3792 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3793 return bfloat16_pack_raw(&p);
8282310d
LZ
3794}
3795
158142c2 3796/*----------------------------------------------------------------------------
377ed926
RH
3797| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3798*----------------------------------------------------------------------------*/
3799
3800float16 float16_silence_nan(float16 a, float_status *status)
3801{
3dddb203
RH
3802 FloatParts64 p;
3803
3804 float16_unpack_raw(&p, a);
377ed926 3805 p.frac <<= float16_params.frac_shift;
92ff426d 3806 parts_silence_nan(&p, status);
377ed926 3807 p.frac >>= float16_params.frac_shift;
71fd178e 3808 return float16_pack_raw(&p);
377ed926
RH
3809}
3810
3811float32 float32_silence_nan(float32 a, float_status *status)
3812{
3dddb203
RH
3813 FloatParts64 p;
3814
3815 float32_unpack_raw(&p, a);
377ed926 3816 p.frac <<= float32_params.frac_shift;
92ff426d 3817 parts_silence_nan(&p, status);
377ed926 3818 p.frac >>= float32_params.frac_shift;
71fd178e 3819 return float32_pack_raw(&p);
377ed926
RH
3820}
3821
3822float64 float64_silence_nan(float64 a, float_status *status)
3823{
3dddb203
RH
3824 FloatParts64 p;
3825
3826 float64_unpack_raw(&p, a);
377ed926 3827 p.frac <<= float64_params.frac_shift;
92ff426d 3828 parts_silence_nan(&p, status);
377ed926 3829 p.frac >>= float64_params.frac_shift;
71fd178e 3830 return float64_pack_raw(&p);
377ed926
RH
3831}
3832
8282310d
LZ
3833bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3834{
3dddb203
RH
3835 FloatParts64 p;
3836
3837 bfloat16_unpack_raw(&p, a);
8282310d 3838 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3839 parts_silence_nan(&p, status);
8282310d 3840 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3841 return bfloat16_pack_raw(&p);
8282310d 3842}
e6b405fe 3843
0018b1f4
RH
3844float128 float128_silence_nan(float128 a, float_status *status)
3845{
3846 FloatParts128 p;
3847
3848 float128_unpack_raw(&p, a);
3849 frac_shl(&p, float128_params.frac_shift);
3850 parts_silence_nan(&p, status);
3851 frac_shr(&p, float128_params.frac_shift);
3852 return float128_pack_raw(&p);
3853}
3854
e6b405fe
AB
3855/*----------------------------------------------------------------------------
3856| If `a' is denormal and we are in flush-to-zero mode then set the
3857| input-denormal exception and return zero. Otherwise just return the value.
3858*----------------------------------------------------------------------------*/
3859
f8155c1d 3860static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3861{
3862 if (p.exp == 0 && p.frac != 0) {
3863 float_raise(float_flag_input_denormal, status);
3864 return true;
3865 }
3866
3867 return false;
3868}
3869
3870float16 float16_squash_input_denormal(float16 a, float_status *status)
3871{
3872 if (status->flush_inputs_to_zero) {
3dddb203
RH
3873 FloatParts64 p;
3874
3875 float16_unpack_raw(&p, a);
e6b405fe
AB
3876 if (parts_squash_denormal(p, status)) {
3877 return float16_set_sign(float16_zero, p.sign);
3878 }
3879 }
3880 return a;
3881}
3882
3883float32 float32_squash_input_denormal(float32 a, float_status *status)
3884{
3885 if (status->flush_inputs_to_zero) {
3dddb203
RH
3886 FloatParts64 p;
3887
3888 float32_unpack_raw(&p, a);
e6b405fe
AB
3889 if (parts_squash_denormal(p, status)) {
3890 return float32_set_sign(float32_zero, p.sign);
3891 }
3892 }
3893 return a;
3894}
3895
3896float64 float64_squash_input_denormal(float64 a, float_status *status)
3897{
3898 if (status->flush_inputs_to_zero) {
3dddb203
RH
3899 FloatParts64 p;
3900
3901 float64_unpack_raw(&p, a);
e6b405fe
AB
3902 if (parts_squash_denormal(p, status)) {
3903 return float64_set_sign(float64_zero, p.sign);
3904 }
3905 }
3906 return a;
3907}
3908
8282310d
LZ
3909bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3910{
3911 if (status->flush_inputs_to_zero) {
3dddb203
RH
3912 FloatParts64 p;
3913
3914 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3915 if (parts_squash_denormal(p, status)) {
3916 return bfloat16_set_sign(bfloat16_zero, p.sign);
3917 }
3918 }
3919 return a;
3920}
3921
377ed926 3922/*----------------------------------------------------------------------------
158142c2
FB
3923| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3924| and 7, and returns the properly rounded 32-bit integer corresponding to the
3925| input. If `zSign' is 1, the input is negated before being converted to an
3926| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3927| is simply rounded to an integer, with the inexact exception raised if the
3928| input cannot be represented exactly as an integer. However, if the fixed-
3929| point input is too large, the invalid exception is raised and the largest
3930| positive or negative integer is returned.
3931*----------------------------------------------------------------------------*/
3932
c120391c
RH
3933static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3934 float_status *status)
158142c2 3935{
8f506c70 3936 int8_t roundingMode;
c120391c 3937 bool roundNearestEven;
8f506c70 3938 int8_t roundIncrement, roundBits;
760e1416 3939 int32_t z;
158142c2 3940
a2f2d288 3941 roundingMode = status->float_rounding_mode;
158142c2 3942 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3943 switch (roundingMode) {
3944 case float_round_nearest_even:
f9288a76 3945 case float_round_ties_away:
dc355b76
PM
3946 roundIncrement = 0x40;
3947 break;
3948 case float_round_to_zero:
3949 roundIncrement = 0;
3950 break;
3951 case float_round_up:
3952 roundIncrement = zSign ? 0 : 0x7f;
3953 break;
3954 case float_round_down:
3955 roundIncrement = zSign ? 0x7f : 0;
3956 break;
5d64abb3
RH
3957 case float_round_to_odd:
3958 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3959 break;
dc355b76
PM
3960 default:
3961 abort();
158142c2
FB
3962 }
3963 roundBits = absZ & 0x7F;
3964 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3965 if (!(roundBits ^ 0x40) && roundNearestEven) {
3966 absZ &= ~1;
3967 }
158142c2
FB
3968 z = absZ;
3969 if ( zSign ) z = - z;
3970 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3971 float_raise(float_flag_invalid, status);
2c217da0 3972 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3973 }
a2f2d288 3974 if (roundBits) {
d82f3b2d 3975 float_raise(float_flag_inexact, status);
a2f2d288 3976 }
158142c2
FB
3977 return z;
3978
3979}
3980
3981/*----------------------------------------------------------------------------
3982| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3983| `absZ1', with binary point between bits 63 and 64 (between the input words),
3984| and returns the properly rounded 64-bit integer corresponding to the input.
3985| If `zSign' is 1, the input is negated before being converted to an integer.
3986| Ordinarily, the fixed-point input is simply rounded to an integer, with
3987| the inexact exception raised if the input cannot be represented exactly as
3988| an integer. However, if the fixed-point input is too large, the invalid
3989| exception is raised and the largest positive or negative integer is
3990| returned.
3991*----------------------------------------------------------------------------*/
3992
c120391c 3993static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3994 float_status *status)
158142c2 3995{
8f506c70 3996 int8_t roundingMode;
c120391c 3997 bool roundNearestEven, increment;
760e1416 3998 int64_t z;
158142c2 3999
a2f2d288 4000 roundingMode = status->float_rounding_mode;
158142c2 4001 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4002 switch (roundingMode) {
4003 case float_round_nearest_even:
f9288a76 4004 case float_round_ties_away:
dc355b76
PM
4005 increment = ((int64_t) absZ1 < 0);
4006 break;
4007 case float_round_to_zero:
4008 increment = 0;
4009 break;
4010 case float_round_up:
4011 increment = !zSign && absZ1;
4012 break;
4013 case float_round_down:
4014 increment = zSign && absZ1;
4015 break;
5d64abb3
RH
4016 case float_round_to_odd:
4017 increment = !(absZ0 & 1) && absZ1;
4018 break;
dc355b76
PM
4019 default:
4020 abort();
158142c2
FB
4021 }
4022 if ( increment ) {
4023 ++absZ0;
4024 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4025 if (!(absZ1 << 1) && roundNearestEven) {
4026 absZ0 &= ~1;
4027 }
158142c2
FB
4028 }
4029 z = absZ0;
4030 if ( zSign ) z = - z;
4031 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4032 overflow:
ff32e16e 4033 float_raise(float_flag_invalid, status);
2c217da0 4034 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4035 }
a2f2d288 4036 if (absZ1) {
d82f3b2d 4037 float_raise(float_flag_inexact, status);
a2f2d288 4038 }
158142c2
FB
4039 return z;
4040
4041}
4042
158142c2
FB
4043/*----------------------------------------------------------------------------
4044| Normalizes the subnormal single-precision floating-point value represented
4045| by the denormalized significand `aSig'. The normalized exponent and
4046| significand are stored at the locations pointed to by `zExpPtr' and
4047| `zSigPtr', respectively.
4048*----------------------------------------------------------------------------*/
4049
4050static void
0c48262d 4051 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4052{
8f506c70 4053 int8_t shiftCount;
158142c2 4054
0019d5c3 4055 shiftCount = clz32(aSig) - 8;
158142c2
FB
4056 *zSigPtr = aSig<<shiftCount;
4057 *zExpPtr = 1 - shiftCount;
4058
4059}
4060
158142c2
FB
4061/*----------------------------------------------------------------------------
4062| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4063| and significand `zSig', and returns the proper single-precision floating-
4064| point value corresponding to the abstract input. Ordinarily, the abstract
4065| value is simply rounded and packed into the single-precision format, with
4066| the inexact exception raised if the abstract input cannot be represented
4067| exactly. However, if the abstract value is too large, the overflow and
4068| inexact exceptions are raised and an infinity or maximal finite value is
4069| returned. If the abstract value is too small, the input value is rounded to
4070| a subnormal number, and the underflow and inexact exceptions are raised if
4071| the abstract input cannot be represented exactly as a subnormal single-
4072| precision floating-point number.
4073| The input significand `zSig' has its binary point between bits 30
4074| and 29, which is 7 bits to the left of the usual location. This shifted
4075| significand must be normalized or smaller. If `zSig' is not normalized,
4076| `zExp' must be 0; in that case, the result returned is a subnormal number,
4077| and it must not require rounding. In the usual case that `zSig' is
4078| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4079| The handling of underflow and overflow follows the IEC/IEEE Standard for
4080| Binary Floating-Point Arithmetic.
4081*----------------------------------------------------------------------------*/
4082
c120391c 4083static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4084 float_status *status)
158142c2 4085{
8f506c70 4086 int8_t roundingMode;
c120391c 4087 bool roundNearestEven;
8f506c70 4088 int8_t roundIncrement, roundBits;
c120391c 4089 bool isTiny;
158142c2 4090
a2f2d288 4091 roundingMode = status->float_rounding_mode;
158142c2 4092 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4093 switch (roundingMode) {
4094 case float_round_nearest_even:
f9288a76 4095 case float_round_ties_away:
dc355b76
PM
4096 roundIncrement = 0x40;
4097 break;
4098 case float_round_to_zero:
4099 roundIncrement = 0;
4100 break;
4101 case float_round_up:
4102 roundIncrement = zSign ? 0 : 0x7f;
4103 break;
4104 case float_round_down:
4105 roundIncrement = zSign ? 0x7f : 0;
4106 break;
5d64abb3
RH
4107 case float_round_to_odd:
4108 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4109 break;
dc355b76
PM
4110 default:
4111 abort();
4112 break;
158142c2
FB
4113 }
4114 roundBits = zSig & 0x7F;
bb98fe42 4115 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4116 if ( ( 0xFD < zExp )
4117 || ( ( zExp == 0xFD )
bb98fe42 4118 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4119 ) {
5d64abb3
RH
4120 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4121 roundIncrement != 0;
ff32e16e 4122 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4123 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4124 }
4125 if ( zExp < 0 ) {
a2f2d288 4126 if (status->flush_to_zero) {
ff32e16e 4127 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4128 return packFloat32(zSign, 0, 0);
4129 }
a828b373
RH
4130 isTiny = status->tininess_before_rounding
4131 || (zExp < -1)
4132 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4133 shift32RightJamming( zSig, - zExp, &zSig );
4134 zExp = 0;
4135 roundBits = zSig & 0x7F;
ff32e16e
PM
4136 if (isTiny && roundBits) {
4137 float_raise(float_flag_underflow, status);
4138 }
5d64abb3
RH
4139 if (roundingMode == float_round_to_odd) {
4140 /*
4141 * For round-to-odd case, the roundIncrement depends on
4142 * zSig which just changed.
4143 */
4144 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4145 }
158142c2
FB
4146 }
4147 }
a2f2d288 4148 if (roundBits) {
d82f3b2d 4149 float_raise(float_flag_inexact, status);
a2f2d288 4150 }
158142c2 4151 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4152 if (!(roundBits ^ 0x40) && roundNearestEven) {
4153 zSig &= ~1;
4154 }
158142c2
FB
4155 if ( zSig == 0 ) zExp = 0;
4156 return packFloat32( zSign, zExp, zSig );
4157
4158}
4159
4160/*----------------------------------------------------------------------------
4161| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4162| and significand `zSig', and returns the proper single-precision floating-
4163| point value corresponding to the abstract input. This routine is just like
4164| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4165| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4166| floating-point exponent.
4167*----------------------------------------------------------------------------*/
4168
4169static float32
c120391c 4170 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4171 float_status *status)
158142c2 4172{
8f506c70 4173 int8_t shiftCount;
158142c2 4174
0019d5c3 4175 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4176 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4177 status);
158142c2
FB
4178
4179}
4180
158142c2
FB
4181/*----------------------------------------------------------------------------
4182| Normalizes the subnormal double-precision floating-point value represented
4183| by the denormalized significand `aSig'. The normalized exponent and
4184| significand are stored at the locations pointed to by `zExpPtr' and
4185| `zSigPtr', respectively.
4186*----------------------------------------------------------------------------*/
4187
4188static void
0c48262d 4189 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4190{
8f506c70 4191 int8_t shiftCount;
158142c2 4192
0019d5c3 4193 shiftCount = clz64(aSig) - 11;
158142c2
FB
4194 *zSigPtr = aSig<<shiftCount;
4195 *zExpPtr = 1 - shiftCount;
4196
4197}
4198
4199/*----------------------------------------------------------------------------
4200| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4201| double-precision floating-point value, returning the result. After being
4202| shifted into the proper positions, the three fields are simply added
4203| together to form the result. This means that any integer portion of `zSig'
4204| will be added into the exponent. Since a properly normalized significand
4205| will have an integer portion equal to 1, the `zExp' input should be 1 less
4206| than the desired result exponent whenever `zSig' is a complete, normalized
4207| significand.
4208*----------------------------------------------------------------------------*/
4209
c120391c 4210static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4211{
4212
f090c9d4 4213 return make_float64(
bb98fe42 4214 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4215
4216}
4217
4218/*----------------------------------------------------------------------------
4219| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4220| and significand `zSig', and returns the proper double-precision floating-
4221| point value corresponding to the abstract input. Ordinarily, the abstract
4222| value is simply rounded and packed into the double-precision format, with
4223| the inexact exception raised if the abstract input cannot be represented
4224| exactly. However, if the abstract value is too large, the overflow and
4225| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4226| returned. If the abstract value is too small, the input value is rounded to
4227| a subnormal number, and the underflow and inexact exceptions are raised if
4228| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4229| precision floating-point number.
4230| The input significand `zSig' has its binary point between bits 62
4231| and 61, which is 10 bits to the left of the usual location. This shifted
4232| significand must be normalized or smaller. If `zSig' is not normalized,
4233| `zExp' must be 0; in that case, the result returned is a subnormal number,
4234| and it must not require rounding. In the usual case that `zSig' is
4235| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4236| The handling of underflow and overflow follows the IEC/IEEE Standard for
4237| Binary Floating-Point Arithmetic.
4238*----------------------------------------------------------------------------*/
4239
c120391c 4240static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4241 float_status *status)
158142c2 4242{
8f506c70 4243 int8_t roundingMode;
c120391c 4244 bool roundNearestEven;
0c48262d 4245 int roundIncrement, roundBits;
c120391c 4246 bool isTiny;
158142c2 4247
a2f2d288 4248 roundingMode = status->float_rounding_mode;
158142c2 4249 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4250 switch (roundingMode) {
4251 case float_round_nearest_even:
f9288a76 4252 case float_round_ties_away:
dc355b76
PM
4253 roundIncrement = 0x200;
4254 break;
4255 case float_round_to_zero:
4256 roundIncrement = 0;
4257 break;
4258 case float_round_up:
4259 roundIncrement = zSign ? 0 : 0x3ff;
4260 break;
4261 case float_round_down:
4262 roundIncrement = zSign ? 0x3ff : 0;
4263 break;
9ee6f678
BR
4264 case float_round_to_odd:
4265 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4266 break;
dc355b76
PM
4267 default:
4268 abort();
158142c2
FB
4269 }
4270 roundBits = zSig & 0x3FF;
bb98fe42 4271 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4272 if ( ( 0x7FD < zExp )
4273 || ( ( zExp == 0x7FD )
bb98fe42 4274 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4275 ) {
9ee6f678
BR
4276 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4277 roundIncrement != 0;
ff32e16e 4278 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4279 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4280 }
4281 if ( zExp < 0 ) {
a2f2d288 4282 if (status->flush_to_zero) {
ff32e16e 4283 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4284 return packFloat64(zSign, 0, 0);
4285 }
a828b373
RH
4286 isTiny = status->tininess_before_rounding
4287 || (zExp < -1)
4288 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4289 shift64RightJamming( zSig, - zExp, &zSig );
4290 zExp = 0;
4291 roundBits = zSig & 0x3FF;
ff32e16e
PM
4292 if (isTiny && roundBits) {
4293 float_raise(float_flag_underflow, status);
4294 }
9ee6f678
BR
4295 if (roundingMode == float_round_to_odd) {
4296 /*
4297 * For round-to-odd case, the roundIncrement depends on
4298 * zSig which just changed.
4299 */
4300 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4301 }
158142c2
FB
4302 }
4303 }
a2f2d288 4304 if (roundBits) {
d82f3b2d 4305 float_raise(float_flag_inexact, status);
a2f2d288 4306 }
158142c2 4307 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4308 if (!(roundBits ^ 0x200) && roundNearestEven) {
4309 zSig &= ~1;
4310 }
158142c2
FB
4311 if ( zSig == 0 ) zExp = 0;
4312 return packFloat64( zSign, zExp, zSig );
4313
4314}
4315
4316/*----------------------------------------------------------------------------
4317| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4318| and significand `zSig', and returns the proper double-precision floating-
4319| point value corresponding to the abstract input. This routine is just like
4320| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4321| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4322| floating-point exponent.
4323*----------------------------------------------------------------------------*/
4324
4325static float64
c120391c 4326 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4327 float_status *status)
158142c2 4328{
8f506c70 4329 int8_t shiftCount;
158142c2 4330
0019d5c3 4331 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4332 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4333 status);
158142c2
FB
4334
4335}
4336
158142c2
FB
4337/*----------------------------------------------------------------------------
4338| Normalizes the subnormal extended double-precision floating-point value
4339| represented by the denormalized significand `aSig'. The normalized exponent
4340| and significand are stored at the locations pointed to by `zExpPtr' and
4341| `zSigPtr', respectively.
4342*----------------------------------------------------------------------------*/
4343
88857aca
LV
4344void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4345 uint64_t *zSigPtr)
158142c2 4346{
8f506c70 4347 int8_t shiftCount;
158142c2 4348
0019d5c3 4349 shiftCount = clz64(aSig);
158142c2
FB
4350 *zSigPtr = aSig<<shiftCount;
4351 *zExpPtr = 1 - shiftCount;
158142c2
FB
4352}
4353
4354/*----------------------------------------------------------------------------
4355| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4356| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4357| and returns the proper extended double-precision floating-point value
4358| corresponding to the abstract input. Ordinarily, the abstract value is
4359| rounded and packed into the extended double-precision format, with the
4360| inexact exception raised if the abstract input cannot be represented
4361| exactly. However, if the abstract value is too large, the overflow and
4362| inexact exceptions are raised and an infinity or maximal finite value is
4363| returned. If the abstract value is too small, the input value is rounded to
4364| a subnormal number, and the underflow and inexact exceptions are raised if
4365| the abstract input cannot be represented exactly as a subnormal extended
4366| double-precision floating-point number.
4367| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4368| number of bits as single or double precision, respectively. Otherwise, the
4369| result is rounded to the full precision of the extended double-precision
4370| format.
4371| The input significand must be normalized or smaller. If the input
4372| significand is not normalized, `zExp' must be 0; in that case, the result
4373| returned is a subnormal number, and it must not require rounding. The
4374| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4375| Floating-Point Arithmetic.
4376*----------------------------------------------------------------------------*/
4377
c120391c 4378floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4379 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4380 float_status *status)
158142c2 4381{
8f506c70 4382 int8_t roundingMode;
c120391c 4383 bool roundNearestEven, increment, isTiny;
f42c2224 4384 int64_t roundIncrement, roundMask, roundBits;
158142c2 4385
a2f2d288 4386 roundingMode = status->float_rounding_mode;
158142c2
FB
4387 roundNearestEven = ( roundingMode == float_round_nearest_even );
4388 if ( roundingPrecision == 80 ) goto precision80;
4389 if ( roundingPrecision == 64 ) {
e9321124
AB
4390 roundIncrement = UINT64_C(0x0000000000000400);
4391 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4392 }
4393 else if ( roundingPrecision == 32 ) {
e9321124
AB
4394 roundIncrement = UINT64_C(0x0000008000000000);
4395 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4396 }
4397 else {
4398 goto precision80;
4399 }
4400 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4401 switch (roundingMode) {
4402 case float_round_nearest_even:
f9288a76 4403 case float_round_ties_away:
dc355b76
PM
4404 break;
4405 case float_round_to_zero:
4406 roundIncrement = 0;
4407 break;
4408 case float_round_up:
4409 roundIncrement = zSign ? 0 : roundMask;
4410 break;
4411 case float_round_down:
4412 roundIncrement = zSign ? roundMask : 0;
4413 break;
4414 default:
4415 abort();
158142c2
FB
4416 }
4417 roundBits = zSig0 & roundMask;
bb98fe42 4418 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4419 if ( ( 0x7FFE < zExp )
4420 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4421 ) {
4422 goto overflow;
4423 }
4424 if ( zExp <= 0 ) {
a2f2d288 4425 if (status->flush_to_zero) {
ff32e16e 4426 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4427 return packFloatx80(zSign, 0, 0);
4428 }
a828b373
RH
4429 isTiny = status->tininess_before_rounding
4430 || (zExp < 0 )
4431 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4432 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4433 zExp = 0;
4434 roundBits = zSig0 & roundMask;
ff32e16e
PM
4435 if (isTiny && roundBits) {
4436 float_raise(float_flag_underflow, status);
4437 }
a2f2d288 4438 if (roundBits) {
d82f3b2d 4439 float_raise(float_flag_inexact, status);
a2f2d288 4440 }
158142c2 4441 zSig0 += roundIncrement;
bb98fe42 4442 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4443 roundIncrement = roundMask + 1;
4444 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4445 roundMask |= roundIncrement;
4446 }
4447 zSig0 &= ~ roundMask;
4448 return packFloatx80( zSign, zExp, zSig0 );
4449 }
4450 }
a2f2d288 4451 if (roundBits) {
d82f3b2d 4452 float_raise(float_flag_inexact, status);
a2f2d288 4453 }
158142c2
FB
4454 zSig0 += roundIncrement;
4455 if ( zSig0 < roundIncrement ) {
4456 ++zExp;
e9321124 4457 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4458 }
4459 roundIncrement = roundMask + 1;
4460 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4461 roundMask |= roundIncrement;
4462 }
4463 zSig0 &= ~ roundMask;
4464 if ( zSig0 == 0 ) zExp = 0;
4465 return packFloatx80( zSign, zExp, zSig0 );
4466 precision80:
dc355b76
PM
4467 switch (roundingMode) {
4468 case float_round_nearest_even:
f9288a76 4469 case float_round_ties_away:
dc355b76
PM
4470 increment = ((int64_t)zSig1 < 0);
4471 break;
4472 case float_round_to_zero:
4473 increment = 0;
4474 break;
4475 case float_round_up:
4476 increment = !zSign && zSig1;
4477 break;
4478 case float_round_down:
4479 increment = zSign && zSig1;
4480 break;
4481 default:
4482 abort();
158142c2 4483 }
bb98fe42 4484 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4485 if ( ( 0x7FFE < zExp )
4486 || ( ( zExp == 0x7FFE )
e9321124 4487 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4488 && increment
4489 )
4490 ) {
4491 roundMask = 0;
4492 overflow:
ff32e16e 4493 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4494 if ( ( roundingMode == float_round_to_zero )
4495 || ( zSign && ( roundingMode == float_round_up ) )
4496 || ( ! zSign && ( roundingMode == float_round_down ) )
4497 ) {
4498 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4499 }
0f605c88
LV
4500 return packFloatx80(zSign,
4501 floatx80_infinity_high,
4502 floatx80_infinity_low);
158142c2
FB
4503 }
4504 if ( zExp <= 0 ) {
a828b373
RH
4505 isTiny = status->tininess_before_rounding
4506 || (zExp < 0)
4507 || !increment
4508 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4509 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4510 zExp = 0;
ff32e16e
PM
4511 if (isTiny && zSig1) {
4512 float_raise(float_flag_underflow, status);
4513 }
a2f2d288 4514 if (zSig1) {
d82f3b2d 4515 float_raise(float_flag_inexact, status);
a2f2d288 4516 }
dc355b76
PM
4517 switch (roundingMode) {
4518 case float_round_nearest_even:
f9288a76 4519 case float_round_ties_away:
dc355b76
PM
4520 increment = ((int64_t)zSig1 < 0);
4521 break;
4522 case float_round_to_zero:
4523 increment = 0;
4524 break;
4525 case float_round_up:
4526 increment = !zSign && zSig1;
4527 break;
4528 case float_round_down:
4529 increment = zSign && zSig1;
4530 break;
4531 default:
4532 abort();
158142c2
FB
4533 }
4534 if ( increment ) {
4535 ++zSig0;
40662886
PMD
4536 if (!(zSig1 << 1) && roundNearestEven) {
4537 zSig0 &= ~1;
4538 }
bb98fe42 4539 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4540 }
4541 return packFloatx80( zSign, zExp, zSig0 );
4542 }
4543 }
a2f2d288 4544 if (zSig1) {
d82f3b2d 4545 float_raise(float_flag_inexact, status);
a2f2d288 4546 }
158142c2
FB
4547 if ( increment ) {
4548 ++zSig0;
4549 if ( zSig0 == 0 ) {
4550 ++zExp;
e9321124 4551 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4552 }
4553 else {
40662886
PMD
4554 if (!(zSig1 << 1) && roundNearestEven) {
4555 zSig0 &= ~1;
4556 }
158142c2
FB
4557 }
4558 }
4559 else {
4560 if ( zSig0 == 0 ) zExp = 0;
4561 }
4562 return packFloatx80( zSign, zExp, zSig0 );
4563
4564}
4565
4566/*----------------------------------------------------------------------------
4567| Takes an abstract floating-point value having sign `zSign', exponent
4568| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4569| and returns the proper extended double-precision floating-point value
4570| corresponding to the abstract input. This routine is just like
4571| `roundAndPackFloatx80' except that the input significand does not have to be
4572| normalized.
4573*----------------------------------------------------------------------------*/
4574
88857aca 4575floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4576 bool zSign, int32_t zExp,
88857aca
LV
4577 uint64_t zSig0, uint64_t zSig1,
4578 float_status *status)
158142c2 4579{
8f506c70 4580 int8_t shiftCount;
158142c2
FB
4581
4582 if ( zSig0 == 0 ) {
4583 zSig0 = zSig1;
4584 zSig1 = 0;
4585 zExp -= 64;
4586 }
0019d5c3 4587 shiftCount = clz64(zSig0);
158142c2
FB
4588 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4589 zExp -= shiftCount;
ff32e16e
PM
4590 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4591 zSig0, zSig1, status);
158142c2
FB
4592
4593}
4594
158142c2
FB
4595/*----------------------------------------------------------------------------
4596| Returns the least-significant 64 fraction bits of the quadruple-precision
4597| floating-point value `a'.
4598*----------------------------------------------------------------------------*/
4599
a49db98d 4600static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4601{
4602
4603 return a.low;
4604
4605}
4606
4607/*----------------------------------------------------------------------------
4608| Returns the most-significant 48 fraction bits of the quadruple-precision
4609| floating-point value `a'.
4610*----------------------------------------------------------------------------*/
4611
a49db98d 4612static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4613{
4614
e9321124 4615 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4616
4617}
4618
4619/*----------------------------------------------------------------------------
4620| Returns the exponent bits of the quadruple-precision floating-point value
4621| `a'.
4622*----------------------------------------------------------------------------*/
4623
f4014512 4624static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4625{
4626
4627 return ( a.high>>48 ) & 0x7FFF;
4628
4629}
4630
4631/*----------------------------------------------------------------------------
4632| Returns the sign bit of the quadruple-precision floating-point value `a'.
4633*----------------------------------------------------------------------------*/
4634
c120391c 4635static inline bool extractFloat128Sign(float128 a)
158142c2 4636{
c120391c 4637 return a.high >> 63;
158142c2
FB
4638}
4639
4640/*----------------------------------------------------------------------------
4641| Normalizes the subnormal quadruple-precision floating-point value
4642| represented by the denormalized significand formed by the concatenation of
4643| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4644| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4645| significand are stored at the location pointed to by `zSig0Ptr', and the
4646| least significant 64 bits of the normalized significand are stored at the
4647| location pointed to by `zSig1Ptr'.
4648*----------------------------------------------------------------------------*/
4649
4650static void
4651 normalizeFloat128Subnormal(
bb98fe42
AF
4652 uint64_t aSig0,
4653 uint64_t aSig1,
f4014512 4654 int32_t *zExpPtr,
bb98fe42
AF
4655 uint64_t *zSig0Ptr,
4656 uint64_t *zSig1Ptr
158142c2
FB
4657 )
4658{
8f506c70 4659 int8_t shiftCount;
158142c2
FB
4660
4661 if ( aSig0 == 0 ) {
0019d5c3 4662 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4663 if ( shiftCount < 0 ) {
4664 *zSig0Ptr = aSig1>>( - shiftCount );
4665 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4666 }
4667 else {
4668 *zSig0Ptr = aSig1<<shiftCount;
4669 *zSig1Ptr = 0;
4670 }
4671 *zExpPtr = - shiftCount - 63;
4672 }
4673 else {
0019d5c3 4674 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4675 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4676 *zExpPtr = 1 - shiftCount;
4677 }
4678
4679}
4680
4681/*----------------------------------------------------------------------------
4682| Packs the sign `zSign', the exponent `zExp', and the significand formed
4683| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4684| floating-point value, returning the result. After being shifted into the
4685| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4686| added together to form the most significant 32 bits of the result. This
4687| means that any integer portion of `zSig0' will be added into the exponent.
4688| Since a properly normalized significand will have an integer portion equal
4689| to 1, the `zExp' input should be 1 less than the desired result exponent
4690| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4691| significand.
4692*----------------------------------------------------------------------------*/
4693
a49db98d 4694static inline float128
c120391c 4695packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4696{
4697 float128 z;
4698
4699 z.low = zSig1;
c120391c 4700 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4701 return z;
158142c2
FB
4702}
4703
4704/*----------------------------------------------------------------------------
4705| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4706| and extended significand formed by the concatenation of `zSig0', `zSig1',
4707| and `zSig2', and returns the proper quadruple-precision floating-point value
4708| corresponding to the abstract input. Ordinarily, the abstract value is
4709| simply rounded and packed into the quadruple-precision format, with the
4710| inexact exception raised if the abstract input cannot be represented
4711| exactly. However, if the abstract value is too large, the overflow and
4712| inexact exceptions are raised and an infinity or maximal finite value is
4713| returned. If the abstract value is too small, the input value is rounded to
4714| a subnormal number, and the underflow and inexact exceptions are raised if
4715| the abstract input cannot be represented exactly as a subnormal quadruple-
4716| precision floating-point number.
4717| The input significand must be normalized or smaller. If the input
4718| significand is not normalized, `zExp' must be 0; in that case, the result
4719| returned is a subnormal number, and it must not require rounding. In the
4720| usual case that the input significand is normalized, `zExp' must be 1 less
4721| than the ``true'' floating-point exponent. The handling of underflow and
4722| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4723*----------------------------------------------------------------------------*/
4724
c120391c 4725static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4726 uint64_t zSig0, uint64_t zSig1,
4727 uint64_t zSig2, float_status *status)
158142c2 4728{
8f506c70 4729 int8_t roundingMode;
c120391c 4730 bool roundNearestEven, increment, isTiny;
158142c2 4731
a2f2d288 4732 roundingMode = status->float_rounding_mode;
158142c2 4733 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4734 switch (roundingMode) {
4735 case float_round_nearest_even:
f9288a76 4736 case float_round_ties_away:
dc355b76
PM
4737 increment = ((int64_t)zSig2 < 0);
4738 break;
4739 case float_round_to_zero:
4740 increment = 0;
4741 break;
4742 case float_round_up:
4743 increment = !zSign && zSig2;
4744 break;
4745 case float_round_down:
4746 increment = zSign && zSig2;
4747 break;
9ee6f678
BR
4748 case float_round_to_odd:
4749 increment = !(zSig1 & 0x1) && zSig2;
4750 break;
dc355b76
PM
4751 default:
4752 abort();
158142c2 4753 }
bb98fe42 4754 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4755 if ( ( 0x7FFD < zExp )
4756 || ( ( zExp == 0x7FFD )
4757 && eq128(
e9321124
AB
4758 UINT64_C(0x0001FFFFFFFFFFFF),
4759 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4760 zSig0,
4761 zSig1
4762 )
4763 && increment
4764 )
4765 ) {
ff32e16e 4766 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4767 if ( ( roundingMode == float_round_to_zero )
4768 || ( zSign && ( roundingMode == float_round_up ) )
4769 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4770 || (roundingMode == float_round_to_odd)
158142c2
FB
4771 ) {
4772 return
4773 packFloat128(
4774 zSign,
4775 0x7FFE,
e9321124
AB
4776 UINT64_C(0x0000FFFFFFFFFFFF),
4777 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4778 );
4779 }
4780 return packFloat128( zSign, 0x7FFF, 0, 0 );
4781 }
4782 if ( zExp < 0 ) {
a2f2d288 4783 if (status->flush_to_zero) {
ff32e16e 4784 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4785 return packFloat128(zSign, 0, 0, 0);
4786 }
a828b373
RH
4787 isTiny = status->tininess_before_rounding
4788 || (zExp < -1)
4789 || !increment
4790 || lt128(zSig0, zSig1,
4791 UINT64_C(0x0001FFFFFFFFFFFF),
4792 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4793 shift128ExtraRightJamming(
4794 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4795 zExp = 0;
ff32e16e
PM
4796 if (isTiny && zSig2) {
4797 float_raise(float_flag_underflow, status);
4798 }
dc355b76
PM
4799 switch (roundingMode) {
4800 case float_round_nearest_even:
f9288a76 4801 case float_round_ties_away:
dc355b76
PM
4802 increment = ((int64_t)zSig2 < 0);
4803 break;
4804 case float_round_to_zero:
4805 increment = 0;
4806 break;
4807 case float_round_up:
4808 increment = !zSign && zSig2;
4809 break;
4810 case float_round_down:
4811 increment = zSign && zSig2;
4812 break;
9ee6f678
BR
4813 case float_round_to_odd:
4814 increment = !(zSig1 & 0x1) && zSig2;
4815 break;
dc355b76
PM
4816 default:
4817 abort();
158142c2
FB
4818 }
4819 }
4820 }
a2f2d288 4821 if (zSig2) {
d82f3b2d 4822 float_raise(float_flag_inexact, status);
a2f2d288 4823 }
158142c2
FB
4824 if ( increment ) {
4825 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4826 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4827 zSig1 &= ~1;
4828 }
158142c2
FB
4829 }
4830 else {
4831 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4832 }
4833 return packFloat128( zSign, zExp, zSig0, zSig1 );
4834
4835}
4836
4837/*----------------------------------------------------------------------------
4838| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4839| and significand formed by the concatenation of `zSig0' and `zSig1', and
4840| returns the proper quadruple-precision floating-point value corresponding
4841| to the abstract input. This routine is just like `roundAndPackFloat128'
4842| except that the input significand has fewer bits and does not have to be
4843| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4844| point exponent.
4845*----------------------------------------------------------------------------*/
4846
c120391c 4847static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4848 uint64_t zSig0, uint64_t zSig1,
4849 float_status *status)
158142c2 4850{
8f506c70 4851 int8_t shiftCount;
bb98fe42 4852 uint64_t zSig2;
158142c2
FB
4853
4854 if ( zSig0 == 0 ) {
4855 zSig0 = zSig1;
4856 zSig1 = 0;
4857 zExp -= 64;
4858 }
0019d5c3 4859 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4860 if ( 0 <= shiftCount ) {
4861 zSig2 = 0;
4862 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4863 }
4864 else {
4865 shift128ExtraRightJamming(
4866 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4867 }
4868 zExp -= shiftCount;
ff32e16e 4869 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4870
4871}
4872
158142c2 4873
158142c2
FB
4874/*----------------------------------------------------------------------------
4875| Returns the result of converting the 32-bit two's complement integer `a'
4876| to the extended double-precision floating-point format. The conversion
4877| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4878| Arithmetic.
4879*----------------------------------------------------------------------------*/
4880
e5a41ffa 4881floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4882{
c120391c 4883 bool zSign;
3a87d009 4884 uint32_t absA;
8f506c70 4885 int8_t shiftCount;
bb98fe42 4886 uint64_t zSig;
158142c2
FB
4887
4888 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4889 zSign = ( a < 0 );
4890 absA = zSign ? - a : a;
0019d5c3 4891 shiftCount = clz32(absA) + 32;
158142c2
FB
4892 zSig = absA;
4893 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4894
4895}
4896
158142c2
FB
4897/*----------------------------------------------------------------------------
4898| Returns the result of converting the 64-bit two's complement integer `a'
4899| to the extended double-precision floating-point format. The conversion
4900| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4901| Arithmetic.
4902*----------------------------------------------------------------------------*/
4903
e5a41ffa 4904floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4905{
c120391c 4906 bool zSign;
182f42fd 4907 uint64_t absA;
8f506c70 4908 int8_t shiftCount;
158142c2
FB
4909
4910 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4911 zSign = ( a < 0 );
4912 absA = zSign ? - a : a;
0019d5c3 4913 shiftCount = clz64(absA);
158142c2
FB
4914 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4915
4916}
4917
158142c2
FB
4918/*----------------------------------------------------------------------------
4919| Returns the result of converting the single-precision floating-point value
4920| `a' to the extended double-precision floating-point format. The conversion
4921| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4922| Arithmetic.
4923*----------------------------------------------------------------------------*/
4924
e5a41ffa 4925floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4926{
c120391c 4927 bool aSign;
0c48262d 4928 int aExp;
bb98fe42 4929 uint32_t aSig;
158142c2 4930
ff32e16e 4931 a = float32_squash_input_denormal(a, status);
158142c2
FB
4932 aSig = extractFloat32Frac( a );
4933 aExp = extractFloat32Exp( a );
4934 aSign = extractFloat32Sign( a );
4935 if ( aExp == 0xFF ) {
ff32e16e 4936 if (aSig) {
7537c2b4
JM
4937 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4938 status);
4939 return floatx80_silence_nan(res, status);
ff32e16e 4940 }
0f605c88
LV
4941 return packFloatx80(aSign,
4942 floatx80_infinity_high,
4943 floatx80_infinity_low);
158142c2
FB
4944 }
4945 if ( aExp == 0 ) {
4946 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4947 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4948 }
4949 aSig |= 0x00800000;
bb98fe42 4950 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4951
4952}
4953
158142c2
FB
4954/*----------------------------------------------------------------------------
4955| Returns the remainder of the single-precision floating-point value `a'
4956| with respect to the corresponding value `b'. The operation is performed
4957| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4958*----------------------------------------------------------------------------*/
4959
e5a41ffa 4960float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4961{
c120391c 4962 bool aSign, zSign;
0c48262d 4963 int aExp, bExp, expDiff;
bb98fe42
AF
4964 uint32_t aSig, bSig;
4965 uint32_t q;
4966 uint64_t aSig64, bSig64, q64;
4967 uint32_t alternateASig;
4968 int32_t sigMean;
ff32e16e
PM
4969 a = float32_squash_input_denormal(a, status);
4970 b = float32_squash_input_denormal(b, status);
158142c2
FB
4971
4972 aSig = extractFloat32Frac( a );
4973 aExp = extractFloat32Exp( a );
4974 aSign = extractFloat32Sign( a );
4975 bSig = extractFloat32Frac( b );
4976 bExp = extractFloat32Exp( b );
158142c2
FB
4977 if ( aExp == 0xFF ) {
4978 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4979 return propagateFloat32NaN(a, b, status);
158142c2 4980 }
ff32e16e 4981 float_raise(float_flag_invalid, status);
af39bc8c 4982 return float32_default_nan(status);
158142c2
FB
4983 }
4984 if ( bExp == 0xFF ) {
ff32e16e
PM
4985 if (bSig) {
4986 return propagateFloat32NaN(a, b, status);
4987 }
158142c2
FB
4988 return a;
4989 }
4990 if ( bExp == 0 ) {
4991 if ( bSig == 0 ) {
ff32e16e 4992 float_raise(float_flag_invalid, status);
af39bc8c 4993 return float32_default_nan(status);
158142c2
FB
4994 }
4995 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4996 }
4997 if ( aExp == 0 ) {
4998 if ( aSig == 0 ) return a;
4999 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5000 }
5001 expDiff = aExp - bExp;
5002 aSig |= 0x00800000;
5003 bSig |= 0x00800000;
5004 if ( expDiff < 32 ) {
5005 aSig <<= 8;
5006 bSig <<= 8;
5007 if ( expDiff < 0 ) {
5008 if ( expDiff < -1 ) return a;
5009 aSig >>= 1;
5010 }
5011 q = ( bSig <= aSig );
5012 if ( q ) aSig -= bSig;
5013 if ( 0 < expDiff ) {
bb98fe42 5014 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5015 q >>= 32 - expDiff;
5016 bSig >>= 2;
5017 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5018 }
5019 else {
5020 aSig >>= 2;
5021 bSig >>= 2;
5022 }
5023 }
5024 else {
5025 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5026 aSig64 = ( (uint64_t) aSig )<<40;
5027 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5028 expDiff -= 64;
5029 while ( 0 < expDiff ) {
5030 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5031 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5032 aSig64 = - ( ( bSig * q64 )<<38 );
5033 expDiff -= 62;
5034 }
5035 expDiff += 64;
5036 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5037 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5038 q = q64>>( 64 - expDiff );
5039 bSig <<= 6;
5040 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5041 }
5042 do {
5043 alternateASig = aSig;
5044 ++q;
5045 aSig -= bSig;
bb98fe42 5046 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5047 sigMean = aSig + alternateASig;
5048 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5049 aSig = alternateASig;
5050 }
bb98fe42 5051 zSign = ( (int32_t) aSig < 0 );
158142c2 5052 if ( zSign ) aSig = - aSig;
ff32e16e 5053 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5054}
5055
369be8f6 5056
158142c2 5057
8229c991
AJ
5058/*----------------------------------------------------------------------------
5059| Returns the binary exponential of the single-precision floating-point value
5060| `a'. The operation is performed according to the IEC/IEEE Standard for
5061| Binary Floating-Point Arithmetic.
5062|
5063| Uses the following identities:
5064|
5065| 1. -------------------------------------------------------------------------
5066| x x*ln(2)
5067| 2 = e
5068|
5069| 2. -------------------------------------------------------------------------
5070| 2 3 4 5 n
5071| x x x x x x x
5072| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5073| 1! 2! 3! 4! 5! n!
5074*----------------------------------------------------------------------------*/
5075
5076static const float64 float32_exp2_coefficients[15] =
5077{
d5138cf4
PM
5078 const_float64( 0x3ff0000000000000ll ), /* 1 */
5079 const_float64( 0x3fe0000000000000ll ), /* 2 */
5080 const_float64( 0x3fc5555555555555ll ), /* 3 */
5081 const_float64( 0x3fa5555555555555ll ), /* 4 */
5082 const_float64( 0x3f81111111111111ll ), /* 5 */
5083 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5084 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5085 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5086 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5087 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5088 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5089 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5090 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5091 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5092 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5093};
5094
e5a41ffa 5095float32 float32_exp2(float32 a, float_status *status)
8229c991 5096{
c120391c 5097 bool aSign;
0c48262d 5098 int aExp;
bb98fe42 5099 uint32_t aSig;
8229c991
AJ
5100 float64 r, x, xn;
5101 int i;
ff32e16e 5102 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5103
5104 aSig = extractFloat32Frac( a );
5105 aExp = extractFloat32Exp( a );
5106 aSign = extractFloat32Sign( a );
5107
5108 if ( aExp == 0xFF) {
ff32e16e
PM
5109 if (aSig) {
5110 return propagateFloat32NaN(a, float32_zero, status);
5111 }
8229c991
AJ
5112 return (aSign) ? float32_zero : a;
5113 }
5114 if (aExp == 0) {
5115 if (aSig == 0) return float32_one;
5116 }
5117
ff32e16e 5118 float_raise(float_flag_inexact, status);
8229c991
AJ
5119
5120 /* ******************************* */
5121 /* using float64 for approximation */
5122 /* ******************************* */
ff32e16e
PM
5123 x = float32_to_float64(a, status);
5124 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5125
5126 xn = x;
5127 r = float64_one;
5128 for (i = 0 ; i < 15 ; i++) {
5129 float64 f;
5130
ff32e16e
PM
5131 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5132 r = float64_add(r, f, status);
8229c991 5133
ff32e16e 5134 xn = float64_mul(xn, x, status);
8229c991
AJ
5135 }
5136
5137 return float64_to_float32(r, status);
5138}
5139
374dfc33
AJ
5140/*----------------------------------------------------------------------------
5141| Returns the binary log of the single-precision floating-point value `a'.
5142| The operation is performed according to the IEC/IEEE Standard for Binary
5143| Floating-Point Arithmetic.
5144*----------------------------------------------------------------------------*/
e5a41ffa 5145float32 float32_log2(float32 a, float_status *status)
374dfc33 5146{
c120391c 5147 bool aSign, zSign;
0c48262d 5148 int aExp;
bb98fe42 5149 uint32_t aSig, zSig, i;
374dfc33 5150
ff32e16e 5151 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5152 aSig = extractFloat32Frac( a );
5153 aExp = extractFloat32Exp( a );
5154 aSign = extractFloat32Sign( a );
5155
5156 if ( aExp == 0 ) {
5157 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5158 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5159 }
5160 if ( aSign ) {
ff32e16e 5161 float_raise(float_flag_invalid, status);
af39bc8c 5162 return float32_default_nan(status);
374dfc33
AJ
5163 }
5164 if ( aExp == 0xFF ) {
ff32e16e
PM
5165 if (aSig) {
5166 return propagateFloat32NaN(a, float32_zero, status);
5167 }
374dfc33
AJ
5168 return a;
5169 }
5170
5171 aExp -= 0x7F;
5172 aSig |= 0x00800000;
5173 zSign = aExp < 0;
5174 zSig = aExp << 23;
5175
5176 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5177 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5178 if ( aSig & 0x01000000 ) {
5179 aSig >>= 1;
5180 zSig |= i;
5181 }
5182 }
5183
5184 if ( zSign )
5185 zSig = -zSig;
5186
ff32e16e 5187 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5188}
5189
158142c2 5190/*----------------------------------------------------------------------------
158142c2
FB
5191| Returns the result of converting the double-precision floating-point value
5192| `a' to the extended double-precision floating-point format. The conversion
5193| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5194| Arithmetic.
5195*----------------------------------------------------------------------------*/
5196
e5a41ffa 5197floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5198{
c120391c 5199 bool aSign;
0c48262d 5200 int aExp;
bb98fe42 5201 uint64_t aSig;
158142c2 5202
ff32e16e 5203 a = float64_squash_input_denormal(a, status);
158142c2
FB
5204 aSig = extractFloat64Frac( a );
5205 aExp = extractFloat64Exp( a );
5206 aSign = extractFloat64Sign( a );
5207 if ( aExp == 0x7FF ) {
ff32e16e 5208 if (aSig) {
7537c2b4
JM
5209 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5210 status);
5211 return floatx80_silence_nan(res, status);
ff32e16e 5212 }
0f605c88
LV
5213 return packFloatx80(aSign,
5214 floatx80_infinity_high,
5215 floatx80_infinity_low);
158142c2
FB
5216 }
5217 if ( aExp == 0 ) {
5218 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5219 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5220 }
5221 return
5222 packFloatx80(
e9321124 5223 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5224
5225}
5226
158142c2
FB
5227/*----------------------------------------------------------------------------
5228| Returns the remainder of the double-precision floating-point value `a'
5229| with respect to the corresponding value `b'. The operation is performed
5230| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5231*----------------------------------------------------------------------------*/
5232
e5a41ffa 5233float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5234{
c120391c 5235 bool aSign, zSign;
0c48262d 5236 int aExp, bExp, expDiff;
bb98fe42
AF
5237 uint64_t aSig, bSig;
5238 uint64_t q, alternateASig;
5239 int64_t sigMean;
158142c2 5240
ff32e16e
PM
5241 a = float64_squash_input_denormal(a, status);
5242 b = float64_squash_input_denormal(b, status);
158142c2
FB
5243 aSig = extractFloat64Frac( a );
5244 aExp = extractFloat64Exp( a );
5245 aSign = extractFloat64Sign( a );
5246 bSig = extractFloat64Frac( b );
5247 bExp = extractFloat64Exp( b );
158142c2
FB
5248 if ( aExp == 0x7FF ) {
5249 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5250 return propagateFloat64NaN(a, b, status);
158142c2 5251 }
ff32e16e 5252 float_raise(float_flag_invalid, status);
af39bc8c 5253 return float64_default_nan(status);
158142c2
FB
5254 }
5255 if ( bExp == 0x7FF ) {
ff32e16e
PM
5256 if (bSig) {
5257 return propagateFloat64NaN(a, b, status);
5258 }
158142c2
FB
5259 return a;
5260 }
5261 if ( bExp == 0 ) {
5262 if ( bSig == 0 ) {
ff32e16e 5263 float_raise(float_flag_invalid, status);
af39bc8c 5264 return float64_default_nan(status);
158142c2
FB
5265 }
5266 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5267 }
5268 if ( aExp == 0 ) {
5269 if ( aSig == 0 ) return a;
5270 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5271 }
5272 expDiff = aExp - bExp;
e9321124
AB
5273 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5274 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5275 if ( expDiff < 0 ) {
5276 if ( expDiff < -1 ) return a;
5277 aSig >>= 1;
5278 }
5279 q = ( bSig <= aSig );
5280 if ( q ) aSig -= bSig;
5281 expDiff -= 64;
5282 while ( 0 < expDiff ) {
5283 q = estimateDiv128To64( aSig, 0, bSig );
5284 q = ( 2 < q ) ? q - 2 : 0;
5285 aSig = - ( ( bSig>>2 ) * q );
5286 expDiff -= 62;
5287 }
5288 expDiff += 64;
5289 if ( 0 < expDiff ) {
5290 q = estimateDiv128To64( aSig, 0, bSig );
5291 q = ( 2 < q ) ? q - 2 : 0;
5292 q >>= 64 - expDiff;
5293 bSig >>= 2;
5294 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5295 }
5296 else {
5297 aSig >>= 2;
5298 bSig >>= 2;
5299 }
5300 do {
5301 alternateASig = aSig;
5302 ++q;
5303 aSig -= bSig;
bb98fe42 5304 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5305 sigMean = aSig + alternateASig;
5306 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5307 aSig = alternateASig;
5308 }
bb98fe42 5309 zSign = ( (int64_t) aSig < 0 );
158142c2 5310 if ( zSign ) aSig = - aSig;
ff32e16e 5311 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5312
5313}
5314
374dfc33
AJ
5315/*----------------------------------------------------------------------------
5316| Returns the binary log of the double-precision floating-point value `a'.
5317| The operation is performed according to the IEC/IEEE Standard for Binary
5318| Floating-Point Arithmetic.
5319*----------------------------------------------------------------------------*/
e5a41ffa 5320float64 float64_log2(float64 a, float_status *status)
374dfc33 5321{
c120391c 5322 bool aSign, zSign;
0c48262d 5323 int aExp;
bb98fe42 5324 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5325 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5326
5327 aSig = extractFloat64Frac( a );
5328 aExp = extractFloat64Exp( a );
5329 aSign = extractFloat64Sign( a );
5330
5331 if ( aExp == 0 ) {
5332 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5333 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5334 }
5335 if ( aSign ) {
ff32e16e 5336 float_raise(float_flag_invalid, status);
af39bc8c 5337 return float64_default_nan(status);
374dfc33
AJ
5338 }
5339 if ( aExp == 0x7FF ) {
ff32e16e
PM
5340 if (aSig) {
5341 return propagateFloat64NaN(a, float64_zero, status);
5342 }
374dfc33
AJ
5343 return a;
5344 }
5345
5346 aExp -= 0x3FF;
e9321124 5347 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5348 zSign = aExp < 0;
bb98fe42 5349 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5350 for (i = 1LL << 51; i > 0; i >>= 1) {
5351 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5352 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5353 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5354 aSig >>= 1;
5355 zSig |= i;
5356 }
5357 }
5358
5359 if ( zSign )
5360 zSig = -zSig;
ff32e16e 5361 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5362}
5363
158142c2
FB
5364/*----------------------------------------------------------------------------
5365| Returns the result of converting the extended double-precision floating-
5366| point value `a' to the 32-bit two's complement integer format. The
5367| conversion is performed according to the IEC/IEEE Standard for Binary
5368| Floating-Point Arithmetic---which means in particular that the conversion
5369| is rounded according to the current rounding mode. If `a' is a NaN, the
5370| largest positive integer is returned. Otherwise, if the conversion
5371| overflows, the largest integer with the same sign as `a' is returned.
5372*----------------------------------------------------------------------------*/
5373
f4014512 5374int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5375{
c120391c 5376 bool aSign;
f4014512 5377 int32_t aExp, shiftCount;
bb98fe42 5378 uint64_t aSig;
158142c2 5379
d1eb8f2a
AD
5380 if (floatx80_invalid_encoding(a)) {
5381 float_raise(float_flag_invalid, status);
5382 return 1 << 31;
5383 }
158142c2
FB
5384 aSig = extractFloatx80Frac( a );
5385 aExp = extractFloatx80Exp( a );
5386 aSign = extractFloatx80Sign( a );
bb98fe42 5387 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5388 shiftCount = 0x4037 - aExp;
5389 if ( shiftCount <= 0 ) shiftCount = 1;
5390 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5391 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5392
5393}
5394
5395/*----------------------------------------------------------------------------
5396| Returns the result of converting the extended double-precision floating-
5397| point value `a' to the 32-bit two's complement integer format. The
5398| conversion is performed according to the IEC/IEEE Standard for Binary
5399| Floating-Point Arithmetic, except that the conversion is always rounded
5400| toward zero. If `a' is a NaN, the largest positive integer is returned.
5401| Otherwise, if the conversion overflows, the largest integer with the same
5402| sign as `a' is returned.
5403*----------------------------------------------------------------------------*/
5404
f4014512 5405int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5406{
c120391c 5407 bool aSign;
f4014512 5408 int32_t aExp, shiftCount;
bb98fe42 5409 uint64_t aSig, savedASig;
b3a6a2e0 5410 int32_t z;
158142c2 5411
d1eb8f2a
AD
5412 if (floatx80_invalid_encoding(a)) {
5413 float_raise(float_flag_invalid, status);
5414 return 1 << 31;
5415 }
158142c2
FB
5416 aSig = extractFloatx80Frac( a );
5417 aExp = extractFloatx80Exp( a );
5418 aSign = extractFloatx80Sign( a );
5419 if ( 0x401E < aExp ) {
bb98fe42 5420 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5421 goto invalid;
5422 }
5423 else if ( aExp < 0x3FFF ) {
a2f2d288 5424 if (aExp || aSig) {
d82f3b2d 5425 float_raise(float_flag_inexact, status);
a2f2d288 5426 }
158142c2
FB
5427 return 0;
5428 }
5429 shiftCount = 0x403E - aExp;
5430 savedASig = aSig;
5431 aSig >>= shiftCount;
5432 z = aSig;
5433 if ( aSign ) z = - z;
5434 if ( ( z < 0 ) ^ aSign ) {
5435 invalid:
ff32e16e 5436 float_raise(float_flag_invalid, status);
bb98fe42 5437 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5438 }
5439 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5440 float_raise(float_flag_inexact, status);
158142c2
FB
5441 }
5442 return z;
5443
5444}
5445
5446/*----------------------------------------------------------------------------
5447| Returns the result of converting the extended double-precision floating-
5448| point value `a' to the 64-bit two's complement integer format. The
5449| conversion is performed according to the IEC/IEEE Standard for Binary
5450| Floating-Point Arithmetic---which means in particular that the conversion
5451| is rounded according to the current rounding mode. If `a' is a NaN,
5452| the largest positive integer is returned. Otherwise, if the conversion
5453| overflows, the largest integer with the same sign as `a' is returned.
5454*----------------------------------------------------------------------------*/
5455
f42c2224 5456int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5457{
c120391c 5458 bool aSign;
f4014512 5459 int32_t aExp, shiftCount;
bb98fe42 5460 uint64_t aSig, aSigExtra;
158142c2 5461
d1eb8f2a
AD
5462 if (floatx80_invalid_encoding(a)) {
5463 float_raise(float_flag_invalid, status);
5464 return 1ULL << 63;
5465 }
158142c2
FB
5466 aSig = extractFloatx80Frac( a );
5467 aExp = extractFloatx80Exp( a );
5468 aSign = extractFloatx80Sign( a );
5469 shiftCount = 0x403E - aExp;
5470 if ( shiftCount <= 0 ) {
5471 if ( shiftCount ) {
ff32e16e 5472 float_raise(float_flag_invalid, status);
0f605c88 5473 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5474 return INT64_MAX;
158142c2 5475 }
2c217da0 5476 return INT64_MIN;
158142c2
FB
5477 }
5478 aSigExtra = 0;
5479 }
5480 else {
5481 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5482 }
ff32e16e 5483 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5484
5485}
5486
5487/*----------------------------------------------------------------------------
5488| Returns the result of converting the extended double-precision floating-
5489| point value `a' to the 64-bit two's complement integer format. The
5490| conversion is performed according to the IEC/IEEE Standard for Binary
5491| Floating-Point Arithmetic, except that the conversion is always rounded
5492| toward zero. If `a' is a NaN, the largest positive integer is returned.
5493| Otherwise, if the conversion overflows, the largest integer with the same
5494| sign as `a' is returned.
5495*----------------------------------------------------------------------------*/
5496
f42c2224 5497int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5498{
c120391c 5499 bool aSign;
f4014512 5500 int32_t aExp, shiftCount;
bb98fe42 5501 uint64_t aSig;
f42c2224 5502 int64_t z;
158142c2 5503
d1eb8f2a
AD
5504 if (floatx80_invalid_encoding(a)) {
5505 float_raise(float_flag_invalid, status);
5506 return 1ULL << 63;
5507 }
158142c2
FB
5508 aSig = extractFloatx80Frac( a );
5509 aExp = extractFloatx80Exp( a );
5510 aSign = extractFloatx80Sign( a );
5511 shiftCount = aExp - 0x403E;
5512 if ( 0 <= shiftCount ) {
e9321124 5513 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5514 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5515 float_raise(float_flag_invalid, status);
158142c2 5516 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5517 return INT64_MAX;
158142c2
FB
5518 }
5519 }
2c217da0 5520 return INT64_MIN;
158142c2
FB
5521 }
5522 else if ( aExp < 0x3FFF ) {
a2f2d288 5523 if (aExp | aSig) {
d82f3b2d 5524 float_raise(float_flag_inexact, status);
a2f2d288 5525 }
158142c2
FB
5526 return 0;
5527 }
5528 z = aSig>>( - shiftCount );
bb98fe42 5529 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5530 float_raise(float_flag_inexact, status);
158142c2
FB
5531 }
5532 if ( aSign ) z = - z;
5533 return z;
5534
5535}
5536
5537/*----------------------------------------------------------------------------
5538| Returns the result of converting the extended double-precision floating-
5539| point value `a' to the single-precision floating-point format. The
5540| conversion is performed according to the IEC/IEEE Standard for Binary
5541| Floating-Point Arithmetic.
5542*----------------------------------------------------------------------------*/
5543
e5a41ffa 5544float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5545{
c120391c 5546 bool aSign;
f4014512 5547 int32_t aExp;
bb98fe42 5548 uint64_t aSig;
158142c2 5549
d1eb8f2a
AD
5550 if (floatx80_invalid_encoding(a)) {
5551 float_raise(float_flag_invalid, status);
5552 return float32_default_nan(status);
5553 }
158142c2
FB
5554 aSig = extractFloatx80Frac( a );
5555 aExp = extractFloatx80Exp( a );
5556 aSign = extractFloatx80Sign( a );
5557 if ( aExp == 0x7FFF ) {
bb98fe42 5558 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5559 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5560 status);
5561 return float32_silence_nan(res, status);
158142c2
FB
5562 }
5563 return packFloat32( aSign, 0xFF, 0 );
5564 }
5565 shift64RightJamming( aSig, 33, &aSig );
5566 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5567 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5568
5569}
5570
5571/*----------------------------------------------------------------------------
5572| Returns the result of converting the extended double-precision floating-
5573| point value `a' to the double-precision floating-point format. The
5574| conversion is performed according to the IEC/IEEE Standard for Binary
5575| Floating-Point Arithmetic.
5576*----------------------------------------------------------------------------*/
5577
e5a41ffa 5578float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5579{
c120391c 5580 bool aSign;
f4014512 5581 int32_t aExp;
bb98fe42 5582 uint64_t aSig, zSig;
158142c2 5583
d1eb8f2a
AD
5584 if (floatx80_invalid_encoding(a)) {
5585 float_raise(float_flag_invalid, status);
5586 return float64_default_nan(status);
5587 }
158142c2
FB
5588 aSig = extractFloatx80Frac( a );
5589 aExp = extractFloatx80Exp( a );
5590 aSign = extractFloatx80Sign( a );
5591 if ( aExp == 0x7FFF ) {
bb98fe42 5592 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5593 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5594 status);
5595 return float64_silence_nan(res, status);
158142c2
FB
5596 }
5597 return packFloat64( aSign, 0x7FF, 0 );
5598 }
5599 shift64RightJamming( aSig, 1, &zSig );
5600 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5601 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5602
5603}
5604
158142c2
FB
5605/*----------------------------------------------------------------------------
5606| Returns the result of converting the extended double-precision floating-
5607| point value `a' to the quadruple-precision floating-point format. The
5608| conversion is performed according to the IEC/IEEE Standard for Binary
5609| Floating-Point Arithmetic.
5610*----------------------------------------------------------------------------*/
5611
e5a41ffa 5612float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5613{
c120391c 5614 bool aSign;
0c48262d 5615 int aExp;
bb98fe42 5616 uint64_t aSig, zSig0, zSig1;
158142c2 5617
d1eb8f2a
AD
5618 if (floatx80_invalid_encoding(a)) {
5619 float_raise(float_flag_invalid, status);
5620 return float128_default_nan(status);
5621 }
158142c2
FB
5622 aSig = extractFloatx80Frac( a );
5623 aExp = extractFloatx80Exp( a );
5624 aSign = extractFloatx80Sign( a );
bb98fe42 5625 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5626 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5627 status);
5628 return float128_silence_nan(res, status);
158142c2
FB
5629 }
5630 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5631 return packFloat128( aSign, aExp, zSig0, zSig1 );
5632
5633}
5634
0f721292
LV
5635/*----------------------------------------------------------------------------
5636| Rounds the extended double-precision floating-point value `a'
5637| to the precision provided by floatx80_rounding_precision and returns the
5638| result as an extended double-precision floating-point value.
5639| The operation is performed according to the IEC/IEEE Standard for Binary
5640| Floating-Point Arithmetic.
5641*----------------------------------------------------------------------------*/
5642
5643floatx80 floatx80_round(floatx80 a, float_status *status)
5644{
5645 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5646 extractFloatx80Sign(a),
5647 extractFloatx80Exp(a),
5648 extractFloatx80Frac(a), 0, status);
5649}
5650
158142c2
FB
5651/*----------------------------------------------------------------------------
5652| Rounds the extended double-precision floating-point value `a' to an integer,
5653| and returns the result as an extended quadruple-precision floating-point
5654| value. The operation is performed according to the IEC/IEEE Standard for
5655| Binary Floating-Point Arithmetic.
5656*----------------------------------------------------------------------------*/
5657
e5a41ffa 5658floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5659{
c120391c 5660 bool aSign;
f4014512 5661 int32_t aExp;
bb98fe42 5662 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5663 floatx80 z;
5664
d1eb8f2a
AD
5665 if (floatx80_invalid_encoding(a)) {
5666 float_raise(float_flag_invalid, status);
5667 return floatx80_default_nan(status);
5668 }
158142c2
FB
5669 aExp = extractFloatx80Exp( a );
5670 if ( 0x403E <= aExp ) {
bb98fe42 5671 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5672 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5673 }
5674 return a;
5675 }
5676 if ( aExp < 0x3FFF ) {
5677 if ( ( aExp == 0 )
9ecaf5cc 5678 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5679 return a;
5680 }
d82f3b2d 5681 float_raise(float_flag_inexact, status);
158142c2 5682 aSign = extractFloatx80Sign( a );
a2f2d288 5683 switch (status->float_rounding_mode) {
158142c2 5684 case float_round_nearest_even:
bb98fe42 5685 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5686 ) {
5687 return
e9321124 5688 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5689 }
5690 break;
f9288a76
PM
5691 case float_round_ties_away:
5692 if (aExp == 0x3FFE) {
e9321124 5693 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5694 }
5695 break;
158142c2
FB
5696 case float_round_down:
5697 return
5698 aSign ?
e9321124 5699 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5700 : packFloatx80( 0, 0, 0 );
5701 case float_round_up:
5702 return
5703 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5704 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5705
5706 case float_round_to_zero:
5707 break;
5708 default:
5709 g_assert_not_reached();
158142c2
FB
5710 }
5711 return packFloatx80( aSign, 0, 0 );
5712 }
5713 lastBitMask = 1;
5714 lastBitMask <<= 0x403E - aExp;
5715 roundBitsMask = lastBitMask - 1;
5716 z = a;
a2f2d288 5717 switch (status->float_rounding_mode) {
dc355b76 5718 case float_round_nearest_even:
158142c2 5719 z.low += lastBitMask>>1;
dc355b76
PM
5720 if ((z.low & roundBitsMask) == 0) {
5721 z.low &= ~lastBitMask;
5722 }
5723 break;
f9288a76
PM
5724 case float_round_ties_away:
5725 z.low += lastBitMask >> 1;
5726 break;
dc355b76
PM
5727 case float_round_to_zero:
5728 break;
5729 case float_round_up:
5730 if (!extractFloatx80Sign(z)) {
5731 z.low += roundBitsMask;
5732 }
5733 break;
5734 case float_round_down:
5735 if (extractFloatx80Sign(z)) {
158142c2
FB
5736 z.low += roundBitsMask;
5737 }
dc355b76
PM
5738 break;
5739 default:
5740 abort();
158142c2
FB
5741 }
5742 z.low &= ~ roundBitsMask;
5743 if ( z.low == 0 ) {
5744 ++z.high;
e9321124 5745 z.low = UINT64_C(0x8000000000000000);
158142c2 5746 }
a2f2d288 5747 if (z.low != a.low) {
d82f3b2d 5748 float_raise(float_flag_inexact, status);
a2f2d288 5749 }
158142c2
FB
5750 return z;
5751
5752}
5753
5754/*----------------------------------------------------------------------------
5755| Returns the result of adding the absolute values of the extended double-
5756| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5757| negated before being returned. `zSign' is ignored if the result is a NaN.
5758| The addition is performed according to the IEC/IEEE Standard for Binary
5759| Floating-Point Arithmetic.
5760*----------------------------------------------------------------------------*/
5761
c120391c 5762static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5763 float_status *status)
158142c2 5764{
f4014512 5765 int32_t aExp, bExp, zExp;
bb98fe42 5766 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5767 int32_t expDiff;
158142c2
FB
5768
5769 aSig = extractFloatx80Frac( a );
5770 aExp = extractFloatx80Exp( a );
5771 bSig = extractFloatx80Frac( b );
5772 bExp = extractFloatx80Exp( b );
5773 expDiff = aExp - bExp;
5774 if ( 0 < expDiff ) {
5775 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5776 if ((uint64_t)(aSig << 1)) {
5777 return propagateFloatx80NaN(a, b, status);
5778 }
158142c2
FB
5779 return a;
5780 }
5781 if ( bExp == 0 ) --expDiff;
5782 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5783 zExp = aExp;
5784 }
5785 else if ( expDiff < 0 ) {
5786 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5787 if ((uint64_t)(bSig << 1)) {
5788 return propagateFloatx80NaN(a, b, status);
5789 }
0f605c88
LV
5790 return packFloatx80(zSign,
5791 floatx80_infinity_high,
5792 floatx80_infinity_low);
158142c2
FB
5793 }
5794 if ( aExp == 0 ) ++expDiff;
5795 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5796 zExp = bExp;
5797 }
5798 else {
5799 if ( aExp == 0x7FFF ) {
bb98fe42 5800 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5801 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5802 }
5803 return a;
5804 }
5805 zSig1 = 0;
5806 zSig0 = aSig + bSig;
5807 if ( aExp == 0 ) {
41602807
JM
5808 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5809 /* At least one of the values is a pseudo-denormal,
5810 * and there is a carry out of the result. */
5811 zExp = 1;
5812 goto shiftRight1;
5813 }
2f311075
RH
5814 if (zSig0 == 0) {
5815 return packFloatx80(zSign, 0, 0);
5816 }
158142c2
FB
5817 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5818 goto roundAndPack;
5819 }
5820 zExp = aExp;
5821 goto shiftRight1;
5822 }
5823 zSig0 = aSig + bSig;
bb98fe42 5824 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5825 shiftRight1:
5826 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5827 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5828 ++zExp;
5829 roundAndPack:
a2f2d288 5830 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5831 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5832}
5833
5834/*----------------------------------------------------------------------------
5835| Returns the result of subtracting the absolute values of the extended
5836| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5837| difference is negated before being returned. `zSign' is ignored if the
5838| result is a NaN. The subtraction is performed according to the IEC/IEEE
5839| Standard for Binary Floating-Point Arithmetic.
5840*----------------------------------------------------------------------------*/
5841
c120391c 5842static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5843 float_status *status)
158142c2 5844{
f4014512 5845 int32_t aExp, bExp, zExp;
bb98fe42 5846 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5847 int32_t expDiff;
158142c2
FB
5848
5849 aSig = extractFloatx80Frac( a );
5850 aExp = extractFloatx80Exp( a );
5851 bSig = extractFloatx80Frac( b );
5852 bExp = extractFloatx80Exp( b );
5853 expDiff = aExp - bExp;
5854 if ( 0 < expDiff ) goto aExpBigger;
5855 if ( expDiff < 0 ) goto bExpBigger;
5856 if ( aExp == 0x7FFF ) {
bb98fe42 5857 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5858 return propagateFloatx80NaN(a, b, status);
158142c2 5859 }
ff32e16e 5860 float_raise(float_flag_invalid, status);
af39bc8c 5861 return floatx80_default_nan(status);
158142c2
FB
5862 }
5863 if ( aExp == 0 ) {
5864 aExp = 1;
5865 bExp = 1;
5866 }
5867 zSig1 = 0;
5868 if ( bSig < aSig ) goto aBigger;
5869 if ( aSig < bSig ) goto bBigger;
a2f2d288 5870 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5871 bExpBigger:
5872 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5873 if ((uint64_t)(bSig << 1)) {
5874 return propagateFloatx80NaN(a, b, status);
5875 }
0f605c88
LV
5876 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5877 floatx80_infinity_low);
158142c2
FB
5878 }
5879 if ( aExp == 0 ) ++expDiff;
5880 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5881 bBigger:
5882 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5883 zExp = bExp;
5884 zSign ^= 1;
5885 goto normalizeRoundAndPack;
5886 aExpBigger:
5887 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5888 if ((uint64_t)(aSig << 1)) {
5889 return propagateFloatx80NaN(a, b, status);
5890 }
158142c2
FB
5891 return a;
5892 }
5893 if ( bExp == 0 ) --expDiff;
5894 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5895 aBigger:
5896 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5897 zExp = aExp;
5898 normalizeRoundAndPack:
a2f2d288 5899 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5900 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5901}
5902
5903/*----------------------------------------------------------------------------
5904| Returns the result of adding the extended double-precision floating-point
5905| values `a' and `b'. The operation is performed according to the IEC/IEEE
5906| Standard for Binary Floating-Point Arithmetic.
5907*----------------------------------------------------------------------------*/
5908
e5a41ffa 5909floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5910{
c120391c 5911 bool aSign, bSign;
158142c2 5912
d1eb8f2a
AD
5913 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5914 float_raise(float_flag_invalid, status);
5915 return floatx80_default_nan(status);
5916 }
158142c2
FB
5917 aSign = extractFloatx80Sign( a );
5918 bSign = extractFloatx80Sign( b );
5919 if ( aSign == bSign ) {
ff32e16e 5920 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5921 }
5922 else {
ff32e16e 5923 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5924 }
5925
5926}
5927
5928/*----------------------------------------------------------------------------
5929| Returns the result of subtracting the extended double-precision floating-
5930| point values `a' and `b'. The operation is performed according to the
5931| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5932*----------------------------------------------------------------------------*/
5933
e5a41ffa 5934floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 5935{
c120391c 5936 bool aSign, bSign;
158142c2 5937
d1eb8f2a
AD
5938 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5939 float_raise(float_flag_invalid, status);
5940 return floatx80_default_nan(status);
5941 }
158142c2
FB
5942 aSign = extractFloatx80Sign( a );
5943 bSign = extractFloatx80Sign( b );
5944 if ( aSign == bSign ) {
ff32e16e 5945 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5946 }
5947 else {
ff32e16e 5948 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5949 }
5950
5951}
5952
5953/*----------------------------------------------------------------------------
5954| Returns the result of multiplying the extended double-precision floating-
5955| point values `a' and `b'. The operation is performed according to the
5956| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5957*----------------------------------------------------------------------------*/
5958
e5a41ffa 5959floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 5960{
c120391c 5961 bool aSign, bSign, zSign;
f4014512 5962 int32_t aExp, bExp, zExp;
bb98fe42 5963 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5964
d1eb8f2a
AD
5965 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5966 float_raise(float_flag_invalid, status);
5967 return floatx80_default_nan(status);
5968 }
158142c2
FB
5969 aSig = extractFloatx80Frac( a );
5970 aExp = extractFloatx80Exp( a );
5971 aSign = extractFloatx80Sign( a );
5972 bSig = extractFloatx80Frac( b );
5973 bExp = extractFloatx80Exp( b );
5974 bSign = extractFloatx80Sign( b );
5975 zSign = aSign ^ bSign;
5976 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5977 if ( (uint64_t) ( aSig<<1 )
5978 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5979 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5980 }
5981 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5982 return packFloatx80(zSign, floatx80_infinity_high,
5983 floatx80_infinity_low);
158142c2
FB
5984 }
5985 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5986 if ((uint64_t)(bSig << 1)) {
5987 return propagateFloatx80NaN(a, b, status);
5988 }
158142c2
FB
5989 if ( ( aExp | aSig ) == 0 ) {
5990 invalid:
ff32e16e 5991 float_raise(float_flag_invalid, status);
af39bc8c 5992 return floatx80_default_nan(status);
158142c2 5993 }
0f605c88
LV
5994 return packFloatx80(zSign, floatx80_infinity_high,
5995 floatx80_infinity_low);
158142c2
FB
5996 }
5997 if ( aExp == 0 ) {
5998 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5999 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6000 }
6001 if ( bExp == 0 ) {
6002 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6003 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6004 }
6005 zExp = aExp + bExp - 0x3FFE;
6006 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6007 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6008 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6009 --zExp;
6010 }
a2f2d288 6011 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6012 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6013}
6014
6015/*----------------------------------------------------------------------------
6016| Returns the result of dividing the extended double-precision floating-point
6017| value `a' by the corresponding value `b'. The operation is performed
6018| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6019*----------------------------------------------------------------------------*/
6020
e5a41ffa 6021floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6022{
c120391c 6023 bool aSign, bSign, zSign;
f4014512 6024 int32_t aExp, bExp, zExp;
bb98fe42
AF
6025 uint64_t aSig, bSig, zSig0, zSig1;
6026 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6027
d1eb8f2a
AD
6028 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6029 float_raise(float_flag_invalid, status);
6030 return floatx80_default_nan(status);
6031 }
158142c2
FB
6032 aSig = extractFloatx80Frac( a );
6033 aExp = extractFloatx80Exp( a );
6034 aSign = extractFloatx80Sign( a );
6035 bSig = extractFloatx80Frac( b );
6036 bExp = extractFloatx80Exp( b );
6037 bSign = extractFloatx80Sign( b );
6038 zSign = aSign ^ bSign;
6039 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6040 if ((uint64_t)(aSig << 1)) {
6041 return propagateFloatx80NaN(a, b, status);
6042 }
158142c2 6043 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6044 if ((uint64_t)(bSig << 1)) {
6045 return propagateFloatx80NaN(a, b, status);
6046 }
158142c2
FB
6047 goto invalid;
6048 }
0f605c88
LV
6049 return packFloatx80(zSign, floatx80_infinity_high,
6050 floatx80_infinity_low);
158142c2
FB
6051 }
6052 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6053 if ((uint64_t)(bSig << 1)) {
6054 return propagateFloatx80NaN(a, b, status);
6055 }
158142c2
FB
6056 return packFloatx80( zSign, 0, 0 );
6057 }
6058 if ( bExp == 0 ) {
6059 if ( bSig == 0 ) {
6060 if ( ( aExp | aSig ) == 0 ) {
6061 invalid:
ff32e16e 6062 float_raise(float_flag_invalid, status);
af39bc8c 6063 return floatx80_default_nan(status);
158142c2 6064 }
ff32e16e 6065 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6066 return packFloatx80(zSign, floatx80_infinity_high,
6067 floatx80_infinity_low);
158142c2
FB
6068 }
6069 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6070 }
6071 if ( aExp == 0 ) {
6072 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6073 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6074 }
6075 zExp = aExp - bExp + 0x3FFE;
6076 rem1 = 0;
6077 if ( bSig <= aSig ) {
6078 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6079 ++zExp;
6080 }
6081 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6082 mul64To128( bSig, zSig0, &term0, &term1 );
6083 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6084 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6085 --zSig0;
6086 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6087 }
6088 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6089 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6090 mul64To128( bSig, zSig1, &term1, &term2 );
6091 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6092 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6093 --zSig1;
6094 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6095 }
6096 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6097 }
a2f2d288 6098 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6099 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6100}
6101
6102/*----------------------------------------------------------------------------
6103| Returns the remainder of the extended double-precision floating-point value
6104| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6105| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6106| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6107| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6108| the absolute value of the integer quotient.
158142c2
FB
6109*----------------------------------------------------------------------------*/
6110
445810ec 6111floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6112 float_status *status)
158142c2 6113{
c120391c 6114 bool aSign, zSign;
b662495d 6115 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6116 uint64_t aSig0, aSig1, bSig;
6117 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6118
445810ec 6119 *quotient = 0;
d1eb8f2a
AD
6120 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6121 float_raise(float_flag_invalid, status);
6122 return floatx80_default_nan(status);
6123 }
158142c2 6124 aSig0 = extractFloatx80Frac( a );
b662495d 6125 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6126 aSign = extractFloatx80Sign( a );
6127 bSig = extractFloatx80Frac( b );
6128 bExp = extractFloatx80Exp( b );
158142c2 6129 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6130 if ( (uint64_t) ( aSig0<<1 )
6131 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6132 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6133 }
6134 goto invalid;
6135 }
6136 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6137 if ((uint64_t)(bSig << 1)) {
6138 return propagateFloatx80NaN(a, b, status);
6139 }
b662495d
JM
6140 if (aExp == 0 && aSig0 >> 63) {
6141 /*
6142 * Pseudo-denormal argument must be returned in normalized
6143 * form.
6144 */
6145 return packFloatx80(aSign, 1, aSig0);
6146 }
158142c2
FB
6147 return a;
6148 }
6149 if ( bExp == 0 ) {
6150 if ( bSig == 0 ) {
6151 invalid:
ff32e16e 6152 float_raise(float_flag_invalid, status);
af39bc8c 6153 return floatx80_default_nan(status);
158142c2
FB
6154 }
6155 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6156 }
6157 if ( aExp == 0 ) {
499a2f7b 6158 if ( aSig0 == 0 ) return a;
158142c2
FB
6159 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6160 }
158142c2
FB
6161 zSign = aSign;
6162 expDiff = aExp - bExp;
6163 aSig1 = 0;
6164 if ( expDiff < 0 ) {
b662495d
JM
6165 if ( mod || expDiff < -1 ) {
6166 if (aExp == 1 && aExpOrig == 0) {
6167 /*
6168 * Pseudo-denormal argument must be returned in
6169 * normalized form.
6170 */
6171 return packFloatx80(aSign, aExp, aSig0);
6172 }
6173 return a;
6174 }
158142c2
FB
6175 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6176 expDiff = 0;
6177 }
445810ec 6178 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6179 if ( q ) aSig0 -= bSig;
6180 expDiff -= 64;
6181 while ( 0 < expDiff ) {
6182 q = estimateDiv128To64( aSig0, aSig1, bSig );
6183 q = ( 2 < q ) ? q - 2 : 0;
6184 mul64To128( bSig, q, &term0, &term1 );
6185 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6186 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6187 expDiff -= 62;
445810ec
JM
6188 *quotient <<= 62;
6189 *quotient += q;
158142c2
FB
6190 }
6191 expDiff += 64;
6192 if ( 0 < expDiff ) {
6193 q = estimateDiv128To64( aSig0, aSig1, bSig );
6194 q = ( 2 < q ) ? q - 2 : 0;
6195 q >>= 64 - expDiff;
6196 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6197 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6198 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6199 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6200 ++q;
6201 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6202 }
445810ec
JM
6203 if (expDiff < 64) {
6204 *quotient <<= expDiff;
6205 } else {
6206 *quotient = 0;
6207 }
6208 *quotient += q;
158142c2
FB
6209 }
6210 else {
6211 term1 = 0;
6212 term0 = bSig;
6213 }
6b8b0136
JM
6214 if (!mod) {
6215 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6216 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6217 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6218 && ( q & 1 ) )
6219 ) {
6220 aSig0 = alternateASig0;
6221 aSig1 = alternateASig1;
6222 zSign = ! zSign;
445810ec 6223 ++*quotient;
6b8b0136 6224 }
158142c2
FB
6225 }
6226 return
6227 normalizeRoundAndPackFloatx80(
ff32e16e 6228 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6229
6230}
6231
6b8b0136
JM
6232/*----------------------------------------------------------------------------
6233| Returns the remainder of the extended double-precision floating-point value
6234| `a' with respect to the corresponding value `b'. The operation is performed
6235| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6236*----------------------------------------------------------------------------*/
6237
6238floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6239{
445810ec
JM
6240 uint64_t quotient;
6241 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6242}
6243
6244/*----------------------------------------------------------------------------
6245| Returns the remainder of the extended double-precision floating-point value
6246| `a' with respect to the corresponding value `b', with the quotient truncated
6247| toward zero.
6248*----------------------------------------------------------------------------*/
6249
6250floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6251{
445810ec
JM
6252 uint64_t quotient;
6253 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6254}
6255
158142c2
FB
6256/*----------------------------------------------------------------------------
6257| Returns the square root of the extended double-precision floating-point
6258| value `a'. The operation is performed according to the IEC/IEEE Standard
6259| for Binary Floating-Point Arithmetic.
6260*----------------------------------------------------------------------------*/
6261
e5a41ffa 6262floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6263{
c120391c 6264 bool aSign;
f4014512 6265 int32_t aExp, zExp;
bb98fe42
AF
6266 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6267 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6268
d1eb8f2a
AD
6269 if (floatx80_invalid_encoding(a)) {
6270 float_raise(float_flag_invalid, status);
6271 return floatx80_default_nan(status);
6272 }
158142c2
FB
6273 aSig0 = extractFloatx80Frac( a );
6274 aExp = extractFloatx80Exp( a );
6275 aSign = extractFloatx80Sign( a );
6276 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6277 if ((uint64_t)(aSig0 << 1)) {
6278 return propagateFloatx80NaN(a, a, status);
6279 }
158142c2
FB
6280 if ( ! aSign ) return a;
6281 goto invalid;
6282 }
6283 if ( aSign ) {
6284 if ( ( aExp | aSig0 ) == 0 ) return a;
6285 invalid:
ff32e16e 6286 float_raise(float_flag_invalid, status);
af39bc8c 6287 return floatx80_default_nan(status);
158142c2
FB
6288 }
6289 if ( aExp == 0 ) {
6290 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6291 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6292 }
6293 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6294 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6295 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6296 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6297 doubleZSig0 = zSig0<<1;
6298 mul64To128( zSig0, zSig0, &term0, &term1 );
6299 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6300 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6301 --zSig0;
6302 doubleZSig0 -= 2;
6303 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6304 }
6305 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6306 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6307 if ( zSig1 == 0 ) zSig1 = 1;
6308 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6309 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6310 mul64To128( zSig1, zSig1, &term2, &term3 );
6311 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6312 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6313 --zSig1;
6314 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6315 term3 |= 1;
6316 term2 |= doubleZSig0;
6317 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6318 }
6319 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6320 }
6321 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6322 zSig0 |= doubleZSig0;
a2f2d288
PM
6323 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6324 0, zExp, zSig0, zSig1, status);
158142c2
FB
6325}
6326
158142c2
FB
6327/*----------------------------------------------------------------------------
6328| Returns the result of converting the quadruple-precision floating-point
6329| value `a' to the extended double-precision floating-point format. The
6330| conversion is performed according to the IEC/IEEE Standard for Binary
6331| Floating-Point Arithmetic.
6332*----------------------------------------------------------------------------*/
6333
e5a41ffa 6334floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6335{
c120391c 6336 bool aSign;
f4014512 6337 int32_t aExp;
bb98fe42 6338 uint64_t aSig0, aSig1;
158142c2
FB
6339
6340 aSig1 = extractFloat128Frac1( a );
6341 aSig0 = extractFloat128Frac0( a );
6342 aExp = extractFloat128Exp( a );
6343 aSign = extractFloat128Sign( a );
6344 if ( aExp == 0x7FFF ) {
6345 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6346 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6347 status);
6348 return floatx80_silence_nan(res, status);
158142c2 6349 }
0f605c88
LV
6350 return packFloatx80(aSign, floatx80_infinity_high,
6351 floatx80_infinity_low);
158142c2
FB
6352 }
6353 if ( aExp == 0 ) {
6354 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6355 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6356 }
6357 else {
e9321124 6358 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6359 }
6360 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6361 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6362
6363}
6364
158142c2
FB
6365/*----------------------------------------------------------------------------
6366| Returns the remainder of the quadruple-precision floating-point value `a'
6367| with respect to the corresponding value `b'. The operation is performed
6368| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6369*----------------------------------------------------------------------------*/
6370
e5a41ffa 6371float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6372{
c120391c 6373 bool aSign, zSign;
f4014512 6374 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6375 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6376 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6377 int64_t sigMean0;
158142c2
FB
6378
6379 aSig1 = extractFloat128Frac1( a );
6380 aSig0 = extractFloat128Frac0( a );
6381 aExp = extractFloat128Exp( a );
6382 aSign = extractFloat128Sign( a );
6383 bSig1 = extractFloat128Frac1( b );
6384 bSig0 = extractFloat128Frac0( b );
6385 bExp = extractFloat128Exp( b );
158142c2
FB
6386 if ( aExp == 0x7FFF ) {
6387 if ( ( aSig0 | aSig1 )
6388 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6389 return propagateFloat128NaN(a, b, status);
158142c2
FB
6390 }
6391 goto invalid;
6392 }
6393 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6394 if (bSig0 | bSig1) {
6395 return propagateFloat128NaN(a, b, status);
6396 }
158142c2
FB
6397 return a;
6398 }
6399 if ( bExp == 0 ) {
6400 if ( ( bSig0 | bSig1 ) == 0 ) {
6401 invalid:
ff32e16e 6402 float_raise(float_flag_invalid, status);
af39bc8c 6403 return float128_default_nan(status);
158142c2
FB
6404 }
6405 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6406 }
6407 if ( aExp == 0 ) {
6408 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6409 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6410 }
6411 expDiff = aExp - bExp;
6412 if ( expDiff < -1 ) return a;
6413 shortShift128Left(
e9321124 6414 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
6415 aSig1,
6416 15 - ( expDiff < 0 ),
6417 &aSig0,
6418 &aSig1
6419 );
6420 shortShift128Left(
e9321124 6421 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
6422 q = le128( bSig0, bSig1, aSig0, aSig1 );
6423 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6424 expDiff -= 64;
6425 while ( 0 < expDiff ) {
6426 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6427 q = ( 4 < q ) ? q - 4 : 0;
6428 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6429 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6430 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6431 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6432 expDiff -= 61;
6433 }
6434 if ( -64 < expDiff ) {
6435 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6436 q = ( 4 < q ) ? q - 4 : 0;
6437 q >>= - expDiff;
6438 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6439 expDiff += 52;
6440 if ( expDiff < 0 ) {
6441 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6442 }
6443 else {
6444 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6445 }
6446 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6447 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6448 }
6449 else {
6450 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6451 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6452 }
6453 do {
6454 alternateASig0 = aSig0;
6455 alternateASig1 = aSig1;
6456 ++q;
6457 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6458 } while ( 0 <= (int64_t) aSig0 );
158142c2 6459 add128(
bb98fe42 6460 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6461 if ( ( sigMean0 < 0 )
6462 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6463 aSig0 = alternateASig0;
6464 aSig1 = alternateASig1;
6465 }
bb98fe42 6466 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6467 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6468 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6469 status);
158142c2
FB
6470}
6471
6472/*----------------------------------------------------------------------------
6473| Returns the square root of the quadruple-precision floating-point value `a'.
6474| The operation is performed according to the IEC/IEEE Standard for Binary
6475| Floating-Point Arithmetic.
6476*----------------------------------------------------------------------------*/
6477
e5a41ffa 6478float128 float128_sqrt(float128 a, float_status *status)
158142c2 6479{
c120391c 6480 bool aSign;
f4014512 6481 int32_t aExp, zExp;
bb98fe42
AF
6482 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6483 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6484
6485 aSig1 = extractFloat128Frac1( a );
6486 aSig0 = extractFloat128Frac0( a );
6487 aExp = extractFloat128Exp( a );
6488 aSign = extractFloat128Sign( a );
6489 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6490 if (aSig0 | aSig1) {
6491 return propagateFloat128NaN(a, a, status);
6492 }
158142c2
FB
6493 if ( ! aSign ) return a;
6494 goto invalid;
6495 }
6496 if ( aSign ) {
6497 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6498 invalid:
ff32e16e 6499 float_raise(float_flag_invalid, status);
af39bc8c 6500 return float128_default_nan(status);
158142c2
FB
6501 }
6502 if ( aExp == 0 ) {
6503 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6504 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6505 }
6506 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 6507 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6508 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6509 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6510 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6511 doubleZSig0 = zSig0<<1;
6512 mul64To128( zSig0, zSig0, &term0, &term1 );
6513 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6514 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6515 --zSig0;
6516 doubleZSig0 -= 2;
6517 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6518 }
6519 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6520 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6521 if ( zSig1 == 0 ) zSig1 = 1;
6522 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6523 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6524 mul64To128( zSig1, zSig1, &term2, &term3 );
6525 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6526 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6527 --zSig1;
6528 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6529 term3 |= 1;
6530 term2 |= doubleZSig0;
6531 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6532 }
6533 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6534 }
6535 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6536 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6537
6538}
6539
71bfd65c
RH
6540static inline FloatRelation
6541floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6542 float_status *status)
f6714d36 6543{
c120391c 6544 bool aSign, bSign;
f6714d36 6545
d1eb8f2a
AD
6546 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6547 float_raise(float_flag_invalid, status);
6548 return float_relation_unordered;
6549 }
f6714d36
AJ
6550 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6551 ( extractFloatx80Frac( a )<<1 ) ) ||
6552 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6553 ( extractFloatx80Frac( b )<<1 ) )) {
6554 if (!is_quiet ||
af39bc8c
AM
6555 floatx80_is_signaling_nan(a, status) ||
6556 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6557 float_raise(float_flag_invalid, status);
f6714d36
AJ
6558 }
6559 return float_relation_unordered;
6560 }
6561 aSign = extractFloatx80Sign( a );
6562 bSign = extractFloatx80Sign( b );
6563 if ( aSign != bSign ) {
6564
6565 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6566 ( ( a.low | b.low ) == 0 ) ) {
6567 /* zero case */
6568 return float_relation_equal;
6569 } else {
6570 return 1 - (2 * aSign);
6571 }
6572 } else {
be53fa78
JM
6573 /* Normalize pseudo-denormals before comparison. */
6574 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6575 ++a.high;
6576 }
6577 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6578 ++b.high;
6579 }
f6714d36
AJ
6580 if (a.low == b.low && a.high == b.high) {
6581 return float_relation_equal;
6582 } else {
6583 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6584 }
6585 }
6586}
6587
71bfd65c 6588FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6589{
ff32e16e 6590 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6591}
6592
71bfd65c
RH
6593FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6594 float_status *status)
f6714d36 6595{
ff32e16e 6596 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6597}
6598
71bfd65c
RH
6599static inline FloatRelation
6600float128_compare_internal(float128 a, float128 b, bool is_quiet,
6601 float_status *status)
1f587329 6602{
c120391c 6603 bool aSign, bSign;
1f587329
BS
6604
6605 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6606 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6607 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6608 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6609 if (!is_quiet ||
af39bc8c
AM
6610 float128_is_signaling_nan(a, status) ||
6611 float128_is_signaling_nan(b, status)) {
ff32e16e 6612 float_raise(float_flag_invalid, status);
1f587329
BS
6613 }
6614 return float_relation_unordered;
6615 }
6616 aSign = extractFloat128Sign( a );
6617 bSign = extractFloat128Sign( b );
6618 if ( aSign != bSign ) {
6619 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6620 /* zero case */
6621 return float_relation_equal;
6622 } else {
6623 return 1 - (2 * aSign);
6624 }
6625 } else {
6626 if (a.low == b.low && a.high == b.high) {
6627 return float_relation_equal;
6628 } else {
6629 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6630 }
6631 }
6632}
6633
71bfd65c 6634FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 6635{
ff32e16e 6636 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6637}
6638
71bfd65c
RH
6639FloatRelation float128_compare_quiet(float128 a, float128 b,
6640 float_status *status)
1f587329 6641{
ff32e16e 6642 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6643}
6644
e5a41ffa 6645floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 6646{
c120391c 6647 bool aSign;
326b9e98 6648 int32_t aExp;
bb98fe42 6649 uint64_t aSig;
9ee6e8bb 6650
d1eb8f2a
AD
6651 if (floatx80_invalid_encoding(a)) {
6652 float_raise(float_flag_invalid, status);
6653 return floatx80_default_nan(status);
6654 }
9ee6e8bb
PB
6655 aSig = extractFloatx80Frac( a );
6656 aExp = extractFloatx80Exp( a );
6657 aSign = extractFloatx80Sign( a );
6658
326b9e98
AJ
6659 if ( aExp == 0x7FFF ) {
6660 if ( aSig<<1 ) {
ff32e16e 6661 return propagateFloatx80NaN(a, a, status);
326b9e98 6662 }
9ee6e8bb
PB
6663 return a;
6664 }
326b9e98 6665
3c85c37f
PM
6666 if (aExp == 0) {
6667 if (aSig == 0) {
6668 return a;
6669 }
6670 aExp++;
6671 }
69397542 6672
326b9e98
AJ
6673 if (n > 0x10000) {
6674 n = 0x10000;
6675 } else if (n < -0x10000) {
6676 n = -0x10000;
6677 }
6678
9ee6e8bb 6679 aExp += n;
a2f2d288
PM
6680 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6681 aSign, aExp, aSig, 0, status);
9ee6e8bb 6682}
9ee6e8bb 6683
e5a41ffa 6684float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 6685{
c120391c 6686 bool aSign;
326b9e98 6687 int32_t aExp;
bb98fe42 6688 uint64_t aSig0, aSig1;
9ee6e8bb
PB
6689
6690 aSig1 = extractFloat128Frac1( a );
6691 aSig0 = extractFloat128Frac0( a );
6692 aExp = extractFloat128Exp( a );
6693 aSign = extractFloat128Sign( a );
6694 if ( aExp == 0x7FFF ) {
326b9e98 6695 if ( aSig0 | aSig1 ) {
ff32e16e 6696 return propagateFloat128NaN(a, a, status);
326b9e98 6697 }
9ee6e8bb
PB
6698 return a;
6699 }
3c85c37f 6700 if (aExp != 0) {
e9321124 6701 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 6702 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 6703 return a;
3c85c37f
PM
6704 } else {
6705 aExp++;
6706 }
69397542 6707
326b9e98
AJ
6708 if (n > 0x10000) {
6709 n = 0x10000;
6710 } else if (n < -0x10000) {
6711 n = -0x10000;
6712 }
6713
69397542
PB
6714 aExp += n - 1;
6715 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 6716 , status);
9ee6e8bb
PB
6717
6718}
f6b3b108
EC
6719
6720static void __attribute__((constructor)) softfloat_init(void)
6721{
6722 union_float64 ua, ub, uc, ur;
6723
6724 if (QEMU_NO_HARDFLOAT) {
6725 return;
6726 }
6727 /*
6728 * Test that the host's FMA is not obviously broken. For example,
6729 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6730 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6731 */
6732 ua.s = 0x0020000000000001ULL;
6733 ub.s = 0x3ca0000000000000ULL;
6734 uc.s = 0x0020000000000000ULL;
6735 ur.h = fma(ua.h, ub.h, uc.h);
6736 if (ur.s != 0x0020000000000001ULL) {
6737 force_soft_fma = true;
6738 }
6739}