]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Use pointers with ftype_round_pack_canonical
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
485
247d1f21
RH
486/* Simple helpers for checking if, or what kind of, NaN we have */
487static inline __attribute__((unused)) bool is_nan(FloatClass c)
488{
489 return unlikely(c >= float_class_qnan);
490}
491
492static inline __attribute__((unused)) bool is_snan(FloatClass c)
493{
494 return c == float_class_snan;
495}
496
497static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498{
499 return c == float_class_qnan;
500}
501
a90119b5
AB
502/*
503 * Structure holding all of the decomposed parts of a float. The
504 * exponent is unbiased and the fraction is normalized. All
505 * calculations are done with a 64 bit fraction and then rounded as
506 * appropriate for the final format.
507 *
508 * Thanks to the packed FloatClass a decent compiler should be able to
509 * fit the whole structure into registers and avoid using the stack
510 * for parameter passing.
511 */
512
513typedef struct {
514 uint64_t frac;
515 int32_t exp;
516 FloatClass cls;
517 bool sign;
f8155c1d 518} FloatParts64;
a90119b5 519
e99c4373 520#define DECOMPOSED_BINARY_POINT 63
a90119b5 521#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
522
523/* Structure holding all of the relevant parameters for a format.
524 * exp_size: the size of the exponent field
525 * exp_bias: the offset applied to the exponent field
526 * exp_max: the maximum normalised exponent
527 * frac_size: the size of the fraction field
528 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
529 * The following are computed based the size of fraction
530 * frac_lsb: least significant bit of fraction
ca3a3d5a 531 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 532 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
533 * The following optional modifiers are available:
534 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
535 */
536typedef struct {
537 int exp_size;
538 int exp_bias;
539 int exp_max;
540 int frac_size;
541 int frac_shift;
542 uint64_t frac_lsb;
543 uint64_t frac_lsbm1;
544 uint64_t round_mask;
545 uint64_t roundeven_mask;
ca3a3d5a 546 bool arm_althp;
a90119b5
AB
547} FloatFmt;
548
549/* Expand fields based on the size of exponent and fraction */
550#define FLOAT_PARAMS(E, F) \
551 .exp_size = E, \
552 .exp_bias = ((1 << E) - 1) >> 1, \
553 .exp_max = (1 << E) - 1, \
554 .frac_size = F, \
555 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
556 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
557 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
558 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
559 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
560
561static const FloatFmt float16_params = {
562 FLOAT_PARAMS(5, 10)
563};
564
6fed16b2
AB
565static const FloatFmt float16_params_ahp = {
566 FLOAT_PARAMS(5, 10),
567 .arm_althp = true
568};
569
8282310d
LZ
570static const FloatFmt bfloat16_params = {
571 FLOAT_PARAMS(8, 7)
572};
573
a90119b5
AB
574static const FloatFmt float32_params = {
575 FLOAT_PARAMS(8, 23)
576};
577
578static const FloatFmt float64_params = {
579 FLOAT_PARAMS(11, 52)
580};
581
6fff2167 582/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 583static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 584{
d8fdd172
RH
585 const int f_size = fmt->frac_size;
586 const int e_size = fmt->exp_size;
6fff2167 587
d8fdd172 588 *r = (FloatParts64) {
6fff2167 589 .cls = float_class_unclassified,
d8fdd172
RH
590 .sign = extract64(raw, f_size + e_size, 1),
591 .exp = extract64(raw, f_size, e_size),
592 .frac = extract64(raw, 0, f_size)
6fff2167
AB
593 };
594}
595
3dddb203 596static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 597{
3dddb203 598 unpack_raw64(p, &float16_params, f);
6fff2167
AB
599}
600
3dddb203 601static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 602{
3dddb203 603 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
604}
605
3dddb203 606static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 607{
3dddb203 608 unpack_raw64(p, &float32_params, f);
6fff2167
AB
609}
610
3dddb203 611static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 612{
3dddb203 613 unpack_raw64(p, &float64_params, f);
6fff2167
AB
614}
615
616/* Pack a float from parts, but do not canonicalize. */
9e4af58c 617static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 618{
9e4af58c
RH
619 const int f_size = fmt->frac_size;
620 const int e_size = fmt->exp_size;
621 uint64_t ret;
622
623 ret = (uint64_t)p->sign << (f_size + e_size);
624 ret = deposit64(ret, f_size, e_size, p->exp);
625 ret = deposit64(ret, 0, f_size, p->frac);
626 return ret;
6fff2167
AB
627}
628
71fd178e 629static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 630{
71fd178e 631 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
632}
633
71fd178e 634static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 635{
71fd178e 636 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
637}
638
71fd178e 639static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 640{
71fd178e 641 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
642}
643
71fd178e 644static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 645{
71fd178e 646 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
647}
648
0664335a
RH
649/*----------------------------------------------------------------------------
650| Functions and definitions to determine: (1) whether tininess for underflow
651| is detected before or after rounding by default, (2) what (if anything)
652| happens when exceptions are raised, (3) how signaling NaNs are distinguished
653| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
654| are propagated from function inputs to output. These details are target-
655| specific.
656*----------------------------------------------------------------------------*/
139c1837 657#include "softfloat-specialize.c.inc"
0664335a 658
0fc07cad
RH
659#define parts_default_nan parts64_default_nan
660
6fff2167 661/* Canonicalize EXP and FRAC, setting CLS. */
f8155c1d 662static FloatParts64 sf_canonicalize(FloatParts64 part, const FloatFmt *parm,
f9943c7f 663 float_status *status)
6fff2167 664{
ca3a3d5a 665 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
666 if (part.frac == 0) {
667 part.cls = float_class_inf;
668 } else {
94933df0 669 part.frac <<= parm->frac_shift;
298b468e
RH
670 part.cls = (parts_is_snan_frac(part.frac, status)
671 ? float_class_snan : float_class_qnan);
6fff2167
AB
672 }
673 } else if (part.exp == 0) {
674 if (likely(part.frac == 0)) {
675 part.cls = float_class_zero;
676 } else if (status->flush_inputs_to_zero) {
677 float_raise(float_flag_input_denormal, status);
678 part.cls = float_class_zero;
679 part.frac = 0;
680 } else {
e99c4373 681 int shift = clz64(part.frac);
6fff2167
AB
682 part.cls = float_class_normal;
683 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
684 part.frac <<= shift;
685 }
686 } else {
687 part.cls = float_class_normal;
688 part.exp -= parm->exp_bias;
689 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
690 }
691 return part;
692}
693
694/* Round and uncanonicalize a floating-point number by parts. There
695 * are FRAC_SHIFT bits that may require rounding at the bottom of the
696 * fraction; these bits will be removed. The exponent will be biased
697 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
698 */
699
f8155c1d 700static FloatParts64 round_canonical(FloatParts64 p, float_status *s,
6fff2167
AB
701 const FloatFmt *parm)
702{
5d64abb3 703 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
704 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
705 const uint64_t round_mask = parm->round_mask;
706 const uint64_t roundeven_mask = parm->roundeven_mask;
707 const int exp_max = parm->exp_max;
708 const int frac_shift = parm->frac_shift;
709 uint64_t frac, inc;
710 int exp, flags = 0;
711 bool overflow_norm;
712
713 frac = p.frac;
714 exp = p.exp;
715
716 switch (p.cls) {
717 case float_class_normal:
718 switch (s->float_rounding_mode) {
719 case float_round_nearest_even:
720 overflow_norm = false;
721 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
722 break;
723 case float_round_ties_away:
724 overflow_norm = false;
725 inc = frac_lsbm1;
726 break;
727 case float_round_to_zero:
728 overflow_norm = true;
729 inc = 0;
730 break;
731 case float_round_up:
732 inc = p.sign ? 0 : round_mask;
733 overflow_norm = p.sign;
734 break;
735 case float_round_down:
736 inc = p.sign ? round_mask : 0;
737 overflow_norm = !p.sign;
738 break;
5d64abb3
RH
739 case float_round_to_odd:
740 overflow_norm = true;
741 inc = frac & frac_lsb ? 0 : round_mask;
742 break;
6fff2167
AB
743 default:
744 g_assert_not_reached();
745 }
746
747 exp += parm->exp_bias;
748 if (likely(exp > 0)) {
749 if (frac & round_mask) {
750 flags |= float_flag_inexact;
e99c4373
RH
751 if (uadd64_overflow(frac, inc, &frac)) {
752 frac = (frac >> 1) | DECOMPOSED_IMPLICIT_BIT;
6fff2167
AB
753 exp++;
754 }
755 }
756 frac >>= frac_shift;
757
ca3a3d5a
AB
758 if (parm->arm_althp) {
759 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
760 if (unlikely(exp > exp_max)) {
761 /* Overflow. Return the maximum normal. */
762 flags = float_flag_invalid;
763 exp = exp_max;
764 frac = -1;
765 }
766 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
767 flags |= float_flag_overflow | float_flag_inexact;
768 if (overflow_norm) {
769 exp = exp_max - 1;
770 frac = -1;
771 } else {
772 p.cls = float_class_inf;
773 goto do_inf;
774 }
775 }
776 } else if (s->flush_to_zero) {
777 flags |= float_flag_output_denormal;
778 p.cls = float_class_zero;
779 goto do_zero;
780 } else {
e99c4373
RH
781 bool is_tiny = s->tininess_before_rounding || (exp < 0);
782
783 if (!is_tiny) {
784 uint64_t discard;
785 is_tiny = !uadd64_overflow(frac, inc, &discard);
786 }
6fff2167
AB
787
788 shift64RightJamming(frac, 1 - exp, &frac);
789 if (frac & round_mask) {
790 /* Need to recompute round-to-even. */
5d64abb3
RH
791 switch (s->float_rounding_mode) {
792 case float_round_nearest_even:
6fff2167
AB
793 inc = ((frac & roundeven_mask) != frac_lsbm1
794 ? frac_lsbm1 : 0);
5d64abb3
RH
795 break;
796 case float_round_to_odd:
797 inc = frac & frac_lsb ? 0 : round_mask;
798 break;
3dede407
RH
799 default:
800 break;
6fff2167
AB
801 }
802 flags |= float_flag_inexact;
803 frac += inc;
804 }
805
806 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
807 frac >>= frac_shift;
808
809 if (is_tiny && (flags & float_flag_inexact)) {
810 flags |= float_flag_underflow;
811 }
812 if (exp == 0 && frac == 0) {
813 p.cls = float_class_zero;
814 }
815 }
816 break;
817
818 case float_class_zero:
819 do_zero:
820 exp = 0;
821 frac = 0;
822 break;
823
824 case float_class_inf:
825 do_inf:
ca3a3d5a 826 assert(!parm->arm_althp);
6fff2167
AB
827 exp = exp_max;
828 frac = 0;
829 break;
830
831 case float_class_qnan:
832 case float_class_snan:
ca3a3d5a 833 assert(!parm->arm_althp);
6fff2167 834 exp = exp_max;
94933df0 835 frac >>= parm->frac_shift;
6fff2167
AB
836 break;
837
838 default:
839 g_assert_not_reached();
840 }
841
842 float_raise(flags, s);
843 p.exp = exp;
844 p.frac = frac;
845 return p;
846}
847
f8155c1d 848static FloatParts64 return_nan(FloatParts64 a, float_status *s)
dbe4d53a 849{
57547c60
RH
850 g_assert(is_nan(a.cls));
851 if (is_snan(a.cls)) {
d82f3b2d 852 float_raise(float_flag_invalid, s);
57547c60
RH
853 if (!s->default_nan_mode) {
854 return parts_silence_nan(a, s);
dbe4d53a 855 }
57547c60
RH
856 } else if (!s->default_nan_mode) {
857 return a;
dbe4d53a 858 }
0fc07cad
RH
859 parts_default_nan(&a, s);
860 return a;
dbe4d53a
AB
861}
862
f8155c1d 863static FloatParts64 pick_nan(FloatParts64 a, FloatParts64 b, float_status *s)
6fff2167
AB
864{
865 if (is_snan(a.cls) || is_snan(b.cls)) {
d82f3b2d 866 float_raise(float_flag_invalid, s);
6fff2167
AB
867 }
868
869 if (s->default_nan_mode) {
0fc07cad 870 parts_default_nan(&a, s);
6fff2167 871 } else {
4f251cfd 872 if (pickNaN(a.cls, b.cls,
6fff2167 873 a.frac > b.frac ||
913602e3 874 (a.frac == b.frac && a.sign < b.sign), s)) {
6fff2167
AB
875 a = b;
876 }
0bcfbcbe
RH
877 if (is_snan(a.cls)) {
878 return parts_silence_nan(a, s);
879 }
6fff2167
AB
880 }
881 return a;
882}
883
f8155c1d 884static FloatParts64 pick_nan_muladd(FloatParts64 a, FloatParts64 b, FloatParts64 c,
d446830a
AB
885 bool inf_zero, float_status *s)
886{
1839189b
PM
887 int which;
888
d446830a 889 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
d82f3b2d 890 float_raise(float_flag_invalid, s);
d446830a
AB
891 }
892
3bd2dec1 893 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 894
d446830a 895 if (s->default_nan_mode) {
1839189b
PM
896 /* Note that this check is after pickNaNMulAdd so that function
897 * has an opportunity to set the Invalid flag.
898 */
f7e598e2 899 which = 3;
1839189b 900 }
d446830a 901
1839189b
PM
902 switch (which) {
903 case 0:
904 break;
905 case 1:
906 a = b;
907 break;
908 case 2:
909 a = c;
910 break;
911 case 3:
0fc07cad
RH
912 parts_default_nan(&a, s);
913 break;
1839189b
PM
914 default:
915 g_assert_not_reached();
d446830a 916 }
1839189b 917
0bcfbcbe
RH
918 if (is_snan(a.cls)) {
919 return parts_silence_nan(a, s);
920 }
d446830a
AB
921 return a;
922}
923
aaffb7bf
RH
924/*
925 * Pack/unpack routines with a specific FloatFmt.
926 */
927
98e256fc
RH
928static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
929 float_status *s, const FloatFmt *params)
aaffb7bf 930{
98e256fc
RH
931 float16_unpack_raw(p, f);
932 *p = sf_canonicalize(*p, params, s);
aaffb7bf
RH
933}
934
98e256fc
RH
935static void float16_unpack_canonical(FloatParts64 *p, float16 f,
936 float_status *s)
aaffb7bf 937{
98e256fc 938 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
939}
940
98e256fc
RH
941static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
942 float_status *s)
aaffb7bf 943{
98e256fc
RH
944 bfloat16_unpack_raw(p, f);
945 *p = sf_canonicalize(*p, &bfloat16_params, s);
aaffb7bf
RH
946}
947
e293e927
RH
948static float16 float16a_round_pack_canonical(FloatParts64 *p,
949 float_status *s,
aaffb7bf
RH
950 const FloatFmt *params)
951{
e293e927
RH
952 *p = round_canonical(*p, s, params);
953 return float16_pack_raw(p);
aaffb7bf
RH
954}
955
e293e927
RH
956static float16 float16_round_pack_canonical(FloatParts64 *p,
957 float_status *s)
aaffb7bf
RH
958{
959 return float16a_round_pack_canonical(p, s, &float16_params);
960}
961
e293e927
RH
962static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
963 float_status *s)
aaffb7bf 964{
e293e927
RH
965 *p = round_canonical(*p, s, &bfloat16_params);
966 return bfloat16_pack_raw(p);
aaffb7bf
RH
967}
968
98e256fc
RH
969static void float32_unpack_canonical(FloatParts64 *p, float32 f,
970 float_status *s)
aaffb7bf 971{
98e256fc
RH
972 float32_unpack_raw(p, f);
973 *p = sf_canonicalize(*p, &float32_params, s);
aaffb7bf
RH
974}
975
e293e927
RH
976static float32 float32_round_pack_canonical(FloatParts64 *p,
977 float_status *s)
aaffb7bf 978{
e293e927
RH
979 *p = round_canonical(*p, s, &float32_params);
980 return float32_pack_raw(p);
aaffb7bf
RH
981}
982
98e256fc
RH
983static void float64_unpack_canonical(FloatParts64 *p, float64 f,
984 float_status *s)
aaffb7bf 985{
98e256fc
RH
986 float64_unpack_raw(p, f);
987 *p = sf_canonicalize(*p, &float64_params, s);
aaffb7bf
RH
988}
989
e293e927
RH
990static float64 float64_round_pack_canonical(FloatParts64 *p,
991 float_status *s)
aaffb7bf 992{
e293e927
RH
993 *p = round_canonical(*p, s, &float64_params);
994 return float64_pack_raw(p);
aaffb7bf
RH
995}
996
6fff2167
AB
997/*
998 * Returns the result of adding or subtracting the values of the
999 * floating-point values `a' and `b'. The operation is performed
1000 * according to the IEC/IEEE Standard for Binary Floating-Point
1001 * Arithmetic.
1002 */
1003
f8155c1d 1004static FloatParts64 addsub_floats(FloatParts64 a, FloatParts64 b, bool subtract,
6fff2167
AB
1005 float_status *s)
1006{
1007 bool a_sign = a.sign;
1008 bool b_sign = b.sign ^ subtract;
1009
1010 if (a_sign != b_sign) {
1011 /* Subtraction */
1012
1013 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1014 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
1015 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1016 a.frac = a.frac - b.frac;
1017 } else {
1018 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1019 a.frac = b.frac - a.frac;
1020 a.exp = b.exp;
1021 a_sign ^= 1;
1022 }
1023
1024 if (a.frac == 0) {
1025 a.cls = float_class_zero;
1026 a.sign = s->float_rounding_mode == float_round_down;
1027 } else {
e99c4373 1028 int shift = clz64(a.frac);
6fff2167
AB
1029 a.frac = a.frac << shift;
1030 a.exp = a.exp - shift;
1031 a.sign = a_sign;
1032 }
1033 return a;
1034 }
1035 if (is_nan(a.cls) || is_nan(b.cls)) {
1036 return pick_nan(a, b, s);
1037 }
1038 if (a.cls == float_class_inf) {
1039 if (b.cls == float_class_inf) {
1040 float_raise(float_flag_invalid, s);
0fc07cad 1041 parts_default_nan(&a, s);
6fff2167
AB
1042 }
1043 return a;
1044 }
1045 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1046 a.sign = s->float_rounding_mode == float_round_down;
1047 return a;
1048 }
1049 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1050 b.sign = a_sign ^ 1;
1051 return b;
1052 }
1053 if (b.cls == float_class_zero) {
1054 return a;
1055 }
1056 } else {
1057 /* Addition */
1058 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1059 if (a.exp > b.exp) {
1060 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1061 } else if (a.exp < b.exp) {
1062 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1063 a.exp = b.exp;
1064 }
e99c4373
RH
1065
1066 if (uadd64_overflow(a.frac, b.frac, &a.frac)) {
64d450a0 1067 shift64RightJamming(a.frac, 1, &a.frac);
e99c4373 1068 a.frac |= DECOMPOSED_IMPLICIT_BIT;
6fff2167
AB
1069 a.exp += 1;
1070 }
1071 return a;
1072 }
1073 if (is_nan(a.cls) || is_nan(b.cls)) {
1074 return pick_nan(a, b, s);
1075 }
1076 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1077 return a;
1078 }
1079 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1080 b.sign = b_sign;
1081 return b;
1082 }
1083 }
1084 g_assert_not_reached();
1085}
1086
1087/*
1088 * Returns the result of adding or subtracting the floating-point
1089 * values `a' and `b'. The operation is performed according to the
1090 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1091 */
1092
97ff87c0 1093float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167 1094{
98e256fc
RH
1095 FloatParts64 pa, pb, pr;
1096
1097 float16_unpack_canonical(&pa, a, status);
1098 float16_unpack_canonical(&pb, b, status);
1099 pr = addsub_floats(pa, pb, false, status);
6fff2167 1100
e293e927 1101 return float16_round_pack_canonical(&pr, status);
6fff2167
AB
1102}
1103
1b615d48
EC
1104float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1105{
98e256fc
RH
1106 FloatParts64 pa, pb, pr;
1107
1108 float16_unpack_canonical(&pa, a, status);
1109 float16_unpack_canonical(&pb, b, status);
1110 pr = addsub_floats(pa, pb, true, status);
1b615d48 1111
e293e927 1112 return float16_round_pack_canonical(&pr, status);
1b615d48
EC
1113}
1114
1115static float32 QEMU_SOFTFLOAT_ATTR
1116soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167 1117{
98e256fc
RH
1118 FloatParts64 pa, pb, pr;
1119
1120 float32_unpack_canonical(&pa, a, status);
1121 float32_unpack_canonical(&pb, b, status);
1122 pr = addsub_floats(pa, pb, subtract, status);
6fff2167 1123
e293e927 1124 return float32_round_pack_canonical(&pr, status);
6fff2167
AB
1125}
1126
1b615d48
EC
1127static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1128{
1129 return soft_f32_addsub(a, b, false, status);
1130}
1131
1132static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1133{
1134 return soft_f32_addsub(a, b, true, status);
1135}
1136
1137static float64 QEMU_SOFTFLOAT_ATTR
1138soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167 1139{
98e256fc
RH
1140 FloatParts64 pa, pb, pr;
1141
1142 float64_unpack_canonical(&pa, a, status);
1143 float64_unpack_canonical(&pb, b, status);
1144 pr = addsub_floats(pa, pb, subtract, status);
6fff2167 1145
e293e927 1146 return float64_round_pack_canonical(&pr, status);
6fff2167
AB
1147}
1148
1b615d48 1149static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1150{
1b615d48
EC
1151 return soft_f64_addsub(a, b, false, status);
1152}
6fff2167 1153
1b615d48
EC
1154static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1155{
1156 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1157}
1158
1b615d48 1159static float hard_f32_add(float a, float b)
6fff2167 1160{
1b615d48
EC
1161 return a + b;
1162}
6fff2167 1163
1b615d48
EC
1164static float hard_f32_sub(float a, float b)
1165{
1166 return a - b;
6fff2167
AB
1167}
1168
1b615d48 1169static double hard_f64_add(double a, double b)
6fff2167 1170{
1b615d48
EC
1171 return a + b;
1172}
6fff2167 1173
1b615d48
EC
1174static double hard_f64_sub(double a, double b)
1175{
1176 return a - b;
1177}
1178
b240c9c4 1179static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1180{
1181 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1182 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1183 }
1184 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1185}
1186
b240c9c4 1187static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1188{
1189 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1190 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1191 } else {
1192 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1193 }
1194}
1195
1196static float32 float32_addsub(float32 a, float32 b, float_status *s,
1197 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1198{
1199 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1200 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1201}
1202
1203static float64 float64_addsub(float64 a, float64 b, float_status *s,
1204 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1205{
1206 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1207 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1208}
1209
1210float32 QEMU_FLATTEN
1211float32_add(float32 a, float32 b, float_status *s)
1212{
1213 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1214}
1215
1216float32 QEMU_FLATTEN
1217float32_sub(float32 a, float32 b, float_status *s)
1218{
1219 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1220}
1221
1222float64 QEMU_FLATTEN
1223float64_add(float64 a, float64 b, float_status *s)
1224{
1225 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1226}
1227
1228float64 QEMU_FLATTEN
1229float64_sub(float64 a, float64 b, float_status *s)
1230{
1231 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1232}
1233
8282310d
LZ
1234/*
1235 * Returns the result of adding or subtracting the bfloat16
1236 * values `a' and `b'.
1237 */
1238bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1239{
98e256fc
RH
1240 FloatParts64 pa, pb, pr;
1241
1242 bfloat16_unpack_canonical(&pa, a, status);
1243 bfloat16_unpack_canonical(&pb, b, status);
1244 pr = addsub_floats(pa, pb, false, status);
8282310d 1245
e293e927 1246 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1247}
1248
1249bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1250{
98e256fc
RH
1251 FloatParts64 pa, pb, pr;
1252
1253 bfloat16_unpack_canonical(&pa, a, status);
1254 bfloat16_unpack_canonical(&pb, b, status);
1255 pr = addsub_floats(pa, pb, true, status);
8282310d 1256
e293e927 1257 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1258}
1259
74d707e2
AB
1260/*
1261 * Returns the result of multiplying the floating-point values `a' and
1262 * `b'. The operation is performed according to the IEC/IEEE Standard
1263 * for Binary Floating-Point Arithmetic.
1264 */
1265
f8155c1d 1266static FloatParts64 mul_floats(FloatParts64 a, FloatParts64 b, float_status *s)
74d707e2
AB
1267{
1268 bool sign = a.sign ^ b.sign;
1269
1270 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1271 uint64_t hi, lo;
1272 int exp = a.exp + b.exp;
1273
1274 mul64To128(a.frac, b.frac, &hi, &lo);
e99c4373 1275 if (hi & DECOMPOSED_IMPLICIT_BIT) {
74d707e2 1276 exp += 1;
e99c4373
RH
1277 } else {
1278 hi <<= 1;
74d707e2 1279 }
e99c4373 1280 hi |= (lo != 0);
74d707e2
AB
1281
1282 /* Re-use a */
1283 a.exp = exp;
1284 a.sign = sign;
e99c4373 1285 a.frac = hi;
74d707e2
AB
1286 return a;
1287 }
1288 /* handle all the NaN cases */
1289 if (is_nan(a.cls) || is_nan(b.cls)) {
1290 return pick_nan(a, b, s);
1291 }
1292 /* Inf * Zero == NaN */
1293 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1294 (a.cls == float_class_zero && b.cls == float_class_inf)) {
d82f3b2d 1295 float_raise(float_flag_invalid, s);
0fc07cad
RH
1296 parts_default_nan(&a, s);
1297 return a;
74d707e2
AB
1298 }
1299 /* Multiply by 0 or Inf */
1300 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1301 a.sign = sign;
1302 return a;
1303 }
1304 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1305 b.sign = sign;
1306 return b;
1307 }
1308 g_assert_not_reached();
1309}
1310
97ff87c0 1311float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1312{
98e256fc
RH
1313 FloatParts64 pa, pb, pr;
1314
1315 float16_unpack_canonical(&pa, a, status);
1316 float16_unpack_canonical(&pb, b, status);
1317 pr = mul_floats(pa, pb, status);
74d707e2 1318
e293e927 1319 return float16_round_pack_canonical(&pr, status);
74d707e2
AB
1320}
1321
2dfabc86
EC
1322static float32 QEMU_SOFTFLOAT_ATTR
1323soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1324{
98e256fc
RH
1325 FloatParts64 pa, pb, pr;
1326
1327 float32_unpack_canonical(&pa, a, status);
1328 float32_unpack_canonical(&pb, b, status);
1329 pr = mul_floats(pa, pb, status);
74d707e2 1330
e293e927 1331 return float32_round_pack_canonical(&pr, status);
74d707e2
AB
1332}
1333
2dfabc86
EC
1334static float64 QEMU_SOFTFLOAT_ATTR
1335soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1336{
98e256fc
RH
1337 FloatParts64 pa, pb, pr;
1338
1339 float64_unpack_canonical(&pa, a, status);
1340 float64_unpack_canonical(&pb, b, status);
1341 pr = mul_floats(pa, pb, status);
74d707e2 1342
e293e927 1343 return float64_round_pack_canonical(&pr, status);
74d707e2
AB
1344}
1345
2dfabc86
EC
1346static float hard_f32_mul(float a, float b)
1347{
1348 return a * b;
1349}
1350
1351static double hard_f64_mul(double a, double b)
1352{
1353 return a * b;
1354}
1355
2dfabc86
EC
1356float32 QEMU_FLATTEN
1357float32_mul(float32 a, float32 b, float_status *s)
1358{
1359 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1360 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1361}
1362
1363float64 QEMU_FLATTEN
1364float64_mul(float64 a, float64 b, float_status *s)
1365{
1366 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1367 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1368}
1369
8282310d
LZ
1370/*
1371 * Returns the result of multiplying the bfloat16
1372 * values `a' and `b'.
1373 */
1374
1375bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1376{
98e256fc
RH
1377 FloatParts64 pa, pb, pr;
1378
1379 bfloat16_unpack_canonical(&pa, a, status);
1380 bfloat16_unpack_canonical(&pb, b, status);
1381 pr = mul_floats(pa, pb, status);
8282310d 1382
e293e927 1383 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1384}
1385
d446830a
AB
1386/*
1387 * Returns the result of multiplying the floating-point values `a' and
1388 * `b' then adding 'c', with no intermediate rounding step after the
1389 * multiplication. The operation is performed according to the
1390 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1391 * The flags argument allows the caller to select negation of the
1392 * addend, the intermediate product, or the final result. (The
1393 * difference between this and having the caller do a separate
1394 * negation is that negating externally will flip the sign bit on
1395 * NaNs.)
1396 */
1397
f8155c1d 1398static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
d446830a
AB
1399 int flags, float_status *s)
1400{
134eda00 1401 bool inf_zero, p_sign;
d446830a
AB
1402 bool sign_flip = flags & float_muladd_negate_result;
1403 FloatClass p_class;
1404 uint64_t hi, lo;
1405 int p_exp;
134eda00
RH
1406 int ab_mask, abc_mask;
1407
1408 ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
1409 abc_mask = float_cmask(c.cls) | ab_mask;
1410 inf_zero = ab_mask == float_cmask_infzero;
d446830a
AB
1411
1412 /* It is implementation-defined whether the cases of (0,inf,qnan)
1413 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1414 * they return if they do), so we have to hand this information
1415 * off to the target-specific pick-a-NaN routine.
1416 */
134eda00 1417 if (unlikely(abc_mask & float_cmask_anynan)) {
d446830a
AB
1418 return pick_nan_muladd(a, b, c, inf_zero, s);
1419 }
1420
1421 if (inf_zero) {
d82f3b2d 1422 float_raise(float_flag_invalid, s);
0fc07cad
RH
1423 parts_default_nan(&a, s);
1424 return a;
d446830a
AB
1425 }
1426
1427 if (flags & float_muladd_negate_c) {
1428 c.sign ^= 1;
1429 }
1430
1431 p_sign = a.sign ^ b.sign;
1432
1433 if (flags & float_muladd_negate_product) {
1434 p_sign ^= 1;
1435 }
1436
134eda00 1437 if (ab_mask & float_cmask_inf) {
d446830a 1438 p_class = float_class_inf;
134eda00 1439 } else if (ab_mask & float_cmask_zero) {
d446830a
AB
1440 p_class = float_class_zero;
1441 } else {
1442 p_class = float_class_normal;
1443 }
1444
1445 if (c.cls == float_class_inf) {
1446 if (p_class == float_class_inf && p_sign != c.sign) {
d82f3b2d 1447 float_raise(float_flag_invalid, s);
0fc07cad 1448 parts_default_nan(&c, s);
d446830a 1449 } else {
9793c1e2 1450 c.sign ^= sign_flip;
d446830a 1451 }
0fc07cad 1452 return c;
d446830a
AB
1453 }
1454
1455 if (p_class == float_class_inf) {
1456 a.cls = float_class_inf;
1457 a.sign = p_sign ^ sign_flip;
1458 return a;
1459 }
1460
1461 if (p_class == float_class_zero) {
1462 if (c.cls == float_class_zero) {
1463 if (p_sign != c.sign) {
1464 p_sign = s->float_rounding_mode == float_round_down;
1465 }
1466 c.sign = p_sign;
1467 } else if (flags & float_muladd_halve_result) {
1468 c.exp -= 1;
1469 }
1470 c.sign ^= sign_flip;
1471 return c;
1472 }
1473
1474 /* a & b should be normals now... */
1475 assert(a.cls == float_class_normal &&
1476 b.cls == float_class_normal);
1477
1478 p_exp = a.exp + b.exp;
1479
d446830a 1480 mul64To128(a.frac, b.frac, &hi, &lo);
d446830a 1481
e99c4373
RH
1482 /* Renormalize to the msb. */
1483 if (hi & DECOMPOSED_IMPLICIT_BIT) {
d446830a 1484 p_exp += 1;
e99c4373
RH
1485 } else {
1486 shortShift128Left(hi, lo, 1, &hi, &lo);
d446830a
AB
1487 }
1488
1489 /* + add/sub */
e99c4373 1490 if (c.cls != float_class_zero) {
d446830a
AB
1491 int exp_diff = p_exp - c.exp;
1492 if (p_sign == c.sign) {
1493 /* Addition */
1494 if (exp_diff <= 0) {
e99c4373 1495 shift64RightJamming(hi, -exp_diff, &hi);
d446830a 1496 p_exp = c.exp;
e99c4373
RH
1497 if (uadd64_overflow(hi, c.frac, &hi)) {
1498 shift64RightJamming(hi, 1, &hi);
1499 hi |= DECOMPOSED_IMPLICIT_BIT;
1500 p_exp += 1;
1501 }
d446830a 1502 } else {
e99c4373
RH
1503 uint64_t c_hi, c_lo, over;
1504 shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
1505 add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
1506 if (over) {
1507 shift64RightJamming(hi, 1, &hi);
1508 hi |= DECOMPOSED_IMPLICIT_BIT;
1509 p_exp += 1;
1510 }
d446830a 1511 }
d446830a
AB
1512 } else {
1513 /* Subtraction */
e99c4373 1514 uint64_t c_hi = c.frac, c_lo = 0;
d446830a
AB
1515
1516 if (exp_diff <= 0) {
1517 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1518 if (exp_diff == 0
1519 &&
1520 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1521 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1522 } else {
1523 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1524 p_sign ^= 1;
1525 p_exp = c.exp;
1526 }
1527 } else {
1528 shift128RightJamming(c_hi, c_lo,
1529 exp_diff,
1530 &c_hi, &c_lo);
1531 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1532 }
1533
1534 if (hi == 0 && lo == 0) {
1535 a.cls = float_class_zero;
1536 a.sign = s->float_rounding_mode == float_round_down;
1537 a.sign ^= sign_flip;
1538 return a;
1539 } else {
1540 int shift;
1541 if (hi != 0) {
1542 shift = clz64(hi);
1543 } else {
1544 shift = clz64(lo) + 64;
1545 }
1546 /* Normalizing to a binary point of 124 is the
1547 correct adjust for the exponent. However since we're
1548 shifting, we might as well put the binary point back
e99c4373 1549 at 63 where we really want it. Therefore shift as
d446830a
AB
1550 if we're leaving 1 bit at the top of the word, but
1551 adjust the exponent as if we're leaving 3 bits. */
e99c4373
RH
1552 shift128Left(hi, lo, shift, &hi, &lo);
1553 p_exp -= shift;
d446830a
AB
1554 }
1555 }
1556 }
e99c4373 1557 hi |= (lo != 0);
d446830a
AB
1558
1559 if (flags & float_muladd_halve_result) {
1560 p_exp -= 1;
1561 }
1562
1563 /* finally prepare our result */
1564 a.cls = float_class_normal;
1565 a.sign = p_sign ^ sign_flip;
1566 a.exp = p_exp;
e99c4373 1567 a.frac = hi;
d446830a
AB
1568
1569 return a;
1570}
1571
97ff87c0 1572float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1573 int flags, float_status *status)
1574{
98e256fc
RH
1575 FloatParts64 pa, pb, pc, pr;
1576
1577 float16_unpack_canonical(&pa, a, status);
1578 float16_unpack_canonical(&pb, b, status);
1579 float16_unpack_canonical(&pc, c, status);
1580 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a 1581
e293e927 1582 return float16_round_pack_canonical(&pr, status);
d446830a
AB
1583}
1584
ccf770ba
EC
1585static float32 QEMU_SOFTFLOAT_ATTR
1586soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1587 float_status *status)
d446830a 1588{
98e256fc
RH
1589 FloatParts64 pa, pb, pc, pr;
1590
1591 float32_unpack_canonical(&pa, a, status);
1592 float32_unpack_canonical(&pb, b, status);
1593 float32_unpack_canonical(&pc, c, status);
1594 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a 1595
e293e927 1596 return float32_round_pack_canonical(&pr, status);
d446830a
AB
1597}
1598
ccf770ba
EC
1599static float64 QEMU_SOFTFLOAT_ATTR
1600soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1601 float_status *status)
d446830a 1602{
98e256fc
RH
1603 FloatParts64 pa, pb, pc, pr;
1604
1605 float64_unpack_canonical(&pa, a, status);
1606 float64_unpack_canonical(&pb, b, status);
1607 float64_unpack_canonical(&pc, c, status);
1608 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a 1609
e293e927 1610 return float64_round_pack_canonical(&pr, status);
d446830a
AB
1611}
1612
f6b3b108
EC
1613static bool force_soft_fma;
1614
ccf770ba
EC
1615float32 QEMU_FLATTEN
1616float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1617{
1618 union_float32 ua, ub, uc, ur;
1619
1620 ua.s = xa;
1621 ub.s = xb;
1622 uc.s = xc;
1623
1624 if (unlikely(!can_use_fpu(s))) {
1625 goto soft;
1626 }
1627 if (unlikely(flags & float_muladd_halve_result)) {
1628 goto soft;
1629 }
1630
1631 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1632 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1633 goto soft;
1634 }
f6b3b108
EC
1635
1636 if (unlikely(force_soft_fma)) {
1637 goto soft;
1638 }
1639
ccf770ba
EC
1640 /*
1641 * When (a || b) == 0, there's no need to check for under/over flow,
1642 * since we know the addend is (normal || 0) and the product is 0.
1643 */
1644 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1645 union_float32 up;
1646 bool prod_sign;
1647
1648 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1649 prod_sign ^= !!(flags & float_muladd_negate_product);
1650 up.s = float32_set_sign(float32_zero, prod_sign);
1651
1652 if (flags & float_muladd_negate_c) {
1653 uc.h = -uc.h;
1654 }
1655 ur.h = up.h + uc.h;
1656 } else {
896f51fb
KC
1657 union_float32 ua_orig = ua;
1658 union_float32 uc_orig = uc;
1659
ccf770ba
EC
1660 if (flags & float_muladd_negate_product) {
1661 ua.h = -ua.h;
1662 }
1663 if (flags & float_muladd_negate_c) {
1664 uc.h = -uc.h;
1665 }
1666
1667 ur.h = fmaf(ua.h, ub.h, uc.h);
1668
1669 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1670 float_raise(float_flag_overflow, s);
ccf770ba 1671 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1672 ua = ua_orig;
1673 uc = uc_orig;
ccf770ba
EC
1674 goto soft;
1675 }
1676 }
1677 if (flags & float_muladd_negate_result) {
1678 return float32_chs(ur.s);
1679 }
1680 return ur.s;
1681
1682 soft:
1683 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1684}
1685
1686float64 QEMU_FLATTEN
1687float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1688{
1689 union_float64 ua, ub, uc, ur;
1690
1691 ua.s = xa;
1692 ub.s = xb;
1693 uc.s = xc;
1694
1695 if (unlikely(!can_use_fpu(s))) {
1696 goto soft;
1697 }
1698 if (unlikely(flags & float_muladd_halve_result)) {
1699 goto soft;
1700 }
1701
1702 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1703 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1704 goto soft;
1705 }
f6b3b108
EC
1706
1707 if (unlikely(force_soft_fma)) {
1708 goto soft;
1709 }
1710
ccf770ba
EC
1711 /*
1712 * When (a || b) == 0, there's no need to check for under/over flow,
1713 * since we know the addend is (normal || 0) and the product is 0.
1714 */
1715 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1716 union_float64 up;
1717 bool prod_sign;
1718
1719 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1720 prod_sign ^= !!(flags & float_muladd_negate_product);
1721 up.s = float64_set_sign(float64_zero, prod_sign);
1722
1723 if (flags & float_muladd_negate_c) {
1724 uc.h = -uc.h;
1725 }
1726 ur.h = up.h + uc.h;
1727 } else {
896f51fb
KC
1728 union_float64 ua_orig = ua;
1729 union_float64 uc_orig = uc;
1730
ccf770ba
EC
1731 if (flags & float_muladd_negate_product) {
1732 ua.h = -ua.h;
1733 }
1734 if (flags & float_muladd_negate_c) {
1735 uc.h = -uc.h;
1736 }
1737
1738 ur.h = fma(ua.h, ub.h, uc.h);
1739
1740 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1741 float_raise(float_flag_overflow, s);
ccf770ba 1742 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1743 ua = ua_orig;
1744 uc = uc_orig;
ccf770ba
EC
1745 goto soft;
1746 }
1747 }
1748 if (flags & float_muladd_negate_result) {
1749 return float64_chs(ur.s);
1750 }
1751 return ur.s;
1752
1753 soft:
1754 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1755}
1756
8282310d
LZ
1757/*
1758 * Returns the result of multiplying the bfloat16 values `a'
1759 * and `b' then adding 'c', with no intermediate rounding step after the
1760 * multiplication.
1761 */
1762
1763bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1764 int flags, float_status *status)
1765{
98e256fc
RH
1766 FloatParts64 pa, pb, pc, pr;
1767
1768 bfloat16_unpack_canonical(&pa, a, status);
1769 bfloat16_unpack_canonical(&pb, b, status);
1770 bfloat16_unpack_canonical(&pc, c, status);
1771 pr = muladd_floats(pa, pb, pc, flags, status);
8282310d 1772
e293e927 1773 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1774}
1775
cf07323d
AB
1776/*
1777 * Returns the result of dividing the floating-point value `a' by the
1778 * corresponding value `b'. The operation is performed according to
1779 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1780 */
1781
f8155c1d 1782static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
cf07323d
AB
1783{
1784 bool sign = a.sign ^ b.sign;
1785
1786 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1787 uint64_t n0, n1, q, r;
cf07323d 1788 int exp = a.exp - b.exp;
5dfbc9e4
RH
1789
1790 /*
1791 * We want a 2*N / N-bit division to produce exactly an N-bit
1792 * result, so that we do not lose any precision and so that we
1793 * do not have to renormalize afterward. If A.frac < B.frac,
1794 * then division would produce an (N-1)-bit result; shift A left
1795 * by one to produce the an N-bit result, and decrement the
1796 * exponent to match.
1797 *
1798 * The udiv_qrnnd algorithm that we're using requires normalization,
e99c4373 1799 * i.e. the msb of the denominator must be set, which is already true.
5dfbc9e4 1800 */
cf07323d
AB
1801 if (a.frac < b.frac) {
1802 exp -= 1;
5dfbc9e4 1803 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
e99c4373
RH
1804 } else {
1805 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
cf07323d 1806 }
e99c4373 1807 q = udiv_qrnnd(&r, n1, n0, b.frac);
5dfbc9e4 1808
e99c4373 1809 /* Set lsb if there is a remainder, to set inexact. */
5dfbc9e4 1810 a.frac = q | (r != 0);
cf07323d
AB
1811 a.sign = sign;
1812 a.exp = exp;
1813 return a;
1814 }
1815 /* handle all the NaN cases */
1816 if (is_nan(a.cls) || is_nan(b.cls)) {
1817 return pick_nan(a, b, s);
1818 }
1819 /* 0/0 or Inf/Inf */
1820 if (a.cls == b.cls
1821 &&
1822 (a.cls == float_class_inf || a.cls == float_class_zero)) {
d82f3b2d 1823 float_raise(float_flag_invalid, s);
0fc07cad
RH
1824 parts_default_nan(&a, s);
1825 return a;
cf07323d 1826 }
9cb4e398
AB
1827 /* Inf / x or 0 / x */
1828 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1829 a.sign = sign;
1830 return a;
1831 }
cf07323d
AB
1832 /* Div 0 => Inf */
1833 if (b.cls == float_class_zero) {
d82f3b2d 1834 float_raise(float_flag_divbyzero, s);
cf07323d
AB
1835 a.cls = float_class_inf;
1836 a.sign = sign;
1837 return a;
1838 }
cf07323d
AB
1839 /* Div by Inf */
1840 if (b.cls == float_class_inf) {
1841 a.cls = float_class_zero;
1842 a.sign = sign;
1843 return a;
1844 }
1845 g_assert_not_reached();
1846}
1847
1848float16 float16_div(float16 a, float16 b, float_status *status)
1849{
98e256fc
RH
1850 FloatParts64 pa, pb, pr;
1851
1852 float16_unpack_canonical(&pa, a, status);
1853 float16_unpack_canonical(&pb, b, status);
1854 pr = div_floats(pa, pb, status);
cf07323d 1855
e293e927 1856 return float16_round_pack_canonical(&pr, status);
cf07323d
AB
1857}
1858
4a629561
EC
1859static float32 QEMU_SOFTFLOAT_ATTR
1860soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 1861{
98e256fc
RH
1862 FloatParts64 pa, pb, pr;
1863
1864 float32_unpack_canonical(&pa, a, status);
1865 float32_unpack_canonical(&pb, b, status);
1866 pr = div_floats(pa, pb, status);
cf07323d 1867
e293e927 1868 return float32_round_pack_canonical(&pr, status);
cf07323d
AB
1869}
1870
4a629561
EC
1871static float64 QEMU_SOFTFLOAT_ATTR
1872soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 1873{
98e256fc
RH
1874 FloatParts64 pa, pb, pr;
1875
1876 float64_unpack_canonical(&pa, a, status);
1877 float64_unpack_canonical(&pb, b, status);
1878 pr = div_floats(pa, pb, status);
cf07323d 1879
e293e927 1880 return float64_round_pack_canonical(&pr, status);
cf07323d
AB
1881}
1882
4a629561
EC
1883static float hard_f32_div(float a, float b)
1884{
1885 return a / b;
1886}
1887
1888static double hard_f64_div(double a, double b)
1889{
1890 return a / b;
1891}
1892
1893static bool f32_div_pre(union_float32 a, union_float32 b)
1894{
1895 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1896 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1897 fpclassify(b.h) == FP_NORMAL;
1898 }
1899 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1900}
1901
1902static bool f64_div_pre(union_float64 a, union_float64 b)
1903{
1904 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1905 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1906 fpclassify(b.h) == FP_NORMAL;
1907 }
1908 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1909}
1910
1911static bool f32_div_post(union_float32 a, union_float32 b)
1912{
1913 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1914 return fpclassify(a.h) != FP_ZERO;
1915 }
1916 return !float32_is_zero(a.s);
1917}
1918
1919static bool f64_div_post(union_float64 a, union_float64 b)
1920{
1921 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1922 return fpclassify(a.h) != FP_ZERO;
1923 }
1924 return !float64_is_zero(a.s);
1925}
1926
1927float32 QEMU_FLATTEN
1928float32_div(float32 a, float32 b, float_status *s)
1929{
1930 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1931 f32_div_pre, f32_div_post);
4a629561
EC
1932}
1933
1934float64 QEMU_FLATTEN
1935float64_div(float64 a, float64 b, float_status *s)
1936{
1937 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 1938 f64_div_pre, f64_div_post);
4a629561
EC
1939}
1940
8282310d
LZ
1941/*
1942 * Returns the result of dividing the bfloat16
1943 * value `a' by the corresponding value `b'.
1944 */
1945
1946bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1947{
98e256fc
RH
1948 FloatParts64 pa, pb, pr;
1949
1950 bfloat16_unpack_canonical(&pa, a, status);
1951 bfloat16_unpack_canonical(&pb, b, status);
1952 pr = div_floats(pa, pb, status);
8282310d 1953
e293e927 1954 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1955}
1956
6fed16b2
AB
1957/*
1958 * Float to Float conversions
1959 *
1960 * Returns the result of converting one float format to another. The
1961 * conversion is performed according to the IEC/IEEE Standard for
1962 * Binary Floating-Point Arithmetic.
1963 *
1964 * The float_to_float helper only needs to take care of raising
1965 * invalid exceptions and handling the conversion on NaNs.
1966 */
1967
f8155c1d 1968static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
6fed16b2
AB
1969 float_status *s)
1970{
1971 if (dstf->arm_althp) {
1972 switch (a.cls) {
1973 case float_class_qnan:
1974 case float_class_snan:
1975 /* There is no NaN in the destination format. Raise Invalid
1976 * and return a zero with the sign of the input NaN.
1977 */
d82f3b2d 1978 float_raise(float_flag_invalid, s);
6fed16b2
AB
1979 a.cls = float_class_zero;
1980 a.frac = 0;
1981 a.exp = 0;
1982 break;
1983
1984 case float_class_inf:
1985 /* There is no Inf in the destination format. Raise Invalid
1986 * and return the maximum normal with the correct sign.
1987 */
d82f3b2d 1988 float_raise(float_flag_invalid, s);
6fed16b2
AB
1989 a.cls = float_class_normal;
1990 a.exp = dstf->exp_max;
1991 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1992 break;
1993
1994 default:
1995 break;
1996 }
1997 } else if (is_nan(a.cls)) {
0d40cd93 1998 return return_nan(a, s);
6fed16b2
AB
1999 }
2000 return a;
2001}
2002
2003float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2004{
2005 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2006 FloatParts64 pa, pr;
2007
2008 float16a_unpack_canonical(&pa, a, s, fmt16);
2009 pr = float_to_float(pa, &float32_params, s);
e293e927 2010 return float32_round_pack_canonical(&pr, s);
6fed16b2
AB
2011}
2012
2013float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2014{
2015 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2016 FloatParts64 pa, pr;
2017
2018 float16a_unpack_canonical(&pa, a, s, fmt16);
2019 pr = float_to_float(pa, &float64_params, s);
e293e927 2020 return float64_round_pack_canonical(&pr, s);
6fed16b2
AB
2021}
2022
2023float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2024{
2025 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2026 FloatParts64 pa, pr;
2027
2028 float32_unpack_canonical(&pa, a, s);
2029 pr = float_to_float(pa, fmt16, s);
e293e927 2030 return float16a_round_pack_canonical(&pr, s, fmt16);
6fed16b2
AB
2031}
2032
21381dcf
MK
2033static float64 QEMU_SOFTFLOAT_ATTR
2034soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2035{
98e256fc
RH
2036 FloatParts64 pa, pr;
2037
2038 float32_unpack_canonical(&pa, a, s);
2039 pr = float_to_float(pa, &float64_params, s);
e293e927 2040 return float64_round_pack_canonical(&pr, s);
6fed16b2
AB
2041}
2042
21381dcf
MK
2043float64 float32_to_float64(float32 a, float_status *s)
2044{
2045 if (likely(float32_is_normal(a))) {
2046 /* Widening conversion can never produce inexact results. */
2047 union_float32 uf;
2048 union_float64 ud;
2049 uf.s = a;
2050 ud.h = uf.h;
2051 return ud.s;
2052 } else if (float32_is_zero(a)) {
2053 return float64_set_sign(float64_zero, float32_is_neg(a));
2054 } else {
2055 return soft_float32_to_float64(a, s);
2056 }
2057}
2058
6fed16b2
AB
2059float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2060{
2061 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2062 FloatParts64 pa, pr;
2063
2064 float64_unpack_canonical(&pa, a, s);
2065 pr = float_to_float(pa, fmt16, s);
e293e927 2066 return float16a_round_pack_canonical(&pr, s, fmt16);
6fed16b2
AB
2067}
2068
2069float32 float64_to_float32(float64 a, float_status *s)
2070{
98e256fc
RH
2071 FloatParts64 pa, pr;
2072
2073 float64_unpack_canonical(&pa, a, s);
2074 pr = float_to_float(pa, &float32_params, s);
e293e927 2075 return float32_round_pack_canonical(&pr, s);
6fed16b2
AB
2076}
2077
34f0c0a9
LZ
2078float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2079{
98e256fc
RH
2080 FloatParts64 pa, pr;
2081
2082 bfloat16_unpack_canonical(&pa, a, s);
2083 pr = float_to_float(pa, &float32_params, s);
e293e927 2084 return float32_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2085}
2086
2087float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2088{
98e256fc
RH
2089 FloatParts64 pa, pr;
2090
2091 bfloat16_unpack_canonical(&pa, a, s);
2092 pr = float_to_float(pa, &float64_params, s);
e293e927 2093 return float64_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2094}
2095
2096bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2097{
98e256fc
RH
2098 FloatParts64 pa, pr;
2099
2100 float32_unpack_canonical(&pa, a, s);
2101 pr = float_to_float(pa, &bfloat16_params, s);
e293e927 2102 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2103}
2104
2105bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2106{
98e256fc
RH
2107 FloatParts64 pa, pr;
2108
2109 float64_unpack_canonical(&pa, a, s);
2110 pr = float_to_float(pa, &bfloat16_params, s);
e293e927 2111 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2112}
2113
dbe4d53a
AB
2114/*
2115 * Rounds the floating-point value `a' to an integer, and returns the
2116 * result as a floating-point value. The operation is performed
2117 * according to the IEC/IEEE Standard for Binary Floating-Point
2118 * Arithmetic.
2119 */
2120
f8155c1d 2121static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2f6c74be 2122 int scale, float_status *s)
dbe4d53a 2123{
2f6c74be
RH
2124 switch (a.cls) {
2125 case float_class_qnan:
2126 case float_class_snan:
dbe4d53a 2127 return return_nan(a, s);
dbe4d53a 2128
dbe4d53a
AB
2129 case float_class_zero:
2130 case float_class_inf:
dbe4d53a
AB
2131 /* already "integral" */
2132 break;
2f6c74be 2133
dbe4d53a 2134 case float_class_normal:
2f6c74be
RH
2135 scale = MIN(MAX(scale, -0x10000), 0x10000);
2136 a.exp += scale;
2137
dbe4d53a
AB
2138 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2139 /* already integral */
2140 break;
2141 }
2142 if (a.exp < 0) {
2143 bool one;
2144 /* all fractional */
d82f3b2d 2145 float_raise(float_flag_inexact, s);
2f6c74be 2146 switch (rmode) {
dbe4d53a
AB
2147 case float_round_nearest_even:
2148 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2149 break;
2150 case float_round_ties_away:
2151 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2152 break;
2153 case float_round_to_zero:
2154 one = false;
2155 break;
2156 case float_round_up:
2157 one = !a.sign;
2158 break;
2159 case float_round_down:
2160 one = a.sign;
2161 break;
5d64abb3
RH
2162 case float_round_to_odd:
2163 one = true;
2164 break;
dbe4d53a
AB
2165 default:
2166 g_assert_not_reached();
2167 }
2168
2169 if (one) {
2170 a.frac = DECOMPOSED_IMPLICIT_BIT;
2171 a.exp = 0;
2172 } else {
2173 a.cls = float_class_zero;
2174 }
2175 } else {
2176 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2177 uint64_t frac_lsbm1 = frac_lsb >> 1;
2178 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2179 uint64_t rnd_mask = rnd_even_mask >> 1;
2180 uint64_t inc;
2181
2f6c74be 2182 switch (rmode) {
dbe4d53a
AB
2183 case float_round_nearest_even:
2184 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2185 break;
2186 case float_round_ties_away:
2187 inc = frac_lsbm1;
2188 break;
2189 case float_round_to_zero:
2190 inc = 0;
2191 break;
2192 case float_round_up:
2193 inc = a.sign ? 0 : rnd_mask;
2194 break;
2195 case float_round_down:
2196 inc = a.sign ? rnd_mask : 0;
2197 break;
5d64abb3
RH
2198 case float_round_to_odd:
2199 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2200 break;
dbe4d53a
AB
2201 default:
2202 g_assert_not_reached();
2203 }
2204
2205 if (a.frac & rnd_mask) {
d82f3b2d 2206 float_raise(float_flag_inexact, s);
e99c4373 2207 if (uadd64_overflow(a.frac, inc, &a.frac)) {
dbe4d53a 2208 a.frac >>= 1;
e99c4373 2209 a.frac |= DECOMPOSED_IMPLICIT_BIT;
dbe4d53a
AB
2210 a.exp++;
2211 }
e99c4373 2212 a.frac &= ~rnd_mask;
dbe4d53a
AB
2213 }
2214 }
2215 break;
2216 default:
2217 g_assert_not_reached();
2218 }
2219 return a;
2220}
2221
2222float16 float16_round_to_int(float16 a, float_status *s)
2223{
98e256fc
RH
2224 FloatParts64 pa, pr;
2225
2226 float16_unpack_canonical(&pa, a, s);
2227 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2228 return float16_round_pack_canonical(&pr, s);
dbe4d53a
AB
2229}
2230
2231float32 float32_round_to_int(float32 a, float_status *s)
2232{
98e256fc
RH
2233 FloatParts64 pa, pr;
2234
2235 float32_unpack_canonical(&pa, a, s);
2236 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2237 return float32_round_pack_canonical(&pr, s);
dbe4d53a
AB
2238}
2239
2240float64 float64_round_to_int(float64 a, float_status *s)
2241{
98e256fc
RH
2242 FloatParts64 pa, pr;
2243
2244 float64_unpack_canonical(&pa, a, s);
2245 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2246 return float64_round_pack_canonical(&pr, s);
dbe4d53a
AB
2247}
2248
34f0c0a9
LZ
2249/*
2250 * Rounds the bfloat16 value `a' to an integer, and returns the
2251 * result as a bfloat16 value.
2252 */
2253
2254bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2255{
98e256fc
RH
2256 FloatParts64 pa, pr;
2257
2258 bfloat16_unpack_canonical(&pa, a, s);
2259 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2260 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2261}
2262
ab52f973
AB
2263/*
2264 * Returns the result of converting the floating-point value `a' to
2265 * the two's complement integer format. The conversion is performed
2266 * according to the IEC/IEEE Standard for Binary Floating-Point
2267 * Arithmetic---which means in particular that the conversion is
2268 * rounded according to the current rounding mode. If `a' is a NaN,
2269 * the largest positive integer is returned. Otherwise, if the
2270 * conversion overflows, the largest integer with the same sign as `a'
2271 * is returned.
2272*/
2273
f8155c1d 2274static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407 2275 int scale, int64_t min, int64_t max,
ab52f973
AB
2276 float_status *s)
2277{
2278 uint64_t r;
2279 int orig_flags = get_float_exception_flags(s);
f8155c1d 2280 FloatParts64 p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2281
2282 switch (p.cls) {
2283 case float_class_snan:
2284 case float_class_qnan:
801bc563 2285 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2286 return max;
2287 case float_class_inf:
801bc563 2288 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2289 return p.sign ? min : max;
2290 case float_class_zero:
2291 return 0;
2292 case float_class_normal:
e99c4373 2293 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2294 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2295 } else {
2296 r = UINT64_MAX;
2297 }
2298 if (p.sign) {
33358375 2299 if (r <= -(uint64_t) min) {
ab52f973
AB
2300 return -r;
2301 } else {
2302 s->float_exception_flags = orig_flags | float_flag_invalid;
2303 return min;
2304 }
2305 } else {
33358375 2306 if (r <= max) {
ab52f973
AB
2307 return r;
2308 } else {
2309 s->float_exception_flags = orig_flags | float_flag_invalid;
2310 return max;
2311 }
2312 }
2313 default:
2314 g_assert_not_reached();
2315 }
2316}
2317
0d93d8ec
FC
2318int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2319 float_status *s)
2320{
98e256fc
RH
2321 FloatParts64 p;
2322
2323 float16_unpack_canonical(&p, a, s);
2324 return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2325}
2326
3dede407 2327int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2328 float_status *s)
2329{
98e256fc
RH
2330 FloatParts64 p;
2331
2332 float16_unpack_canonical(&p, a, s);
2333 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2334}
2335
3dede407 2336int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2337 float_status *s)
2338{
98e256fc
RH
2339 FloatParts64 p;
2340
2341 float16_unpack_canonical(&p, a, s);
2342 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2343}
2344
3dede407 2345int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2346 float_status *s)
2347{
98e256fc
RH
2348 FloatParts64 p;
2349
2350 float16_unpack_canonical(&p, a, s);
2351 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2352}
2353
3dede407 2354int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2355 float_status *s)
2356{
98e256fc
RH
2357 FloatParts64 p;
2358
2359 float32_unpack_canonical(&p, a, s);
2360 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2361}
2362
3dede407 2363int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2364 float_status *s)
2365{
98e256fc
RH
2366 FloatParts64 p;
2367
2368 float32_unpack_canonical(&p, a, s);
2369 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2370}
2371
3dede407 2372int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2373 float_status *s)
2374{
98e256fc
RH
2375 FloatParts64 p;
2376
2377 float32_unpack_canonical(&p, a, s);
2378 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2379}
2380
3dede407 2381int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2382 float_status *s)
2383{
98e256fc
RH
2384 FloatParts64 p;
2385
2386 float64_unpack_canonical(&p, a, s);
2387 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2388}
2389
3dede407 2390int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2391 float_status *s)
2392{
98e256fc
RH
2393 FloatParts64 p;
2394
2395 float64_unpack_canonical(&p, a, s);
2396 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2397}
2398
3dede407 2399int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2400 float_status *s)
2401{
98e256fc
RH
2402 FloatParts64 p;
2403
2404 float64_unpack_canonical(&p, a, s);
2405 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2406}
2407
0d93d8ec
FC
2408int8_t float16_to_int8(float16 a, float_status *s)
2409{
2410 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2411}
2412
2f6c74be
RH
2413int16_t float16_to_int16(float16 a, float_status *s)
2414{
2415 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2416}
2417
2418int32_t float16_to_int32(float16 a, float_status *s)
2419{
2420 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2421}
2422
2423int64_t float16_to_int64(float16 a, float_status *s)
2424{
2425 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2426}
2427
2428int16_t float32_to_int16(float32 a, float_status *s)
2429{
2430 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2431}
2432
2433int32_t float32_to_int32(float32 a, float_status *s)
2434{
2435 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2436}
2437
2438int64_t float32_to_int64(float32 a, float_status *s)
2439{
2440 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2441}
2442
2443int16_t float64_to_int16(float64 a, float_status *s)
2444{
2445 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2446}
2447
2448int32_t float64_to_int32(float64 a, float_status *s)
2449{
2450 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2451}
2452
2453int64_t float64_to_int64(float64 a, float_status *s)
2454{
2455 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2456}
2457
2458int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2459{
2460 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2461}
2462
2463int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2464{
2465 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2466}
2467
2468int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2469{
2470 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2471}
2472
2f6c74be
RH
2473int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2474{
2475 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2476}
ab52f973 2477
2f6c74be
RH
2478int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2479{
2480 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2481}
2482
2483int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2484{
2485 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2486}
2487
2488int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2489{
2490 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2491}
ab52f973 2492
2f6c74be
RH
2493int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2494{
2495 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2496}
ab52f973 2497
2f6c74be
RH
2498int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2499{
2500 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2501}
ab52f973 2502
34f0c0a9
LZ
2503/*
2504 * Returns the result of converting the floating-point value `a' to
2505 * the two's complement integer format.
2506 */
2507
2508int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2509 float_status *s)
2510{
98e256fc
RH
2511 FloatParts64 p;
2512
2513 bfloat16_unpack_canonical(&p, a, s);
2514 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
34f0c0a9
LZ
2515}
2516
2517int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2518 float_status *s)
2519{
98e256fc
RH
2520 FloatParts64 p;
2521
2522 bfloat16_unpack_canonical(&p, a, s);
2523 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
34f0c0a9
LZ
2524}
2525
2526int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2527 float_status *s)
2528{
98e256fc
RH
2529 FloatParts64 p;
2530
2531 bfloat16_unpack_canonical(&p, a, s);
2532 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
34f0c0a9
LZ
2533}
2534
2535int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2536{
2537 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2538}
2539
2540int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2541{
2542 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2543}
2544
2545int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2546{
2547 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2548}
2549
2550int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2551{
2552 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2553}
2554
2555int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2556{
2557 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2558}
2559
2560int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2561{
2562 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2563}
2564
ab52f973
AB
2565/*
2566 * Returns the result of converting the floating-point value `a' to
2567 * the unsigned integer format. The conversion is performed according
2568 * to the IEC/IEEE Standard for Binary Floating-Point
2569 * Arithmetic---which means in particular that the conversion is
2570 * rounded according to the current rounding mode. If `a' is a NaN,
2571 * the largest unsigned integer is returned. Otherwise, if the
2572 * conversion overflows, the largest unsigned integer is returned. If
2573 * the 'a' is negative, the result is rounded and zero is returned;
2574 * values that do not round to zero will raise the inexact exception
2575 * flag.
2576 */
2577
f8155c1d 2578static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407
RH
2579 int scale, uint64_t max,
2580 float_status *s)
ab52f973
AB
2581{
2582 int orig_flags = get_float_exception_flags(s);
f8155c1d 2583 FloatParts64 p = round_to_int(in, rmode, scale, s);
2f6c74be 2584 uint64_t r;
ab52f973
AB
2585
2586 switch (p.cls) {
2587 case float_class_snan:
2588 case float_class_qnan:
2589 s->float_exception_flags = orig_flags | float_flag_invalid;
2590 return max;
2591 case float_class_inf:
801bc563 2592 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2593 return p.sign ? 0 : max;
2594 case float_class_zero:
2595 return 0;
2596 case float_class_normal:
ab52f973
AB
2597 if (p.sign) {
2598 s->float_exception_flags = orig_flags | float_flag_invalid;
2599 return 0;
2600 }
2601
e99c4373 2602 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2603 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2604 } else {
2605 s->float_exception_flags = orig_flags | float_flag_invalid;
2606 return max;
2607 }
2608
2609 /* For uint64 this will never trip, but if p.exp is too large
2610 * to shift a decomposed fraction we shall have exited via the
2611 * 3rd leg above.
2612 */
2613 if (r > max) {
2614 s->float_exception_flags = orig_flags | float_flag_invalid;
2615 return max;
ab52f973 2616 }
2f6c74be 2617 return r;
ab52f973
AB
2618 default:
2619 g_assert_not_reached();
2620 }
2621}
2622
0d93d8ec
FC
2623uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2624 float_status *s)
2625{
98e256fc
RH
2626 FloatParts64 p;
2627
2628 float16_unpack_canonical(&p, a, s);
2629 return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2630}
2631
3dede407 2632uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2633 float_status *s)
2634{
98e256fc
RH
2635 FloatParts64 p;
2636
2637 float16_unpack_canonical(&p, a, s);
2638 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2639}
2640
3dede407 2641uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2642 float_status *s)
2643{
98e256fc
RH
2644 FloatParts64 p;
2645
2646 float16_unpack_canonical(&p, a, s);
2647 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2648}
2649
3dede407 2650uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2651 float_status *s)
2652{
98e256fc
RH
2653 FloatParts64 p;
2654
2655 float16_unpack_canonical(&p, a, s);
2656 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2657}
2658
3dede407 2659uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2660 float_status *s)
2661{
98e256fc
RH
2662 FloatParts64 p;
2663
2664 float32_unpack_canonical(&p, a, s);
2665 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2666}
2667
3dede407 2668uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2669 float_status *s)
2670{
98e256fc
RH
2671 FloatParts64 p;
2672
2673 float32_unpack_canonical(&p, a, s);
2674 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2675}
2676
3dede407 2677uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2678 float_status *s)
2679{
98e256fc
RH
2680 FloatParts64 p;
2681
2682 float32_unpack_canonical(&p, a, s);
2683 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2684}
2685
3dede407 2686uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2687 float_status *s)
2688{
98e256fc
RH
2689 FloatParts64 p;
2690
2691 float64_unpack_canonical(&p, a, s);
2692 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2693}
2694
3dede407 2695uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2696 float_status *s)
2697{
98e256fc
RH
2698 FloatParts64 p;
2699
2700 float64_unpack_canonical(&p, a, s);
2701 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2702}
2703
3dede407 2704uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2705 float_status *s)
2706{
98e256fc
RH
2707 FloatParts64 p;
2708
2709 float64_unpack_canonical(&p, a, s);
2710 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2711}
2712
0d93d8ec
FC
2713uint8_t float16_to_uint8(float16 a, float_status *s)
2714{
2715 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2716}
2717
2f6c74be
RH
2718uint16_t float16_to_uint16(float16 a, float_status *s)
2719{
2720 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2721}
2722
2723uint32_t float16_to_uint32(float16 a, float_status *s)
2724{
2725 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2726}
2727
2728uint64_t float16_to_uint64(float16 a, float_status *s)
2729{
2730 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2731}
2732
2733uint16_t float32_to_uint16(float32 a, float_status *s)
2734{
2735 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2736}
2737
2738uint32_t float32_to_uint32(float32 a, float_status *s)
2739{
2740 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2741}
2742
2743uint64_t float32_to_uint64(float32 a, float_status *s)
2744{
2745 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2746}
2747
2748uint16_t float64_to_uint16(float64 a, float_status *s)
2749{
2750 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2751}
2752
2753uint32_t float64_to_uint32(float64 a, float_status *s)
2754{
2755 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2756}
2757
2758uint64_t float64_to_uint64(float64 a, float_status *s)
2759{
2760 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2761}
2762
2763uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2764{
2765 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2766}
2767
2768uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2769{
2770 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2771}
2772
2773uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2774{
2775 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2776}
2777
2778uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2779{
2780 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2781}
2782
2783uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2784{
2785 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2786}
2787
2788uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2789{
2790 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2791}
2792
2793uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2794{
2795 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2796}
2797
2798uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2799{
2800 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2801}
2802
2803uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2804{
2805 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2806}
ab52f973 2807
34f0c0a9
LZ
2808/*
2809 * Returns the result of converting the bfloat16 value `a' to
2810 * the unsigned integer format.
2811 */
2812
2813uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2814 int scale, float_status *s)
2815{
98e256fc
RH
2816 FloatParts64 p;
2817
2818 bfloat16_unpack_canonical(&p, a, s);
2819 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
34f0c0a9
LZ
2820}
2821
2822uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2823 int scale, float_status *s)
2824{
98e256fc
RH
2825 FloatParts64 p;
2826
2827 bfloat16_unpack_canonical(&p, a, s);
2828 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
34f0c0a9
LZ
2829}
2830
2831uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2832 int scale, float_status *s)
2833{
98e256fc
RH
2834 FloatParts64 p;
2835
2836 bfloat16_unpack_canonical(&p, a, s);
2837 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
34f0c0a9
LZ
2838}
2839
2840uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2841{
2842 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2843}
2844
2845uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2846{
2847 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2848}
2849
2850uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2851{
2852 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2853}
2854
2855uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2856{
2857 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2858}
2859
2860uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2861{
2862 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2863}
2864
2865uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2866{
2867 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2868}
2869
c02e1fb8
AB
2870/*
2871 * Integer to float conversions
2872 *
2873 * Returns the result of converting the two's complement integer `a'
2874 * to the floating-point format. The conversion is performed according
2875 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2876 */
2877
f8155c1d 2878static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2879{
f8155c1d 2880 FloatParts64 r = { .sign = false };
2abdfe24 2881
c02e1fb8
AB
2882 if (a == 0) {
2883 r.cls = float_class_zero;
c02e1fb8 2884 } else {
2abdfe24
RH
2885 uint64_t f = a;
2886 int shift;
2887
2888 r.cls = float_class_normal;
c02e1fb8 2889 if (a < 0) {
2abdfe24 2890 f = -f;
c02e1fb8 2891 r.sign = true;
c02e1fb8 2892 }
e99c4373 2893 shift = clz64(f);
2abdfe24
RH
2894 scale = MIN(MAX(scale, -0x10000), 0x10000);
2895
2896 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
e99c4373 2897 r.frac = f << shift;
c02e1fb8
AB
2898 }
2899
2900 return r;
2901}
2902
2abdfe24 2903float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2904{
f8155c1d 2905 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2906 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
2907}
2908
2abdfe24
RH
2909float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2910{
2911 return int64_to_float16_scalbn(a, scale, status);
2912}
2913
2914float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2915{
2916 return int64_to_float16_scalbn(a, scale, status);
2917}
2918
2919float16 int64_to_float16(int64_t a, float_status *status)
2920{
2921 return int64_to_float16_scalbn(a, 0, status);
2922}
2923
c02e1fb8
AB
2924float16 int32_to_float16(int32_t a, float_status *status)
2925{
2abdfe24 2926 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2927}
2928
2929float16 int16_to_float16(int16_t a, float_status *status)
2930{
2abdfe24 2931 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2932}
2933
0d93d8ec
FC
2934float16 int8_to_float16(int8_t a, float_status *status)
2935{
2936 return int64_to_float16_scalbn(a, 0, status);
2937}
2938
2abdfe24 2939float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2940{
f8155c1d 2941 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2942 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
2943}
2944
2abdfe24
RH
2945float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2946{
2947 return int64_to_float32_scalbn(a, scale, status);
2948}
2949
2950float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2951{
2952 return int64_to_float32_scalbn(a, scale, status);
2953}
2954
2955float32 int64_to_float32(int64_t a, float_status *status)
2956{
2957 return int64_to_float32_scalbn(a, 0, status);
2958}
2959
c02e1fb8
AB
2960float32 int32_to_float32(int32_t a, float_status *status)
2961{
2abdfe24 2962 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2963}
2964
2965float32 int16_to_float32(int16_t a, float_status *status)
2966{
2abdfe24 2967 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2968}
2969
2abdfe24 2970float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2971{
f8155c1d 2972 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2973 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
2974}
2975
2abdfe24
RH
2976float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2977{
2978 return int64_to_float64_scalbn(a, scale, status);
2979}
2980
2981float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2982{
2983 return int64_to_float64_scalbn(a, scale, status);
2984}
2985
2986float64 int64_to_float64(int64_t a, float_status *status)
2987{
2988 return int64_to_float64_scalbn(a, 0, status);
2989}
2990
c02e1fb8
AB
2991float64 int32_to_float64(int32_t a, float_status *status)
2992{
2abdfe24 2993 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2994}
2995
2996float64 int16_to_float64(int16_t a, float_status *status)
2997{
2abdfe24 2998 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2999}
3000
34f0c0a9
LZ
3001/*
3002 * Returns the result of converting the two's complement integer `a'
3003 * to the bfloat16 format.
3004 */
3005
3006bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3007{
f8155c1d 3008 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3009 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3010}
3011
3012bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3013{
3014 return int64_to_bfloat16_scalbn(a, scale, status);
3015}
3016
3017bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3018{
3019 return int64_to_bfloat16_scalbn(a, scale, status);
3020}
3021
3022bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3023{
3024 return int64_to_bfloat16_scalbn(a, 0, status);
3025}
3026
3027bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3028{
3029 return int64_to_bfloat16_scalbn(a, 0, status);
3030}
3031
3032bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3033{
3034 return int64_to_bfloat16_scalbn(a, 0, status);
3035}
c02e1fb8
AB
3036
3037/*
3038 * Unsigned Integer to float conversions
3039 *
3040 * Returns the result of converting the unsigned integer `a' to the
3041 * floating-point format. The conversion is performed according to the
3042 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3043 */
3044
f8155c1d 3045static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 3046{
f8155c1d 3047 FloatParts64 r = { .sign = false };
e99c4373 3048 int shift;
c02e1fb8
AB
3049
3050 if (a == 0) {
3051 r.cls = float_class_zero;
3052 } else {
2abdfe24 3053 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 3054 shift = clz64(a);
c02e1fb8 3055 r.cls = float_class_normal;
e99c4373
RH
3056 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3057 r.frac = a << shift;
c02e1fb8
AB
3058 }
3059
3060 return r;
3061}
3062
2abdfe24 3063float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3064{
f8155c1d 3065 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3066 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3067}
3068
2abdfe24
RH
3069float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3070{
3071 return uint64_to_float16_scalbn(a, scale, status);
3072}
3073
3074float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3075{
3076 return uint64_to_float16_scalbn(a, scale, status);
3077}
3078
3079float16 uint64_to_float16(uint64_t a, float_status *status)
3080{
3081 return uint64_to_float16_scalbn(a, 0, status);
3082}
3083
c02e1fb8
AB
3084float16 uint32_to_float16(uint32_t a, float_status *status)
3085{
2abdfe24 3086 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3087}
3088
3089float16 uint16_to_float16(uint16_t a, float_status *status)
3090{
2abdfe24 3091 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3092}
3093
0d93d8ec
FC
3094float16 uint8_to_float16(uint8_t a, float_status *status)
3095{
3096 return uint64_to_float16_scalbn(a, 0, status);
3097}
3098
2abdfe24 3099float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3100{
f8155c1d 3101 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3102 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3103}
3104
2abdfe24
RH
3105float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3106{
3107 return uint64_to_float32_scalbn(a, scale, status);
3108}
3109
3110float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3111{
3112 return uint64_to_float32_scalbn(a, scale, status);
3113}
3114
3115float32 uint64_to_float32(uint64_t a, float_status *status)
3116{
3117 return uint64_to_float32_scalbn(a, 0, status);
3118}
3119
c02e1fb8
AB
3120float32 uint32_to_float32(uint32_t a, float_status *status)
3121{
2abdfe24 3122 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3123}
3124
3125float32 uint16_to_float32(uint16_t a, float_status *status)
3126{
2abdfe24 3127 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3128}
3129
2abdfe24 3130float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3131{
f8155c1d 3132 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3133 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3134}
3135
2abdfe24
RH
3136float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3137{
3138 return uint64_to_float64_scalbn(a, scale, status);
3139}
3140
3141float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3142{
3143 return uint64_to_float64_scalbn(a, scale, status);
3144}
3145
3146float64 uint64_to_float64(uint64_t a, float_status *status)
3147{
3148 return uint64_to_float64_scalbn(a, 0, status);
3149}
3150
c02e1fb8
AB
3151float64 uint32_to_float64(uint32_t a, float_status *status)
3152{
2abdfe24 3153 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3154}
3155
3156float64 uint16_to_float64(uint16_t a, float_status *status)
3157{
2abdfe24 3158 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3159}
3160
34f0c0a9
LZ
3161/*
3162 * Returns the result of converting the unsigned integer `a' to the
3163 * bfloat16 format.
3164 */
3165
3166bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3167{
f8155c1d 3168 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3169 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3170}
3171
3172bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3173{
3174 return uint64_to_bfloat16_scalbn(a, scale, status);
3175}
3176
3177bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3178{
3179 return uint64_to_bfloat16_scalbn(a, scale, status);
3180}
3181
3182bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3183{
3184 return uint64_to_bfloat16_scalbn(a, 0, status);
3185}
3186
3187bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3188{
3189 return uint64_to_bfloat16_scalbn(a, 0, status);
3190}
3191
3192bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3193{
3194 return uint64_to_bfloat16_scalbn(a, 0, status);
3195}
3196
89360067
AB
3197/* Float Min/Max */
3198/* min() and max() functions. These can't be implemented as
3199 * 'compare and pick one input' because that would mishandle
3200 * NaNs and +0 vs -0.
3201 *
3202 * minnum() and maxnum() functions. These are similar to the min()
3203 * and max() functions but if one of the arguments is a QNaN and
3204 * the other is numerical then the numerical argument is returned.
3205 * SNaNs will get quietened before being returned.
3206 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3207 * and maxNum() operations. min() and max() are the typical min/max
3208 * semantics provided by many CPUs which predate that specification.
3209 *
3210 * minnummag() and maxnummag() functions correspond to minNumMag()
3211 * and minNumMag() from the IEEE-754 2008.
3212 */
f8155c1d 3213static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
89360067
AB
3214 bool ieee, bool ismag, float_status *s)
3215{
3216 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3217 if (ieee) {
3218 /* Takes two floating-point values `a' and `b', one of
3219 * which is a NaN, and returns the appropriate NaN
3220 * result. If either `a' or `b' is a signaling NaN,
3221 * the invalid exception is raised.
3222 */
3223 if (is_snan(a.cls) || is_snan(b.cls)) {
3224 return pick_nan(a, b, s);
3225 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3226 return b;
3227 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3228 return a;
3229 }
3230 }
3231 return pick_nan(a, b, s);
3232 } else {
3233 int a_exp, b_exp;
89360067
AB
3234
3235 switch (a.cls) {
3236 case float_class_normal:
3237 a_exp = a.exp;
3238 break;
3239 case float_class_inf:
3240 a_exp = INT_MAX;
3241 break;
3242 case float_class_zero:
3243 a_exp = INT_MIN;
3244 break;
3245 default:
3246 g_assert_not_reached();
3247 break;
3248 }
3249 switch (b.cls) {
3250 case float_class_normal:
3251 b_exp = b.exp;
3252 break;
3253 case float_class_inf:
3254 b_exp = INT_MAX;
3255 break;
3256 case float_class_zero:
3257 b_exp = INT_MIN;
3258 break;
3259 default:
3260 g_assert_not_reached();
3261 break;
3262 }
3263
6245327a
EC
3264 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3265 bool a_less = a_exp < b_exp;
3266 if (a_exp == b_exp) {
3267 a_less = a.frac < b.frac;
3268 }
3269 return a_less ^ ismin ? b : a;
89360067
AB
3270 }
3271
6245327a 3272 if (a.sign == b.sign) {
89360067
AB
3273 bool a_less = a_exp < b_exp;
3274 if (a_exp == b_exp) {
3275 a_less = a.frac < b.frac;
3276 }
6245327a 3277 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3278 } else {
6245327a 3279 return a.sign ^ ismin ? b : a;
89360067
AB
3280 }
3281 }
3282}
3283
3284#define MINMAX(sz, name, ismin, isiee, ismag) \
3285float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3286 float_status *s) \
3287{ \
98e256fc
RH
3288 FloatParts64 pa, pb, pr; \
3289 float ## sz ## _unpack_canonical(&pa, a, s); \
3290 float ## sz ## _unpack_canonical(&pb, b, s); \
3291 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3292 return float ## sz ## _round_pack_canonical(&pr, s); \
89360067
AB
3293}
3294
3295MINMAX(16, min, true, false, false)
3296MINMAX(16, minnum, true, true, false)
3297MINMAX(16, minnummag, true, true, true)
3298MINMAX(16, max, false, false, false)
3299MINMAX(16, maxnum, false, true, false)
3300MINMAX(16, maxnummag, false, true, true)
3301
3302MINMAX(32, min, true, false, false)
3303MINMAX(32, minnum, true, true, false)
3304MINMAX(32, minnummag, true, true, true)
3305MINMAX(32, max, false, false, false)
3306MINMAX(32, maxnum, false, true, false)
3307MINMAX(32, maxnummag, false, true, true)
3308
3309MINMAX(64, min, true, false, false)
3310MINMAX(64, minnum, true, true, false)
3311MINMAX(64, minnummag, true, true, true)
3312MINMAX(64, max, false, false, false)
3313MINMAX(64, maxnum, false, true, false)
3314MINMAX(64, maxnummag, false, true, true)
3315
3316#undef MINMAX
3317
8282310d
LZ
3318#define BF16_MINMAX(name, ismin, isiee, ismag) \
3319bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3320{ \
98e256fc
RH
3321 FloatParts64 pa, pb, pr; \
3322 bfloat16_unpack_canonical(&pa, a, s); \
3323 bfloat16_unpack_canonical(&pb, b, s); \
3324 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3325 return bfloat16_round_pack_canonical(&pr, s); \
8282310d
LZ
3326}
3327
3328BF16_MINMAX(min, true, false, false)
3329BF16_MINMAX(minnum, true, true, false)
3330BF16_MINMAX(minnummag, true, true, true)
3331BF16_MINMAX(max, false, false, false)
3332BF16_MINMAX(maxnum, false, true, false)
3333BF16_MINMAX(maxnummag, false, true, true)
3334
3335#undef BF16_MINMAX
3336
0c4c9092 3337/* Floating point compare */
f8155c1d 3338static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3339 float_status *s)
0c4c9092
AB
3340{
3341 if (is_nan(a.cls) || is_nan(b.cls)) {
3342 if (!is_quiet ||
3343 a.cls == float_class_snan ||
3344 b.cls == float_class_snan) {
d82f3b2d 3345 float_raise(float_flag_invalid, s);
0c4c9092
AB
3346 }
3347 return float_relation_unordered;
3348 }
3349
3350 if (a.cls == float_class_zero) {
3351 if (b.cls == float_class_zero) {
3352 return float_relation_equal;
3353 }
3354 return b.sign ? float_relation_greater : float_relation_less;
3355 } else if (b.cls == float_class_zero) {
3356 return a.sign ? float_relation_less : float_relation_greater;
3357 }
3358
3359 /* The only really important thing about infinity is its sign. If
3360 * both are infinities the sign marks the smallest of the two.
3361 */
3362 if (a.cls == float_class_inf) {
3363 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3364 return float_relation_equal;
3365 }
3366 return a.sign ? float_relation_less : float_relation_greater;
3367 } else if (b.cls == float_class_inf) {
3368 return b.sign ? float_relation_greater : float_relation_less;
3369 }
3370
3371 if (a.sign != b.sign) {
3372 return a.sign ? float_relation_less : float_relation_greater;
3373 }
3374
3375 if (a.exp == b.exp) {
3376 if (a.frac == b.frac) {
3377 return float_relation_equal;
3378 }
3379 if (a.sign) {
3380 return a.frac > b.frac ?
3381 float_relation_less : float_relation_greater;
3382 } else {
3383 return a.frac > b.frac ?
3384 float_relation_greater : float_relation_less;
3385 }
3386 } else {
3387 if (a.sign) {
3388 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3389 } else {
3390 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3391 }
3392 }
3393}
3394
d9fe9db9
EC
3395#define COMPARE(name, attr, sz) \
3396static int attr \
3397name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3398{ \
98e256fc
RH
3399 FloatParts64 pa, pb; \
3400 float ## sz ## _unpack_canonical(&pa, a, s); \
3401 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3402 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3403}
3404
d9fe9db9
EC
3405COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3406COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3407COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3408
3409#undef COMPARE
3410
71bfd65c 3411FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3412{
3413 return soft_f16_compare(a, b, false, s);
3414}
3415
71bfd65c 3416FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3417{
3418 return soft_f16_compare(a, b, true, s);
3419}
3420
71bfd65c 3421static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3422f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3423{
3424 union_float32 ua, ub;
3425
3426 ua.s = xa;
3427 ub.s = xb;
3428
3429 if (QEMU_NO_HARDFLOAT) {
3430 goto soft;
3431 }
3432
3433 float32_input_flush2(&ua.s, &ub.s, s);
3434 if (isgreaterequal(ua.h, ub.h)) {
3435 if (isgreater(ua.h, ub.h)) {
3436 return float_relation_greater;
3437 }
3438 return float_relation_equal;
3439 }
3440 if (likely(isless(ua.h, ub.h))) {
3441 return float_relation_less;
3442 }
3443 /* The only condition remaining is unordered.
3444 * Fall through to set flags.
3445 */
3446 soft:
3447 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3448}
3449
71bfd65c 3450FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3451{
3452 return f32_compare(a, b, false, s);
3453}
3454
71bfd65c 3455FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3456{
3457 return f32_compare(a, b, true, s);
3458}
3459
71bfd65c 3460static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3461f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3462{
3463 union_float64 ua, ub;
3464
3465 ua.s = xa;
3466 ub.s = xb;
3467
3468 if (QEMU_NO_HARDFLOAT) {
3469 goto soft;
3470 }
3471
3472 float64_input_flush2(&ua.s, &ub.s, s);
3473 if (isgreaterequal(ua.h, ub.h)) {
3474 if (isgreater(ua.h, ub.h)) {
3475 return float_relation_greater;
3476 }
3477 return float_relation_equal;
3478 }
3479 if (likely(isless(ua.h, ub.h))) {
3480 return float_relation_less;
3481 }
3482 /* The only condition remaining is unordered.
3483 * Fall through to set flags.
3484 */
3485 soft:
3486 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3487}
3488
71bfd65c 3489FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3490{
3491 return f64_compare(a, b, false, s);
3492}
3493
71bfd65c 3494FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3495{
3496 return f64_compare(a, b, true, s);
3497}
3498
8282310d
LZ
3499static FloatRelation QEMU_FLATTEN
3500soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3501{
98e256fc
RH
3502 FloatParts64 pa, pb;
3503
3504 bfloat16_unpack_canonical(&pa, a, s);
3505 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3506 return compare_floats(pa, pb, is_quiet, s);
3507}
3508
3509FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3510{
3511 return soft_bf16_compare(a, b, false, s);
3512}
3513
3514FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3515{
3516 return soft_bf16_compare(a, b, true, s);
3517}
3518
0bfc9f19 3519/* Multiply A by 2 raised to the power N. */
f8155c1d 3520static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3521{
3522 if (unlikely(is_nan(a.cls))) {
3523 return return_nan(a, s);
3524 }
3525 if (a.cls == float_class_normal) {
f8155c1d 3526 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3527 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3528 * still allows rounding to infinity, without allowing overflow
f8155c1d 3529 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3530 */
3531 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3532 a.exp += n;
3533 }
3534 return a;
3535}
3536
3537float16 float16_scalbn(float16 a, int n, float_status *status)
3538{
98e256fc
RH
3539 FloatParts64 pa, pr;
3540
3541 float16_unpack_canonical(&pa, a, status);
3542 pr = scalbn_decomposed(pa, n, status);
e293e927 3543 return float16_round_pack_canonical(&pr, status);
0bfc9f19
AB
3544}
3545
3546float32 float32_scalbn(float32 a, int n, float_status *status)
3547{
98e256fc
RH
3548 FloatParts64 pa, pr;
3549
3550 float32_unpack_canonical(&pa, a, status);
3551 pr = scalbn_decomposed(pa, n, status);
e293e927 3552 return float32_round_pack_canonical(&pr, status);
0bfc9f19
AB
3553}
3554
3555float64 float64_scalbn(float64 a, int n, float_status *status)
3556{
98e256fc
RH
3557 FloatParts64 pa, pr;
3558
3559 float64_unpack_canonical(&pa, a, status);
3560 pr = scalbn_decomposed(pa, n, status);
e293e927 3561 return float64_round_pack_canonical(&pr, status);
0bfc9f19
AB
3562}
3563
8282310d
LZ
3564bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3565{
98e256fc
RH
3566 FloatParts64 pa, pr;
3567
3568 bfloat16_unpack_canonical(&pa, a, status);
3569 pr = scalbn_decomposed(pa, n, status);
e293e927 3570 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3571}
3572
c13bb2da
AB
3573/*
3574 * Square Root
3575 *
3576 * The old softfloat code did an approximation step before zeroing in
3577 * on the final result. However for simpleness we just compute the
3578 * square root by iterating down from the implicit bit to enough extra
3579 * bits to ensure we get a correctly rounded result.
3580 *
3581 * This does mean however the calculation is slower than before,
3582 * especially for 64 bit floats.
3583 */
3584
f8155c1d 3585static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3586{
3587 uint64_t a_frac, r_frac, s_frac;
3588 int bit, last_bit;
3589
3590 if (is_nan(a.cls)) {
3591 return return_nan(a, s);
3592 }
3593 if (a.cls == float_class_zero) {
3594 return a; /* sqrt(+-0) = +-0 */
3595 }
3596 if (a.sign) {
d82f3b2d 3597 float_raise(float_flag_invalid, s);
0fc07cad
RH
3598 parts_default_nan(&a, s);
3599 return a;
c13bb2da
AB
3600 }
3601 if (a.cls == float_class_inf) {
3602 return a; /* sqrt(+inf) = +inf */
3603 }
3604
3605 assert(a.cls == float_class_normal);
3606
3607 /* We need two overflow bits at the top. Adding room for that is a
3608 * right shift. If the exponent is odd, we can discard the low bit
3609 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3610 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3611 */
e99c4373 3612 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3613 a.exp >>= 1;
3614
3615 /* Bit-by-bit computation of sqrt. */
3616 r_frac = 0;
3617 s_frac = 0;
3618
3619 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3620 * properly rounded result. Remember we've inserted two more bits
3621 * at the top, so these positions are two less.
c13bb2da 3622 */
e99c4373 3623 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3624 last_bit = MAX(p->frac_shift - 4, 0);
3625 do {
3626 uint64_t q = 1ULL << bit;
3627 uint64_t t_frac = s_frac + q;
3628 if (t_frac <= a_frac) {
3629 s_frac = t_frac + q;
3630 a_frac -= t_frac;
3631 r_frac += q;
3632 }
3633 a_frac <<= 1;
3634 } while (--bit >= last_bit);
3635
3636 /* Undo the right shift done above. If there is any remaining
3637 * fraction, the result is inexact. Set the sticky bit.
3638 */
e99c4373 3639 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3640
3641 return a;
3642}
3643
97ff87c0 3644float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3645{
98e256fc
RH
3646 FloatParts64 pa, pr;
3647
3648 float16_unpack_canonical(&pa, a, status);
3649 pr = sqrt_float(pa, status, &float16_params);
e293e927 3650 return float16_round_pack_canonical(&pr, status);
c13bb2da
AB
3651}
3652
f131bae8
EC
3653static float32 QEMU_SOFTFLOAT_ATTR
3654soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3655{
98e256fc
RH
3656 FloatParts64 pa, pr;
3657
3658 float32_unpack_canonical(&pa, a, status);
3659 pr = sqrt_float(pa, status, &float32_params);
e293e927 3660 return float32_round_pack_canonical(&pr, status);
c13bb2da
AB
3661}
3662
f131bae8
EC
3663static float64 QEMU_SOFTFLOAT_ATTR
3664soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3665{
98e256fc
RH
3666 FloatParts64 pa, pr;
3667
3668 float64_unpack_canonical(&pa, a, status);
3669 pr = sqrt_float(pa, status, &float64_params);
e293e927 3670 return float64_round_pack_canonical(&pr, status);
c13bb2da
AB
3671}
3672
f131bae8
EC
3673float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3674{
3675 union_float32 ua, ur;
3676
3677 ua.s = xa;
3678 if (unlikely(!can_use_fpu(s))) {
3679 goto soft;
3680 }
3681
3682 float32_input_flush1(&ua.s, s);
3683 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3684 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3685 fpclassify(ua.h) == FP_ZERO) ||
3686 signbit(ua.h))) {
3687 goto soft;
3688 }
3689 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3690 float32_is_neg(ua.s))) {
3691 goto soft;
3692 }
3693 ur.h = sqrtf(ua.h);
3694 return ur.s;
3695
3696 soft:
3697 return soft_f32_sqrt(ua.s, s);
3698}
3699
3700float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3701{
3702 union_float64 ua, ur;
3703
3704 ua.s = xa;
3705 if (unlikely(!can_use_fpu(s))) {
3706 goto soft;
3707 }
3708
3709 float64_input_flush1(&ua.s, s);
3710 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3711 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3712 fpclassify(ua.h) == FP_ZERO) ||
3713 signbit(ua.h))) {
3714 goto soft;
3715 }
3716 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3717 float64_is_neg(ua.s))) {
3718 goto soft;
3719 }
3720 ur.h = sqrt(ua.h);
3721 return ur.s;
3722
3723 soft:
3724 return soft_f64_sqrt(ua.s, s);
3725}
3726
8282310d
LZ
3727bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3728{
98e256fc
RH
3729 FloatParts64 pa, pr;
3730
3731 bfloat16_unpack_canonical(&pa, a, status);
3732 pr = sqrt_float(pa, status, &bfloat16_params);
e293e927 3733 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3734}
3735
0218a16e
RH
3736/*----------------------------------------------------------------------------
3737| The pattern for a default generated NaN.
3738*----------------------------------------------------------------------------*/
3739
3740float16 float16_default_nan(float_status *status)
3741{
0fc07cad
RH
3742 FloatParts64 p;
3743
3744 parts_default_nan(&p, status);
0218a16e 3745 p.frac >>= float16_params.frac_shift;
71fd178e 3746 return float16_pack_raw(&p);
0218a16e
RH
3747}
3748
3749float32 float32_default_nan(float_status *status)
3750{
0fc07cad
RH
3751 FloatParts64 p;
3752
3753 parts_default_nan(&p, status);
0218a16e 3754 p.frac >>= float32_params.frac_shift;
71fd178e 3755 return float32_pack_raw(&p);
0218a16e
RH
3756}
3757
3758float64 float64_default_nan(float_status *status)
3759{
0fc07cad
RH
3760 FloatParts64 p;
3761
3762 parts_default_nan(&p, status);
0218a16e 3763 p.frac >>= float64_params.frac_shift;
71fd178e 3764 return float64_pack_raw(&p);
0218a16e
RH
3765}
3766
3767float128 float128_default_nan(float_status *status)
3768{
0fc07cad 3769 FloatParts64 p;
0218a16e
RH
3770 float128 r;
3771
0fc07cad 3772 parts_default_nan(&p, status);
0218a16e
RH
3773 /* Extrapolate from the choices made by parts_default_nan to fill
3774 * in the quad-floating format. If the low bit is set, assume we
3775 * want to set all non-snan bits.
3776 */
3777 r.low = -(p.frac & 1);
3778 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
e9321124 3779 r.high |= UINT64_C(0x7FFF000000000000);
0218a16e
RH
3780 r.high |= (uint64_t)p.sign << 63;
3781
3782 return r;
3783}
c13bb2da 3784
8282310d
LZ
3785bfloat16 bfloat16_default_nan(float_status *status)
3786{
0fc07cad
RH
3787 FloatParts64 p;
3788
3789 parts_default_nan(&p, status);
8282310d 3790 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3791 return bfloat16_pack_raw(&p);
8282310d
LZ
3792}
3793
158142c2 3794/*----------------------------------------------------------------------------
377ed926
RH
3795| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3796*----------------------------------------------------------------------------*/
3797
3798float16 float16_silence_nan(float16 a, float_status *status)
3799{
3dddb203
RH
3800 FloatParts64 p;
3801
3802 float16_unpack_raw(&p, a);
377ed926
RH
3803 p.frac <<= float16_params.frac_shift;
3804 p = parts_silence_nan(p, status);
3805 p.frac >>= float16_params.frac_shift;
71fd178e 3806 return float16_pack_raw(&p);
377ed926
RH
3807}
3808
3809float32 float32_silence_nan(float32 a, float_status *status)
3810{
3dddb203
RH
3811 FloatParts64 p;
3812
3813 float32_unpack_raw(&p, a);
377ed926
RH
3814 p.frac <<= float32_params.frac_shift;
3815 p = parts_silence_nan(p, status);
3816 p.frac >>= float32_params.frac_shift;
71fd178e 3817 return float32_pack_raw(&p);
377ed926
RH
3818}
3819
3820float64 float64_silence_nan(float64 a, float_status *status)
3821{
3dddb203
RH
3822 FloatParts64 p;
3823
3824 float64_unpack_raw(&p, a);
377ed926
RH
3825 p.frac <<= float64_params.frac_shift;
3826 p = parts_silence_nan(p, status);
3827 p.frac >>= float64_params.frac_shift;
71fd178e 3828 return float64_pack_raw(&p);
377ed926
RH
3829}
3830
8282310d
LZ
3831bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3832{
3dddb203
RH
3833 FloatParts64 p;
3834
3835 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3836 p.frac <<= bfloat16_params.frac_shift;
3837 p = parts_silence_nan(p, status);
3838 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3839 return bfloat16_pack_raw(&p);
8282310d 3840}
e6b405fe
AB
3841
3842/*----------------------------------------------------------------------------
3843| If `a' is denormal and we are in flush-to-zero mode then set the
3844| input-denormal exception and return zero. Otherwise just return the value.
3845*----------------------------------------------------------------------------*/
3846
f8155c1d 3847static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3848{
3849 if (p.exp == 0 && p.frac != 0) {
3850 float_raise(float_flag_input_denormal, status);
3851 return true;
3852 }
3853
3854 return false;
3855}
3856
3857float16 float16_squash_input_denormal(float16 a, float_status *status)
3858{
3859 if (status->flush_inputs_to_zero) {
3dddb203
RH
3860 FloatParts64 p;
3861
3862 float16_unpack_raw(&p, a);
e6b405fe
AB
3863 if (parts_squash_denormal(p, status)) {
3864 return float16_set_sign(float16_zero, p.sign);
3865 }
3866 }
3867 return a;
3868}
3869
3870float32 float32_squash_input_denormal(float32 a, float_status *status)
3871{
3872 if (status->flush_inputs_to_zero) {
3dddb203
RH
3873 FloatParts64 p;
3874
3875 float32_unpack_raw(&p, a);
e6b405fe
AB
3876 if (parts_squash_denormal(p, status)) {
3877 return float32_set_sign(float32_zero, p.sign);
3878 }
3879 }
3880 return a;
3881}
3882
3883float64 float64_squash_input_denormal(float64 a, float_status *status)
3884{
3885 if (status->flush_inputs_to_zero) {
3dddb203
RH
3886 FloatParts64 p;
3887
3888 float64_unpack_raw(&p, a);
e6b405fe
AB
3889 if (parts_squash_denormal(p, status)) {
3890 return float64_set_sign(float64_zero, p.sign);
3891 }
3892 }
3893 return a;
3894}
3895
8282310d
LZ
3896bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3897{
3898 if (status->flush_inputs_to_zero) {
3dddb203
RH
3899 FloatParts64 p;
3900
3901 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3902 if (parts_squash_denormal(p, status)) {
3903 return bfloat16_set_sign(bfloat16_zero, p.sign);
3904 }
3905 }
3906 return a;
3907}
3908
377ed926 3909/*----------------------------------------------------------------------------
158142c2
FB
3910| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3911| and 7, and returns the properly rounded 32-bit integer corresponding to the
3912| input. If `zSign' is 1, the input is negated before being converted to an
3913| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3914| is simply rounded to an integer, with the inexact exception raised if the
3915| input cannot be represented exactly as an integer. However, if the fixed-
3916| point input is too large, the invalid exception is raised and the largest
3917| positive or negative integer is returned.
3918*----------------------------------------------------------------------------*/
3919
c120391c
RH
3920static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3921 float_status *status)
158142c2 3922{
8f506c70 3923 int8_t roundingMode;
c120391c 3924 bool roundNearestEven;
8f506c70 3925 int8_t roundIncrement, roundBits;
760e1416 3926 int32_t z;
158142c2 3927
a2f2d288 3928 roundingMode = status->float_rounding_mode;
158142c2 3929 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3930 switch (roundingMode) {
3931 case float_round_nearest_even:
f9288a76 3932 case float_round_ties_away:
dc355b76
PM
3933 roundIncrement = 0x40;
3934 break;
3935 case float_round_to_zero:
3936 roundIncrement = 0;
3937 break;
3938 case float_round_up:
3939 roundIncrement = zSign ? 0 : 0x7f;
3940 break;
3941 case float_round_down:
3942 roundIncrement = zSign ? 0x7f : 0;
3943 break;
5d64abb3
RH
3944 case float_round_to_odd:
3945 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3946 break;
dc355b76
PM
3947 default:
3948 abort();
158142c2
FB
3949 }
3950 roundBits = absZ & 0x7F;
3951 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3952 if (!(roundBits ^ 0x40) && roundNearestEven) {
3953 absZ &= ~1;
3954 }
158142c2
FB
3955 z = absZ;
3956 if ( zSign ) z = - z;
3957 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3958 float_raise(float_flag_invalid, status);
2c217da0 3959 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3960 }
a2f2d288 3961 if (roundBits) {
d82f3b2d 3962 float_raise(float_flag_inexact, status);
a2f2d288 3963 }
158142c2
FB
3964 return z;
3965
3966}
3967
3968/*----------------------------------------------------------------------------
3969| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3970| `absZ1', with binary point between bits 63 and 64 (between the input words),
3971| and returns the properly rounded 64-bit integer corresponding to the input.
3972| If `zSign' is 1, the input is negated before being converted to an integer.
3973| Ordinarily, the fixed-point input is simply rounded to an integer, with
3974| the inexact exception raised if the input cannot be represented exactly as
3975| an integer. However, if the fixed-point input is too large, the invalid
3976| exception is raised and the largest positive or negative integer is
3977| returned.
3978*----------------------------------------------------------------------------*/
3979
c120391c 3980static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3981 float_status *status)
158142c2 3982{
8f506c70 3983 int8_t roundingMode;
c120391c 3984 bool roundNearestEven, increment;
760e1416 3985 int64_t z;
158142c2 3986
a2f2d288 3987 roundingMode = status->float_rounding_mode;
158142c2 3988 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3989 switch (roundingMode) {
3990 case float_round_nearest_even:
f9288a76 3991 case float_round_ties_away:
dc355b76
PM
3992 increment = ((int64_t) absZ1 < 0);
3993 break;
3994 case float_round_to_zero:
3995 increment = 0;
3996 break;
3997 case float_round_up:
3998 increment = !zSign && absZ1;
3999 break;
4000 case float_round_down:
4001 increment = zSign && absZ1;
4002 break;
5d64abb3
RH
4003 case float_round_to_odd:
4004 increment = !(absZ0 & 1) && absZ1;
4005 break;
dc355b76
PM
4006 default:
4007 abort();
158142c2
FB
4008 }
4009 if ( increment ) {
4010 ++absZ0;
4011 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4012 if (!(absZ1 << 1) && roundNearestEven) {
4013 absZ0 &= ~1;
4014 }
158142c2
FB
4015 }
4016 z = absZ0;
4017 if ( zSign ) z = - z;
4018 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4019 overflow:
ff32e16e 4020 float_raise(float_flag_invalid, status);
2c217da0 4021 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4022 }
a2f2d288 4023 if (absZ1) {
d82f3b2d 4024 float_raise(float_flag_inexact, status);
a2f2d288 4025 }
158142c2
FB
4026 return z;
4027
4028}
4029
fb3ea83a
TM
4030/*----------------------------------------------------------------------------
4031| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4032| `absZ1', with binary point between bits 63 and 64 (between the input words),
4033| and returns the properly rounded 64-bit unsigned integer corresponding to the
4034| input. Ordinarily, the fixed-point input is simply rounded to an integer,
4035| with the inexact exception raised if the input cannot be represented exactly
4036| as an integer. However, if the fixed-point input is too large, the invalid
4037| exception is raised and the largest unsigned integer is returned.
4038*----------------------------------------------------------------------------*/
4039
c120391c 4040static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 4041 uint64_t absZ1, float_status *status)
fb3ea83a 4042{
8f506c70 4043 int8_t roundingMode;
c120391c 4044 bool roundNearestEven, increment;
fb3ea83a 4045
a2f2d288 4046 roundingMode = status->float_rounding_mode;
fb3ea83a 4047 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
4048 switch (roundingMode) {
4049 case float_round_nearest_even:
f9288a76 4050 case float_round_ties_away:
dc355b76
PM
4051 increment = ((int64_t)absZ1 < 0);
4052 break;
4053 case float_round_to_zero:
4054 increment = 0;
4055 break;
4056 case float_round_up:
4057 increment = !zSign && absZ1;
4058 break;
4059 case float_round_down:
4060 increment = zSign && absZ1;
4061 break;
5d64abb3
RH
4062 case float_round_to_odd:
4063 increment = !(absZ0 & 1) && absZ1;
4064 break;
dc355b76
PM
4065 default:
4066 abort();
fb3ea83a
TM
4067 }
4068 if (increment) {
4069 ++absZ0;
4070 if (absZ0 == 0) {
ff32e16e 4071 float_raise(float_flag_invalid, status);
2c217da0 4072 return UINT64_MAX;
fb3ea83a 4073 }
40662886
PMD
4074 if (!(absZ1 << 1) && roundNearestEven) {
4075 absZ0 &= ~1;
4076 }
fb3ea83a
TM
4077 }
4078
4079 if (zSign && absZ0) {
ff32e16e 4080 float_raise(float_flag_invalid, status);
fb3ea83a
TM
4081 return 0;
4082 }
4083
4084 if (absZ1) {
d82f3b2d 4085 float_raise(float_flag_inexact, status);
fb3ea83a
TM
4086 }
4087 return absZ0;
4088}
4089
158142c2
FB
4090/*----------------------------------------------------------------------------
4091| Normalizes the subnormal single-precision floating-point value represented
4092| by the denormalized significand `aSig'. The normalized exponent and
4093| significand are stored at the locations pointed to by `zExpPtr' and
4094| `zSigPtr', respectively.
4095*----------------------------------------------------------------------------*/
4096
4097static void
0c48262d 4098 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4099{
8f506c70 4100 int8_t shiftCount;
158142c2 4101
0019d5c3 4102 shiftCount = clz32(aSig) - 8;
158142c2
FB
4103 *zSigPtr = aSig<<shiftCount;
4104 *zExpPtr = 1 - shiftCount;
4105
4106}
4107
158142c2
FB
4108/*----------------------------------------------------------------------------
4109| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4110| and significand `zSig', and returns the proper single-precision floating-
4111| point value corresponding to the abstract input. Ordinarily, the abstract
4112| value is simply rounded and packed into the single-precision format, with
4113| the inexact exception raised if the abstract input cannot be represented
4114| exactly. However, if the abstract value is too large, the overflow and
4115| inexact exceptions are raised and an infinity or maximal finite value is
4116| returned. If the abstract value is too small, the input value is rounded to
4117| a subnormal number, and the underflow and inexact exceptions are raised if
4118| the abstract input cannot be represented exactly as a subnormal single-
4119| precision floating-point number.
4120| The input significand `zSig' has its binary point between bits 30
4121| and 29, which is 7 bits to the left of the usual location. This shifted
4122| significand must be normalized or smaller. If `zSig' is not normalized,
4123| `zExp' must be 0; in that case, the result returned is a subnormal number,
4124| and it must not require rounding. In the usual case that `zSig' is
4125| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4126| The handling of underflow and overflow follows the IEC/IEEE Standard for
4127| Binary Floating-Point Arithmetic.
4128*----------------------------------------------------------------------------*/
4129
c120391c 4130static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4131 float_status *status)
158142c2 4132{
8f506c70 4133 int8_t roundingMode;
c120391c 4134 bool roundNearestEven;
8f506c70 4135 int8_t roundIncrement, roundBits;
c120391c 4136 bool isTiny;
158142c2 4137
a2f2d288 4138 roundingMode = status->float_rounding_mode;
158142c2 4139 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4140 switch (roundingMode) {
4141 case float_round_nearest_even:
f9288a76 4142 case float_round_ties_away:
dc355b76
PM
4143 roundIncrement = 0x40;
4144 break;
4145 case float_round_to_zero:
4146 roundIncrement = 0;
4147 break;
4148 case float_round_up:
4149 roundIncrement = zSign ? 0 : 0x7f;
4150 break;
4151 case float_round_down:
4152 roundIncrement = zSign ? 0x7f : 0;
4153 break;
5d64abb3
RH
4154 case float_round_to_odd:
4155 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4156 break;
dc355b76
PM
4157 default:
4158 abort();
4159 break;
158142c2
FB
4160 }
4161 roundBits = zSig & 0x7F;
bb98fe42 4162 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4163 if ( ( 0xFD < zExp )
4164 || ( ( zExp == 0xFD )
bb98fe42 4165 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4166 ) {
5d64abb3
RH
4167 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4168 roundIncrement != 0;
ff32e16e 4169 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4170 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4171 }
4172 if ( zExp < 0 ) {
a2f2d288 4173 if (status->flush_to_zero) {
ff32e16e 4174 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4175 return packFloat32(zSign, 0, 0);
4176 }
a828b373
RH
4177 isTiny = status->tininess_before_rounding
4178 || (zExp < -1)
4179 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4180 shift32RightJamming( zSig, - zExp, &zSig );
4181 zExp = 0;
4182 roundBits = zSig & 0x7F;
ff32e16e
PM
4183 if (isTiny && roundBits) {
4184 float_raise(float_flag_underflow, status);
4185 }
5d64abb3
RH
4186 if (roundingMode == float_round_to_odd) {
4187 /*
4188 * For round-to-odd case, the roundIncrement depends on
4189 * zSig which just changed.
4190 */
4191 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4192 }
158142c2
FB
4193 }
4194 }
a2f2d288 4195 if (roundBits) {
d82f3b2d 4196 float_raise(float_flag_inexact, status);
a2f2d288 4197 }
158142c2 4198 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4199 if (!(roundBits ^ 0x40) && roundNearestEven) {
4200 zSig &= ~1;
4201 }
158142c2
FB
4202 if ( zSig == 0 ) zExp = 0;
4203 return packFloat32( zSign, zExp, zSig );
4204
4205}
4206
4207/*----------------------------------------------------------------------------
4208| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4209| and significand `zSig', and returns the proper single-precision floating-
4210| point value corresponding to the abstract input. This routine is just like
4211| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4212| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4213| floating-point exponent.
4214*----------------------------------------------------------------------------*/
4215
4216static float32
c120391c 4217 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4218 float_status *status)
158142c2 4219{
8f506c70 4220 int8_t shiftCount;
158142c2 4221
0019d5c3 4222 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4223 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4224 status);
158142c2
FB
4225
4226}
4227
158142c2
FB
4228/*----------------------------------------------------------------------------
4229| Normalizes the subnormal double-precision floating-point value represented
4230| by the denormalized significand `aSig'. The normalized exponent and
4231| significand are stored at the locations pointed to by `zExpPtr' and
4232| `zSigPtr', respectively.
4233*----------------------------------------------------------------------------*/
4234
4235static void
0c48262d 4236 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4237{
8f506c70 4238 int8_t shiftCount;
158142c2 4239
0019d5c3 4240 shiftCount = clz64(aSig) - 11;
158142c2
FB
4241 *zSigPtr = aSig<<shiftCount;
4242 *zExpPtr = 1 - shiftCount;
4243
4244}
4245
4246/*----------------------------------------------------------------------------
4247| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4248| double-precision floating-point value, returning the result. After being
4249| shifted into the proper positions, the three fields are simply added
4250| together to form the result. This means that any integer portion of `zSig'
4251| will be added into the exponent. Since a properly normalized significand
4252| will have an integer portion equal to 1, the `zExp' input should be 1 less
4253| than the desired result exponent whenever `zSig' is a complete, normalized
4254| significand.
4255*----------------------------------------------------------------------------*/
4256
c120391c 4257static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4258{
4259
f090c9d4 4260 return make_float64(
bb98fe42 4261 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4262
4263}
4264
4265/*----------------------------------------------------------------------------
4266| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4267| and significand `zSig', and returns the proper double-precision floating-
4268| point value corresponding to the abstract input. Ordinarily, the abstract
4269| value is simply rounded and packed into the double-precision format, with
4270| the inexact exception raised if the abstract input cannot be represented
4271| exactly. However, if the abstract value is too large, the overflow and
4272| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4273| returned. If the abstract value is too small, the input value is rounded to
4274| a subnormal number, and the underflow and inexact exceptions are raised if
4275| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4276| precision floating-point number.
4277| The input significand `zSig' has its binary point between bits 62
4278| and 61, which is 10 bits to the left of the usual location. This shifted
4279| significand must be normalized or smaller. If `zSig' is not normalized,
4280| `zExp' must be 0; in that case, the result returned is a subnormal number,
4281| and it must not require rounding. In the usual case that `zSig' is
4282| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4283| The handling of underflow and overflow follows the IEC/IEEE Standard for
4284| Binary Floating-Point Arithmetic.
4285*----------------------------------------------------------------------------*/
4286
c120391c 4287static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4288 float_status *status)
158142c2 4289{
8f506c70 4290 int8_t roundingMode;
c120391c 4291 bool roundNearestEven;
0c48262d 4292 int roundIncrement, roundBits;
c120391c 4293 bool isTiny;
158142c2 4294
a2f2d288 4295 roundingMode = status->float_rounding_mode;
158142c2 4296 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4297 switch (roundingMode) {
4298 case float_round_nearest_even:
f9288a76 4299 case float_round_ties_away:
dc355b76
PM
4300 roundIncrement = 0x200;
4301 break;
4302 case float_round_to_zero:
4303 roundIncrement = 0;
4304 break;
4305 case float_round_up:
4306 roundIncrement = zSign ? 0 : 0x3ff;
4307 break;
4308 case float_round_down:
4309 roundIncrement = zSign ? 0x3ff : 0;
4310 break;
9ee6f678
BR
4311 case float_round_to_odd:
4312 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4313 break;
dc355b76
PM
4314 default:
4315 abort();
158142c2
FB
4316 }
4317 roundBits = zSig & 0x3FF;
bb98fe42 4318 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4319 if ( ( 0x7FD < zExp )
4320 || ( ( zExp == 0x7FD )
bb98fe42 4321 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4322 ) {
9ee6f678
BR
4323 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4324 roundIncrement != 0;
ff32e16e 4325 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4326 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4327 }
4328 if ( zExp < 0 ) {
a2f2d288 4329 if (status->flush_to_zero) {
ff32e16e 4330 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4331 return packFloat64(zSign, 0, 0);
4332 }
a828b373
RH
4333 isTiny = status->tininess_before_rounding
4334 || (zExp < -1)
4335 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4336 shift64RightJamming( zSig, - zExp, &zSig );
4337 zExp = 0;
4338 roundBits = zSig & 0x3FF;
ff32e16e
PM
4339 if (isTiny && roundBits) {
4340 float_raise(float_flag_underflow, status);
4341 }
9ee6f678
BR
4342 if (roundingMode == float_round_to_odd) {
4343 /*
4344 * For round-to-odd case, the roundIncrement depends on
4345 * zSig which just changed.
4346 */
4347 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4348 }
158142c2
FB
4349 }
4350 }
a2f2d288 4351 if (roundBits) {
d82f3b2d 4352 float_raise(float_flag_inexact, status);
a2f2d288 4353 }
158142c2 4354 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4355 if (!(roundBits ^ 0x200) && roundNearestEven) {
4356 zSig &= ~1;
4357 }
158142c2
FB
4358 if ( zSig == 0 ) zExp = 0;
4359 return packFloat64( zSign, zExp, zSig );
4360
4361}
4362
4363/*----------------------------------------------------------------------------
4364| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4365| and significand `zSig', and returns the proper double-precision floating-
4366| point value corresponding to the abstract input. This routine is just like
4367| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4368| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4369| floating-point exponent.
4370*----------------------------------------------------------------------------*/
4371
4372static float64
c120391c 4373 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4374 float_status *status)
158142c2 4375{
8f506c70 4376 int8_t shiftCount;
158142c2 4377
0019d5c3 4378 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4379 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4380 status);
158142c2
FB
4381
4382}
4383
158142c2
FB
4384/*----------------------------------------------------------------------------
4385| Normalizes the subnormal extended double-precision floating-point value
4386| represented by the denormalized significand `aSig'. The normalized exponent
4387| and significand are stored at the locations pointed to by `zExpPtr' and
4388| `zSigPtr', respectively.
4389*----------------------------------------------------------------------------*/
4390
88857aca
LV
4391void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4392 uint64_t *zSigPtr)
158142c2 4393{
8f506c70 4394 int8_t shiftCount;
158142c2 4395
0019d5c3 4396 shiftCount = clz64(aSig);
158142c2
FB
4397 *zSigPtr = aSig<<shiftCount;
4398 *zExpPtr = 1 - shiftCount;
158142c2
FB
4399}
4400
4401/*----------------------------------------------------------------------------
4402| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4403| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4404| and returns the proper extended double-precision floating-point value
4405| corresponding to the abstract input. Ordinarily, the abstract value is
4406| rounded and packed into the extended double-precision format, with the
4407| inexact exception raised if the abstract input cannot be represented
4408| exactly. However, if the abstract value is too large, the overflow and
4409| inexact exceptions are raised and an infinity or maximal finite value is
4410| returned. If the abstract value is too small, the input value is rounded to
4411| a subnormal number, and the underflow and inexact exceptions are raised if
4412| the abstract input cannot be represented exactly as a subnormal extended
4413| double-precision floating-point number.
4414| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4415| number of bits as single or double precision, respectively. Otherwise, the
4416| result is rounded to the full precision of the extended double-precision
4417| format.
4418| The input significand must be normalized or smaller. If the input
4419| significand is not normalized, `zExp' must be 0; in that case, the result
4420| returned is a subnormal number, and it must not require rounding. The
4421| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4422| Floating-Point Arithmetic.
4423*----------------------------------------------------------------------------*/
4424
c120391c 4425floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4426 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4427 float_status *status)
158142c2 4428{
8f506c70 4429 int8_t roundingMode;
c120391c 4430 bool roundNearestEven, increment, isTiny;
f42c2224 4431 int64_t roundIncrement, roundMask, roundBits;
158142c2 4432
a2f2d288 4433 roundingMode = status->float_rounding_mode;
158142c2
FB
4434 roundNearestEven = ( roundingMode == float_round_nearest_even );
4435 if ( roundingPrecision == 80 ) goto precision80;
4436 if ( roundingPrecision == 64 ) {
e9321124
AB
4437 roundIncrement = UINT64_C(0x0000000000000400);
4438 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4439 }
4440 else if ( roundingPrecision == 32 ) {
e9321124
AB
4441 roundIncrement = UINT64_C(0x0000008000000000);
4442 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4443 }
4444 else {
4445 goto precision80;
4446 }
4447 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4448 switch (roundingMode) {
4449 case float_round_nearest_even:
f9288a76 4450 case float_round_ties_away:
dc355b76
PM
4451 break;
4452 case float_round_to_zero:
4453 roundIncrement = 0;
4454 break;
4455 case float_round_up:
4456 roundIncrement = zSign ? 0 : roundMask;
4457 break;
4458 case float_round_down:
4459 roundIncrement = zSign ? roundMask : 0;
4460 break;
4461 default:
4462 abort();
158142c2
FB
4463 }
4464 roundBits = zSig0 & roundMask;
bb98fe42 4465 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4466 if ( ( 0x7FFE < zExp )
4467 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4468 ) {
4469 goto overflow;
4470 }
4471 if ( zExp <= 0 ) {
a2f2d288 4472 if (status->flush_to_zero) {
ff32e16e 4473 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4474 return packFloatx80(zSign, 0, 0);
4475 }
a828b373
RH
4476 isTiny = status->tininess_before_rounding
4477 || (zExp < 0 )
4478 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4479 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4480 zExp = 0;
4481 roundBits = zSig0 & roundMask;
ff32e16e
PM
4482 if (isTiny && roundBits) {
4483 float_raise(float_flag_underflow, status);
4484 }
a2f2d288 4485 if (roundBits) {
d82f3b2d 4486 float_raise(float_flag_inexact, status);
a2f2d288 4487 }
158142c2 4488 zSig0 += roundIncrement;
bb98fe42 4489 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4490 roundIncrement = roundMask + 1;
4491 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4492 roundMask |= roundIncrement;
4493 }
4494 zSig0 &= ~ roundMask;
4495 return packFloatx80( zSign, zExp, zSig0 );
4496 }
4497 }
a2f2d288 4498 if (roundBits) {
d82f3b2d 4499 float_raise(float_flag_inexact, status);
a2f2d288 4500 }
158142c2
FB
4501 zSig0 += roundIncrement;
4502 if ( zSig0 < roundIncrement ) {
4503 ++zExp;
e9321124 4504 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4505 }
4506 roundIncrement = roundMask + 1;
4507 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4508 roundMask |= roundIncrement;
4509 }
4510 zSig0 &= ~ roundMask;
4511 if ( zSig0 == 0 ) zExp = 0;
4512 return packFloatx80( zSign, zExp, zSig0 );
4513 precision80:
dc355b76
PM
4514 switch (roundingMode) {
4515 case float_round_nearest_even:
f9288a76 4516 case float_round_ties_away:
dc355b76
PM
4517 increment = ((int64_t)zSig1 < 0);
4518 break;
4519 case float_round_to_zero:
4520 increment = 0;
4521 break;
4522 case float_round_up:
4523 increment = !zSign && zSig1;
4524 break;
4525 case float_round_down:
4526 increment = zSign && zSig1;
4527 break;
4528 default:
4529 abort();
158142c2 4530 }
bb98fe42 4531 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4532 if ( ( 0x7FFE < zExp )
4533 || ( ( zExp == 0x7FFE )
e9321124 4534 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4535 && increment
4536 )
4537 ) {
4538 roundMask = 0;
4539 overflow:
ff32e16e 4540 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4541 if ( ( roundingMode == float_round_to_zero )
4542 || ( zSign && ( roundingMode == float_round_up ) )
4543 || ( ! zSign && ( roundingMode == float_round_down ) )
4544 ) {
4545 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4546 }
0f605c88
LV
4547 return packFloatx80(zSign,
4548 floatx80_infinity_high,
4549 floatx80_infinity_low);
158142c2
FB
4550 }
4551 if ( zExp <= 0 ) {
a828b373
RH
4552 isTiny = status->tininess_before_rounding
4553 || (zExp < 0)
4554 || !increment
4555 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4556 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4557 zExp = 0;
ff32e16e
PM
4558 if (isTiny && zSig1) {
4559 float_raise(float_flag_underflow, status);
4560 }
a2f2d288 4561 if (zSig1) {
d82f3b2d 4562 float_raise(float_flag_inexact, status);
a2f2d288 4563 }
dc355b76
PM
4564 switch (roundingMode) {
4565 case float_round_nearest_even:
f9288a76 4566 case float_round_ties_away:
dc355b76
PM
4567 increment = ((int64_t)zSig1 < 0);
4568 break;
4569 case float_round_to_zero:
4570 increment = 0;
4571 break;
4572 case float_round_up:
4573 increment = !zSign && zSig1;
4574 break;
4575 case float_round_down:
4576 increment = zSign && zSig1;
4577 break;
4578 default:
4579 abort();
158142c2
FB
4580 }
4581 if ( increment ) {
4582 ++zSig0;
40662886
PMD
4583 if (!(zSig1 << 1) && roundNearestEven) {
4584 zSig0 &= ~1;
4585 }
bb98fe42 4586 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4587 }
4588 return packFloatx80( zSign, zExp, zSig0 );
4589 }
4590 }
a2f2d288 4591 if (zSig1) {
d82f3b2d 4592 float_raise(float_flag_inexact, status);
a2f2d288 4593 }
158142c2
FB
4594 if ( increment ) {
4595 ++zSig0;
4596 if ( zSig0 == 0 ) {
4597 ++zExp;
e9321124 4598 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4599 }
4600 else {
40662886
PMD
4601 if (!(zSig1 << 1) && roundNearestEven) {
4602 zSig0 &= ~1;
4603 }
158142c2
FB
4604 }
4605 }
4606 else {
4607 if ( zSig0 == 0 ) zExp = 0;
4608 }
4609 return packFloatx80( zSign, zExp, zSig0 );
4610
4611}
4612
4613/*----------------------------------------------------------------------------
4614| Takes an abstract floating-point value having sign `zSign', exponent
4615| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4616| and returns the proper extended double-precision floating-point value
4617| corresponding to the abstract input. This routine is just like
4618| `roundAndPackFloatx80' except that the input significand does not have to be
4619| normalized.
4620*----------------------------------------------------------------------------*/
4621
88857aca 4622floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4623 bool zSign, int32_t zExp,
88857aca
LV
4624 uint64_t zSig0, uint64_t zSig1,
4625 float_status *status)
158142c2 4626{
8f506c70 4627 int8_t shiftCount;
158142c2
FB
4628
4629 if ( zSig0 == 0 ) {
4630 zSig0 = zSig1;
4631 zSig1 = 0;
4632 zExp -= 64;
4633 }
0019d5c3 4634 shiftCount = clz64(zSig0);
158142c2
FB
4635 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4636 zExp -= shiftCount;
ff32e16e
PM
4637 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4638 zSig0, zSig1, status);
158142c2
FB
4639
4640}
4641
158142c2
FB
4642/*----------------------------------------------------------------------------
4643| Returns the least-significant 64 fraction bits of the quadruple-precision
4644| floating-point value `a'.
4645*----------------------------------------------------------------------------*/
4646
a49db98d 4647static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4648{
4649
4650 return a.low;
4651
4652}
4653
4654/*----------------------------------------------------------------------------
4655| Returns the most-significant 48 fraction bits of the quadruple-precision
4656| floating-point value `a'.
4657*----------------------------------------------------------------------------*/
4658
a49db98d 4659static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4660{
4661
e9321124 4662 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4663
4664}
4665
4666/*----------------------------------------------------------------------------
4667| Returns the exponent bits of the quadruple-precision floating-point value
4668| `a'.
4669*----------------------------------------------------------------------------*/
4670
f4014512 4671static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4672{
4673
4674 return ( a.high>>48 ) & 0x7FFF;
4675
4676}
4677
4678/*----------------------------------------------------------------------------
4679| Returns the sign bit of the quadruple-precision floating-point value `a'.
4680*----------------------------------------------------------------------------*/
4681
c120391c 4682static inline bool extractFloat128Sign(float128 a)
158142c2 4683{
c120391c 4684 return a.high >> 63;
158142c2
FB
4685}
4686
4687/*----------------------------------------------------------------------------
4688| Normalizes the subnormal quadruple-precision floating-point value
4689| represented by the denormalized significand formed by the concatenation of
4690| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4691| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4692| significand are stored at the location pointed to by `zSig0Ptr', and the
4693| least significant 64 bits of the normalized significand are stored at the
4694| location pointed to by `zSig1Ptr'.
4695*----------------------------------------------------------------------------*/
4696
4697static void
4698 normalizeFloat128Subnormal(
bb98fe42
AF
4699 uint64_t aSig0,
4700 uint64_t aSig1,
f4014512 4701 int32_t *zExpPtr,
bb98fe42
AF
4702 uint64_t *zSig0Ptr,
4703 uint64_t *zSig1Ptr
158142c2
FB
4704 )
4705{
8f506c70 4706 int8_t shiftCount;
158142c2
FB
4707
4708 if ( aSig0 == 0 ) {
0019d5c3 4709 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4710 if ( shiftCount < 0 ) {
4711 *zSig0Ptr = aSig1>>( - shiftCount );
4712 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4713 }
4714 else {
4715 *zSig0Ptr = aSig1<<shiftCount;
4716 *zSig1Ptr = 0;
4717 }
4718 *zExpPtr = - shiftCount - 63;
4719 }
4720 else {
0019d5c3 4721 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4722 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4723 *zExpPtr = 1 - shiftCount;
4724 }
4725
4726}
4727
4728/*----------------------------------------------------------------------------
4729| Packs the sign `zSign', the exponent `zExp', and the significand formed
4730| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4731| floating-point value, returning the result. After being shifted into the
4732| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4733| added together to form the most significant 32 bits of the result. This
4734| means that any integer portion of `zSig0' will be added into the exponent.
4735| Since a properly normalized significand will have an integer portion equal
4736| to 1, the `zExp' input should be 1 less than the desired result exponent
4737| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4738| significand.
4739*----------------------------------------------------------------------------*/
4740
a49db98d 4741static inline float128
c120391c 4742packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4743{
4744 float128 z;
4745
4746 z.low = zSig1;
c120391c 4747 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4748 return z;
158142c2
FB
4749}
4750
4751/*----------------------------------------------------------------------------
4752| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4753| and extended significand formed by the concatenation of `zSig0', `zSig1',
4754| and `zSig2', and returns the proper quadruple-precision floating-point value
4755| corresponding to the abstract input. Ordinarily, the abstract value is
4756| simply rounded and packed into the quadruple-precision format, with the
4757| inexact exception raised if the abstract input cannot be represented
4758| exactly. However, if the abstract value is too large, the overflow and
4759| inexact exceptions are raised and an infinity or maximal finite value is
4760| returned. If the abstract value is too small, the input value is rounded to
4761| a subnormal number, and the underflow and inexact exceptions are raised if
4762| the abstract input cannot be represented exactly as a subnormal quadruple-
4763| precision floating-point number.
4764| The input significand must be normalized or smaller. If the input
4765| significand is not normalized, `zExp' must be 0; in that case, the result
4766| returned is a subnormal number, and it must not require rounding. In the
4767| usual case that the input significand is normalized, `zExp' must be 1 less
4768| than the ``true'' floating-point exponent. The handling of underflow and
4769| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4770*----------------------------------------------------------------------------*/
4771
c120391c 4772static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4773 uint64_t zSig0, uint64_t zSig1,
4774 uint64_t zSig2, float_status *status)
158142c2 4775{
8f506c70 4776 int8_t roundingMode;
c120391c 4777 bool roundNearestEven, increment, isTiny;
158142c2 4778
a2f2d288 4779 roundingMode = status->float_rounding_mode;
158142c2 4780 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4781 switch (roundingMode) {
4782 case float_round_nearest_even:
f9288a76 4783 case float_round_ties_away:
dc355b76
PM
4784 increment = ((int64_t)zSig2 < 0);
4785 break;
4786 case float_round_to_zero:
4787 increment = 0;
4788 break;
4789 case float_round_up:
4790 increment = !zSign && zSig2;
4791 break;
4792 case float_round_down:
4793 increment = zSign && zSig2;
4794 break;
9ee6f678
BR
4795 case float_round_to_odd:
4796 increment = !(zSig1 & 0x1) && zSig2;
4797 break;
dc355b76
PM
4798 default:
4799 abort();
158142c2 4800 }
bb98fe42 4801 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4802 if ( ( 0x7FFD < zExp )
4803 || ( ( zExp == 0x7FFD )
4804 && eq128(
e9321124
AB
4805 UINT64_C(0x0001FFFFFFFFFFFF),
4806 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4807 zSig0,
4808 zSig1
4809 )
4810 && increment
4811 )
4812 ) {
ff32e16e 4813 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4814 if ( ( roundingMode == float_round_to_zero )
4815 || ( zSign && ( roundingMode == float_round_up ) )
4816 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4817 || (roundingMode == float_round_to_odd)
158142c2
FB
4818 ) {
4819 return
4820 packFloat128(
4821 zSign,
4822 0x7FFE,
e9321124
AB
4823 UINT64_C(0x0000FFFFFFFFFFFF),
4824 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4825 );
4826 }
4827 return packFloat128( zSign, 0x7FFF, 0, 0 );
4828 }
4829 if ( zExp < 0 ) {
a2f2d288 4830 if (status->flush_to_zero) {
ff32e16e 4831 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4832 return packFloat128(zSign, 0, 0, 0);
4833 }
a828b373
RH
4834 isTiny = status->tininess_before_rounding
4835 || (zExp < -1)
4836 || !increment
4837 || lt128(zSig0, zSig1,
4838 UINT64_C(0x0001FFFFFFFFFFFF),
4839 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4840 shift128ExtraRightJamming(
4841 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4842 zExp = 0;
ff32e16e
PM
4843 if (isTiny && zSig2) {
4844 float_raise(float_flag_underflow, status);
4845 }
dc355b76
PM
4846 switch (roundingMode) {
4847 case float_round_nearest_even:
f9288a76 4848 case float_round_ties_away:
dc355b76
PM
4849 increment = ((int64_t)zSig2 < 0);
4850 break;
4851 case float_round_to_zero:
4852 increment = 0;
4853 break;
4854 case float_round_up:
4855 increment = !zSign && zSig2;
4856 break;
4857 case float_round_down:
4858 increment = zSign && zSig2;
4859 break;
9ee6f678
BR
4860 case float_round_to_odd:
4861 increment = !(zSig1 & 0x1) && zSig2;
4862 break;
dc355b76
PM
4863 default:
4864 abort();
158142c2
FB
4865 }
4866 }
4867 }
a2f2d288 4868 if (zSig2) {
d82f3b2d 4869 float_raise(float_flag_inexact, status);
a2f2d288 4870 }
158142c2
FB
4871 if ( increment ) {
4872 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4873 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4874 zSig1 &= ~1;
4875 }
158142c2
FB
4876 }
4877 else {
4878 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4879 }
4880 return packFloat128( zSign, zExp, zSig0, zSig1 );
4881
4882}
4883
4884/*----------------------------------------------------------------------------
4885| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4886| and significand formed by the concatenation of `zSig0' and `zSig1', and
4887| returns the proper quadruple-precision floating-point value corresponding
4888| to the abstract input. This routine is just like `roundAndPackFloat128'
4889| except that the input significand has fewer bits and does not have to be
4890| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4891| point exponent.
4892*----------------------------------------------------------------------------*/
4893
c120391c 4894static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4895 uint64_t zSig0, uint64_t zSig1,
4896 float_status *status)
158142c2 4897{
8f506c70 4898 int8_t shiftCount;
bb98fe42 4899 uint64_t zSig2;
158142c2
FB
4900
4901 if ( zSig0 == 0 ) {
4902 zSig0 = zSig1;
4903 zSig1 = 0;
4904 zExp -= 64;
4905 }
0019d5c3 4906 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4907 if ( 0 <= shiftCount ) {
4908 zSig2 = 0;
4909 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4910 }
4911 else {
4912 shift128ExtraRightJamming(
4913 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4914 }
4915 zExp -= shiftCount;
ff32e16e 4916 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4917
4918}
4919
158142c2 4920
158142c2
FB
4921/*----------------------------------------------------------------------------
4922| Returns the result of converting the 32-bit two's complement integer `a'
4923| to the extended double-precision floating-point format. The conversion
4924| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4925| Arithmetic.
4926*----------------------------------------------------------------------------*/
4927
e5a41ffa 4928floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4929{
c120391c 4930 bool zSign;
3a87d009 4931 uint32_t absA;
8f506c70 4932 int8_t shiftCount;
bb98fe42 4933 uint64_t zSig;
158142c2
FB
4934
4935 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4936 zSign = ( a < 0 );
4937 absA = zSign ? - a : a;
0019d5c3 4938 shiftCount = clz32(absA) + 32;
158142c2
FB
4939 zSig = absA;
4940 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4941
4942}
4943
158142c2
FB
4944/*----------------------------------------------------------------------------
4945| Returns the result of converting the 32-bit two's complement integer `a' to
4946| the quadruple-precision floating-point format. The conversion is performed
4947| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4948*----------------------------------------------------------------------------*/
4949
e5a41ffa 4950float128 int32_to_float128(int32_t a, float_status *status)
158142c2 4951{
c120391c 4952 bool zSign;
3a87d009 4953 uint32_t absA;
8f506c70 4954 int8_t shiftCount;
bb98fe42 4955 uint64_t zSig0;
158142c2
FB
4956
4957 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4958 zSign = ( a < 0 );
4959 absA = zSign ? - a : a;
0019d5c3 4960 shiftCount = clz32(absA) + 17;
158142c2
FB
4961 zSig0 = absA;
4962 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4963
4964}
4965
158142c2
FB
4966/*----------------------------------------------------------------------------
4967| Returns the result of converting the 64-bit two's complement integer `a'
4968| to the extended double-precision floating-point format. The conversion
4969| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4970| Arithmetic.
4971*----------------------------------------------------------------------------*/
4972
e5a41ffa 4973floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4974{
c120391c 4975 bool zSign;
182f42fd 4976 uint64_t absA;
8f506c70 4977 int8_t shiftCount;
158142c2
FB
4978
4979 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4980 zSign = ( a < 0 );
4981 absA = zSign ? - a : a;
0019d5c3 4982 shiftCount = clz64(absA);
158142c2
FB
4983 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4984
4985}
4986
158142c2
FB
4987/*----------------------------------------------------------------------------
4988| Returns the result of converting the 64-bit two's complement integer `a' to
4989| the quadruple-precision floating-point format. The conversion is performed
4990| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4991*----------------------------------------------------------------------------*/
4992
e5a41ffa 4993float128 int64_to_float128(int64_t a, float_status *status)
158142c2 4994{
c120391c 4995 bool zSign;
182f42fd 4996 uint64_t absA;
8f506c70 4997 int8_t shiftCount;
f4014512 4998 int32_t zExp;
bb98fe42 4999 uint64_t zSig0, zSig1;
158142c2
FB
5000
5001 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5002 zSign = ( a < 0 );
5003 absA = zSign ? - a : a;
0019d5c3 5004 shiftCount = clz64(absA) + 49;
158142c2
FB
5005 zExp = 0x406E - shiftCount;
5006 if ( 64 <= shiftCount ) {
5007 zSig1 = 0;
5008 zSig0 = absA;
5009 shiftCount -= 64;
5010 }
5011 else {
5012 zSig1 = absA;
5013 zSig0 = 0;
5014 }
5015 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5016 return packFloat128( zSign, zExp, zSig0, zSig1 );
5017
5018}
5019
6bb8e0f1
PM
5020/*----------------------------------------------------------------------------
5021| Returns the result of converting the 64-bit unsigned integer `a'
5022| to the quadruple-precision floating-point format. The conversion is performed
5023| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5024*----------------------------------------------------------------------------*/
5025
e5a41ffa 5026float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
5027{
5028 if (a == 0) {
5029 return float128_zero;
5030 }
6603d506 5031 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
5032}
5033
158142c2
FB
5034/*----------------------------------------------------------------------------
5035| Returns the result of converting the single-precision floating-point value
5036| `a' to the extended double-precision floating-point format. The conversion
5037| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5038| Arithmetic.
5039*----------------------------------------------------------------------------*/
5040
e5a41ffa 5041floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 5042{
c120391c 5043 bool aSign;
0c48262d 5044 int aExp;
bb98fe42 5045 uint32_t aSig;
158142c2 5046
ff32e16e 5047 a = float32_squash_input_denormal(a, status);
158142c2
FB
5048 aSig = extractFloat32Frac( a );
5049 aExp = extractFloat32Exp( a );
5050 aSign = extractFloat32Sign( a );
5051 if ( aExp == 0xFF ) {
ff32e16e 5052 if (aSig) {
7537c2b4
JM
5053 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5054 status);
5055 return floatx80_silence_nan(res, status);
ff32e16e 5056 }
0f605c88
LV
5057 return packFloatx80(aSign,
5058 floatx80_infinity_high,
5059 floatx80_infinity_low);
158142c2
FB
5060 }
5061 if ( aExp == 0 ) {
5062 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5063 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5064 }
5065 aSig |= 0x00800000;
bb98fe42 5066 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
5067
5068}
5069
158142c2
FB
5070/*----------------------------------------------------------------------------
5071| Returns the result of converting the single-precision floating-point value
5072| `a' to the double-precision floating-point format. The conversion is
5073| performed according to the IEC/IEEE Standard for Binary Floating-Point
5074| Arithmetic.
5075*----------------------------------------------------------------------------*/
5076
e5a41ffa 5077float128 float32_to_float128(float32 a, float_status *status)
158142c2 5078{
c120391c 5079 bool aSign;
0c48262d 5080 int aExp;
bb98fe42 5081 uint32_t aSig;
158142c2 5082
ff32e16e 5083 a = float32_squash_input_denormal(a, status);
158142c2
FB
5084 aSig = extractFloat32Frac( a );
5085 aExp = extractFloat32Exp( a );
5086 aSign = extractFloat32Sign( a );
5087 if ( aExp == 0xFF ) {
ff32e16e
PM
5088 if (aSig) {
5089 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5090 }
158142c2
FB
5091 return packFloat128( aSign, 0x7FFF, 0, 0 );
5092 }
5093 if ( aExp == 0 ) {
5094 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5095 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5096 --aExp;
5097 }
bb98fe42 5098 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
5099
5100}
5101
158142c2
FB
5102/*----------------------------------------------------------------------------
5103| Returns the remainder of the single-precision floating-point value `a'
5104| with respect to the corresponding value `b'. The operation is performed
5105| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5106*----------------------------------------------------------------------------*/
5107
e5a41ffa 5108float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 5109{
c120391c 5110 bool aSign, zSign;
0c48262d 5111 int aExp, bExp, expDiff;
bb98fe42
AF
5112 uint32_t aSig, bSig;
5113 uint32_t q;
5114 uint64_t aSig64, bSig64, q64;
5115 uint32_t alternateASig;
5116 int32_t sigMean;
ff32e16e
PM
5117 a = float32_squash_input_denormal(a, status);
5118 b = float32_squash_input_denormal(b, status);
158142c2
FB
5119
5120 aSig = extractFloat32Frac( a );
5121 aExp = extractFloat32Exp( a );
5122 aSign = extractFloat32Sign( a );
5123 bSig = extractFloat32Frac( b );
5124 bExp = extractFloat32Exp( b );
158142c2
FB
5125 if ( aExp == 0xFF ) {
5126 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 5127 return propagateFloat32NaN(a, b, status);
158142c2 5128 }
ff32e16e 5129 float_raise(float_flag_invalid, status);
af39bc8c 5130 return float32_default_nan(status);
158142c2
FB
5131 }
5132 if ( bExp == 0xFF ) {
ff32e16e
PM
5133 if (bSig) {
5134 return propagateFloat32NaN(a, b, status);
5135 }
158142c2
FB
5136 return a;
5137 }
5138 if ( bExp == 0 ) {
5139 if ( bSig == 0 ) {
ff32e16e 5140 float_raise(float_flag_invalid, status);
af39bc8c 5141 return float32_default_nan(status);
158142c2
FB
5142 }
5143 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5144 }
5145 if ( aExp == 0 ) {
5146 if ( aSig == 0 ) return a;
5147 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5148 }
5149 expDiff = aExp - bExp;
5150 aSig |= 0x00800000;
5151 bSig |= 0x00800000;
5152 if ( expDiff < 32 ) {
5153 aSig <<= 8;
5154 bSig <<= 8;
5155 if ( expDiff < 0 ) {
5156 if ( expDiff < -1 ) return a;
5157 aSig >>= 1;
5158 }
5159 q = ( bSig <= aSig );
5160 if ( q ) aSig -= bSig;
5161 if ( 0 < expDiff ) {
bb98fe42 5162 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5163 q >>= 32 - expDiff;
5164 bSig >>= 2;
5165 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5166 }
5167 else {
5168 aSig >>= 2;
5169 bSig >>= 2;
5170 }
5171 }
5172 else {
5173 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5174 aSig64 = ( (uint64_t) aSig )<<40;
5175 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5176 expDiff -= 64;
5177 while ( 0 < expDiff ) {
5178 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5179 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5180 aSig64 = - ( ( bSig * q64 )<<38 );
5181 expDiff -= 62;
5182 }
5183 expDiff += 64;
5184 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5185 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5186 q = q64>>( 64 - expDiff );
5187 bSig <<= 6;
5188 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5189 }
5190 do {
5191 alternateASig = aSig;
5192 ++q;
5193 aSig -= bSig;
bb98fe42 5194 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5195 sigMean = aSig + alternateASig;
5196 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5197 aSig = alternateASig;
5198 }
bb98fe42 5199 zSign = ( (int32_t) aSig < 0 );
158142c2 5200 if ( zSign ) aSig = - aSig;
ff32e16e 5201 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5202}
5203
369be8f6 5204
158142c2 5205
8229c991
AJ
5206/*----------------------------------------------------------------------------
5207| Returns the binary exponential of the single-precision floating-point value
5208| `a'. The operation is performed according to the IEC/IEEE Standard for
5209| Binary Floating-Point Arithmetic.
5210|
5211| Uses the following identities:
5212|
5213| 1. -------------------------------------------------------------------------
5214| x x*ln(2)
5215| 2 = e
5216|
5217| 2. -------------------------------------------------------------------------
5218| 2 3 4 5 n
5219| x x x x x x x
5220| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5221| 1! 2! 3! 4! 5! n!
5222*----------------------------------------------------------------------------*/
5223
5224static const float64 float32_exp2_coefficients[15] =
5225{
d5138cf4
PM
5226 const_float64( 0x3ff0000000000000ll ), /* 1 */
5227 const_float64( 0x3fe0000000000000ll ), /* 2 */
5228 const_float64( 0x3fc5555555555555ll ), /* 3 */
5229 const_float64( 0x3fa5555555555555ll ), /* 4 */
5230 const_float64( 0x3f81111111111111ll ), /* 5 */
5231 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5232 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5233 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5234 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5235 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5236 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5237 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5238 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5239 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5240 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5241};
5242
e5a41ffa 5243float32 float32_exp2(float32 a, float_status *status)
8229c991 5244{
c120391c 5245 bool aSign;
0c48262d 5246 int aExp;
bb98fe42 5247 uint32_t aSig;
8229c991
AJ
5248 float64 r, x, xn;
5249 int i;
ff32e16e 5250 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5251
5252 aSig = extractFloat32Frac( a );
5253 aExp = extractFloat32Exp( a );
5254 aSign = extractFloat32Sign( a );
5255
5256 if ( aExp == 0xFF) {
ff32e16e
PM
5257 if (aSig) {
5258 return propagateFloat32NaN(a, float32_zero, status);
5259 }
8229c991
AJ
5260 return (aSign) ? float32_zero : a;
5261 }
5262 if (aExp == 0) {
5263 if (aSig == 0) return float32_one;
5264 }
5265
ff32e16e 5266 float_raise(float_flag_inexact, status);
8229c991
AJ
5267
5268 /* ******************************* */
5269 /* using float64 for approximation */
5270 /* ******************************* */
ff32e16e
PM
5271 x = float32_to_float64(a, status);
5272 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5273
5274 xn = x;
5275 r = float64_one;
5276 for (i = 0 ; i < 15 ; i++) {
5277 float64 f;
5278
ff32e16e
PM
5279 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5280 r = float64_add(r, f, status);
8229c991 5281
ff32e16e 5282 xn = float64_mul(xn, x, status);
8229c991
AJ
5283 }
5284
5285 return float64_to_float32(r, status);
5286}
5287
374dfc33
AJ
5288/*----------------------------------------------------------------------------
5289| Returns the binary log of the single-precision floating-point value `a'.
5290| The operation is performed according to the IEC/IEEE Standard for Binary
5291| Floating-Point Arithmetic.
5292*----------------------------------------------------------------------------*/
e5a41ffa 5293float32 float32_log2(float32 a, float_status *status)
374dfc33 5294{
c120391c 5295 bool aSign, zSign;
0c48262d 5296 int aExp;
bb98fe42 5297 uint32_t aSig, zSig, i;
374dfc33 5298
ff32e16e 5299 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5300 aSig = extractFloat32Frac( a );
5301 aExp = extractFloat32Exp( a );
5302 aSign = extractFloat32Sign( a );
5303
5304 if ( aExp == 0 ) {
5305 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5306 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5307 }
5308 if ( aSign ) {
ff32e16e 5309 float_raise(float_flag_invalid, status);
af39bc8c 5310 return float32_default_nan(status);
374dfc33
AJ
5311 }
5312 if ( aExp == 0xFF ) {
ff32e16e
PM
5313 if (aSig) {
5314 return propagateFloat32NaN(a, float32_zero, status);
5315 }
374dfc33
AJ
5316 return a;
5317 }
5318
5319 aExp -= 0x7F;
5320 aSig |= 0x00800000;
5321 zSign = aExp < 0;
5322 zSig = aExp << 23;
5323
5324 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5325 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5326 if ( aSig & 0x01000000 ) {
5327 aSig >>= 1;
5328 zSig |= i;
5329 }
5330 }
5331
5332 if ( zSign )
5333 zSig = -zSig;
5334
ff32e16e 5335 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5336}
5337
158142c2 5338/*----------------------------------------------------------------------------
158142c2
FB
5339| Returns the result of converting the double-precision floating-point value
5340| `a' to the extended double-precision floating-point format. The conversion
5341| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5342| Arithmetic.
5343*----------------------------------------------------------------------------*/
5344
e5a41ffa 5345floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5346{
c120391c 5347 bool aSign;
0c48262d 5348 int aExp;
bb98fe42 5349 uint64_t aSig;
158142c2 5350
ff32e16e 5351 a = float64_squash_input_denormal(a, status);
158142c2
FB
5352 aSig = extractFloat64Frac( a );
5353 aExp = extractFloat64Exp( a );
5354 aSign = extractFloat64Sign( a );
5355 if ( aExp == 0x7FF ) {
ff32e16e 5356 if (aSig) {
7537c2b4
JM
5357 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5358 status);
5359 return floatx80_silence_nan(res, status);
ff32e16e 5360 }
0f605c88
LV
5361 return packFloatx80(aSign,
5362 floatx80_infinity_high,
5363 floatx80_infinity_low);
158142c2
FB
5364 }
5365 if ( aExp == 0 ) {
5366 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5367 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5368 }
5369 return
5370 packFloatx80(
e9321124 5371 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5372
5373}
5374
158142c2
FB
5375/*----------------------------------------------------------------------------
5376| Returns the result of converting the double-precision floating-point value
5377| `a' to the quadruple-precision floating-point format. The conversion is
5378| performed according to the IEC/IEEE Standard for Binary Floating-Point
5379| Arithmetic.
5380*----------------------------------------------------------------------------*/
5381
e5a41ffa 5382float128 float64_to_float128(float64 a, float_status *status)
158142c2 5383{
c120391c 5384 bool aSign;
0c48262d 5385 int aExp;
bb98fe42 5386 uint64_t aSig, zSig0, zSig1;
158142c2 5387
ff32e16e 5388 a = float64_squash_input_denormal(a, status);
158142c2
FB
5389 aSig = extractFloat64Frac( a );
5390 aExp = extractFloat64Exp( a );
5391 aSign = extractFloat64Sign( a );
5392 if ( aExp == 0x7FF ) {
ff32e16e
PM
5393 if (aSig) {
5394 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5395 }
158142c2
FB
5396 return packFloat128( aSign, 0x7FFF, 0, 0 );
5397 }
5398 if ( aExp == 0 ) {
5399 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5400 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5401 --aExp;
5402 }
5403 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5404 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5405
5406}
5407
158142c2
FB
5408
5409/*----------------------------------------------------------------------------
5410| Returns the remainder of the double-precision floating-point value `a'
5411| with respect to the corresponding value `b'. The operation is performed
5412| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5413*----------------------------------------------------------------------------*/
5414
e5a41ffa 5415float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5416{
c120391c 5417 bool aSign, zSign;
0c48262d 5418 int aExp, bExp, expDiff;
bb98fe42
AF
5419 uint64_t aSig, bSig;
5420 uint64_t q, alternateASig;
5421 int64_t sigMean;
158142c2 5422
ff32e16e
PM
5423 a = float64_squash_input_denormal(a, status);
5424 b = float64_squash_input_denormal(b, status);
158142c2
FB
5425 aSig = extractFloat64Frac( a );
5426 aExp = extractFloat64Exp( a );
5427 aSign = extractFloat64Sign( a );
5428 bSig = extractFloat64Frac( b );
5429 bExp = extractFloat64Exp( b );
158142c2
FB
5430 if ( aExp == 0x7FF ) {
5431 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5432 return propagateFloat64NaN(a, b, status);
158142c2 5433 }
ff32e16e 5434 float_raise(float_flag_invalid, status);
af39bc8c 5435 return float64_default_nan(status);
158142c2
FB
5436 }
5437 if ( bExp == 0x7FF ) {
ff32e16e
PM
5438 if (bSig) {
5439 return propagateFloat64NaN(a, b, status);
5440 }
158142c2
FB
5441 return a;
5442 }
5443 if ( bExp == 0 ) {
5444 if ( bSig == 0 ) {
ff32e16e 5445 float_raise(float_flag_invalid, status);
af39bc8c 5446 return float64_default_nan(status);
158142c2
FB
5447 }
5448 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5449 }
5450 if ( aExp == 0 ) {
5451 if ( aSig == 0 ) return a;
5452 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5453 }
5454 expDiff = aExp - bExp;
e9321124
AB
5455 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5456 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5457 if ( expDiff < 0 ) {
5458 if ( expDiff < -1 ) return a;
5459 aSig >>= 1;
5460 }
5461 q = ( bSig <= aSig );
5462 if ( q ) aSig -= bSig;
5463 expDiff -= 64;
5464 while ( 0 < expDiff ) {
5465 q = estimateDiv128To64( aSig, 0, bSig );
5466 q = ( 2 < q ) ? q - 2 : 0;
5467 aSig = - ( ( bSig>>2 ) * q );
5468 expDiff -= 62;
5469 }
5470 expDiff += 64;
5471 if ( 0 < expDiff ) {
5472 q = estimateDiv128To64( aSig, 0, bSig );
5473 q = ( 2 < q ) ? q - 2 : 0;
5474 q >>= 64 - expDiff;
5475 bSig >>= 2;
5476 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5477 }
5478 else {
5479 aSig >>= 2;
5480 bSig >>= 2;
5481 }
5482 do {
5483 alternateASig = aSig;
5484 ++q;
5485 aSig -= bSig;
bb98fe42 5486 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5487 sigMean = aSig + alternateASig;
5488 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5489 aSig = alternateASig;
5490 }
bb98fe42 5491 zSign = ( (int64_t) aSig < 0 );
158142c2 5492 if ( zSign ) aSig = - aSig;
ff32e16e 5493 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5494
5495}
5496
374dfc33
AJ
5497/*----------------------------------------------------------------------------
5498| Returns the binary log of the double-precision floating-point value `a'.
5499| The operation is performed according to the IEC/IEEE Standard for Binary
5500| Floating-Point Arithmetic.
5501*----------------------------------------------------------------------------*/
e5a41ffa 5502float64 float64_log2(float64 a, float_status *status)
374dfc33 5503{
c120391c 5504 bool aSign, zSign;
0c48262d 5505 int aExp;
bb98fe42 5506 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5507 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5508
5509 aSig = extractFloat64Frac( a );
5510 aExp = extractFloat64Exp( a );
5511 aSign = extractFloat64Sign( a );
5512
5513 if ( aExp == 0 ) {
5514 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5515 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5516 }
5517 if ( aSign ) {
ff32e16e 5518 float_raise(float_flag_invalid, status);
af39bc8c 5519 return float64_default_nan(status);
374dfc33
AJ
5520 }
5521 if ( aExp == 0x7FF ) {
ff32e16e
PM
5522 if (aSig) {
5523 return propagateFloat64NaN(a, float64_zero, status);
5524 }
374dfc33
AJ
5525 return a;
5526 }
5527
5528 aExp -= 0x3FF;
e9321124 5529 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5530 zSign = aExp < 0;
bb98fe42 5531 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5532 for (i = 1LL << 51; i > 0; i >>= 1) {
5533 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5534 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5535 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5536 aSig >>= 1;
5537 zSig |= i;
5538 }
5539 }
5540
5541 if ( zSign )
5542 zSig = -zSig;
ff32e16e 5543 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5544}
5545
158142c2
FB
5546/*----------------------------------------------------------------------------
5547| Returns the result of converting the extended double-precision floating-
5548| point value `a' to the 32-bit two's complement integer format. The
5549| conversion is performed according to the IEC/IEEE Standard for Binary
5550| Floating-Point Arithmetic---which means in particular that the conversion
5551| is rounded according to the current rounding mode. If `a' is a NaN, the
5552| largest positive integer is returned. Otherwise, if the conversion
5553| overflows, the largest integer with the same sign as `a' is returned.
5554*----------------------------------------------------------------------------*/
5555
f4014512 5556int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5557{
c120391c 5558 bool aSign;
f4014512 5559 int32_t aExp, shiftCount;
bb98fe42 5560 uint64_t aSig;
158142c2 5561
d1eb8f2a
AD
5562 if (floatx80_invalid_encoding(a)) {
5563 float_raise(float_flag_invalid, status);
5564 return 1 << 31;
5565 }
158142c2
FB
5566 aSig = extractFloatx80Frac( a );
5567 aExp = extractFloatx80Exp( a );
5568 aSign = extractFloatx80Sign( a );
bb98fe42 5569 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5570 shiftCount = 0x4037 - aExp;
5571 if ( shiftCount <= 0 ) shiftCount = 1;
5572 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5573 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5574
5575}
5576
5577/*----------------------------------------------------------------------------
5578| Returns the result of converting the extended double-precision floating-
5579| point value `a' to the 32-bit two's complement integer format. The
5580| conversion is performed according to the IEC/IEEE Standard for Binary
5581| Floating-Point Arithmetic, except that the conversion is always rounded
5582| toward zero. If `a' is a NaN, the largest positive integer is returned.
5583| Otherwise, if the conversion overflows, the largest integer with the same
5584| sign as `a' is returned.
5585*----------------------------------------------------------------------------*/
5586
f4014512 5587int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5588{
c120391c 5589 bool aSign;
f4014512 5590 int32_t aExp, shiftCount;
bb98fe42 5591 uint64_t aSig, savedASig;
b3a6a2e0 5592 int32_t z;
158142c2 5593
d1eb8f2a
AD
5594 if (floatx80_invalid_encoding(a)) {
5595 float_raise(float_flag_invalid, status);
5596 return 1 << 31;
5597 }
158142c2
FB
5598 aSig = extractFloatx80Frac( a );
5599 aExp = extractFloatx80Exp( a );
5600 aSign = extractFloatx80Sign( a );
5601 if ( 0x401E < aExp ) {
bb98fe42 5602 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5603 goto invalid;
5604 }
5605 else if ( aExp < 0x3FFF ) {
a2f2d288 5606 if (aExp || aSig) {
d82f3b2d 5607 float_raise(float_flag_inexact, status);
a2f2d288 5608 }
158142c2
FB
5609 return 0;
5610 }
5611 shiftCount = 0x403E - aExp;
5612 savedASig = aSig;
5613 aSig >>= shiftCount;
5614 z = aSig;
5615 if ( aSign ) z = - z;
5616 if ( ( z < 0 ) ^ aSign ) {
5617 invalid:
ff32e16e 5618 float_raise(float_flag_invalid, status);
bb98fe42 5619 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5620 }
5621 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5622 float_raise(float_flag_inexact, status);
158142c2
FB
5623 }
5624 return z;
5625
5626}
5627
5628/*----------------------------------------------------------------------------
5629| Returns the result of converting the extended double-precision floating-
5630| point value `a' to the 64-bit two's complement integer format. The
5631| conversion is performed according to the IEC/IEEE Standard for Binary
5632| Floating-Point Arithmetic---which means in particular that the conversion
5633| is rounded according to the current rounding mode. If `a' is a NaN,
5634| the largest positive integer is returned. Otherwise, if the conversion
5635| overflows, the largest integer with the same sign as `a' is returned.
5636*----------------------------------------------------------------------------*/
5637
f42c2224 5638int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5639{
c120391c 5640 bool aSign;
f4014512 5641 int32_t aExp, shiftCount;
bb98fe42 5642 uint64_t aSig, aSigExtra;
158142c2 5643
d1eb8f2a
AD
5644 if (floatx80_invalid_encoding(a)) {
5645 float_raise(float_flag_invalid, status);
5646 return 1ULL << 63;
5647 }
158142c2
FB
5648 aSig = extractFloatx80Frac( a );
5649 aExp = extractFloatx80Exp( a );
5650 aSign = extractFloatx80Sign( a );
5651 shiftCount = 0x403E - aExp;
5652 if ( shiftCount <= 0 ) {
5653 if ( shiftCount ) {
ff32e16e 5654 float_raise(float_flag_invalid, status);
0f605c88 5655 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5656 return INT64_MAX;
158142c2 5657 }
2c217da0 5658 return INT64_MIN;
158142c2
FB
5659 }
5660 aSigExtra = 0;
5661 }
5662 else {
5663 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5664 }
ff32e16e 5665 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5666
5667}
5668
5669/*----------------------------------------------------------------------------
5670| Returns the result of converting the extended double-precision floating-
5671| point value `a' to the 64-bit two's complement integer format. The
5672| conversion is performed according to the IEC/IEEE Standard for Binary
5673| Floating-Point Arithmetic, except that the conversion is always rounded
5674| toward zero. If `a' is a NaN, the largest positive integer is returned.
5675| Otherwise, if the conversion overflows, the largest integer with the same
5676| sign as `a' is returned.
5677*----------------------------------------------------------------------------*/
5678
f42c2224 5679int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5680{
c120391c 5681 bool aSign;
f4014512 5682 int32_t aExp, shiftCount;
bb98fe42 5683 uint64_t aSig;
f42c2224 5684 int64_t z;
158142c2 5685
d1eb8f2a
AD
5686 if (floatx80_invalid_encoding(a)) {
5687 float_raise(float_flag_invalid, status);
5688 return 1ULL << 63;
5689 }
158142c2
FB
5690 aSig = extractFloatx80Frac( a );
5691 aExp = extractFloatx80Exp( a );
5692 aSign = extractFloatx80Sign( a );
5693 shiftCount = aExp - 0x403E;
5694 if ( 0 <= shiftCount ) {
e9321124 5695 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5696 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5697 float_raise(float_flag_invalid, status);
158142c2 5698 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5699 return INT64_MAX;
158142c2
FB
5700 }
5701 }
2c217da0 5702 return INT64_MIN;
158142c2
FB
5703 }
5704 else if ( aExp < 0x3FFF ) {
a2f2d288 5705 if (aExp | aSig) {
d82f3b2d 5706 float_raise(float_flag_inexact, status);
a2f2d288 5707 }
158142c2
FB
5708 return 0;
5709 }
5710 z = aSig>>( - shiftCount );
bb98fe42 5711 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5712 float_raise(float_flag_inexact, status);
158142c2
FB
5713 }
5714 if ( aSign ) z = - z;
5715 return z;
5716
5717}
5718
5719/*----------------------------------------------------------------------------
5720| Returns the result of converting the extended double-precision floating-
5721| point value `a' to the single-precision floating-point format. The
5722| conversion is performed according to the IEC/IEEE Standard for Binary
5723| Floating-Point Arithmetic.
5724*----------------------------------------------------------------------------*/
5725
e5a41ffa 5726float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5727{
c120391c 5728 bool aSign;
f4014512 5729 int32_t aExp;
bb98fe42 5730 uint64_t aSig;
158142c2 5731
d1eb8f2a
AD
5732 if (floatx80_invalid_encoding(a)) {
5733 float_raise(float_flag_invalid, status);
5734 return float32_default_nan(status);
5735 }
158142c2
FB
5736 aSig = extractFloatx80Frac( a );
5737 aExp = extractFloatx80Exp( a );
5738 aSign = extractFloatx80Sign( a );
5739 if ( aExp == 0x7FFF ) {
bb98fe42 5740 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5741 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5742 status);
5743 return float32_silence_nan(res, status);
158142c2
FB
5744 }
5745 return packFloat32( aSign, 0xFF, 0 );
5746 }
5747 shift64RightJamming( aSig, 33, &aSig );
5748 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5749 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5750
5751}
5752
5753/*----------------------------------------------------------------------------
5754| Returns the result of converting the extended double-precision floating-
5755| point value `a' to the double-precision floating-point format. The
5756| conversion is performed according to the IEC/IEEE Standard for Binary
5757| Floating-Point Arithmetic.
5758*----------------------------------------------------------------------------*/
5759
e5a41ffa 5760float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5761{
c120391c 5762 bool aSign;
f4014512 5763 int32_t aExp;
bb98fe42 5764 uint64_t aSig, zSig;
158142c2 5765
d1eb8f2a
AD
5766 if (floatx80_invalid_encoding(a)) {
5767 float_raise(float_flag_invalid, status);
5768 return float64_default_nan(status);
5769 }
158142c2
FB
5770 aSig = extractFloatx80Frac( a );
5771 aExp = extractFloatx80Exp( a );
5772 aSign = extractFloatx80Sign( a );
5773 if ( aExp == 0x7FFF ) {
bb98fe42 5774 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5775 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5776 status);
5777 return float64_silence_nan(res, status);
158142c2
FB
5778 }
5779 return packFloat64( aSign, 0x7FF, 0 );
5780 }
5781 shift64RightJamming( aSig, 1, &zSig );
5782 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5783 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5784
5785}
5786
158142c2
FB
5787/*----------------------------------------------------------------------------
5788| Returns the result of converting the extended double-precision floating-
5789| point value `a' to the quadruple-precision floating-point format. The
5790| conversion is performed according to the IEC/IEEE Standard for Binary
5791| Floating-Point Arithmetic.
5792*----------------------------------------------------------------------------*/
5793
e5a41ffa 5794float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5795{
c120391c 5796 bool aSign;
0c48262d 5797 int aExp;
bb98fe42 5798 uint64_t aSig, zSig0, zSig1;
158142c2 5799
d1eb8f2a
AD
5800 if (floatx80_invalid_encoding(a)) {
5801 float_raise(float_flag_invalid, status);
5802 return float128_default_nan(status);
5803 }
158142c2
FB
5804 aSig = extractFloatx80Frac( a );
5805 aExp = extractFloatx80Exp( a );
5806 aSign = extractFloatx80Sign( a );
bb98fe42 5807 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5808 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5809 status);
5810 return float128_silence_nan(res, status);
158142c2
FB
5811 }
5812 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5813 return packFloat128( aSign, aExp, zSig0, zSig1 );
5814
5815}
5816
0f721292
LV
5817/*----------------------------------------------------------------------------
5818| Rounds the extended double-precision floating-point value `a'
5819| to the precision provided by floatx80_rounding_precision and returns the
5820| result as an extended double-precision floating-point value.
5821| The operation is performed according to the IEC/IEEE Standard for Binary
5822| Floating-Point Arithmetic.
5823*----------------------------------------------------------------------------*/
5824
5825floatx80 floatx80_round(floatx80 a, float_status *status)
5826{
5827 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5828 extractFloatx80Sign(a),
5829 extractFloatx80Exp(a),
5830 extractFloatx80Frac(a), 0, status);
5831}
5832
158142c2
FB
5833/*----------------------------------------------------------------------------
5834| Rounds the extended double-precision floating-point value `a' to an integer,
5835| and returns the result as an extended quadruple-precision floating-point
5836| value. The operation is performed according to the IEC/IEEE Standard for
5837| Binary Floating-Point Arithmetic.
5838*----------------------------------------------------------------------------*/
5839
e5a41ffa 5840floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5841{
c120391c 5842 bool aSign;
f4014512 5843 int32_t aExp;
bb98fe42 5844 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5845 floatx80 z;
5846
d1eb8f2a
AD
5847 if (floatx80_invalid_encoding(a)) {
5848 float_raise(float_flag_invalid, status);
5849 return floatx80_default_nan(status);
5850 }
158142c2
FB
5851 aExp = extractFloatx80Exp( a );
5852 if ( 0x403E <= aExp ) {
bb98fe42 5853 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5854 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5855 }
5856 return a;
5857 }
5858 if ( aExp < 0x3FFF ) {
5859 if ( ( aExp == 0 )
9ecaf5cc 5860 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5861 return a;
5862 }
d82f3b2d 5863 float_raise(float_flag_inexact, status);
158142c2 5864 aSign = extractFloatx80Sign( a );
a2f2d288 5865 switch (status->float_rounding_mode) {
158142c2 5866 case float_round_nearest_even:
bb98fe42 5867 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5868 ) {
5869 return
e9321124 5870 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5871 }
5872 break;
f9288a76
PM
5873 case float_round_ties_away:
5874 if (aExp == 0x3FFE) {
e9321124 5875 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5876 }
5877 break;
158142c2
FB
5878 case float_round_down:
5879 return
5880 aSign ?
e9321124 5881 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5882 : packFloatx80( 0, 0, 0 );
5883 case float_round_up:
5884 return
5885 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5886 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5887
5888 case float_round_to_zero:
5889 break;
5890 default:
5891 g_assert_not_reached();
158142c2
FB
5892 }
5893 return packFloatx80( aSign, 0, 0 );
5894 }
5895 lastBitMask = 1;
5896 lastBitMask <<= 0x403E - aExp;
5897 roundBitsMask = lastBitMask - 1;
5898 z = a;
a2f2d288 5899 switch (status->float_rounding_mode) {
dc355b76 5900 case float_round_nearest_even:
158142c2 5901 z.low += lastBitMask>>1;
dc355b76
PM
5902 if ((z.low & roundBitsMask) == 0) {
5903 z.low &= ~lastBitMask;
5904 }
5905 break;
f9288a76
PM
5906 case float_round_ties_away:
5907 z.low += lastBitMask >> 1;
5908 break;
dc355b76
PM
5909 case float_round_to_zero:
5910 break;
5911 case float_round_up:
5912 if (!extractFloatx80Sign(z)) {
5913 z.low += roundBitsMask;
5914 }
5915 break;
5916 case float_round_down:
5917 if (extractFloatx80Sign(z)) {
158142c2
FB
5918 z.low += roundBitsMask;
5919 }
dc355b76
PM
5920 break;
5921 default:
5922 abort();
158142c2
FB
5923 }
5924 z.low &= ~ roundBitsMask;
5925 if ( z.low == 0 ) {
5926 ++z.high;
e9321124 5927 z.low = UINT64_C(0x8000000000000000);
158142c2 5928 }
a2f2d288 5929 if (z.low != a.low) {
d82f3b2d 5930 float_raise(float_flag_inexact, status);
a2f2d288 5931 }
158142c2
FB
5932 return z;
5933
5934}
5935
5936/*----------------------------------------------------------------------------
5937| Returns the result of adding the absolute values of the extended double-
5938| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5939| negated before being returned. `zSign' is ignored if the result is a NaN.
5940| The addition is performed according to the IEC/IEEE Standard for Binary
5941| Floating-Point Arithmetic.
5942*----------------------------------------------------------------------------*/
5943
c120391c 5944static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5945 float_status *status)
158142c2 5946{
f4014512 5947 int32_t aExp, bExp, zExp;
bb98fe42 5948 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5949 int32_t expDiff;
158142c2
FB
5950
5951 aSig = extractFloatx80Frac( a );
5952 aExp = extractFloatx80Exp( a );
5953 bSig = extractFloatx80Frac( b );
5954 bExp = extractFloatx80Exp( b );
5955 expDiff = aExp - bExp;
5956 if ( 0 < expDiff ) {
5957 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5958 if ((uint64_t)(aSig << 1)) {
5959 return propagateFloatx80NaN(a, b, status);
5960 }
158142c2
FB
5961 return a;
5962 }
5963 if ( bExp == 0 ) --expDiff;
5964 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5965 zExp = aExp;
5966 }
5967 else if ( expDiff < 0 ) {
5968 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5969 if ((uint64_t)(bSig << 1)) {
5970 return propagateFloatx80NaN(a, b, status);
5971 }
0f605c88
LV
5972 return packFloatx80(zSign,
5973 floatx80_infinity_high,
5974 floatx80_infinity_low);
158142c2
FB
5975 }
5976 if ( aExp == 0 ) ++expDiff;
5977 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5978 zExp = bExp;
5979 }
5980 else {
5981 if ( aExp == 0x7FFF ) {
bb98fe42 5982 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5983 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5984 }
5985 return a;
5986 }
5987 zSig1 = 0;
5988 zSig0 = aSig + bSig;
5989 if ( aExp == 0 ) {
41602807
JM
5990 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5991 /* At least one of the values is a pseudo-denormal,
5992 * and there is a carry out of the result. */
5993 zExp = 1;
5994 goto shiftRight1;
5995 }
2f311075
RH
5996 if (zSig0 == 0) {
5997 return packFloatx80(zSign, 0, 0);
5998 }
158142c2
FB
5999 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6000 goto roundAndPack;
6001 }
6002 zExp = aExp;
6003 goto shiftRight1;
6004 }
6005 zSig0 = aSig + bSig;
bb98fe42 6006 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
6007 shiftRight1:
6008 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 6009 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
6010 ++zExp;
6011 roundAndPack:
a2f2d288 6012 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6013 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6014}
6015
6016/*----------------------------------------------------------------------------
6017| Returns the result of subtracting the absolute values of the extended
6018| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
6019| difference is negated before being returned. `zSign' is ignored if the
6020| result is a NaN. The subtraction is performed according to the IEC/IEEE
6021| Standard for Binary Floating-Point Arithmetic.
6022*----------------------------------------------------------------------------*/
6023
c120391c 6024static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 6025 float_status *status)
158142c2 6026{
f4014512 6027 int32_t aExp, bExp, zExp;
bb98fe42 6028 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 6029 int32_t expDiff;
158142c2
FB
6030
6031 aSig = extractFloatx80Frac( a );
6032 aExp = extractFloatx80Exp( a );
6033 bSig = extractFloatx80Frac( b );
6034 bExp = extractFloatx80Exp( b );
6035 expDiff = aExp - bExp;
6036 if ( 0 < expDiff ) goto aExpBigger;
6037 if ( expDiff < 0 ) goto bExpBigger;
6038 if ( aExp == 0x7FFF ) {
bb98fe42 6039 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6040 return propagateFloatx80NaN(a, b, status);
158142c2 6041 }
ff32e16e 6042 float_raise(float_flag_invalid, status);
af39bc8c 6043 return floatx80_default_nan(status);
158142c2
FB
6044 }
6045 if ( aExp == 0 ) {
6046 aExp = 1;
6047 bExp = 1;
6048 }
6049 zSig1 = 0;
6050 if ( bSig < aSig ) goto aBigger;
6051 if ( aSig < bSig ) goto bBigger;
a2f2d288 6052 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
6053 bExpBigger:
6054 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6055 if ((uint64_t)(bSig << 1)) {
6056 return propagateFloatx80NaN(a, b, status);
6057 }
0f605c88
LV
6058 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6059 floatx80_infinity_low);
158142c2
FB
6060 }
6061 if ( aExp == 0 ) ++expDiff;
6062 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6063 bBigger:
6064 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6065 zExp = bExp;
6066 zSign ^= 1;
6067 goto normalizeRoundAndPack;
6068 aExpBigger:
6069 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6070 if ((uint64_t)(aSig << 1)) {
6071 return propagateFloatx80NaN(a, b, status);
6072 }
158142c2
FB
6073 return a;
6074 }
6075 if ( bExp == 0 ) --expDiff;
6076 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6077 aBigger:
6078 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6079 zExp = aExp;
6080 normalizeRoundAndPack:
a2f2d288 6081 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6082 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6083}
6084
6085/*----------------------------------------------------------------------------
6086| Returns the result of adding the extended double-precision floating-point
6087| values `a' and `b'. The operation is performed according to the IEC/IEEE
6088| Standard for Binary Floating-Point Arithmetic.
6089*----------------------------------------------------------------------------*/
6090
e5a41ffa 6091floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 6092{
c120391c 6093 bool aSign, bSign;
158142c2 6094
d1eb8f2a
AD
6095 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6096 float_raise(float_flag_invalid, status);
6097 return floatx80_default_nan(status);
6098 }
158142c2
FB
6099 aSign = extractFloatx80Sign( a );
6100 bSign = extractFloatx80Sign( b );
6101 if ( aSign == bSign ) {
ff32e16e 6102 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6103 }
6104 else {
ff32e16e 6105 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6106 }
6107
6108}
6109
6110/*----------------------------------------------------------------------------
6111| Returns the result of subtracting the extended double-precision floating-
6112| point values `a' and `b'. The operation is performed according to the
6113| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6114*----------------------------------------------------------------------------*/
6115
e5a41ffa 6116floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 6117{
c120391c 6118 bool aSign, bSign;
158142c2 6119
d1eb8f2a
AD
6120 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6121 float_raise(float_flag_invalid, status);
6122 return floatx80_default_nan(status);
6123 }
158142c2
FB
6124 aSign = extractFloatx80Sign( a );
6125 bSign = extractFloatx80Sign( b );
6126 if ( aSign == bSign ) {
ff32e16e 6127 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6128 }
6129 else {
ff32e16e 6130 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6131 }
6132
6133}
6134
6135/*----------------------------------------------------------------------------
6136| Returns the result of multiplying the extended double-precision floating-
6137| point values `a' and `b'. The operation is performed according to the
6138| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6139*----------------------------------------------------------------------------*/
6140
e5a41ffa 6141floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 6142{
c120391c 6143 bool aSign, bSign, zSign;
f4014512 6144 int32_t aExp, bExp, zExp;
bb98fe42 6145 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6146
d1eb8f2a
AD
6147 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6148 float_raise(float_flag_invalid, status);
6149 return floatx80_default_nan(status);
6150 }
158142c2
FB
6151 aSig = extractFloatx80Frac( a );
6152 aExp = extractFloatx80Exp( a );
6153 aSign = extractFloatx80Sign( a );
6154 bSig = extractFloatx80Frac( b );
6155 bExp = extractFloatx80Exp( b );
6156 bSign = extractFloatx80Sign( b );
6157 zSign = aSign ^ bSign;
6158 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6159 if ( (uint64_t) ( aSig<<1 )
6160 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6161 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6162 }
6163 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6164 return packFloatx80(zSign, floatx80_infinity_high,
6165 floatx80_infinity_low);
158142c2
FB
6166 }
6167 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6168 if ((uint64_t)(bSig << 1)) {
6169 return propagateFloatx80NaN(a, b, status);
6170 }
158142c2
FB
6171 if ( ( aExp | aSig ) == 0 ) {
6172 invalid:
ff32e16e 6173 float_raise(float_flag_invalid, status);
af39bc8c 6174 return floatx80_default_nan(status);
158142c2 6175 }
0f605c88
LV
6176 return packFloatx80(zSign, floatx80_infinity_high,
6177 floatx80_infinity_low);
158142c2
FB
6178 }
6179 if ( aExp == 0 ) {
6180 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6181 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6182 }
6183 if ( bExp == 0 ) {
6184 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6185 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6186 }
6187 zExp = aExp + bExp - 0x3FFE;
6188 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6189 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6190 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6191 --zExp;
6192 }
a2f2d288 6193 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6194 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6195}
6196
6197/*----------------------------------------------------------------------------
6198| Returns the result of dividing the extended double-precision floating-point
6199| value `a' by the corresponding value `b'. The operation is performed
6200| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6201*----------------------------------------------------------------------------*/
6202
e5a41ffa 6203floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6204{
c120391c 6205 bool aSign, bSign, zSign;
f4014512 6206 int32_t aExp, bExp, zExp;
bb98fe42
AF
6207 uint64_t aSig, bSig, zSig0, zSig1;
6208 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6209
d1eb8f2a
AD
6210 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6211 float_raise(float_flag_invalid, status);
6212 return floatx80_default_nan(status);
6213 }
158142c2
FB
6214 aSig = extractFloatx80Frac( a );
6215 aExp = extractFloatx80Exp( a );
6216 aSign = extractFloatx80Sign( a );
6217 bSig = extractFloatx80Frac( b );
6218 bExp = extractFloatx80Exp( b );
6219 bSign = extractFloatx80Sign( b );
6220 zSign = aSign ^ bSign;
6221 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6222 if ((uint64_t)(aSig << 1)) {
6223 return propagateFloatx80NaN(a, b, status);
6224 }
158142c2 6225 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6226 if ((uint64_t)(bSig << 1)) {
6227 return propagateFloatx80NaN(a, b, status);
6228 }
158142c2
FB
6229 goto invalid;
6230 }
0f605c88
LV
6231 return packFloatx80(zSign, floatx80_infinity_high,
6232 floatx80_infinity_low);
158142c2
FB
6233 }
6234 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6235 if ((uint64_t)(bSig << 1)) {
6236 return propagateFloatx80NaN(a, b, status);
6237 }
158142c2
FB
6238 return packFloatx80( zSign, 0, 0 );
6239 }
6240 if ( bExp == 0 ) {
6241 if ( bSig == 0 ) {
6242 if ( ( aExp | aSig ) == 0 ) {
6243 invalid:
ff32e16e 6244 float_raise(float_flag_invalid, status);
af39bc8c 6245 return floatx80_default_nan(status);
158142c2 6246 }
ff32e16e 6247 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6248 return packFloatx80(zSign, floatx80_infinity_high,
6249 floatx80_infinity_low);
158142c2
FB
6250 }
6251 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6252 }
6253 if ( aExp == 0 ) {
6254 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6255 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6256 }
6257 zExp = aExp - bExp + 0x3FFE;
6258 rem1 = 0;
6259 if ( bSig <= aSig ) {
6260 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6261 ++zExp;
6262 }
6263 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6264 mul64To128( bSig, zSig0, &term0, &term1 );
6265 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6266 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6267 --zSig0;
6268 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6269 }
6270 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6271 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6272 mul64To128( bSig, zSig1, &term1, &term2 );
6273 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6274 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6275 --zSig1;
6276 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6277 }
6278 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6279 }
a2f2d288 6280 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6281 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6282}
6283
6284/*----------------------------------------------------------------------------
6285| Returns the remainder of the extended double-precision floating-point value
6286| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6287| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6288| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6289| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6290| the absolute value of the integer quotient.
158142c2
FB
6291*----------------------------------------------------------------------------*/
6292
445810ec 6293floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6294 float_status *status)
158142c2 6295{
c120391c 6296 bool aSign, zSign;
b662495d 6297 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6298 uint64_t aSig0, aSig1, bSig;
6299 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6300
445810ec 6301 *quotient = 0;
d1eb8f2a
AD
6302 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6303 float_raise(float_flag_invalid, status);
6304 return floatx80_default_nan(status);
6305 }
158142c2 6306 aSig0 = extractFloatx80Frac( a );
b662495d 6307 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6308 aSign = extractFloatx80Sign( a );
6309 bSig = extractFloatx80Frac( b );
6310 bExp = extractFloatx80Exp( b );
158142c2 6311 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6312 if ( (uint64_t) ( aSig0<<1 )
6313 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6314 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6315 }
6316 goto invalid;
6317 }
6318 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6319 if ((uint64_t)(bSig << 1)) {
6320 return propagateFloatx80NaN(a, b, status);
6321 }
b662495d
JM
6322 if (aExp == 0 && aSig0 >> 63) {
6323 /*
6324 * Pseudo-denormal argument must be returned in normalized
6325 * form.
6326 */
6327 return packFloatx80(aSign, 1, aSig0);
6328 }
158142c2
FB
6329 return a;
6330 }
6331 if ( bExp == 0 ) {
6332 if ( bSig == 0 ) {
6333 invalid:
ff32e16e 6334 float_raise(float_flag_invalid, status);
af39bc8c 6335 return floatx80_default_nan(status);
158142c2
FB
6336 }
6337 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6338 }
6339 if ( aExp == 0 ) {
499a2f7b 6340 if ( aSig0 == 0 ) return a;
158142c2
FB
6341 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6342 }
158142c2
FB
6343 zSign = aSign;
6344 expDiff = aExp - bExp;
6345 aSig1 = 0;
6346 if ( expDiff < 0 ) {
b662495d
JM
6347 if ( mod || expDiff < -1 ) {
6348 if (aExp == 1 && aExpOrig == 0) {
6349 /*
6350 * Pseudo-denormal argument must be returned in
6351 * normalized form.
6352 */
6353 return packFloatx80(aSign, aExp, aSig0);
6354 }
6355 return a;
6356 }
158142c2
FB
6357 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6358 expDiff = 0;
6359 }
445810ec 6360 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6361 if ( q ) aSig0 -= bSig;
6362 expDiff -= 64;
6363 while ( 0 < expDiff ) {
6364 q = estimateDiv128To64( aSig0, aSig1, bSig );
6365 q = ( 2 < q ) ? q - 2 : 0;
6366 mul64To128( bSig, q, &term0, &term1 );
6367 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6368 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6369 expDiff -= 62;
445810ec
JM
6370 *quotient <<= 62;
6371 *quotient += q;
158142c2
FB
6372 }
6373 expDiff += 64;
6374 if ( 0 < expDiff ) {
6375 q = estimateDiv128To64( aSig0, aSig1, bSig );
6376 q = ( 2 < q ) ? q - 2 : 0;
6377 q >>= 64 - expDiff;
6378 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6379 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6380 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6381 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6382 ++q;
6383 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6384 }
445810ec
JM
6385 if (expDiff < 64) {
6386 *quotient <<= expDiff;
6387 } else {
6388 *quotient = 0;
6389 }
6390 *quotient += q;
158142c2
FB
6391 }
6392 else {
6393 term1 = 0;
6394 term0 = bSig;
6395 }
6b8b0136
JM
6396 if (!mod) {
6397 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6398 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6399 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6400 && ( q & 1 ) )
6401 ) {
6402 aSig0 = alternateASig0;
6403 aSig1 = alternateASig1;
6404 zSign = ! zSign;
445810ec 6405 ++*quotient;
6b8b0136 6406 }
158142c2
FB
6407 }
6408 return
6409 normalizeRoundAndPackFloatx80(
ff32e16e 6410 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6411
6412}
6413
6b8b0136
JM
6414/*----------------------------------------------------------------------------
6415| Returns the remainder of the extended double-precision floating-point value
6416| `a' with respect to the corresponding value `b'. The operation is performed
6417| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6418*----------------------------------------------------------------------------*/
6419
6420floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6421{
445810ec
JM
6422 uint64_t quotient;
6423 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6424}
6425
6426/*----------------------------------------------------------------------------
6427| Returns the remainder of the extended double-precision floating-point value
6428| `a' with respect to the corresponding value `b', with the quotient truncated
6429| toward zero.
6430*----------------------------------------------------------------------------*/
6431
6432floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6433{
445810ec
JM
6434 uint64_t quotient;
6435 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6436}
6437
158142c2
FB
6438/*----------------------------------------------------------------------------
6439| Returns the square root of the extended double-precision floating-point
6440| value `a'. The operation is performed according to the IEC/IEEE Standard
6441| for Binary Floating-Point Arithmetic.
6442*----------------------------------------------------------------------------*/
6443
e5a41ffa 6444floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6445{
c120391c 6446 bool aSign;
f4014512 6447 int32_t aExp, zExp;
bb98fe42
AF
6448 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6449 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6450
d1eb8f2a
AD
6451 if (floatx80_invalid_encoding(a)) {
6452 float_raise(float_flag_invalid, status);
6453 return floatx80_default_nan(status);
6454 }
158142c2
FB
6455 aSig0 = extractFloatx80Frac( a );
6456 aExp = extractFloatx80Exp( a );
6457 aSign = extractFloatx80Sign( a );
6458 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6459 if ((uint64_t)(aSig0 << 1)) {
6460 return propagateFloatx80NaN(a, a, status);
6461 }
158142c2
FB
6462 if ( ! aSign ) return a;
6463 goto invalid;
6464 }
6465 if ( aSign ) {
6466 if ( ( aExp | aSig0 ) == 0 ) return a;
6467 invalid:
ff32e16e 6468 float_raise(float_flag_invalid, status);
af39bc8c 6469 return floatx80_default_nan(status);
158142c2
FB
6470 }
6471 if ( aExp == 0 ) {
6472 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6473 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6474 }
6475 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6476 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6477 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6478 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6479 doubleZSig0 = zSig0<<1;
6480 mul64To128( zSig0, zSig0, &term0, &term1 );
6481 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6482 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6483 --zSig0;
6484 doubleZSig0 -= 2;
6485 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6486 }
6487 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6488 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6489 if ( zSig1 == 0 ) zSig1 = 1;
6490 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6491 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6492 mul64To128( zSig1, zSig1, &term2, &term3 );
6493 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6494 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6495 --zSig1;
6496 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6497 term3 |= 1;
6498 term2 |= doubleZSig0;
6499 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6500 }
6501 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6502 }
6503 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6504 zSig0 |= doubleZSig0;
a2f2d288
PM
6505 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6506 0, zExp, zSig0, zSig1, status);
158142c2
FB
6507}
6508
6509/*----------------------------------------------------------------------------
158142c2
FB
6510| Returns the result of converting the quadruple-precision floating-point
6511| value `a' to the 32-bit two's complement integer format. The conversion
6512| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6513| Arithmetic---which means in particular that the conversion is rounded
6514| according to the current rounding mode. If `a' is a NaN, the largest
6515| positive integer is returned. Otherwise, if the conversion overflows, the
6516| largest integer with the same sign as `a' is returned.
6517*----------------------------------------------------------------------------*/
6518
f4014512 6519int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6520{
c120391c 6521 bool aSign;
f4014512 6522 int32_t aExp, shiftCount;
bb98fe42 6523 uint64_t aSig0, aSig1;
158142c2
FB
6524
6525 aSig1 = extractFloat128Frac1( a );
6526 aSig0 = extractFloat128Frac0( a );
6527 aExp = extractFloat128Exp( a );
6528 aSign = extractFloat128Sign( a );
6529 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6530 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6531 aSig0 |= ( aSig1 != 0 );
6532 shiftCount = 0x4028 - aExp;
6533 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6534 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6535
6536}
6537
6538/*----------------------------------------------------------------------------
6539| Returns the result of converting the quadruple-precision floating-point
6540| value `a' to the 32-bit two's complement integer format. The conversion
6541| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6542| Arithmetic, except that the conversion is always rounded toward zero. If
6543| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6544| conversion overflows, the largest integer with the same sign as `a' is
6545| returned.
6546*----------------------------------------------------------------------------*/
6547
f4014512 6548int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6549{
c120391c 6550 bool aSign;
f4014512 6551 int32_t aExp, shiftCount;
bb98fe42 6552 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6553 int32_t z;
158142c2
FB
6554
6555 aSig1 = extractFloat128Frac1( a );
6556 aSig0 = extractFloat128Frac0( a );
6557 aExp = extractFloat128Exp( a );
6558 aSign = extractFloat128Sign( a );
6559 aSig0 |= ( aSig1 != 0 );
6560 if ( 0x401E < aExp ) {
6561 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6562 goto invalid;
6563 }
6564 else if ( aExp < 0x3FFF ) {
a2f2d288 6565 if (aExp || aSig0) {
d82f3b2d 6566 float_raise(float_flag_inexact, status);
a2f2d288 6567 }
158142c2
FB
6568 return 0;
6569 }
e9321124 6570 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6571 shiftCount = 0x402F - aExp;
6572 savedASig = aSig0;
6573 aSig0 >>= shiftCount;
6574 z = aSig0;
6575 if ( aSign ) z = - z;
6576 if ( ( z < 0 ) ^ aSign ) {
6577 invalid:
ff32e16e 6578 float_raise(float_flag_invalid, status);
2c217da0 6579 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6580 }
6581 if ( ( aSig0<<shiftCount ) != savedASig ) {
d82f3b2d 6582 float_raise(float_flag_inexact, status);
158142c2
FB
6583 }
6584 return z;
6585
6586}
6587
6588/*----------------------------------------------------------------------------
6589| Returns the result of converting the quadruple-precision floating-point
6590| value `a' to the 64-bit two's complement integer format. The conversion
6591| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6592| Arithmetic---which means in particular that the conversion is rounded
6593| according to the current rounding mode. If `a' is a NaN, the largest
6594| positive integer is returned. Otherwise, if the conversion overflows, the
6595| largest integer with the same sign as `a' is returned.
6596*----------------------------------------------------------------------------*/
6597
f42c2224 6598int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6599{
c120391c 6600 bool aSign;
f4014512 6601 int32_t aExp, shiftCount;
bb98fe42 6602 uint64_t aSig0, aSig1;
158142c2
FB
6603
6604 aSig1 = extractFloat128Frac1( a );
6605 aSig0 = extractFloat128Frac0( a );
6606 aExp = extractFloat128Exp( a );
6607 aSign = extractFloat128Sign( a );
e9321124 6608 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6609 shiftCount = 0x402F - aExp;
6610 if ( shiftCount <= 0 ) {
6611 if ( 0x403E < aExp ) {
ff32e16e 6612 float_raise(float_flag_invalid, status);
158142c2
FB
6613 if ( ! aSign
6614 || ( ( aExp == 0x7FFF )
e9321124 6615 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6616 )
6617 ) {
2c217da0 6618 return INT64_MAX;
158142c2 6619 }
2c217da0 6620 return INT64_MIN;
158142c2
FB
6621 }
6622 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6623 }
6624 else {
6625 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6626 }
ff32e16e 6627 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6628
6629}
6630
6631/*----------------------------------------------------------------------------
6632| Returns the result of converting the quadruple-precision floating-point
6633| value `a' to the 64-bit two's complement integer format. The conversion
6634| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6635| Arithmetic, except that the conversion is always rounded toward zero.
6636| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6637| the conversion overflows, the largest integer with the same sign as `a' is
6638| returned.
6639*----------------------------------------------------------------------------*/
6640
f42c2224 6641int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6642{
c120391c 6643 bool aSign;
f4014512 6644 int32_t aExp, shiftCount;
bb98fe42 6645 uint64_t aSig0, aSig1;
f42c2224 6646 int64_t z;
158142c2
FB
6647
6648 aSig1 = extractFloat128Frac1( a );
6649 aSig0 = extractFloat128Frac0( a );
6650 aExp = extractFloat128Exp( a );
6651 aSign = extractFloat128Sign( a );
e9321124 6652 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6653 shiftCount = aExp - 0x402F;
6654 if ( 0 < shiftCount ) {
6655 if ( 0x403E <= aExp ) {
e9321124
AB
6656 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6657 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6658 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288 6659 if (aSig1) {
d82f3b2d 6660 float_raise(float_flag_inexact, status);
a2f2d288 6661 }
158142c2
FB
6662 }
6663 else {
ff32e16e 6664 float_raise(float_flag_invalid, status);
158142c2 6665 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6666 return INT64_MAX;
158142c2
FB
6667 }
6668 }
2c217da0 6669 return INT64_MIN;
158142c2
FB
6670 }
6671 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6672 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
d82f3b2d 6673 float_raise(float_flag_inexact, status);
158142c2
FB
6674 }
6675 }
6676 else {
6677 if ( aExp < 0x3FFF ) {
6678 if ( aExp | aSig0 | aSig1 ) {
d82f3b2d 6679 float_raise(float_flag_inexact, status);
158142c2
FB
6680 }
6681 return 0;
6682 }
6683 z = aSig0>>( - shiftCount );
6684 if ( aSig1
bb98fe42 6685 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
d82f3b2d 6686 float_raise(float_flag_inexact, status);
158142c2
FB
6687 }
6688 }
6689 if ( aSign ) z = - z;
6690 return z;
6691
6692}
6693
2e6d8568
BR
6694/*----------------------------------------------------------------------------
6695| Returns the result of converting the quadruple-precision floating-point value
6696| `a' to the 64-bit unsigned integer format. The conversion is
6697| performed according to the IEC/IEEE Standard for Binary Floating-Point
6698| Arithmetic---which means in particular that the conversion is rounded
6699| according to the current rounding mode. If `a' is a NaN, the largest
6700| positive integer is returned. If the conversion overflows, the
6701| largest unsigned integer is returned. If 'a' is negative, the value is
6702| rounded and zero is returned; negative values that do not round to zero
6703| will raise the inexact exception.
6704*----------------------------------------------------------------------------*/
6705
6706uint64_t float128_to_uint64(float128 a, float_status *status)
6707{
c120391c 6708 bool aSign;
2e6d8568
BR
6709 int aExp;
6710 int shiftCount;
6711 uint64_t aSig0, aSig1;
6712
6713 aSig0 = extractFloat128Frac0(a);
6714 aSig1 = extractFloat128Frac1(a);
6715 aExp = extractFloat128Exp(a);
6716 aSign = extractFloat128Sign(a);
6717 if (aSign && (aExp > 0x3FFE)) {
6718 float_raise(float_flag_invalid, status);
6719 if (float128_is_any_nan(a)) {
2c217da0 6720 return UINT64_MAX;
2e6d8568
BR
6721 } else {
6722 return 0;
6723 }
6724 }
6725 if (aExp) {
2c217da0 6726 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6727 }
6728 shiftCount = 0x402F - aExp;
6729 if (shiftCount <= 0) {
6730 if (0x403E < aExp) {
6731 float_raise(float_flag_invalid, status);
2c217da0 6732 return UINT64_MAX;
2e6d8568
BR
6733 }
6734 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6735 } else {
6736 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6737 }
6738 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6739}
6740
6741uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6742{
6743 uint64_t v;
6744 signed char current_rounding_mode = status->float_rounding_mode;
6745
6746 set_float_rounding_mode(float_round_to_zero, status);
6747 v = float128_to_uint64(a, status);
6748 set_float_rounding_mode(current_rounding_mode, status);
6749
6750 return v;
6751}
6752
158142c2
FB
6753/*----------------------------------------------------------------------------
6754| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6755| value `a' to the 32-bit unsigned integer format. The conversion
6756| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6757| Arithmetic except that the conversion is always rounded toward zero.
6758| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6759| if the conversion overflows, the largest unsigned integer is returned.
6760| If 'a' is negative, the value is rounded and zero is returned; negative
6761| values that do not round to zero will raise the inexact exception.
6762*----------------------------------------------------------------------------*/
6763
6764uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6765{
6766 uint64_t v;
6767 uint32_t res;
6768 int old_exc_flags = get_float_exception_flags(status);
6769
6770 v = float128_to_uint64_round_to_zero(a, status);
6771 if (v > 0xffffffff) {
6772 res = 0xffffffff;
6773 } else {
6774 return v;
6775 }
6776 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6777 float_raise(float_flag_invalid, status);
6778 return res;
6779}
6780
6781/*----------------------------------------------------------------------------
6782| Returns the result of converting the quadruple-precision floating-point value
6783| `a' to the 32-bit unsigned integer format. The conversion is
6784| performed according to the IEC/IEEE Standard for Binary Floating-Point
6785| Arithmetic---which means in particular that the conversion is rounded
6786| according to the current rounding mode. If `a' is a NaN, the largest
6787| positive integer is returned. If the conversion overflows, the
6788| largest unsigned integer is returned. If 'a' is negative, the value is
6789| rounded and zero is returned; negative values that do not round to zero
6790| will raise the inexact exception.
6791*----------------------------------------------------------------------------*/
6792
6793uint32_t float128_to_uint32(float128 a, float_status *status)
6794{
6795 uint64_t v;
6796 uint32_t res;
6797 int old_exc_flags = get_float_exception_flags(status);
6798
6799 v = float128_to_uint64(a, status);
6800 if (v > 0xffffffff) {
6801 res = 0xffffffff;
6802 } else {
6803 return v;
6804 }
6805 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6806 float_raise(float_flag_invalid, status);
6807 return res;
6808}
6809
6810/*----------------------------------------------------------------------------
6811| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6812| value `a' to the single-precision floating-point format. The conversion
6813| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6814| Arithmetic.
6815*----------------------------------------------------------------------------*/
6816
e5a41ffa 6817float32 float128_to_float32(float128 a, float_status *status)
158142c2 6818{
c120391c 6819 bool aSign;
f4014512 6820 int32_t aExp;
bb98fe42
AF
6821 uint64_t aSig0, aSig1;
6822 uint32_t zSig;
158142c2
FB
6823
6824 aSig1 = extractFloat128Frac1( a );
6825 aSig0 = extractFloat128Frac0( a );
6826 aExp = extractFloat128Exp( a );
6827 aSign = extractFloat128Sign( a );
6828 if ( aExp == 0x7FFF ) {
6829 if ( aSig0 | aSig1 ) {
ff32e16e 6830 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6831 }
6832 return packFloat32( aSign, 0xFF, 0 );
6833 }
6834 aSig0 |= ( aSig1 != 0 );
6835 shift64RightJamming( aSig0, 18, &aSig0 );
6836 zSig = aSig0;
6837 if ( aExp || zSig ) {
6838 zSig |= 0x40000000;
6839 aExp -= 0x3F81;
6840 }
ff32e16e 6841 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6842
6843}
6844
6845/*----------------------------------------------------------------------------
6846| Returns the result of converting the quadruple-precision floating-point
6847| value `a' to the double-precision floating-point format. The conversion
6848| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6849| Arithmetic.
6850*----------------------------------------------------------------------------*/
6851
e5a41ffa 6852float64 float128_to_float64(float128 a, float_status *status)
158142c2 6853{
c120391c 6854 bool aSign;
f4014512 6855 int32_t aExp;
bb98fe42 6856 uint64_t aSig0, aSig1;
158142c2
FB
6857
6858 aSig1 = extractFloat128Frac1( a );
6859 aSig0 = extractFloat128Frac0( a );
6860 aExp = extractFloat128Exp( a );
6861 aSign = extractFloat128Sign( a );
6862 if ( aExp == 0x7FFF ) {
6863 if ( aSig0 | aSig1 ) {
ff32e16e 6864 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6865 }
6866 return packFloat64( aSign, 0x7FF, 0 );
6867 }
6868 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6869 aSig0 |= ( aSig1 != 0 );
6870 if ( aExp || aSig0 ) {
e9321124 6871 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6872 aExp -= 0x3C01;
6873 }
ff32e16e 6874 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6875
6876}
6877
158142c2
FB
6878/*----------------------------------------------------------------------------
6879| Returns the result of converting the quadruple-precision floating-point
6880| value `a' to the extended double-precision floating-point format. The
6881| conversion is performed according to the IEC/IEEE Standard for Binary
6882| Floating-Point Arithmetic.
6883*----------------------------------------------------------------------------*/
6884
e5a41ffa 6885floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6886{
c120391c 6887 bool aSign;
f4014512 6888 int32_t aExp;
bb98fe42 6889 uint64_t aSig0, aSig1;
158142c2
FB
6890
6891 aSig1 = extractFloat128Frac1( a );
6892 aSig0 = extractFloat128Frac0( a );
6893 aExp = extractFloat128Exp( a );
6894 aSign = extractFloat128Sign( a );
6895 if ( aExp == 0x7FFF ) {
6896 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6897 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6898 status);
6899 return floatx80_silence_nan(res, status);
158142c2 6900 }
0f605c88
LV
6901 return packFloatx80(aSign, floatx80_infinity_high,
6902 floatx80_infinity_low);
158142c2
FB
6903 }
6904 if ( aExp == 0 ) {
6905 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6906 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6907 }
6908 else {
e9321124 6909 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6910 }
6911 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6912 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6913
6914}
6915
158142c2
FB
6916/*----------------------------------------------------------------------------
6917| Rounds the quadruple-precision floating-point value `a' to an integer, and
6918| returns the result as a quadruple-precision floating-point value. The
6919| operation is performed according to the IEC/IEEE Standard for Binary
6920| Floating-Point Arithmetic.
6921*----------------------------------------------------------------------------*/
6922
e5a41ffa 6923float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6924{
c120391c 6925 bool aSign;
f4014512 6926 int32_t aExp;
bb98fe42 6927 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6928 float128 z;
6929
6930 aExp = extractFloat128Exp( a );
6931 if ( 0x402F <= aExp ) {
6932 if ( 0x406F <= aExp ) {
6933 if ( ( aExp == 0x7FFF )
6934 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6935 ) {
ff32e16e 6936 return propagateFloat128NaN(a, a, status);
158142c2
FB
6937 }
6938 return a;
6939 }
6940 lastBitMask = 1;
6941 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6942 roundBitsMask = lastBitMask - 1;
6943 z = a;
a2f2d288 6944 switch (status->float_rounding_mode) {
dc355b76 6945 case float_round_nearest_even:
158142c2
FB
6946 if ( lastBitMask ) {
6947 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6948 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6949 }
6950 else {
bb98fe42 6951 if ( (int64_t) z.low < 0 ) {
158142c2 6952 ++z.high;
bb98fe42 6953 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6954 }
6955 }
dc355b76 6956 break;
f9288a76
PM
6957 case float_round_ties_away:
6958 if (lastBitMask) {
6959 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6960 } else {
6961 if ((int64_t) z.low < 0) {
6962 ++z.high;
6963 }
6964 }
6965 break;
dc355b76
PM
6966 case float_round_to_zero:
6967 break;
6968 case float_round_up:
6969 if (!extractFloat128Sign(z)) {
6970 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6971 }
6972 break;
6973 case float_round_down:
6974 if (extractFloat128Sign(z)) {
6975 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6976 }
dc355b76 6977 break;
5d64abb3
RH
6978 case float_round_to_odd:
6979 /*
6980 * Note that if lastBitMask == 0, the last bit is the lsb
6981 * of high, and roundBitsMask == -1.
6982 */
6983 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6984 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6985 }
6986 break;
dc355b76
PM
6987 default:
6988 abort();
158142c2
FB
6989 }
6990 z.low &= ~ roundBitsMask;
6991 }
6992 else {
6993 if ( aExp < 0x3FFF ) {
bb98fe42 6994 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
d82f3b2d 6995 float_raise(float_flag_inexact, status);
158142c2 6996 aSign = extractFloat128Sign( a );
a2f2d288 6997 switch (status->float_rounding_mode) {
5d64abb3 6998 case float_round_nearest_even:
158142c2
FB
6999 if ( ( aExp == 0x3FFE )
7000 && ( extractFloat128Frac0( a )
7001 | extractFloat128Frac1( a ) )
7002 ) {
7003 return packFloat128( aSign, 0x3FFF, 0, 0 );
7004 }
7005 break;
f9288a76
PM
7006 case float_round_ties_away:
7007 if (aExp == 0x3FFE) {
7008 return packFloat128(aSign, 0x3FFF, 0, 0);
7009 }
7010 break;
5d64abb3 7011 case float_round_down:
158142c2
FB
7012 return
7013 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7014 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7015 case float_round_up:
158142c2
FB
7016 return
7017 aSign ? packFloat128( 1, 0, 0, 0 )
7018 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7019
7020 case float_round_to_odd:
7021 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
7022
7023 case float_round_to_zero:
7024 break;
158142c2
FB
7025 }
7026 return packFloat128( aSign, 0, 0, 0 );
7027 }
7028 lastBitMask = 1;
7029 lastBitMask <<= 0x402F - aExp;
7030 roundBitsMask = lastBitMask - 1;
7031 z.low = 0;
7032 z.high = a.high;
a2f2d288 7033 switch (status->float_rounding_mode) {
dc355b76 7034 case float_round_nearest_even:
158142c2
FB
7035 z.high += lastBitMask>>1;
7036 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7037 z.high &= ~ lastBitMask;
7038 }
dc355b76 7039 break;
f9288a76
PM
7040 case float_round_ties_away:
7041 z.high += lastBitMask>>1;
7042 break;
dc355b76
PM
7043 case float_round_to_zero:
7044 break;
7045 case float_round_up:
7046 if (!extractFloat128Sign(z)) {
158142c2
FB
7047 z.high |= ( a.low != 0 );
7048 z.high += roundBitsMask;
7049 }
dc355b76
PM
7050 break;
7051 case float_round_down:
7052 if (extractFloat128Sign(z)) {
7053 z.high |= (a.low != 0);
7054 z.high += roundBitsMask;
7055 }
7056 break;
5d64abb3
RH
7057 case float_round_to_odd:
7058 if ((z.high & lastBitMask) == 0) {
7059 z.high |= (a.low != 0);
7060 z.high += roundBitsMask;
7061 }
7062 break;
dc355b76
PM
7063 default:
7064 abort();
158142c2
FB
7065 }
7066 z.high &= ~ roundBitsMask;
7067 }
7068 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
d82f3b2d 7069 float_raise(float_flag_inexact, status);
158142c2
FB
7070 }
7071 return z;
7072
7073}
7074
7075/*----------------------------------------------------------------------------
7076| Returns the result of adding the absolute values of the quadruple-precision
7077| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7078| before being returned. `zSign' is ignored if the result is a NaN.
7079| The addition is performed according to the IEC/IEEE Standard for Binary
7080| Floating-Point Arithmetic.
7081*----------------------------------------------------------------------------*/
7082
c120391c 7083static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 7084 float_status *status)
158142c2 7085{
f4014512 7086 int32_t aExp, bExp, zExp;
bb98fe42 7087 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 7088 int32_t expDiff;
158142c2
FB
7089
7090 aSig1 = extractFloat128Frac1( a );
7091 aSig0 = extractFloat128Frac0( a );
7092 aExp = extractFloat128Exp( a );
7093 bSig1 = extractFloat128Frac1( b );
7094 bSig0 = extractFloat128Frac0( b );
7095 bExp = extractFloat128Exp( b );
7096 expDiff = aExp - bExp;
7097 if ( 0 < expDiff ) {
7098 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7099 if (aSig0 | aSig1) {
7100 return propagateFloat128NaN(a, b, status);
7101 }
158142c2
FB
7102 return a;
7103 }
7104 if ( bExp == 0 ) {
7105 --expDiff;
7106 }
7107 else {
e9321124 7108 bSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7109 }
7110 shift128ExtraRightJamming(
7111 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7112 zExp = aExp;
7113 }
7114 else if ( expDiff < 0 ) {
7115 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7116 if (bSig0 | bSig1) {
7117 return propagateFloat128NaN(a, b, status);
7118 }
158142c2
FB
7119 return packFloat128( zSign, 0x7FFF, 0, 0 );
7120 }
7121 if ( aExp == 0 ) {
7122 ++expDiff;
7123 }
7124 else {
e9321124 7125 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7126 }
7127 shift128ExtraRightJamming(
7128 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7129 zExp = bExp;
7130 }
7131 else {
7132 if ( aExp == 0x7FFF ) {
7133 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7134 return propagateFloat128NaN(a, b, status);
158142c2
FB
7135 }
7136 return a;
7137 }
7138 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7139 if ( aExp == 0 ) {
a2f2d288 7140 if (status->flush_to_zero) {
e6afc87f 7141 if (zSig0 | zSig1) {
ff32e16e 7142 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7143 }
7144 return packFloat128(zSign, 0, 0, 0);
7145 }
fe76d976
PB
7146 return packFloat128( zSign, 0, zSig0, zSig1 );
7147 }
158142c2 7148 zSig2 = 0;
e9321124 7149 zSig0 |= UINT64_C(0x0002000000000000);
158142c2
FB
7150 zExp = aExp;
7151 goto shiftRight1;
7152 }
e9321124 7153 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7154 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7155 --zExp;
e9321124 7156 if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
158142c2
FB
7157 ++zExp;
7158 shiftRight1:
7159 shift128ExtraRightJamming(
7160 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7161 roundAndPack:
ff32e16e 7162 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7163
7164}
7165
7166/*----------------------------------------------------------------------------
7167| Returns the result of subtracting the absolute values of the quadruple-
7168| precision floating-point values `a' and `b'. If `zSign' is 1, the
7169| difference is negated before being returned. `zSign' is ignored if the
7170| result is a NaN. The subtraction is performed according to the IEC/IEEE
7171| Standard for Binary Floating-Point Arithmetic.
7172*----------------------------------------------------------------------------*/
7173
c120391c 7174static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 7175 float_status *status)
158142c2 7176{
f4014512 7177 int32_t aExp, bExp, zExp;
bb98fe42 7178 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7179 int32_t expDiff;
158142c2
FB
7180
7181 aSig1 = extractFloat128Frac1( a );
7182 aSig0 = extractFloat128Frac0( a );
7183 aExp = extractFloat128Exp( a );
7184 bSig1 = extractFloat128Frac1( b );
7185 bSig0 = extractFloat128Frac0( b );
7186 bExp = extractFloat128Exp( b );
7187 expDiff = aExp - bExp;
7188 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7189 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7190 if ( 0 < expDiff ) goto aExpBigger;
7191 if ( expDiff < 0 ) goto bExpBigger;
7192 if ( aExp == 0x7FFF ) {
7193 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7194 return propagateFloat128NaN(a, b, status);
158142c2 7195 }
ff32e16e 7196 float_raise(float_flag_invalid, status);
af39bc8c 7197 return float128_default_nan(status);
158142c2
FB
7198 }
7199 if ( aExp == 0 ) {
7200 aExp = 1;
7201 bExp = 1;
7202 }
7203 if ( bSig0 < aSig0 ) goto aBigger;
7204 if ( aSig0 < bSig0 ) goto bBigger;
7205 if ( bSig1 < aSig1 ) goto aBigger;
7206 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7207 return packFloat128(status->float_rounding_mode == float_round_down,
7208 0, 0, 0);
158142c2
FB
7209 bExpBigger:
7210 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7211 if (bSig0 | bSig1) {
7212 return propagateFloat128NaN(a, b, status);
7213 }
158142c2
FB
7214 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7215 }
7216 if ( aExp == 0 ) {
7217 ++expDiff;
7218 }
7219 else {
e9321124 7220 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7221 }
7222 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
e9321124 7223 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7224 bBigger:
7225 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7226 zExp = bExp;
7227 zSign ^= 1;
7228 goto normalizeRoundAndPack;
7229 aExpBigger:
7230 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7231 if (aSig0 | aSig1) {
7232 return propagateFloat128NaN(a, b, status);
7233 }
158142c2
FB
7234 return a;
7235 }
7236 if ( bExp == 0 ) {
7237 --expDiff;
7238 }
7239 else {
e9321124 7240 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7241 }
7242 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
e9321124 7243 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7244 aBigger:
7245 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7246 zExp = aExp;
7247 normalizeRoundAndPack:
7248 --zExp;
ff32e16e
PM
7249 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7250 status);
158142c2
FB
7251
7252}
7253
7254/*----------------------------------------------------------------------------
7255| Returns the result of adding the quadruple-precision floating-point values
7256| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7257| for Binary Floating-Point Arithmetic.
7258*----------------------------------------------------------------------------*/
7259
e5a41ffa 7260float128 float128_add(float128 a, float128 b, float_status *status)
158142c2 7261{
c120391c 7262 bool aSign, bSign;
158142c2
FB
7263
7264 aSign = extractFloat128Sign( a );
7265 bSign = extractFloat128Sign( b );
7266 if ( aSign == bSign ) {
ff32e16e 7267 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7268 }
7269 else {
ff32e16e 7270 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7271 }
7272
7273}
7274
7275/*----------------------------------------------------------------------------
7276| Returns the result of subtracting the quadruple-precision floating-point
7277| values `a' and `b'. The operation is performed according to the IEC/IEEE
7278| Standard for Binary Floating-Point Arithmetic.
7279*----------------------------------------------------------------------------*/
7280
e5a41ffa 7281float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2 7282{
c120391c 7283 bool aSign, bSign;
158142c2
FB
7284
7285 aSign = extractFloat128Sign( a );
7286 bSign = extractFloat128Sign( b );
7287 if ( aSign == bSign ) {
ff32e16e 7288 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7289 }
7290 else {
ff32e16e 7291 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7292 }
7293
7294}
7295
7296/*----------------------------------------------------------------------------
7297| Returns the result of multiplying the quadruple-precision floating-point
7298| values `a' and `b'. The operation is performed according to the IEC/IEEE
7299| Standard for Binary Floating-Point Arithmetic.
7300*----------------------------------------------------------------------------*/
7301
e5a41ffa 7302float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2 7303{
c120391c 7304 bool aSign, bSign, zSign;
f4014512 7305 int32_t aExp, bExp, zExp;
bb98fe42 7306 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7307
7308 aSig1 = extractFloat128Frac1( a );
7309 aSig0 = extractFloat128Frac0( a );
7310 aExp = extractFloat128Exp( a );
7311 aSign = extractFloat128Sign( a );
7312 bSig1 = extractFloat128Frac1( b );
7313 bSig0 = extractFloat128Frac0( b );
7314 bExp = extractFloat128Exp( b );
7315 bSign = extractFloat128Sign( b );
7316 zSign = aSign ^ bSign;
7317 if ( aExp == 0x7FFF ) {
7318 if ( ( aSig0 | aSig1 )
7319 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7320 return propagateFloat128NaN(a, b, status);
158142c2
FB
7321 }
7322 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7323 return packFloat128( zSign, 0x7FFF, 0, 0 );
7324 }
7325 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7326 if (bSig0 | bSig1) {
7327 return propagateFloat128NaN(a, b, status);
7328 }
158142c2
FB
7329 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7330 invalid:
ff32e16e 7331 float_raise(float_flag_invalid, status);
af39bc8c 7332 return float128_default_nan(status);
158142c2
FB
7333 }
7334 return packFloat128( zSign, 0x7FFF, 0, 0 );
7335 }
7336 if ( aExp == 0 ) {
7337 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7338 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7339 }
7340 if ( bExp == 0 ) {
7341 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7342 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7343 }
7344 zExp = aExp + bExp - 0x4000;
e9321124 7345 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7346 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7347 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7348 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7349 zSig2 |= ( zSig3 != 0 );
e9321124 7350 if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
158142c2
FB
7351 shift128ExtraRightJamming(
7352 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7353 ++zExp;
7354 }
ff32e16e 7355 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7356
7357}
7358
7359/*----------------------------------------------------------------------------
7360| Returns the result of dividing the quadruple-precision floating-point value
7361| `a' by the corresponding value `b'. The operation is performed according to
7362| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7363*----------------------------------------------------------------------------*/
7364
e5a41ffa 7365float128 float128_div(float128 a, float128 b, float_status *status)
158142c2 7366{
c120391c 7367 bool aSign, bSign, zSign;
f4014512 7368 int32_t aExp, bExp, zExp;
bb98fe42
AF
7369 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7370 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7371
7372 aSig1 = extractFloat128Frac1( a );
7373 aSig0 = extractFloat128Frac0( a );
7374 aExp = extractFloat128Exp( a );
7375 aSign = extractFloat128Sign( a );
7376 bSig1 = extractFloat128Frac1( b );
7377 bSig0 = extractFloat128Frac0( b );
7378 bExp = extractFloat128Exp( b );
7379 bSign = extractFloat128Sign( b );
7380 zSign = aSign ^ bSign;
7381 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7382 if (aSig0 | aSig1) {
7383 return propagateFloat128NaN(a, b, status);
7384 }
158142c2 7385 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7386 if (bSig0 | bSig1) {
7387 return propagateFloat128NaN(a, b, status);
7388 }
158142c2
FB
7389 goto invalid;
7390 }
7391 return packFloat128( zSign, 0x7FFF, 0, 0 );
7392 }
7393 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7394 if (bSig0 | bSig1) {
7395 return propagateFloat128NaN(a, b, status);
7396 }
158142c2
FB
7397 return packFloat128( zSign, 0, 0, 0 );
7398 }
7399 if ( bExp == 0 ) {
7400 if ( ( bSig0 | bSig1 ) == 0 ) {
7401 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7402 invalid:
ff32e16e 7403 float_raise(float_flag_invalid, status);
af39bc8c 7404 return float128_default_nan(status);
158142c2 7405 }
ff32e16e 7406 float_raise(float_flag_divbyzero, status);
158142c2
FB
7407 return packFloat128( zSign, 0x7FFF, 0, 0 );
7408 }
7409 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7410 }
7411 if ( aExp == 0 ) {
7412 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7413 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7414 }
7415 zExp = aExp - bExp + 0x3FFD;
7416 shortShift128Left(
e9321124 7417 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7418 shortShift128Left(
e9321124 7419 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7420 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7421 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7422 ++zExp;
7423 }
7424 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7425 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7426 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7427 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7428 --zSig0;
7429 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7430 }
7431 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7432 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7433 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7434 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7435 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7436 --zSig1;
7437 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7438 }
7439 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7440 }
7441 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7442 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7443
7444}
7445
7446/*----------------------------------------------------------------------------
7447| Returns the remainder of the quadruple-precision floating-point value `a'
7448| with respect to the corresponding value `b'. The operation is performed
7449| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7450*----------------------------------------------------------------------------*/
7451
e5a41ffa 7452float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7453{
c120391c 7454 bool aSign, zSign;
f4014512 7455 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7456 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7457 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7458 int64_t sigMean0;
158142c2
FB
7459
7460 aSig1 = extractFloat128Frac1( a );
7461 aSig0 = extractFloat128Frac0( a );
7462 aExp = extractFloat128Exp( a );
7463 aSign = extractFloat128Sign( a );
7464 bSig1 = extractFloat128Frac1( b );
7465 bSig0 = extractFloat128Frac0( b );
7466 bExp = extractFloat128Exp( b );
158142c2
FB
7467 if ( aExp == 0x7FFF ) {
7468 if ( ( aSig0 | aSig1 )
7469 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7470 return propagateFloat128NaN(a, b, status);
158142c2
FB
7471 }
7472 goto invalid;
7473 }
7474 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7475 if (bSig0 | bSig1) {
7476 return propagateFloat128NaN(a, b, status);
7477 }
158142c2
FB
7478 return a;
7479 }
7480 if ( bExp == 0 ) {
7481 if ( ( bSig0 | bSig1 ) == 0 ) {
7482 invalid:
ff32e16e 7483 float_raise(float_flag_invalid, status);
af39bc8c 7484 return float128_default_nan(status);
158142c2
FB
7485 }
7486 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7487 }
7488 if ( aExp == 0 ) {
7489 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7490 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7491 }
7492 expDiff = aExp - bExp;
7493 if ( expDiff < -1 ) return a;
7494 shortShift128Left(
e9321124 7495 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7496 aSig1,
7497 15 - ( expDiff < 0 ),
7498 &aSig0,
7499 &aSig1
7500 );
7501 shortShift128Left(
e9321124 7502 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7503 q = le128( bSig0, bSig1, aSig0, aSig1 );
7504 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7505 expDiff -= 64;
7506 while ( 0 < expDiff ) {
7507 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7508 q = ( 4 < q ) ? q - 4 : 0;
7509 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7510 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7511 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7512 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7513 expDiff -= 61;
7514 }
7515 if ( -64 < expDiff ) {
7516 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7517 q = ( 4 < q ) ? q - 4 : 0;
7518 q >>= - expDiff;
7519 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7520 expDiff += 52;
7521 if ( expDiff < 0 ) {
7522 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7523 }
7524 else {
7525 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7526 }
7527 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7528 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7529 }
7530 else {
7531 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7532 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7533 }
7534 do {
7535 alternateASig0 = aSig0;
7536 alternateASig1 = aSig1;
7537 ++q;
7538 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7539 } while ( 0 <= (int64_t) aSig0 );
158142c2 7540 add128(
bb98fe42 7541 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7542 if ( ( sigMean0 < 0 )
7543 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7544 aSig0 = alternateASig0;
7545 aSig1 = alternateASig1;
7546 }
bb98fe42 7547 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7548 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7549 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7550 status);
158142c2
FB
7551}
7552
7553/*----------------------------------------------------------------------------
7554| Returns the square root of the quadruple-precision floating-point value `a'.
7555| The operation is performed according to the IEC/IEEE Standard for Binary
7556| Floating-Point Arithmetic.
7557*----------------------------------------------------------------------------*/
7558
e5a41ffa 7559float128 float128_sqrt(float128 a, float_status *status)
158142c2 7560{
c120391c 7561 bool aSign;
f4014512 7562 int32_t aExp, zExp;
bb98fe42
AF
7563 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7564 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7565
7566 aSig1 = extractFloat128Frac1( a );
7567 aSig0 = extractFloat128Frac0( a );
7568 aExp = extractFloat128Exp( a );
7569 aSign = extractFloat128Sign( a );
7570 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7571 if (aSig0 | aSig1) {
7572 return propagateFloat128NaN(a, a, status);
7573 }
158142c2
FB
7574 if ( ! aSign ) return a;
7575 goto invalid;
7576 }
7577 if ( aSign ) {
7578 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7579 invalid:
ff32e16e 7580 float_raise(float_flag_invalid, status);
af39bc8c 7581 return float128_default_nan(status);
158142c2
FB
7582 }
7583 if ( aExp == 0 ) {
7584 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7585 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7586 }
7587 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7588 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7589 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7590 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7591 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7592 doubleZSig0 = zSig0<<1;
7593 mul64To128( zSig0, zSig0, &term0, &term1 );
7594 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7595 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7596 --zSig0;
7597 doubleZSig0 -= 2;
7598 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7599 }
7600 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7601 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7602 if ( zSig1 == 0 ) zSig1 = 1;
7603 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7604 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7605 mul64To128( zSig1, zSig1, &term2, &term3 );
7606 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7607 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7608 --zSig1;
7609 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7610 term3 |= 1;
7611 term2 |= doubleZSig0;
7612 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7613 }
7614 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7615 }
7616 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7617 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7618
7619}
7620
71bfd65c
RH
7621static inline FloatRelation
7622floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7623 float_status *status)
f6714d36 7624{
c120391c 7625 bool aSign, bSign;
f6714d36 7626
d1eb8f2a
AD
7627 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7628 float_raise(float_flag_invalid, status);
7629 return float_relation_unordered;
7630 }
f6714d36
AJ
7631 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7632 ( extractFloatx80Frac( a )<<1 ) ) ||
7633 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7634 ( extractFloatx80Frac( b )<<1 ) )) {
7635 if (!is_quiet ||
af39bc8c
AM
7636 floatx80_is_signaling_nan(a, status) ||
7637 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7638 float_raise(float_flag_invalid, status);
f6714d36
AJ
7639 }
7640 return float_relation_unordered;
7641 }
7642 aSign = extractFloatx80Sign( a );
7643 bSign = extractFloatx80Sign( b );
7644 if ( aSign != bSign ) {
7645
7646 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7647 ( ( a.low | b.low ) == 0 ) ) {
7648 /* zero case */
7649 return float_relation_equal;
7650 } else {
7651 return 1 - (2 * aSign);
7652 }
7653 } else {
be53fa78
JM
7654 /* Normalize pseudo-denormals before comparison. */
7655 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7656 ++a.high;
7657 }
7658 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7659 ++b.high;
7660 }
f6714d36
AJ
7661 if (a.low == b.low && a.high == b.high) {
7662 return float_relation_equal;
7663 } else {
7664 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7665 }
7666 }
7667}
7668
71bfd65c 7669FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7670{
ff32e16e 7671 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7672}
7673
71bfd65c
RH
7674FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7675 float_status *status)
f6714d36 7676{
ff32e16e 7677 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7678}
7679
71bfd65c
RH
7680static inline FloatRelation
7681float128_compare_internal(float128 a, float128 b, bool is_quiet,
7682 float_status *status)
1f587329 7683{
c120391c 7684 bool aSign, bSign;
1f587329
BS
7685
7686 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7687 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7688 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7689 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7690 if (!is_quiet ||
af39bc8c
AM
7691 float128_is_signaling_nan(a, status) ||
7692 float128_is_signaling_nan(b, status)) {
ff32e16e 7693 float_raise(float_flag_invalid, status);
1f587329
BS
7694 }
7695 return float_relation_unordered;
7696 }
7697 aSign = extractFloat128Sign( a );
7698 bSign = extractFloat128Sign( b );
7699 if ( aSign != bSign ) {
7700 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7701 /* zero case */
7702 return float_relation_equal;
7703 } else {
7704 return 1 - (2 * aSign);
7705 }
7706 } else {
7707 if (a.low == b.low && a.high == b.high) {
7708 return float_relation_equal;
7709 } else {
7710 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7711 }
7712 }
7713}
7714
71bfd65c 7715FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 7716{
ff32e16e 7717 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7718}
7719
71bfd65c
RH
7720FloatRelation float128_compare_quiet(float128 a, float128 b,
7721 float_status *status)
1f587329 7722{
ff32e16e 7723 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7724}
7725
e5a41ffa 7726floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7727{
c120391c 7728 bool aSign;
326b9e98 7729 int32_t aExp;
bb98fe42 7730 uint64_t aSig;
9ee6e8bb 7731
d1eb8f2a
AD
7732 if (floatx80_invalid_encoding(a)) {
7733 float_raise(float_flag_invalid, status);
7734 return floatx80_default_nan(status);
7735 }
9ee6e8bb
PB
7736 aSig = extractFloatx80Frac( a );
7737 aExp = extractFloatx80Exp( a );
7738 aSign = extractFloatx80Sign( a );
7739
326b9e98
AJ
7740 if ( aExp == 0x7FFF ) {
7741 if ( aSig<<1 ) {
ff32e16e 7742 return propagateFloatx80NaN(a, a, status);
326b9e98 7743 }
9ee6e8bb
PB
7744 return a;
7745 }
326b9e98 7746
3c85c37f
PM
7747 if (aExp == 0) {
7748 if (aSig == 0) {
7749 return a;
7750 }
7751 aExp++;
7752 }
69397542 7753
326b9e98
AJ
7754 if (n > 0x10000) {
7755 n = 0x10000;
7756 } else if (n < -0x10000) {
7757 n = -0x10000;
7758 }
7759
9ee6e8bb 7760 aExp += n;
a2f2d288
PM
7761 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7762 aSign, aExp, aSig, 0, status);
9ee6e8bb 7763}
9ee6e8bb 7764
e5a41ffa 7765float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7766{
c120391c 7767 bool aSign;
326b9e98 7768 int32_t aExp;
bb98fe42 7769 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7770
7771 aSig1 = extractFloat128Frac1( a );
7772 aSig0 = extractFloat128Frac0( a );
7773 aExp = extractFloat128Exp( a );
7774 aSign = extractFloat128Sign( a );
7775 if ( aExp == 0x7FFF ) {
326b9e98 7776 if ( aSig0 | aSig1 ) {
ff32e16e 7777 return propagateFloat128NaN(a, a, status);
326b9e98 7778 }
9ee6e8bb
PB
7779 return a;
7780 }
3c85c37f 7781 if (aExp != 0) {
e9321124 7782 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7783 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7784 return a;
3c85c37f
PM
7785 } else {
7786 aExp++;
7787 }
69397542 7788
326b9e98
AJ
7789 if (n > 0x10000) {
7790 n = 0x10000;
7791 } else if (n < -0x10000) {
7792 n = -0x10000;
7793 }
7794
69397542
PB
7795 aExp += n - 1;
7796 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7797 , status);
9ee6e8bb
PB
7798
7799}
f6b3b108
EC
7800
7801static void __attribute__((constructor)) softfloat_init(void)
7802{
7803 union_float64 ua, ub, uc, ur;
7804
7805 if (QEMU_NO_HARDFLOAT) {
7806 return;
7807 }
7808 /*
7809 * Test that the host's FMA is not obviously broken. For example,
7810 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7811 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7812 */
7813 ua.s = 0x0020000000000001ULL;
7814 ub.s = 0x3ca0000000000000ULL;
7815 uc.s = 0x0020000000000000ULL;
7816 ur.h = fma(ua.h, ub.h, uc.h);
7817 if (ur.s != 0x0020000000000001ULL) {
7818 force_soft_fma = true;
7819 }
7820}