fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485
 486 /* Simple helpers for checking if, or what kind of, NaN we have */
 487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 488 {
 489     return unlikely(c >= float_class_qnan);
 490 }
 491
 492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 493 {
 494     return c == float_class_snan;
 495 }
 496
 497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 498 {
 499     return c == float_class_qnan;
 500 }
 501
 502 /*
 503  * Structure holding all of the decomposed parts of a float.
 504  * The exponent is unbiased and the fraction is normalized.
 505  *
 506  * The fraction words are stored in big-endian word ordering,
 507  * so that truncation from a larger format to a smaller format
 508  * can be done simply by ignoring subsequent elements.
 509  */
 510
 511 typedef struct {
 512     FloatClass cls;
 513     bool sign;
 514     int32_t exp;
 515     union {
 516         /* Routines that know the structure may reference the singular name. */
 517         uint64_t frac;
 518         /*
 519          * Routines expanded with multiple structures reference "hi" and "lo"
 520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 521          * both the same word and aliased here.
 522          */
 523         uint64_t frac_hi;
 524         uint64_t frac_lo;
 525     };
 526 } FloatParts64;
 527
 528 typedef struct {
 529     FloatClass cls;
 530     bool sign;
 531     int32_t exp;
 532     uint64_t frac_hi;
 533     uint64_t frac_lo;
 534 } FloatParts128;
 535
 536 typedef struct {
 537     FloatClass cls;
 538     bool sign;
 539     int32_t exp;
 540     uint64_t frac_hi;
 541     uint64_t frac_hm;  /* high-middle */
 542     uint64_t frac_lm;  /* low-middle */
 543     uint64_t frac_lo;
 544 } FloatParts256;
 545
 546 /* These apply to the most significant word of each FloatPartsN. */
 547 #define DECOMPOSED_BINARY_POINT    63
 548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 549
 550 /* Structure holding all of the relevant parameters for a format.
 551  *   exp_size: the size of the exponent field
 552  *   exp_bias: the offset applied to the exponent field
 553  *   exp_max: the maximum normalised exponent
 554  *   frac_size: the size of the fraction field
 555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 556  * The following are computed based the size of fraction
 557  *   frac_lsb: least significant bit of fraction
 558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
 559  *   round_mask/roundeven_mask: masks used for rounding
 560  * The following optional modifiers are available:
 561  *   arm_althp: handle ARM Alternative Half Precision
 562  */
 563 typedef struct {
 564     int exp_size;
 565     int exp_bias;
 566     int exp_max;
 567     int frac_size;
 568     int frac_shift;
 569     uint64_t frac_lsb;
 570     uint64_t frac_lsbm1;
 571     uint64_t round_mask;
 572     uint64_t roundeven_mask;
 573     bool arm_althp;
 574 } FloatFmt;
 575
 576 /* Expand fields based on the size of exponent and fraction */
 577 #define FLOAT_PARAMS(E, F)                                           \
 578     .exp_size       = E,                                             \
 579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
 580     .exp_max        = (1 << E) - 1,                                  \
 581     .frac_size      = F,                                             \
 582     .frac_shift     = (-F - 1) & 63,                                 \
 583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
 584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
 585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
 586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 587
 588 static const FloatFmt float16_params = {
 589     FLOAT_PARAMS(5, 10)
 590 };
 591
 592 static const FloatFmt float16_params_ahp = {
 593     FLOAT_PARAMS(5, 10),
 594     .arm_althp = true
 595 };
 596
 597 static const FloatFmt bfloat16_params = {
 598     FLOAT_PARAMS(8, 7)
 599 };
 600
 601 static const FloatFmt float32_params = {
 602     FLOAT_PARAMS(8, 23)
 603 };
 604
 605 static const FloatFmt float64_params = {
 606     FLOAT_PARAMS(11, 52)
 607 };
 608
 609 static const FloatFmt float128_params = {
 610     FLOAT_PARAMS(15, 112)
 611 };
 612
 613 /* Unpack a float to parts, but do not canonicalize.  */
 614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 615 {
 616     const int f_size = fmt->frac_size;
 617     const int e_size = fmt->exp_size;
 618
 619     *r = (FloatParts64) {
 620         .cls = float_class_unclassified,
 621         .sign = extract64(raw, f_size + e_size, 1),
 622         .exp = extract64(raw, f_size, e_size),
 623         .frac = extract64(raw, 0, f_size)
 624     };
 625 }
 626
 627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 628 {
 629     unpack_raw64(p, &float16_params, f);
 630 }
 631
 632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 633 {
 634     unpack_raw64(p, &bfloat16_params, f);
 635 }
 636
 637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 638 {
 639     unpack_raw64(p, &float32_params, f);
 640 }
 641
 642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 643 {
 644     unpack_raw64(p, &float64_params, f);
 645 }
 646
 647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 648 {
 649     const int f_size = float128_params.frac_size - 64;
 650     const int e_size = float128_params.exp_size;
 651
 652     *p = (FloatParts128) {
 653         .cls = float_class_unclassified,
 654         .sign = extract64(f.high, f_size + e_size, 1),
 655         .exp = extract64(f.high, f_size, e_size),
 656         .frac_hi = extract64(f.high, 0, f_size),
 657         .frac_lo = f.low,
 658     };
 659 }
 660
 661 /* Pack a float from parts, but do not canonicalize.  */
 662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 663 {
 664     const int f_size = fmt->frac_size;
 665     const int e_size = fmt->exp_size;
 666     uint64_t ret;
 667
 668     ret = (uint64_t)p->sign << (f_size + e_size);
 669     ret = deposit64(ret, f_size, e_size, p->exp);
 670     ret = deposit64(ret, 0, f_size, p->frac);
 671     return ret;
 672 }
 673
 674 static inline float16 float16_pack_raw(const FloatParts64 *p)
 675 {
 676     return make_float16(pack_raw64(p, &float16_params));
 677 }
 678
 679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 680 {
 681     return pack_raw64(p, &bfloat16_params);
 682 }
 683
 684 static inline float32 float32_pack_raw(const FloatParts64 *p)
 685 {
 686     return make_float32(pack_raw64(p, &float32_params));
 687 }
 688
 689 static inline float64 float64_pack_raw(const FloatParts64 *p)
 690 {
 691     return make_float64(pack_raw64(p, &float64_params));
 692 }
 693
 694 static float128 float128_pack_raw(const FloatParts128 *p)
 695 {
 696     const int f_size = float128_params.frac_size - 64;
 697     const int e_size = float128_params.exp_size;
 698     uint64_t hi;
 699
 700     hi = (uint64_t)p->sign << (f_size + e_size);
 701     hi = deposit64(hi, f_size, e_size, p->exp);
 702     hi = deposit64(hi, 0, f_size, p->frac_hi);
 703     return make_float128(hi, p->frac_lo);
 704 }
 705
 706 /*----------------------------------------------------------------------------
 707 | Functions and definitions to determine:  (1) whether tininess for underflow
 708 | is detected before or after rounding by default, (2) what (if anything)
 709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 711 | are propagated from function inputs to output.  These details are target-
 712 | specific.
 713 *----------------------------------------------------------------------------*/
 714 #include "softfloat-specialize.c.inc"
 715
 716 #define PARTS_GENERIC_64_128(NAME, P) \
 717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 718
 719 #define PARTS_GENERIC_64_128_256(NAME, P) \
 720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 722
 723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 725
 726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 728
 729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 730
 731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 732                                       float_status *s);
 733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 734                                         float_status *s);
 735
 736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 737
 738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 739                                              FloatParts64 *c, float_status *s,
 740                                              int ab_mask, int abc_mask);
 741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 742                                                FloatParts128 *b,
 743                                                FloatParts128 *c,
 744                                                float_status *s,
 745                                                int ab_mask, int abc_mask);
 746
 747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 749
 750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 751                                  const FloatFmt *fmt);
 752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 753                                   const FloatFmt *fmt);
 754
 755 #define parts_canonicalize(A, S, F) \
 756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 757
 758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 759                             const FloatFmt *fmt);
 760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 761                              const FloatFmt *fmt);
 762
 763 #define parts_uncanon(A, S, F) \
 764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 765
 766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 769
 770 #define parts_add_normal(A, B) \
 771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 772
 773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 776
 777 #define parts_sub_normal(A, B) \
 778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 779
 780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 781                                     float_status *s, bool subtract);
 782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 783                                       float_status *s, bool subtract);
 784
 785 #define parts_addsub(A, B, S, Z) \
 786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 787
 788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 789                                  float_status *s);
 790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 791                                    float_status *s);
 792
 793 #define parts_mul(A, B, S) \
 794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 795
 796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 797                                     FloatParts64 *c, int flags,
 798                                     float_status *s);
 799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 800                                       FloatParts128 *c, int flags,
 801                                       float_status *s);
 802
 803 #define parts_muladd(A, B, C, Z, S) \
 804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 805
 806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 807                                  float_status *s);
 808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 809                                    float_status *s);
 810
 811 #define parts_div(A, B, S) \
 812     PARTS_GENERIC_64_128(div, A)(A, B, S)
 813
 814 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 815                                         int scale, int frac_size);
 816 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 817                                          int scale, int frac_size);
 818
 819 #define parts_round_to_int_normal(A, R, C, F) \
 820     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 821
 822 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 823                                  int scale, float_status *s,
 824                                  const FloatFmt *fmt);
 825 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 826                                   int scale, float_status *s,
 827                                   const FloatFmt *fmt);
 828
 829 #define parts_round_to_int(A, R, C, S, F) \
 830     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 831
 832 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 833                                      int scale, int64_t min, int64_t max,
 834                                      float_status *s);
 835 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 836                                      int scale, int64_t min, int64_t max,
 837                                      float_status *s);
 838
 839 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 840     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 841
 842 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 843                                       int scale, uint64_t max,
 844                                       float_status *s);
 845 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 846                                        int scale, uint64_t max,
 847                                        float_status *s);
 848
 849 #define parts_float_to_uint(P, R, Z, M, S) \
 850     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 851
 852 /*
 853  * Helper functions for softfloat-parts.c.inc, per-size operations.
 854  */
 855
 856 #define FRAC_GENERIC_64_128(NAME, P) \
 857     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 858
 859 #define FRAC_GENERIC_64_128_256(NAME, P) \
 860     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 861                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 862
 863 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 864 {
 865     return uadd64_overflow(a->frac, b->frac, &r->frac);
 866 }
 867
 868 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 869 {
 870     bool c = 0;
 871     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 872     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 873     return c;
 874 }
 875
 876 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 877 {
 878     bool c = 0;
 879     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 880     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 881     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 882     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 883     return c;
 884 }
 885
 886 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 887
 888 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 889 {
 890     return uadd64_overflow(a->frac, c, &r->frac);
 891 }
 892
 893 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 894 {
 895     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 896     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 897 }
 898
 899 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 900
 901 static void frac64_allones(FloatParts64 *a)
 902 {
 903     a->frac = -1;
 904 }
 905
 906 static void frac128_allones(FloatParts128 *a)
 907 {
 908     a->frac_hi = a->frac_lo = -1;
 909 }
 910
 911 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 912
 913 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 914 {
 915     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 916 }
 917
 918 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 919 {
 920     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 921     if (ta == tb) {
 922         ta = a->frac_lo, tb = b->frac_lo;
 923         if (ta == tb) {
 924             return 0;
 925         }
 926     }
 927     return ta < tb ? -1 : 1;
 928 }
 929
 930 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 931
 932 static void frac64_clear(FloatParts64 *a)
 933 {
 934     a->frac = 0;
 935 }
 936
 937 static void frac128_clear(FloatParts128 *a)
 938 {
 939     a->frac_hi = a->frac_lo = 0;
 940 }
 941
 942 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 943
 944 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 945 {
 946     uint64_t n1, n0, r, q;
 947     bool ret;
 948
 949     /*
 950      * We want a 2*N / N-bit division to produce exactly an N-bit
 951      * result, so that we do not lose any precision and so that we
 952      * do not have to renormalize afterward.  If A.frac < B.frac,
 953      * then division would produce an (N-1)-bit result; shift A left
 954      * by one to produce the an N-bit result, and return true to
 955      * decrement the exponent to match.
 956      *
 957      * The udiv_qrnnd algorithm that we're using requires normalization,
 958      * i.e. the msb of the denominator must be set, which is already true.
 959      */
 960     ret = a->frac < b->frac;
 961     if (ret) {
 962         n0 = a->frac;
 963         n1 = 0;
 964     } else {
 965         n0 = a->frac >> 1;
 966         n1 = a->frac << 63;
 967     }
 968     q = udiv_qrnnd(&r, n0, n1, b->frac);
 969
 970     /* Set lsb if there is a remainder, to set inexact. */
 971     a->frac = q | (r != 0);
 972
 973     return ret;
 974 }
 975
 976 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
 977 {
 978     uint64_t q0, q1, a0, a1, b0, b1;
 979     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
 980     bool ret = false;
 981
 982     a0 = a->frac_hi, a1 = a->frac_lo;
 983     b0 = b->frac_hi, b1 = b->frac_lo;
 984
 985     ret = lt128(a0, a1, b0, b1);
 986     if (!ret) {
 987         a1 = shr_double(a0, a1, 1);
 988         a0 = a0 >> 1;
 989     }
 990
 991     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
 992     q0 = estimateDiv128To64(a0, a1, b0);
 993
 994     /*
 995      * Estimate is high because B1 was not included (unless B1 == 0).
 996      * Reduce quotient and increase remainder until remainder is non-negative.
 997      * This loop will execute 0 to 2 times.
 998      */
 999     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1000     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1001     while (r0 != 0) {
1002         q0--;
1003         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1004     }
1005
1006     /* Repeat using the remainder, producing a second word of quotient. */
1007     q1 = estimateDiv128To64(r1, r2, b0);
1008     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1009     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1010     while (r1 != 0) {
1011         q1--;
1012         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1013     }
1014
1015     /* Any remainder indicates inexact; set sticky bit. */
1016     q1 |= (r2 | r3) != 0;
1017
1018     a->frac_hi = q0;
1019     a->frac_lo = q1;
1020     return ret;
1021 }
1022
1023 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1024
1025 static bool frac64_eqz(FloatParts64 *a)
1026 {
1027     return a->frac == 0;
1028 }
1029
1030 static bool frac128_eqz(FloatParts128 *a)
1031 {
1032     return (a->frac_hi | a->frac_lo) == 0;
1033 }
1034
1035 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1036
1037 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1038 {
1039     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1040 }
1041
1042 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1043 {
1044     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1045                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1046 }
1047
1048 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1049
1050 static void frac64_neg(FloatParts64 *a)
1051 {
1052     a->frac = -a->frac;
1053 }
1054
1055 static void frac128_neg(FloatParts128 *a)
1056 {
1057     bool c = 0;
1058     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1059     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1060 }
1061
1062 static void frac256_neg(FloatParts256 *a)
1063 {
1064     bool c = 0;
1065     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1066     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1067     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1068     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1069 }
1070
1071 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1072
1073 static int frac64_normalize(FloatParts64 *a)
1074 {
1075     if (a->frac) {
1076         int shift = clz64(a->frac);
1077         a->frac <<= shift;
1078         return shift;
1079     }
1080     return 64;
1081 }
1082
1083 static int frac128_normalize(FloatParts128 *a)
1084 {
1085     if (a->frac_hi) {
1086         int shl = clz64(a->frac_hi);
1087         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1088         a->frac_lo <<= shl;
1089         return shl;
1090     } else if (a->frac_lo) {
1091         int shl = clz64(a->frac_lo);
1092         a->frac_hi = a->frac_lo << shl;
1093         a->frac_lo = 0;
1094         return shl + 64;
1095     }
1096     return 128;
1097 }
1098
1099 static int frac256_normalize(FloatParts256 *a)
1100 {
1101     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1102     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1103     int ret, shl;
1104
1105     if (likely(a0)) {
1106         shl = clz64(a0);
1107         if (shl == 0) {
1108             return 0;
1109         }
1110         ret = shl;
1111     } else {
1112         if (a1) {
1113             ret = 64;
1114             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1115         } else if (a2) {
1116             ret = 128;
1117             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1118         } else if (a3) {
1119             ret = 192;
1120             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1121         } else {
1122             ret = 256;
1123             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1124             goto done;
1125         }
1126         shl = clz64(a0);
1127         if (shl == 0) {
1128             goto done;
1129         }
1130         ret += shl;
1131     }
1132
1133     a0 = shl_double(a0, a1, shl);
1134     a1 = shl_double(a1, a2, shl);
1135     a2 = shl_double(a2, a3, shl);
1136     a3 <<= shl;
1137
1138  done:
1139     a->frac_hi = a0;
1140     a->frac_hm = a1;
1141     a->frac_lm = a2;
1142     a->frac_lo = a3;
1143     return ret;
1144 }
1145
1146 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1147
1148 static void frac64_shl(FloatParts64 *a, int c)
1149 {
1150     a->frac <<= c;
1151 }
1152
1153 static void frac128_shl(FloatParts128 *a, int c)
1154 {
1155     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1156
1157     if (c & 64) {
1158         a0 = a1, a1 = 0;
1159     }
1160
1161     c &= 63;
1162     if (c) {
1163         a0 = shl_double(a0, a1, c);
1164         a1 = a1 << c;
1165     }
1166
1167     a->frac_hi = a0;
1168     a->frac_lo = a1;
1169 }
1170
1171 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1172
1173 static void frac64_shr(FloatParts64 *a, int c)
1174 {
1175     a->frac >>= c;
1176 }
1177
1178 static void frac128_shr(FloatParts128 *a, int c)
1179 {
1180     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1181
1182     if (c & 64) {
1183         a1 = a0, a0 = 0;
1184     }
1185
1186     c &= 63;
1187     if (c) {
1188         a1 = shr_double(a0, a1, c);
1189         a0 = a0 >> c;
1190     }
1191
1192     a->frac_hi = a0;
1193     a->frac_lo = a1;
1194 }
1195
1196 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1197
1198 static void frac64_shrjam(FloatParts64 *a, int c)
1199 {
1200     uint64_t a0 = a->frac;
1201
1202     if (likely(c != 0)) {
1203         if (likely(c < 64)) {
1204             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1205         } else {
1206             a0 = a0 != 0;
1207         }
1208         a->frac = a0;
1209     }
1210 }
1211
1212 static void frac128_shrjam(FloatParts128 *a, int c)
1213 {
1214     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1215     uint64_t sticky = 0;
1216
1217     if (unlikely(c == 0)) {
1218         return;
1219     } else if (likely(c < 64)) {
1220         /* nothing */
1221     } else if (likely(c < 128)) {
1222         sticky = a1;
1223         a1 = a0;
1224         a0 = 0;
1225         c &= 63;
1226         if (c == 0) {
1227             goto done;
1228         }
1229     } else {
1230         sticky = a0 | a1;
1231         a0 = a1 = 0;
1232         goto done;
1233     }
1234
1235     sticky |= shr_double(a1, 0, c);
1236     a1 = shr_double(a0, a1, c);
1237     a0 = a0 >> c;
1238
1239  done:
1240     a->frac_lo = a1 | (sticky != 0);
1241     a->frac_hi = a0;
1242 }
1243
1244 static void frac256_shrjam(FloatParts256 *a, int c)
1245 {
1246     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1247     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1248     uint64_t sticky = 0;
1249
1250     if (unlikely(c == 0)) {
1251         return;
1252     } else if (likely(c < 64)) {
1253         /* nothing */
1254     } else if (likely(c < 256)) {
1255         if (unlikely(c & 128)) {
1256             sticky |= a2 | a3;
1257             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1258         }
1259         if (unlikely(c & 64)) {
1260             sticky |= a3;
1261             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1262         }
1263         c &= 63;
1264         if (c == 0) {
1265             goto done;
1266         }
1267     } else {
1268         sticky = a0 | a1 | a2 | a3;
1269         a0 = a1 = a2 = a3 = 0;
1270         goto done;
1271     }
1272
1273     sticky |= shr_double(a3, 0, c);
1274     a3 = shr_double(a2, a3, c);
1275     a2 = shr_double(a1, a2, c);
1276     a1 = shr_double(a0, a1, c);
1277     a0 = a0 >> c;
1278
1279  done:
1280     a->frac_lo = a3 | (sticky != 0);
1281     a->frac_lm = a2;
1282     a->frac_hm = a1;
1283     a->frac_hi = a0;
1284 }
1285
1286 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1287
1288 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1289 {
1290     return usub64_overflow(a->frac, b->frac, &r->frac);
1291 }
1292
1293 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1294 {
1295     bool c = 0;
1296     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1297     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1298     return c;
1299 }
1300
1301 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1302 {
1303     bool c = 0;
1304     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1305     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1306     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1307     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1308     return c;
1309 }
1310
1311 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1312
1313 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1314 {
1315     r->frac = a->frac_hi | (a->frac_lo != 0);
1316 }
1317
1318 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1319 {
1320     r->frac_hi = a->frac_hi;
1321     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1322 }
1323
1324 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1325
1326 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1327 {
1328     r->frac_hi = a->frac;
1329     r->frac_lo = 0;
1330 }
1331
1332 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1333 {
1334     r->frac_hi = a->frac_hi;
1335     r->frac_hm = a->frac_lo;
1336     r->frac_lm = 0;
1337     r->frac_lo = 0;
1338 }
1339
1340 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1341
1342 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1343 #define FloatPartsN    glue(FloatParts,N)
1344 #define FloatPartsW    glue(FloatParts,W)
1345
1346 #define N 64
1347 #define W 128
1348
1349 #include "softfloat-parts-addsub.c.inc"
1350 #include "softfloat-parts.c.inc"
1351
1352 #undef  N
1353 #undef  W
1354 #define N 128
1355 #define W 256
1356
1357 #include "softfloat-parts-addsub.c.inc"
1358 #include "softfloat-parts.c.inc"
1359
1360 #undef  N
1361 #undef  W
1362 #define N            256
1363
1364 #include "softfloat-parts-addsub.c.inc"
1365
1366 #undef  N
1367 #undef  W
1368 #undef  partsN
1369 #undef  FloatPartsN
1370 #undef  FloatPartsW
1371
1372 /*
1373  * Pack/unpack routines with a specific FloatFmt.
1374  */
1375
1376 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1377                                       float_status *s, const FloatFmt *params)
1378 {
1379     float16_unpack_raw(p, f);
1380     parts_canonicalize(p, s, params);
1381 }
1382
1383 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1384                                      float_status *s)
1385 {
1386     float16a_unpack_canonical(p, f, s, &float16_params);
1387 }
1388
1389 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1390                                       float_status *s)
1391 {
1392     bfloat16_unpack_raw(p, f);
1393     parts_canonicalize(p, s, &bfloat16_params);
1394 }
1395
1396 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1397                                              float_status *s,
1398                                              const FloatFmt *params)
1399 {
1400     parts_uncanon(p, s, params);
1401     return float16_pack_raw(p);
1402 }
1403
1404 static float16 float16_round_pack_canonical(FloatParts64 *p,
1405                                             float_status *s)
1406 {
1407     return float16a_round_pack_canonical(p, s, &float16_params);
1408 }
1409
1410 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1411                                               float_status *s)
1412 {
1413     parts_uncanon(p, s, &bfloat16_params);
1414     return bfloat16_pack_raw(p);
1415 }
1416
1417 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1418                                      float_status *s)
1419 {
1420     float32_unpack_raw(p, f);
1421     parts_canonicalize(p, s, &float32_params);
1422 }
1423
1424 static float32 float32_round_pack_canonical(FloatParts64 *p,
1425                                             float_status *s)
1426 {
1427     parts_uncanon(p, s, &float32_params);
1428     return float32_pack_raw(p);
1429 }
1430
1431 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1432                                      float_status *s)
1433 {
1434     float64_unpack_raw(p, f);
1435     parts_canonicalize(p, s, &float64_params);
1436 }
1437
1438 static float64 float64_round_pack_canonical(FloatParts64 *p,
1439                                             float_status *s)
1440 {
1441     parts_uncanon(p, s, &float64_params);
1442     return float64_pack_raw(p);
1443 }
1444
1445 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1446                                       float_status *s)
1447 {
1448     float128_unpack_raw(p, f);
1449     parts_canonicalize(p, s, &float128_params);
1450 }
1451
1452 static float128 float128_round_pack_canonical(FloatParts128 *p,
1453                                               float_status *s)
1454 {
1455     parts_uncanon(p, s, &float128_params);
1456     return float128_pack_raw(p);
1457 }
1458
1459 /*
1460  * Addition and subtraction
1461  */
1462
1463 static float16 QEMU_FLATTEN
1464 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1465 {
1466     FloatParts64 pa, pb, *pr;
1467
1468     float16_unpack_canonical(&pa, a, status);
1469     float16_unpack_canonical(&pb, b, status);
1470     pr = parts_addsub(&pa, &pb, status, subtract);
1471
1472     return float16_round_pack_canonical(pr, status);
1473 }
1474
1475 float16 float16_add(float16 a, float16 b, float_status *status)
1476 {
1477     return float16_addsub(a, b, status, false);
1478 }
1479
1480 float16 float16_sub(float16 a, float16 b, float_status *status)
1481 {
1482     return float16_addsub(a, b, status, true);
1483 }
1484
1485 static float32 QEMU_SOFTFLOAT_ATTR
1486 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1487 {
1488     FloatParts64 pa, pb, *pr;
1489
1490     float32_unpack_canonical(&pa, a, status);
1491     float32_unpack_canonical(&pb, b, status);
1492     pr = parts_addsub(&pa, &pb, status, subtract);
1493
1494     return float32_round_pack_canonical(pr, status);
1495 }
1496
1497 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1498 {
1499     return soft_f32_addsub(a, b, status, false);
1500 }
1501
1502 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1503 {
1504     return soft_f32_addsub(a, b, status, true);
1505 }
1506
1507 static float64 QEMU_SOFTFLOAT_ATTR
1508 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1509 {
1510     FloatParts64 pa, pb, *pr;
1511
1512     float64_unpack_canonical(&pa, a, status);
1513     float64_unpack_canonical(&pb, b, status);
1514     pr = parts_addsub(&pa, &pb, status, subtract);
1515
1516     return float64_round_pack_canonical(pr, status);
1517 }
1518
1519 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1520 {
1521     return soft_f64_addsub(a, b, status, false);
1522 }
1523
1524 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1525 {
1526     return soft_f64_addsub(a, b, status, true);
1527 }
1528
1529 static float hard_f32_add(float a, float b)
1530 {
1531     return a + b;
1532 }
1533
1534 static float hard_f32_sub(float a, float b)
1535 {
1536     return a - b;
1537 }
1538
1539 static double hard_f64_add(double a, double b)
1540 {
1541     return a + b;
1542 }
1543
1544 static double hard_f64_sub(double a, double b)
1545 {
1546     return a - b;
1547 }
1548
1549 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1550 {
1551     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1552         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1553     }
1554     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1555 }
1556
1557 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1558 {
1559     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1560         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1561     } else {
1562         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1563     }
1564 }
1565
1566 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1567                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1568 {
1569     return float32_gen2(a, b, s, hard, soft,
1570                         f32_is_zon2, f32_addsubmul_post);
1571 }
1572
1573 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1574                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1575 {
1576     return float64_gen2(a, b, s, hard, soft,
1577                         f64_is_zon2, f64_addsubmul_post);
1578 }
1579
1580 float32 QEMU_FLATTEN
1581 float32_add(float32 a, float32 b, float_status *s)
1582 {
1583     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1584 }
1585
1586 float32 QEMU_FLATTEN
1587 float32_sub(float32 a, float32 b, float_status *s)
1588 {
1589     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1590 }
1591
1592 float64 QEMU_FLATTEN
1593 float64_add(float64 a, float64 b, float_status *s)
1594 {
1595     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1596 }
1597
1598 float64 QEMU_FLATTEN
1599 float64_sub(float64 a, float64 b, float_status *s)
1600 {
1601     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1602 }
1603
1604 static bfloat16 QEMU_FLATTEN
1605 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1606 {
1607     FloatParts64 pa, pb, *pr;
1608
1609     bfloat16_unpack_canonical(&pa, a, status);
1610     bfloat16_unpack_canonical(&pb, b, status);
1611     pr = parts_addsub(&pa, &pb, status, subtract);
1612
1613     return bfloat16_round_pack_canonical(pr, status);
1614 }
1615
1616 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1617 {
1618     return bfloat16_addsub(a, b, status, false);
1619 }
1620
1621 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1622 {
1623     return bfloat16_addsub(a, b, status, true);
1624 }
1625
1626 static float128 QEMU_FLATTEN
1627 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1628 {
1629     FloatParts128 pa, pb, *pr;
1630
1631     float128_unpack_canonical(&pa, a, status);
1632     float128_unpack_canonical(&pb, b, status);
1633     pr = parts_addsub(&pa, &pb, status, subtract);
1634
1635     return float128_round_pack_canonical(pr, status);
1636 }
1637
1638 float128 float128_add(float128 a, float128 b, float_status *status)
1639 {
1640     return float128_addsub(a, b, status, false);
1641 }
1642
1643 float128 float128_sub(float128 a, float128 b, float_status *status)
1644 {
1645     return float128_addsub(a, b, status, true);
1646 }
1647
1648 /*
1649  * Multiplication
1650  */
1651
1652 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1653 {
1654     FloatParts64 pa, pb, *pr;
1655
1656     float16_unpack_canonical(&pa, a, status);
1657     float16_unpack_canonical(&pb, b, status);
1658     pr = parts_mul(&pa, &pb, status);
1659
1660     return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 static float32 QEMU_SOFTFLOAT_ATTR
1664 soft_f32_mul(float32 a, float32 b, float_status *status)
1665 {
1666     FloatParts64 pa, pb, *pr;
1667
1668     float32_unpack_canonical(&pa, a, status);
1669     float32_unpack_canonical(&pb, b, status);
1670     pr = parts_mul(&pa, &pb, status);
1671
1672     return float32_round_pack_canonical(pr, status);
1673 }
1674
1675 static float64 QEMU_SOFTFLOAT_ATTR
1676 soft_f64_mul(float64 a, float64 b, float_status *status)
1677 {
1678     FloatParts64 pa, pb, *pr;
1679
1680     float64_unpack_canonical(&pa, a, status);
1681     float64_unpack_canonical(&pb, b, status);
1682     pr = parts_mul(&pa, &pb, status);
1683
1684     return float64_round_pack_canonical(pr, status);
1685 }
1686
1687 static float hard_f32_mul(float a, float b)
1688 {
1689     return a * b;
1690 }
1691
1692 static double hard_f64_mul(double a, double b)
1693 {
1694     return a * b;
1695 }
1696
1697 float32 QEMU_FLATTEN
1698 float32_mul(float32 a, float32 b, float_status *s)
1699 {
1700     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1701                         f32_is_zon2, f32_addsubmul_post);
1702 }
1703
1704 float64 QEMU_FLATTEN
1705 float64_mul(float64 a, float64 b, float_status *s)
1706 {
1707     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1708                         f64_is_zon2, f64_addsubmul_post);
1709 }
1710
1711 bfloat16 QEMU_FLATTEN
1712 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1713 {
1714     FloatParts64 pa, pb, *pr;
1715
1716     bfloat16_unpack_canonical(&pa, a, status);
1717     bfloat16_unpack_canonical(&pb, b, status);
1718     pr = parts_mul(&pa, &pb, status);
1719
1720     return bfloat16_round_pack_canonical(pr, status);
1721 }
1722
1723 float128 QEMU_FLATTEN
1724 float128_mul(float128 a, float128 b, float_status *status)
1725 {
1726     FloatParts128 pa, pb, *pr;
1727
1728     float128_unpack_canonical(&pa, a, status);
1729     float128_unpack_canonical(&pb, b, status);
1730     pr = parts_mul(&pa, &pb, status);
1731
1732     return float128_round_pack_canonical(pr, status);
1733 }
1734
1735 /*
1736  * Fused multiply-add
1737  */
1738
1739 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1740                                     int flags, float_status *status)
1741 {
1742     FloatParts64 pa, pb, pc, *pr;
1743
1744     float16_unpack_canonical(&pa, a, status);
1745     float16_unpack_canonical(&pb, b, status);
1746     float16_unpack_canonical(&pc, c, status);
1747     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1748
1749     return float16_round_pack_canonical(pr, status);
1750 }
1751
1752 static float32 QEMU_SOFTFLOAT_ATTR
1753 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1754                 float_status *status)
1755 {
1756     FloatParts64 pa, pb, pc, *pr;
1757
1758     float32_unpack_canonical(&pa, a, status);
1759     float32_unpack_canonical(&pb, b, status);
1760     float32_unpack_canonical(&pc, c, status);
1761     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1762
1763     return float32_round_pack_canonical(pr, status);
1764 }
1765
1766 static float64 QEMU_SOFTFLOAT_ATTR
1767 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1768                 float_status *status)
1769 {
1770     FloatParts64 pa, pb, pc, *pr;
1771
1772     float64_unpack_canonical(&pa, a, status);
1773     float64_unpack_canonical(&pb, b, status);
1774     float64_unpack_canonical(&pc, c, status);
1775     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1776
1777     return float64_round_pack_canonical(pr, status);
1778 }
1779
1780 static bool force_soft_fma;
1781
1782 float32 QEMU_FLATTEN
1783 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1784 {
1785     union_float32 ua, ub, uc, ur;
1786
1787     ua.s = xa;
1788     ub.s = xb;
1789     uc.s = xc;
1790
1791     if (unlikely(!can_use_fpu(s))) {
1792         goto soft;
1793     }
1794     if (unlikely(flags & float_muladd_halve_result)) {
1795         goto soft;
1796     }
1797
1798     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1799     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1800         goto soft;
1801     }
1802
1803     if (unlikely(force_soft_fma)) {
1804         goto soft;
1805     }
1806
1807     /*
1808      * When (a || b) == 0, there's no need to check for under/over flow,
1809      * since we know the addend is (normal || 0) and the product is 0.
1810      */
1811     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1812         union_float32 up;
1813         bool prod_sign;
1814
1815         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1816         prod_sign ^= !!(flags & float_muladd_negate_product);
1817         up.s = float32_set_sign(float32_zero, prod_sign);
1818
1819         if (flags & float_muladd_negate_c) {
1820             uc.h = -uc.h;
1821         }
1822         ur.h = up.h + uc.h;
1823     } else {
1824         union_float32 ua_orig = ua;
1825         union_float32 uc_orig = uc;
1826
1827         if (flags & float_muladd_negate_product) {
1828             ua.h = -ua.h;
1829         }
1830         if (flags & float_muladd_negate_c) {
1831             uc.h = -uc.h;
1832         }
1833
1834         ur.h = fmaf(ua.h, ub.h, uc.h);
1835
1836         if (unlikely(f32_is_inf(ur))) {
1837             float_raise(float_flag_overflow, s);
1838         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1839             ua = ua_orig;
1840             uc = uc_orig;
1841             goto soft;
1842         }
1843     }
1844     if (flags & float_muladd_negate_result) {
1845         return float32_chs(ur.s);
1846     }
1847     return ur.s;
1848
1849  soft:
1850     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1851 }
1852
1853 float64 QEMU_FLATTEN
1854 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1855 {
1856     union_float64 ua, ub, uc, ur;
1857
1858     ua.s = xa;
1859     ub.s = xb;
1860     uc.s = xc;
1861
1862     if (unlikely(!can_use_fpu(s))) {
1863         goto soft;
1864     }
1865     if (unlikely(flags & float_muladd_halve_result)) {
1866         goto soft;
1867     }
1868
1869     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1870     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1871         goto soft;
1872     }
1873
1874     if (unlikely(force_soft_fma)) {
1875         goto soft;
1876     }
1877
1878     /*
1879      * When (a || b) == 0, there's no need to check for under/over flow,
1880      * since we know the addend is (normal || 0) and the product is 0.
1881      */
1882     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1883         union_float64 up;
1884         bool prod_sign;
1885
1886         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1887         prod_sign ^= !!(flags & float_muladd_negate_product);
1888         up.s = float64_set_sign(float64_zero, prod_sign);
1889
1890         if (flags & float_muladd_negate_c) {
1891             uc.h = -uc.h;
1892         }
1893         ur.h = up.h + uc.h;
1894     } else {
1895         union_float64 ua_orig = ua;
1896         union_float64 uc_orig = uc;
1897
1898         if (flags & float_muladd_negate_product) {
1899             ua.h = -ua.h;
1900         }
1901         if (flags & float_muladd_negate_c) {
1902             uc.h = -uc.h;
1903         }
1904
1905         ur.h = fma(ua.h, ub.h, uc.h);
1906
1907         if (unlikely(f64_is_inf(ur))) {
1908             float_raise(float_flag_overflow, s);
1909         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1910             ua = ua_orig;
1911             uc = uc_orig;
1912             goto soft;
1913         }
1914     }
1915     if (flags & float_muladd_negate_result) {
1916         return float64_chs(ur.s);
1917     }
1918     return ur.s;
1919
1920  soft:
1921     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1922 }
1923
1924 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1925                                       int flags, float_status *status)
1926 {
1927     FloatParts64 pa, pb, pc, *pr;
1928
1929     bfloat16_unpack_canonical(&pa, a, status);
1930     bfloat16_unpack_canonical(&pb, b, status);
1931     bfloat16_unpack_canonical(&pc, c, status);
1932     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1933
1934     return bfloat16_round_pack_canonical(pr, status);
1935 }
1936
1937 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1938                                       int flags, float_status *status)
1939 {
1940     FloatParts128 pa, pb, pc, *pr;
1941
1942     float128_unpack_canonical(&pa, a, status);
1943     float128_unpack_canonical(&pb, b, status);
1944     float128_unpack_canonical(&pc, c, status);
1945     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1946
1947     return float128_round_pack_canonical(pr, status);
1948 }
1949
1950 /*
1951  * Division
1952  */
1953
1954 float16 float16_div(float16 a, float16 b, float_status *status)
1955 {
1956     FloatParts64 pa, pb, *pr;
1957
1958     float16_unpack_canonical(&pa, a, status);
1959     float16_unpack_canonical(&pb, b, status);
1960     pr = parts_div(&pa, &pb, status);
1961
1962     return float16_round_pack_canonical(pr, status);
1963 }
1964
1965 static float32 QEMU_SOFTFLOAT_ATTR
1966 soft_f32_div(float32 a, float32 b, float_status *status)
1967 {
1968     FloatParts64 pa, pb, *pr;
1969
1970     float32_unpack_canonical(&pa, a, status);
1971     float32_unpack_canonical(&pb, b, status);
1972     pr = parts_div(&pa, &pb, status);
1973
1974     return float32_round_pack_canonical(pr, status);
1975 }
1976
1977 static float64 QEMU_SOFTFLOAT_ATTR
1978 soft_f64_div(float64 a, float64 b, float_status *status)
1979 {
1980     FloatParts64 pa, pb, *pr;
1981
1982     float64_unpack_canonical(&pa, a, status);
1983     float64_unpack_canonical(&pb, b, status);
1984     pr = parts_div(&pa, &pb, status);
1985
1986     return float64_round_pack_canonical(pr, status);
1987 }
1988
1989 static float hard_f32_div(float a, float b)
1990 {
1991     return a / b;
1992 }
1993
1994 static double hard_f64_div(double a, double b)
1995 {
1996     return a / b;
1997 }
1998
1999 static bool f32_div_pre(union_float32 a, union_float32 b)
2000 {
2001     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2002         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2003                fpclassify(b.h) == FP_NORMAL;
2004     }
2005     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2006 }
2007
2008 static bool f64_div_pre(union_float64 a, union_float64 b)
2009 {
2010     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2011         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2012                fpclassify(b.h) == FP_NORMAL;
2013     }
2014     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2015 }
2016
2017 static bool f32_div_post(union_float32 a, union_float32 b)
2018 {
2019     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2020         return fpclassify(a.h) != FP_ZERO;
2021     }
2022     return !float32_is_zero(a.s);
2023 }
2024
2025 static bool f64_div_post(union_float64 a, union_float64 b)
2026 {
2027     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2028         return fpclassify(a.h) != FP_ZERO;
2029     }
2030     return !float64_is_zero(a.s);
2031 }
2032
2033 float32 QEMU_FLATTEN
2034 float32_div(float32 a, float32 b, float_status *s)
2035 {
2036     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2037                         f32_div_pre, f32_div_post);
2038 }
2039
2040 float64 QEMU_FLATTEN
2041 float64_div(float64 a, float64 b, float_status *s)
2042 {
2043     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2044                         f64_div_pre, f64_div_post);
2045 }
2046
2047 bfloat16 QEMU_FLATTEN
2048 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2049 {
2050     FloatParts64 pa, pb, *pr;
2051
2052     bfloat16_unpack_canonical(&pa, a, status);
2053     bfloat16_unpack_canonical(&pb, b, status);
2054     pr = parts_div(&pa, &pb, status);
2055
2056     return bfloat16_round_pack_canonical(pr, status);
2057 }
2058
2059 float128 QEMU_FLATTEN
2060 float128_div(float128 a, float128 b, float_status *status)
2061 {
2062     FloatParts128 pa, pb, *pr;
2063
2064     float128_unpack_canonical(&pa, a, status);
2065     float128_unpack_canonical(&pb, b, status);
2066     pr = parts_div(&pa, &pb, status);
2067
2068     return float128_round_pack_canonical(pr, status);
2069 }
2070
2071 /*
2072  * Float to Float conversions
2073  *
2074  * Returns the result of converting one float format to another. The
2075  * conversion is performed according to the IEC/IEEE Standard for
2076  * Binary Floating-Point Arithmetic.
2077  *
2078  * Usually this only needs to take care of raising invalid exceptions
2079  * and handling the conversion on NaNs.
2080  */
2081
2082 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2083 {
2084     switch (a->cls) {
2085     case float_class_qnan:
2086     case float_class_snan:
2087         /*
2088          * There is no NaN in the destination format.  Raise Invalid
2089          * and return a zero with the sign of the input NaN.
2090          */
2091         float_raise(float_flag_invalid, s);
2092         a->cls = float_class_zero;
2093         break;
2094
2095     case float_class_inf:
2096         /*
2097          * There is no Inf in the destination format.  Raise Invalid
2098          * and return the maximum normal with the correct sign.
2099          */
2100         float_raise(float_flag_invalid, s);
2101         a->cls = float_class_normal;
2102         a->exp = float16_params_ahp.exp_max;
2103         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2104                                   float16_params_ahp.frac_size + 1);
2105         break;
2106
2107     case float_class_normal:
2108     case float_class_zero:
2109         break;
2110
2111     default:
2112         g_assert_not_reached();
2113     }
2114 }
2115
2116 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2117 {
2118     if (is_nan(a->cls)) {
2119         parts_return_nan(a, s);
2120     }
2121 }
2122
2123 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2124 {
2125     if (is_nan(a->cls)) {
2126         parts_return_nan(a, s);
2127     }
2128 }
2129
2130 #define parts_float_to_float(P, S) \
2131     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2132
2133 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2134                                         float_status *s)
2135 {
2136     a->cls = b->cls;
2137     a->sign = b->sign;
2138     a->exp = b->exp;
2139
2140     if (a->cls == float_class_normal) {
2141         frac_truncjam(a, b);
2142     } else if (is_nan(a->cls)) {
2143         /* Discard the low bits of the NaN. */
2144         a->frac = b->frac_hi;
2145         parts_return_nan(a, s);
2146     }
2147 }
2148
2149 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2150                                        float_status *s)
2151 {
2152     a->cls = b->cls;
2153     a->sign = b->sign;
2154     a->exp = b->exp;
2155     frac_widen(a, b);
2156
2157     if (is_nan(a->cls)) {
2158         parts_return_nan(a, s);
2159     }
2160 }
2161
2162 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2163 {
2164     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2165     FloatParts64 p;
2166
2167     float16a_unpack_canonical(&p, a, s, fmt16);
2168     parts_float_to_float(&p, s);
2169     return float32_round_pack_canonical(&p, s);
2170 }
2171
2172 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2173 {
2174     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2175     FloatParts64 p;
2176
2177     float16a_unpack_canonical(&p, a, s, fmt16);
2178     parts_float_to_float(&p, s);
2179     return float64_round_pack_canonical(&p, s);
2180 }
2181
2182 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2183 {
2184     FloatParts64 p;
2185     const FloatFmt *fmt;
2186
2187     float32_unpack_canonical(&p, a, s);
2188     if (ieee) {
2189         parts_float_to_float(&p, s);
2190         fmt = &float16_params;
2191     } else {
2192         parts_float_to_ahp(&p, s);
2193         fmt = &float16_params_ahp;
2194     }
2195     return float16a_round_pack_canonical(&p, s, fmt);
2196 }
2197
2198 static float64 QEMU_SOFTFLOAT_ATTR
2199 soft_float32_to_float64(float32 a, float_status *s)
2200 {
2201     FloatParts64 p;
2202
2203     float32_unpack_canonical(&p, a, s);
2204     parts_float_to_float(&p, s);
2205     return float64_round_pack_canonical(&p, s);
2206 }
2207
2208 float64 float32_to_float64(float32 a, float_status *s)
2209 {
2210     if (likely(float32_is_normal(a))) {
2211         /* Widening conversion can never produce inexact results.  */
2212         union_float32 uf;
2213         union_float64 ud;
2214         uf.s = a;
2215         ud.h = uf.h;
2216         return ud.s;
2217     } else if (float32_is_zero(a)) {
2218         return float64_set_sign(float64_zero, float32_is_neg(a));
2219     } else {
2220         return soft_float32_to_float64(a, s);
2221     }
2222 }
2223
2224 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2225 {
2226     FloatParts64 p;
2227     const FloatFmt *fmt;
2228
2229     float64_unpack_canonical(&p, a, s);
2230     if (ieee) {
2231         parts_float_to_float(&p, s);
2232         fmt = &float16_params;
2233     } else {
2234         parts_float_to_ahp(&p, s);
2235         fmt = &float16_params_ahp;
2236     }
2237     return float16a_round_pack_canonical(&p, s, fmt);
2238 }
2239
2240 float32 float64_to_float32(float64 a, float_status *s)
2241 {
2242     FloatParts64 p;
2243
2244     float64_unpack_canonical(&p, a, s);
2245     parts_float_to_float(&p, s);
2246     return float32_round_pack_canonical(&p, s);
2247 }
2248
2249 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2250 {
2251     FloatParts64 p;
2252
2253     bfloat16_unpack_canonical(&p, a, s);
2254     parts_float_to_float(&p, s);
2255     return float32_round_pack_canonical(&p, s);
2256 }
2257
2258 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2259 {
2260     FloatParts64 p;
2261
2262     bfloat16_unpack_canonical(&p, a, s);
2263     parts_float_to_float(&p, s);
2264     return float64_round_pack_canonical(&p, s);
2265 }
2266
2267 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2268 {
2269     FloatParts64 p;
2270
2271     float32_unpack_canonical(&p, a, s);
2272     parts_float_to_float(&p, s);
2273     return bfloat16_round_pack_canonical(&p, s);
2274 }
2275
2276 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2277 {
2278     FloatParts64 p;
2279
2280     float64_unpack_canonical(&p, a, s);
2281     parts_float_to_float(&p, s);
2282     return bfloat16_round_pack_canonical(&p, s);
2283 }
2284
2285 float32 float128_to_float32(float128 a, float_status *s)
2286 {
2287     FloatParts64 p64;
2288     FloatParts128 p128;
2289
2290     float128_unpack_canonical(&p128, a, s);
2291     parts_float_to_float_narrow(&p64, &p128, s);
2292     return float32_round_pack_canonical(&p64, s);
2293 }
2294
2295 float64 float128_to_float64(float128 a, float_status *s)
2296 {
2297     FloatParts64 p64;
2298     FloatParts128 p128;
2299
2300     float128_unpack_canonical(&p128, a, s);
2301     parts_float_to_float_narrow(&p64, &p128, s);
2302     return float64_round_pack_canonical(&p64, s);
2303 }
2304
2305 float128 float32_to_float128(float32 a, float_status *s)
2306 {
2307     FloatParts64 p64;
2308     FloatParts128 p128;
2309
2310     float32_unpack_canonical(&p64, a, s);
2311     parts_float_to_float_widen(&p128, &p64, s);
2312     return float128_round_pack_canonical(&p128, s);
2313 }
2314
2315 float128 float64_to_float128(float64 a, float_status *s)
2316 {
2317     FloatParts64 p64;
2318     FloatParts128 p128;
2319
2320     float64_unpack_canonical(&p64, a, s);
2321     parts_float_to_float_widen(&p128, &p64, s);
2322     return float128_round_pack_canonical(&p128, s);
2323 }
2324
2325 /*
2326  * Round to integral value
2327  */
2328
2329 float16 float16_round_to_int(float16 a, float_status *s)
2330 {
2331     FloatParts64 p;
2332
2333     float16_unpack_canonical(&p, a, s);
2334     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2335     return float16_round_pack_canonical(&p, s);
2336 }
2337
2338 float32 float32_round_to_int(float32 a, float_status *s)
2339 {
2340     FloatParts64 p;
2341
2342     float32_unpack_canonical(&p, a, s);
2343     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2344     return float32_round_pack_canonical(&p, s);
2345 }
2346
2347 float64 float64_round_to_int(float64 a, float_status *s)
2348 {
2349     FloatParts64 p;
2350
2351     float64_unpack_canonical(&p, a, s);
2352     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2353     return float64_round_pack_canonical(&p, s);
2354 }
2355
2356 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2357 {
2358     FloatParts64 p;
2359
2360     bfloat16_unpack_canonical(&p, a, s);
2361     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2362     return bfloat16_round_pack_canonical(&p, s);
2363 }
2364
2365 float128 float128_round_to_int(float128 a, float_status *s)
2366 {
2367     FloatParts128 p;
2368
2369     float128_unpack_canonical(&p, a, s);
2370     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2371     return float128_round_pack_canonical(&p, s);
2372 }
2373
2374 /*
2375  * Floating-point to signed integer conversions
2376  */
2377
2378 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2379                               float_status *s)
2380 {
2381     FloatParts64 p;
2382
2383     float16_unpack_canonical(&p, a, s);
2384     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2385 }
2386
2387 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2388                                 float_status *s)
2389 {
2390     FloatParts64 p;
2391
2392     float16_unpack_canonical(&p, a, s);
2393     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2394 }
2395
2396 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2397                                 float_status *s)
2398 {
2399     FloatParts64 p;
2400
2401     float16_unpack_canonical(&p, a, s);
2402     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2403 }
2404
2405 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2406                                 float_status *s)
2407 {
2408     FloatParts64 p;
2409
2410     float16_unpack_canonical(&p, a, s);
2411     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2412 }
2413
2414 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2415                                 float_status *s)
2416 {
2417     FloatParts64 p;
2418
2419     float32_unpack_canonical(&p, a, s);
2420     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2421 }
2422
2423 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2424                                 float_status *s)
2425 {
2426     FloatParts64 p;
2427
2428     float32_unpack_canonical(&p, a, s);
2429     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2430 }
2431
2432 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2433                                 float_status *s)
2434 {
2435     FloatParts64 p;
2436
2437     float32_unpack_canonical(&p, a, s);
2438     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2439 }
2440
2441 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2442                                 float_status *s)
2443 {
2444     FloatParts64 p;
2445
2446     float64_unpack_canonical(&p, a, s);
2447     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2448 }
2449
2450 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2451                                 float_status *s)
2452 {
2453     FloatParts64 p;
2454
2455     float64_unpack_canonical(&p, a, s);
2456     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2457 }
2458
2459 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2460                                 float_status *s)
2461 {
2462     FloatParts64 p;
2463
2464     float64_unpack_canonical(&p, a, s);
2465     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2466 }
2467
2468 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2469                                  float_status *s)
2470 {
2471     FloatParts64 p;
2472
2473     bfloat16_unpack_canonical(&p, a, s);
2474     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2475 }
2476
2477 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2478                                  float_status *s)
2479 {
2480     FloatParts64 p;
2481
2482     bfloat16_unpack_canonical(&p, a, s);
2483     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2484 }
2485
2486 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2487                                  float_status *s)
2488 {
2489     FloatParts64 p;
2490
2491     bfloat16_unpack_canonical(&p, a, s);
2492     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2493 }
2494
2495 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2496                                         int scale, float_status *s)
2497 {
2498     FloatParts128 p;
2499
2500     float128_unpack_canonical(&p, a, s);
2501     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2502 }
2503
2504 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2505                                         int scale, float_status *s)
2506 {
2507     FloatParts128 p;
2508
2509     float128_unpack_canonical(&p, a, s);
2510     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2511 }
2512
2513 int8_t float16_to_int8(float16 a, float_status *s)
2514 {
2515     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2516 }
2517
2518 int16_t float16_to_int16(float16 a, float_status *s)
2519 {
2520     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2521 }
2522
2523 int32_t float16_to_int32(float16 a, float_status *s)
2524 {
2525     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2526 }
2527
2528 int64_t float16_to_int64(float16 a, float_status *s)
2529 {
2530     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2531 }
2532
2533 int16_t float32_to_int16(float32 a, float_status *s)
2534 {
2535     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2536 }
2537
2538 int32_t float32_to_int32(float32 a, float_status *s)
2539 {
2540     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2541 }
2542
2543 int64_t float32_to_int64(float32 a, float_status *s)
2544 {
2545     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2546 }
2547
2548 int16_t float64_to_int16(float64 a, float_status *s)
2549 {
2550     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2551 }
2552
2553 int32_t float64_to_int32(float64 a, float_status *s)
2554 {
2555     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2556 }
2557
2558 int64_t float64_to_int64(float64 a, float_status *s)
2559 {
2560     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2561 }
2562
2563 int32_t float128_to_int32(float128 a, float_status *s)
2564 {
2565     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2566 }
2567
2568 int64_t float128_to_int64(float128 a, float_status *s)
2569 {
2570     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2571 }
2572
2573 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2574 {
2575     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2576 }
2577
2578 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2579 {
2580     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2581 }
2582
2583 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2584 {
2585     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2586 }
2587
2588 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2589 {
2590     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2591 }
2592
2593 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2594 {
2595     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2596 }
2597
2598 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2599 {
2600     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2601 }
2602
2603 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2604 {
2605     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2606 }
2607
2608 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2609 {
2610     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2611 }
2612
2613 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2614 {
2615     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2616 }
2617
2618 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2619 {
2620     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2621 }
2622
2623 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2624 {
2625     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2626 }
2627
2628 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2629 {
2630     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2631 }
2632
2633 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2634 {
2635     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2636 }
2637
2638 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2639 {
2640     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2641 }
2642
2643 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2644 {
2645     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2646 }
2647
2648 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2649 {
2650     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2651 }
2652
2653 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2654 {
2655     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2656 }
2657
2658 /*
2659  * Floating-point to unsigned integer conversions
2660  */
2661
2662 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2663                                 float_status *s)
2664 {
2665     FloatParts64 p;
2666
2667     float16_unpack_canonical(&p, a, s);
2668     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2669 }
2670
2671 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2672                                   float_status *s)
2673 {
2674     FloatParts64 p;
2675
2676     float16_unpack_canonical(&p, a, s);
2677     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2678 }
2679
2680 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2681                                   float_status *s)
2682 {
2683     FloatParts64 p;
2684
2685     float16_unpack_canonical(&p, a, s);
2686     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2687 }
2688
2689 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2690                                   float_status *s)
2691 {
2692     FloatParts64 p;
2693
2694     float16_unpack_canonical(&p, a, s);
2695     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2696 }
2697
2698 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2699                                   float_status *s)
2700 {
2701     FloatParts64 p;
2702
2703     float32_unpack_canonical(&p, a, s);
2704     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2705 }
2706
2707 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2708                                   float_status *s)
2709 {
2710     FloatParts64 p;
2711
2712     float32_unpack_canonical(&p, a, s);
2713     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2714 }
2715
2716 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2717                                   float_status *s)
2718 {
2719     FloatParts64 p;
2720
2721     float32_unpack_canonical(&p, a, s);
2722     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2723 }
2724
2725 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2726                                   float_status *s)
2727 {
2728     FloatParts64 p;
2729
2730     float64_unpack_canonical(&p, a, s);
2731     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2732 }
2733
2734 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2735                                   float_status *s)
2736 {
2737     FloatParts64 p;
2738
2739     float64_unpack_canonical(&p, a, s);
2740     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2741 }
2742
2743 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2744                                   float_status *s)
2745 {
2746     FloatParts64 p;
2747
2748     float64_unpack_canonical(&p, a, s);
2749     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2750 }
2751
2752 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2753                                    int scale, float_status *s)
2754 {
2755     FloatParts64 p;
2756
2757     bfloat16_unpack_canonical(&p, a, s);
2758     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2759 }
2760
2761 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2762                                    int scale, float_status *s)
2763 {
2764     FloatParts64 p;
2765
2766     bfloat16_unpack_canonical(&p, a, s);
2767     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2768 }
2769
2770 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2771                                    int scale, float_status *s)
2772 {
2773     FloatParts64 p;
2774
2775     bfloat16_unpack_canonical(&p, a, s);
2776     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2777 }
2778
2779 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2780                                           int scale, float_status *s)
2781 {
2782     FloatParts128 p;
2783
2784     float128_unpack_canonical(&p, a, s);
2785     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2786 }
2787
2788 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2789                                           int scale, float_status *s)
2790 {
2791     FloatParts128 p;
2792
2793     float128_unpack_canonical(&p, a, s);
2794     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2795 }
2796
2797 uint8_t float16_to_uint8(float16 a, float_status *s)
2798 {
2799     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2800 }
2801
2802 uint16_t float16_to_uint16(float16 a, float_status *s)
2803 {
2804     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2805 }
2806
2807 uint32_t float16_to_uint32(float16 a, float_status *s)
2808 {
2809     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2810 }
2811
2812 uint64_t float16_to_uint64(float16 a, float_status *s)
2813 {
2814     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2815 }
2816
2817 uint16_t float32_to_uint16(float32 a, float_status *s)
2818 {
2819     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2820 }
2821
2822 uint32_t float32_to_uint32(float32 a, float_status *s)
2823 {
2824     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2825 }
2826
2827 uint64_t float32_to_uint64(float32 a, float_status *s)
2828 {
2829     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2830 }
2831
2832 uint16_t float64_to_uint16(float64 a, float_status *s)
2833 {
2834     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2835 }
2836
2837 uint32_t float64_to_uint32(float64 a, float_status *s)
2838 {
2839     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2840 }
2841
2842 uint64_t float64_to_uint64(float64 a, float_status *s)
2843 {
2844     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2845 }
2846
2847 uint32_t float128_to_uint32(float128 a, float_status *s)
2848 {
2849     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2850 }
2851
2852 uint64_t float128_to_uint64(float128 a, float_status *s)
2853 {
2854     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2855 }
2856
2857 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2858 {
2859     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2860 }
2861
2862 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2863 {
2864     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2865 }
2866
2867 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2868 {
2869     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2870 }
2871
2872 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2873 {
2874     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2875 }
2876
2877 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2878 {
2879     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2880 }
2881
2882 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2883 {
2884     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2885 }
2886
2887 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2888 {
2889     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2890 }
2891
2892 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2893 {
2894     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2895 }
2896
2897 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2898 {
2899     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2900 }
2901
2902 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2903 {
2904     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2905 }
2906
2907 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2908 {
2909     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2910 }
2911
2912 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2913 {
2914     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2915 }
2916
2917 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2918 {
2919     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2920 }
2921
2922 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2923 {
2924     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2925 }
2926
2927 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2928 {
2929     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2930 }
2931
2932 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2933 {
2934     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2935 }
2936
2937 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2938 {
2939     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2940 }
2941
2942 /*
2943  * Integer to float conversions
2944  *
2945  * Returns the result of converting the two's complement integer `a'
2946  * to the floating-point format. The conversion is performed according
2947  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2948  */
2949
2950 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
2951 {
2952     FloatParts64 r = { .sign = false };
2953
2954     if (a == 0) {
2955         r.cls = float_class_zero;
2956     } else {
2957         uint64_t f = a;
2958         int shift;
2959
2960         r.cls = float_class_normal;
2961         if (a < 0) {
2962             f = -f;
2963             r.sign = true;
2964         }
2965         shift = clz64(f);
2966         scale = MIN(MAX(scale, -0x10000), 0x10000);
2967
2968         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2969         r.frac = f << shift;
2970     }
2971
2972     return r;
2973 }
2974
2975 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2976 {
2977     FloatParts64 pa = int_to_float(a, scale, status);
2978     return float16_round_pack_canonical(&pa, status);
2979 }
2980
2981 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2982 {
2983     return int64_to_float16_scalbn(a, scale, status);
2984 }
2985
2986 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2987 {
2988     return int64_to_float16_scalbn(a, scale, status);
2989 }
2990
2991 float16 int64_to_float16(int64_t a, float_status *status)
2992 {
2993     return int64_to_float16_scalbn(a, 0, status);
2994 }
2995
2996 float16 int32_to_float16(int32_t a, float_status *status)
2997 {
2998     return int64_to_float16_scalbn(a, 0, status);
2999 }
3000
3001 float16 int16_to_float16(int16_t a, float_status *status)
3002 {
3003     return int64_to_float16_scalbn(a, 0, status);
3004 }
3005
3006 float16 int8_to_float16(int8_t a, float_status *status)
3007 {
3008     return int64_to_float16_scalbn(a, 0, status);
3009 }
3010
3011 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3012 {
3013     FloatParts64 pa = int_to_float(a, scale, status);
3014     return float32_round_pack_canonical(&pa, status);
3015 }
3016
3017 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3018 {
3019     return int64_to_float32_scalbn(a, scale, status);
3020 }
3021
3022 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3023 {
3024     return int64_to_float32_scalbn(a, scale, status);
3025 }
3026
3027 float32 int64_to_float32(int64_t a, float_status *status)
3028 {
3029     return int64_to_float32_scalbn(a, 0, status);
3030 }
3031
3032 float32 int32_to_float32(int32_t a, float_status *status)
3033 {
3034     return int64_to_float32_scalbn(a, 0, status);
3035 }
3036
3037 float32 int16_to_float32(int16_t a, float_status *status)
3038 {
3039     return int64_to_float32_scalbn(a, 0, status);
3040 }
3041
3042 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3043 {
3044     FloatParts64 pa = int_to_float(a, scale, status);
3045     return float64_round_pack_canonical(&pa, status);
3046 }
3047
3048 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3049 {
3050     return int64_to_float64_scalbn(a, scale, status);
3051 }
3052
3053 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3054 {
3055     return int64_to_float64_scalbn(a, scale, status);
3056 }
3057
3058 float64 int64_to_float64(int64_t a, float_status *status)
3059 {
3060     return int64_to_float64_scalbn(a, 0, status);
3061 }
3062
3063 float64 int32_to_float64(int32_t a, float_status *status)
3064 {
3065     return int64_to_float64_scalbn(a, 0, status);
3066 }
3067
3068 float64 int16_to_float64(int16_t a, float_status *status)
3069 {
3070     return int64_to_float64_scalbn(a, 0, status);
3071 }
3072
3073 /*
3074  * Returns the result of converting the two's complement integer `a'
3075  * to the bfloat16 format.
3076  */
3077
3078 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3079 {
3080     FloatParts64 pa = int_to_float(a, scale, status);
3081     return bfloat16_round_pack_canonical(&pa, status);
3082 }
3083
3084 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3085 {
3086     return int64_to_bfloat16_scalbn(a, scale, status);
3087 }
3088
3089 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3090 {
3091     return int64_to_bfloat16_scalbn(a, scale, status);
3092 }
3093
3094 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3095 {
3096     return int64_to_bfloat16_scalbn(a, 0, status);
3097 }
3098
3099 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3100 {
3101     return int64_to_bfloat16_scalbn(a, 0, status);
3102 }
3103
3104 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3105 {
3106     return int64_to_bfloat16_scalbn(a, 0, status);
3107 }
3108
3109 /*
3110  * Unsigned Integer to float conversions
3111  *
3112  * Returns the result of converting the unsigned integer `a' to the
3113  * floating-point format. The conversion is performed according to the
3114  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3115  */
3116
3117 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3118 {
3119     FloatParts64 r = { .sign = false };
3120     int shift;
3121
3122     if (a == 0) {
3123         r.cls = float_class_zero;
3124     } else {
3125         scale = MIN(MAX(scale, -0x10000), 0x10000);
3126         shift = clz64(a);
3127         r.cls = float_class_normal;
3128         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3129         r.frac = a << shift;
3130     }
3131
3132     return r;
3133 }
3134
3135 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3136 {
3137     FloatParts64 pa = uint_to_float(a, scale, status);
3138     return float16_round_pack_canonical(&pa, status);
3139 }
3140
3141 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3142 {
3143     return uint64_to_float16_scalbn(a, scale, status);
3144 }
3145
3146 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3147 {
3148     return uint64_to_float16_scalbn(a, scale, status);
3149 }
3150
3151 float16 uint64_to_float16(uint64_t a, float_status *status)
3152 {
3153     return uint64_to_float16_scalbn(a, 0, status);
3154 }
3155
3156 float16 uint32_to_float16(uint32_t a, float_status *status)
3157 {
3158     return uint64_to_float16_scalbn(a, 0, status);
3159 }
3160
3161 float16 uint16_to_float16(uint16_t a, float_status *status)
3162 {
3163     return uint64_to_float16_scalbn(a, 0, status);
3164 }
3165
3166 float16 uint8_to_float16(uint8_t a, float_status *status)
3167 {
3168     return uint64_to_float16_scalbn(a, 0, status);
3169 }
3170
3171 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3172 {
3173     FloatParts64 pa = uint_to_float(a, scale, status);
3174     return float32_round_pack_canonical(&pa, status);
3175 }
3176
3177 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3178 {
3179     return uint64_to_float32_scalbn(a, scale, status);
3180 }
3181
3182 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3183 {
3184     return uint64_to_float32_scalbn(a, scale, status);
3185 }
3186
3187 float32 uint64_to_float32(uint64_t a, float_status *status)
3188 {
3189     return uint64_to_float32_scalbn(a, 0, status);
3190 }
3191
3192 float32 uint32_to_float32(uint32_t a, float_status *status)
3193 {
3194     return uint64_to_float32_scalbn(a, 0, status);
3195 }
3196
3197 float32 uint16_to_float32(uint16_t a, float_status *status)
3198 {
3199     return uint64_to_float32_scalbn(a, 0, status);
3200 }
3201
3202 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3203 {
3204     FloatParts64 pa = uint_to_float(a, scale, status);
3205     return float64_round_pack_canonical(&pa, status);
3206 }
3207
3208 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3209 {
3210     return uint64_to_float64_scalbn(a, scale, status);
3211 }
3212
3213 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3214 {
3215     return uint64_to_float64_scalbn(a, scale, status);
3216 }
3217
3218 float64 uint64_to_float64(uint64_t a, float_status *status)
3219 {
3220     return uint64_to_float64_scalbn(a, 0, status);
3221 }
3222
3223 float64 uint32_to_float64(uint32_t a, float_status *status)
3224 {
3225     return uint64_to_float64_scalbn(a, 0, status);
3226 }
3227
3228 float64 uint16_to_float64(uint16_t a, float_status *status)
3229 {
3230     return uint64_to_float64_scalbn(a, 0, status);
3231 }
3232
3233 /*
3234  * Returns the result of converting the unsigned integer `a' to the
3235  * bfloat16 format.
3236  */
3237
3238 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3239 {
3240     FloatParts64 pa = uint_to_float(a, scale, status);
3241     return bfloat16_round_pack_canonical(&pa, status);
3242 }
3243
3244 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3245 {
3246     return uint64_to_bfloat16_scalbn(a, scale, status);
3247 }
3248
3249 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3250 {
3251     return uint64_to_bfloat16_scalbn(a, scale, status);
3252 }
3253
3254 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3255 {
3256     return uint64_to_bfloat16_scalbn(a, 0, status);
3257 }
3258
3259 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3260 {
3261     return uint64_to_bfloat16_scalbn(a, 0, status);
3262 }
3263
3264 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3265 {
3266     return uint64_to_bfloat16_scalbn(a, 0, status);
3267 }
3268
3269 /* Float Min/Max */
3270 /* min() and max() functions. These can't be implemented as
3271  * 'compare and pick one input' because that would mishandle
3272  * NaNs and +0 vs -0.
3273  *
3274  * minnum() and maxnum() functions. These are similar to the min()
3275  * and max() functions but if one of the arguments is a QNaN and
3276  * the other is numerical then the numerical argument is returned.
3277  * SNaNs will get quietened before being returned.
3278  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3279  * and maxNum() operations. min() and max() are the typical min/max
3280  * semantics provided by many CPUs which predate that specification.
3281  *
3282  * minnummag() and maxnummag() functions correspond to minNumMag()
3283  * and minNumMag() from the IEEE-754 2008.
3284  */
3285 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3286                                 bool ieee, bool ismag, float_status *s)
3287 {
3288     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3289         if (ieee) {
3290             /* Takes two floating-point values `a' and `b', one of
3291              * which is a NaN, and returns the appropriate NaN
3292              * result. If either `a' or `b' is a signaling NaN,
3293              * the invalid exception is raised.
3294              */
3295             if (is_snan(a.cls) || is_snan(b.cls)) {
3296                 return *parts_pick_nan(&a, &b, s);
3297             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3298                 return b;
3299             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3300                 return a;
3301             }
3302         }
3303         return *parts_pick_nan(&a, &b, s);
3304     } else {
3305         int a_exp, b_exp;
3306
3307         switch (a.cls) {
3308         case float_class_normal:
3309             a_exp = a.exp;
3310             break;
3311         case float_class_inf:
3312             a_exp = INT_MAX;
3313             break;
3314         case float_class_zero:
3315             a_exp = INT_MIN;
3316             break;
3317         default:
3318             g_assert_not_reached();
3319             break;
3320         }
3321         switch (b.cls) {
3322         case float_class_normal:
3323             b_exp = b.exp;
3324             break;
3325         case float_class_inf:
3326             b_exp = INT_MAX;
3327             break;
3328         case float_class_zero:
3329             b_exp = INT_MIN;
3330             break;
3331         default:
3332             g_assert_not_reached();
3333             break;
3334         }
3335
3336         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3337             bool a_less = a_exp < b_exp;
3338             if (a_exp == b_exp) {
3339                 a_less = a.frac < b.frac;
3340             }
3341             return a_less ^ ismin ? b : a;
3342         }
3343
3344         if (a.sign == b.sign) {
3345             bool a_less = a_exp < b_exp;
3346             if (a_exp == b_exp) {
3347                 a_less = a.frac < b.frac;
3348             }
3349             return a.sign ^ a_less ^ ismin ? b : a;
3350         } else {
3351             return a.sign ^ ismin ? b : a;
3352         }
3353     }
3354 }
3355
3356 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3357 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3358                                      float_status *s)                   \
3359 {                                                                       \
3360     FloatParts64 pa, pb, pr;                                            \
3361     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3362     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3363     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3364     return float ## sz ## _round_pack_canonical(&pr, s);                \
3365 }
3366
3367 MINMAX(16, min, true, false, false)
3368 MINMAX(16, minnum, true, true, false)
3369 MINMAX(16, minnummag, true, true, true)
3370 MINMAX(16, max, false, false, false)
3371 MINMAX(16, maxnum, false, true, false)
3372 MINMAX(16, maxnummag, false, true, true)
3373
3374 MINMAX(32, min, true, false, false)
3375 MINMAX(32, minnum, true, true, false)
3376 MINMAX(32, minnummag, true, true, true)
3377 MINMAX(32, max, false, false, false)
3378 MINMAX(32, maxnum, false, true, false)
3379 MINMAX(32, maxnummag, false, true, true)
3380
3381 MINMAX(64, min, true, false, false)
3382 MINMAX(64, minnum, true, true, false)
3383 MINMAX(64, minnummag, true, true, true)
3384 MINMAX(64, max, false, false, false)
3385 MINMAX(64, maxnum, false, true, false)
3386 MINMAX(64, maxnummag, false, true, true)
3387
3388 #undef MINMAX
3389
3390 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3391 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3392 {                                                                       \
3393     FloatParts64 pa, pb, pr;                                            \
3394     bfloat16_unpack_canonical(&pa, a, s);                               \
3395     bfloat16_unpack_canonical(&pb, b, s);                               \
3396     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3397     return bfloat16_round_pack_canonical(&pr, s);                       \
3398 }
3399
3400 BF16_MINMAX(min, true, false, false)
3401 BF16_MINMAX(minnum, true, true, false)
3402 BF16_MINMAX(minnummag, true, true, true)
3403 BF16_MINMAX(max, false, false, false)
3404 BF16_MINMAX(maxnum, false, true, false)
3405 BF16_MINMAX(maxnummag, false, true, true)
3406
3407 #undef BF16_MINMAX
3408
3409 /* Floating point compare */
3410 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3411                                     float_status *s)
3412 {
3413     if (is_nan(a.cls) || is_nan(b.cls)) {
3414         if (!is_quiet ||
3415             a.cls == float_class_snan ||
3416             b.cls == float_class_snan) {
3417             float_raise(float_flag_invalid, s);
3418         }
3419         return float_relation_unordered;
3420     }
3421
3422     if (a.cls == float_class_zero) {
3423         if (b.cls == float_class_zero) {
3424             return float_relation_equal;
3425         }
3426         return b.sign ? float_relation_greater : float_relation_less;
3427     } else if (b.cls == float_class_zero) {
3428         return a.sign ? float_relation_less : float_relation_greater;
3429     }
3430
3431     /* The only really important thing about infinity is its sign. If
3432      * both are infinities the sign marks the smallest of the two.
3433      */
3434     if (a.cls == float_class_inf) {
3435         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3436             return float_relation_equal;
3437         }
3438         return a.sign ? float_relation_less : float_relation_greater;
3439     } else if (b.cls == float_class_inf) {
3440         return b.sign ? float_relation_greater : float_relation_less;
3441     }
3442
3443     if (a.sign != b.sign) {
3444         return a.sign ? float_relation_less : float_relation_greater;
3445     }
3446
3447     if (a.exp == b.exp) {
3448         if (a.frac == b.frac) {
3449             return float_relation_equal;
3450         }
3451         if (a.sign) {
3452             return a.frac > b.frac ?
3453                 float_relation_less : float_relation_greater;
3454         } else {
3455             return a.frac > b.frac ?
3456                 float_relation_greater : float_relation_less;
3457         }
3458     } else {
3459         if (a.sign) {
3460             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3461         } else {
3462             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3463         }
3464     }
3465 }
3466
3467 #define COMPARE(name, attr, sz)                                         \
3468 static int attr                                                         \
3469 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3470 {                                                                       \
3471     FloatParts64 pa, pb;                                                \
3472     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3473     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3474     return compare_floats(pa, pb, is_quiet, s);                         \
3475 }
3476
3477 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3478 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3479 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3480
3481 #undef COMPARE
3482
3483 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3484 {
3485     return soft_f16_compare(a, b, false, s);
3486 }
3487
3488 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3489 {
3490     return soft_f16_compare(a, b, true, s);
3491 }
3492
3493 static FloatRelation QEMU_FLATTEN
3494 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3495 {
3496     union_float32 ua, ub;
3497
3498     ua.s = xa;
3499     ub.s = xb;
3500
3501     if (QEMU_NO_HARDFLOAT) {
3502         goto soft;
3503     }
3504
3505     float32_input_flush2(&ua.s, &ub.s, s);
3506     if (isgreaterequal(ua.h, ub.h)) {
3507         if (isgreater(ua.h, ub.h)) {
3508             return float_relation_greater;
3509         }
3510         return float_relation_equal;
3511     }
3512     if (likely(isless(ua.h, ub.h))) {
3513         return float_relation_less;
3514     }
3515     /* The only condition remaining is unordered.
3516      * Fall through to set flags.
3517      */
3518  soft:
3519     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3520 }
3521
3522 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3523 {
3524     return f32_compare(a, b, false, s);
3525 }
3526
3527 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3528 {
3529     return f32_compare(a, b, true, s);
3530 }
3531
3532 static FloatRelation QEMU_FLATTEN
3533 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3534 {
3535     union_float64 ua, ub;
3536
3537     ua.s = xa;
3538     ub.s = xb;
3539
3540     if (QEMU_NO_HARDFLOAT) {
3541         goto soft;
3542     }
3543
3544     float64_input_flush2(&ua.s, &ub.s, s);
3545     if (isgreaterequal(ua.h, ub.h)) {
3546         if (isgreater(ua.h, ub.h)) {
3547             return float_relation_greater;
3548         }
3549         return float_relation_equal;
3550     }
3551     if (likely(isless(ua.h, ub.h))) {
3552         return float_relation_less;
3553     }
3554     /* The only condition remaining is unordered.
3555      * Fall through to set flags.
3556      */
3557  soft:
3558     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3559 }
3560
3561 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3562 {
3563     return f64_compare(a, b, false, s);
3564 }
3565
3566 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3567 {
3568     return f64_compare(a, b, true, s);
3569 }
3570
3571 static FloatRelation QEMU_FLATTEN
3572 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3573 {
3574     FloatParts64 pa, pb;
3575
3576     bfloat16_unpack_canonical(&pa, a, s);
3577     bfloat16_unpack_canonical(&pb, b, s);
3578     return compare_floats(pa, pb, is_quiet, s);
3579 }
3580
3581 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3582 {
3583     return soft_bf16_compare(a, b, false, s);
3584 }
3585
3586 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3587 {
3588     return soft_bf16_compare(a, b, true, s);
3589 }
3590
3591 /* Multiply A by 2 raised to the power N.  */
3592 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3593 {
3594     if (unlikely(is_nan(a.cls))) {
3595         parts_return_nan(&a, s);
3596     }
3597     if (a.cls == float_class_normal) {
3598         /* The largest float type (even though not supported by FloatParts64)
3599          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3600          * still allows rounding to infinity, without allowing overflow
3601          * within the int32_t that backs FloatParts64.exp.
3602          */
3603         n = MIN(MAX(n, -0x10000), 0x10000);
3604         a.exp += n;
3605     }
3606     return a;
3607 }
3608
3609 float16 float16_scalbn(float16 a, int n, float_status *status)
3610 {
3611     FloatParts64 pa, pr;
3612
3613     float16_unpack_canonical(&pa, a, status);
3614     pr = scalbn_decomposed(pa, n, status);
3615     return float16_round_pack_canonical(&pr, status);
3616 }
3617
3618 float32 float32_scalbn(float32 a, int n, float_status *status)
3619 {
3620     FloatParts64 pa, pr;
3621
3622     float32_unpack_canonical(&pa, a, status);
3623     pr = scalbn_decomposed(pa, n, status);
3624     return float32_round_pack_canonical(&pr, status);
3625 }
3626
3627 float64 float64_scalbn(float64 a, int n, float_status *status)
3628 {
3629     FloatParts64 pa, pr;
3630
3631     float64_unpack_canonical(&pa, a, status);
3632     pr = scalbn_decomposed(pa, n, status);
3633     return float64_round_pack_canonical(&pr, status);
3634 }
3635
3636 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3637 {
3638     FloatParts64 pa, pr;
3639
3640     bfloat16_unpack_canonical(&pa, a, status);
3641     pr = scalbn_decomposed(pa, n, status);
3642     return bfloat16_round_pack_canonical(&pr, status);
3643 }
3644
3645 /*
3646  * Square Root
3647  *
3648  * The old softfloat code did an approximation step before zeroing in
3649  * on the final result. However for simpleness we just compute the
3650  * square root by iterating down from the implicit bit to enough extra
3651  * bits to ensure we get a correctly rounded result.
3652  *
3653  * This does mean however the calculation is slower than before,
3654  * especially for 64 bit floats.
3655  */
3656
3657 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3658 {
3659     uint64_t a_frac, r_frac, s_frac;
3660     int bit, last_bit;
3661
3662     if (is_nan(a.cls)) {
3663         parts_return_nan(&a, s);
3664         return a;
3665     }
3666     if (a.cls == float_class_zero) {
3667         return a;  /* sqrt(+-0) = +-0 */
3668     }
3669     if (a.sign) {
3670         float_raise(float_flag_invalid, s);
3671         parts_default_nan(&a, s);
3672         return a;
3673     }
3674     if (a.cls == float_class_inf) {
3675         return a;  /* sqrt(+inf) = +inf */
3676     }
3677
3678     assert(a.cls == float_class_normal);
3679
3680     /* We need two overflow bits at the top. Adding room for that is a
3681      * right shift. If the exponent is odd, we can discard the low bit
3682      * by multiplying the fraction by 2; that's a left shift. Combine
3683      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3684      */
3685     a_frac = a.frac >> (2 - (a.exp & 1));
3686     a.exp >>= 1;
3687
3688     /* Bit-by-bit computation of sqrt.  */
3689     r_frac = 0;
3690     s_frac = 0;
3691
3692     /* Iterate from implicit bit down to the 3 extra bits to compute a
3693      * properly rounded result. Remember we've inserted two more bits
3694      * at the top, so these positions are two less.
3695      */
3696     bit = DECOMPOSED_BINARY_POINT - 2;
3697     last_bit = MAX(p->frac_shift - 4, 0);
3698     do {
3699         uint64_t q = 1ULL << bit;
3700         uint64_t t_frac = s_frac + q;
3701         if (t_frac <= a_frac) {
3702             s_frac = t_frac + q;
3703             a_frac -= t_frac;
3704             r_frac += q;
3705         }
3706         a_frac <<= 1;
3707     } while (--bit >= last_bit);
3708
3709     /* Undo the right shift done above. If there is any remaining
3710      * fraction, the result is inexact. Set the sticky bit.
3711      */
3712     a.frac = (r_frac << 2) + (a_frac != 0);
3713
3714     return a;
3715 }
3716
3717 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3718 {
3719     FloatParts64 pa, pr;
3720
3721     float16_unpack_canonical(&pa, a, status);
3722     pr = sqrt_float(pa, status, &float16_params);
3723     return float16_round_pack_canonical(&pr, status);
3724 }
3725
3726 static float32 QEMU_SOFTFLOAT_ATTR
3727 soft_f32_sqrt(float32 a, float_status *status)
3728 {
3729     FloatParts64 pa, pr;
3730
3731     float32_unpack_canonical(&pa, a, status);
3732     pr = sqrt_float(pa, status, &float32_params);
3733     return float32_round_pack_canonical(&pr, status);
3734 }
3735
3736 static float64 QEMU_SOFTFLOAT_ATTR
3737 soft_f64_sqrt(float64 a, float_status *status)
3738 {
3739     FloatParts64 pa, pr;
3740
3741     float64_unpack_canonical(&pa, a, status);
3742     pr = sqrt_float(pa, status, &float64_params);
3743     return float64_round_pack_canonical(&pr, status);
3744 }
3745
3746 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3747 {
3748     union_float32 ua, ur;
3749
3750     ua.s = xa;
3751     if (unlikely(!can_use_fpu(s))) {
3752         goto soft;
3753     }
3754
3755     float32_input_flush1(&ua.s, s);
3756     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3757         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3758                        fpclassify(ua.h) == FP_ZERO) ||
3759                      signbit(ua.h))) {
3760             goto soft;
3761         }
3762     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3763                         float32_is_neg(ua.s))) {
3764         goto soft;
3765     }
3766     ur.h = sqrtf(ua.h);
3767     return ur.s;
3768
3769  soft:
3770     return soft_f32_sqrt(ua.s, s);
3771 }
3772
3773 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3774 {
3775     union_float64 ua, ur;
3776
3777     ua.s = xa;
3778     if (unlikely(!can_use_fpu(s))) {
3779         goto soft;
3780     }
3781
3782     float64_input_flush1(&ua.s, s);
3783     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3784         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3785                        fpclassify(ua.h) == FP_ZERO) ||
3786                      signbit(ua.h))) {
3787             goto soft;
3788         }
3789     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3790                         float64_is_neg(ua.s))) {
3791         goto soft;
3792     }
3793     ur.h = sqrt(ua.h);
3794     return ur.s;
3795
3796  soft:
3797     return soft_f64_sqrt(ua.s, s);
3798 }
3799
3800 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3801 {
3802     FloatParts64 pa, pr;
3803
3804     bfloat16_unpack_canonical(&pa, a, status);
3805     pr = sqrt_float(pa, status, &bfloat16_params);
3806     return bfloat16_round_pack_canonical(&pr, status);
3807 }
3808
3809 /*----------------------------------------------------------------------------
3810 | The pattern for a default generated NaN.
3811 *----------------------------------------------------------------------------*/
3812
3813 float16 float16_default_nan(float_status *status)
3814 {
3815     FloatParts64 p;
3816
3817     parts_default_nan(&p, status);
3818     p.frac >>= float16_params.frac_shift;
3819     return float16_pack_raw(&p);
3820 }
3821
3822 float32 float32_default_nan(float_status *status)
3823 {
3824     FloatParts64 p;
3825
3826     parts_default_nan(&p, status);
3827     p.frac >>= float32_params.frac_shift;
3828     return float32_pack_raw(&p);
3829 }
3830
3831 float64 float64_default_nan(float_status *status)
3832 {
3833     FloatParts64 p;
3834
3835     parts_default_nan(&p, status);
3836     p.frac >>= float64_params.frac_shift;
3837     return float64_pack_raw(&p);
3838 }
3839
3840 float128 float128_default_nan(float_status *status)
3841 {
3842     FloatParts128 p;
3843
3844     parts_default_nan(&p, status);
3845     frac_shr(&p, float128_params.frac_shift);
3846     return float128_pack_raw(&p);
3847 }
3848
3849 bfloat16 bfloat16_default_nan(float_status *status)
3850 {
3851     FloatParts64 p;
3852
3853     parts_default_nan(&p, status);
3854     p.frac >>= bfloat16_params.frac_shift;
3855     return bfloat16_pack_raw(&p);
3856 }
3857
3858 /*----------------------------------------------------------------------------
3859 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3860 *----------------------------------------------------------------------------*/
3861
3862 float16 float16_silence_nan(float16 a, float_status *status)
3863 {
3864     FloatParts64 p;
3865
3866     float16_unpack_raw(&p, a);
3867     p.frac <<= float16_params.frac_shift;
3868     parts_silence_nan(&p, status);
3869     p.frac >>= float16_params.frac_shift;
3870     return float16_pack_raw(&p);
3871 }
3872
3873 float32 float32_silence_nan(float32 a, float_status *status)
3874 {
3875     FloatParts64 p;
3876
3877     float32_unpack_raw(&p, a);
3878     p.frac <<= float32_params.frac_shift;
3879     parts_silence_nan(&p, status);
3880     p.frac >>= float32_params.frac_shift;
3881     return float32_pack_raw(&p);
3882 }
3883
3884 float64 float64_silence_nan(float64 a, float_status *status)
3885 {
3886     FloatParts64 p;
3887
3888     float64_unpack_raw(&p, a);
3889     p.frac <<= float64_params.frac_shift;
3890     parts_silence_nan(&p, status);
3891     p.frac >>= float64_params.frac_shift;
3892     return float64_pack_raw(&p);
3893 }
3894
3895 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3896 {
3897     FloatParts64 p;
3898
3899     bfloat16_unpack_raw(&p, a);
3900     p.frac <<= bfloat16_params.frac_shift;
3901     parts_silence_nan(&p, status);
3902     p.frac >>= bfloat16_params.frac_shift;
3903     return bfloat16_pack_raw(&p);
3904 }
3905
3906 float128 float128_silence_nan(float128 a, float_status *status)
3907 {
3908     FloatParts128 p;
3909
3910     float128_unpack_raw(&p, a);
3911     frac_shl(&p, float128_params.frac_shift);
3912     parts_silence_nan(&p, status);
3913     frac_shr(&p, float128_params.frac_shift);
3914     return float128_pack_raw(&p);
3915 }
3916
3917 /*----------------------------------------------------------------------------
3918 | If `a' is denormal and we are in flush-to-zero mode then set the
3919 | input-denormal exception and return zero. Otherwise just return the value.
3920 *----------------------------------------------------------------------------*/
3921
3922 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3923 {
3924     if (p.exp == 0 && p.frac != 0) {
3925         float_raise(float_flag_input_denormal, status);
3926         return true;
3927     }
3928
3929     return false;
3930 }
3931
3932 float16 float16_squash_input_denormal(float16 a, float_status *status)
3933 {
3934     if (status->flush_inputs_to_zero) {
3935         FloatParts64 p;
3936
3937         float16_unpack_raw(&p, a);
3938         if (parts_squash_denormal(p, status)) {
3939             return float16_set_sign(float16_zero, p.sign);
3940         }
3941     }
3942     return a;
3943 }
3944
3945 float32 float32_squash_input_denormal(float32 a, float_status *status)
3946 {
3947     if (status->flush_inputs_to_zero) {
3948         FloatParts64 p;
3949
3950         float32_unpack_raw(&p, a);
3951         if (parts_squash_denormal(p, status)) {
3952             return float32_set_sign(float32_zero, p.sign);
3953         }
3954     }
3955     return a;
3956 }
3957
3958 float64 float64_squash_input_denormal(float64 a, float_status *status)
3959 {
3960     if (status->flush_inputs_to_zero) {
3961         FloatParts64 p;
3962
3963         float64_unpack_raw(&p, a);
3964         if (parts_squash_denormal(p, status)) {
3965             return float64_set_sign(float64_zero, p.sign);
3966         }
3967     }
3968     return a;
3969 }
3970
3971 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3972 {
3973     if (status->flush_inputs_to_zero) {
3974         FloatParts64 p;
3975
3976         bfloat16_unpack_raw(&p, a);
3977         if (parts_squash_denormal(p, status)) {
3978             return bfloat16_set_sign(bfloat16_zero, p.sign);
3979         }
3980     }
3981     return a;
3982 }
3983
3984 /*----------------------------------------------------------------------------
3985 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3986 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3987 | input.  If `zSign' is 1, the input is negated before being converted to an
3988 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3989 | is simply rounded to an integer, with the inexact exception raised if the
3990 | input cannot be represented exactly as an integer.  However, if the fixed-
3991 | point input is too large, the invalid exception is raised and the largest
3992 | positive or negative integer is returned.
3993 *----------------------------------------------------------------------------*/
3994
3995 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3996                                  float_status *status)
3997 {
3998     int8_t roundingMode;
3999     bool roundNearestEven;
4000     int8_t roundIncrement, roundBits;
4001     int32_t z;
4002
4003     roundingMode = status->float_rounding_mode;
4004     roundNearestEven = ( roundingMode == float_round_nearest_even );
4005     switch (roundingMode) {
4006     case float_round_nearest_even:
4007     case float_round_ties_away:
4008         roundIncrement = 0x40;
4009         break;
4010     case float_round_to_zero:
4011         roundIncrement = 0;
4012         break;
4013     case float_round_up:
4014         roundIncrement = zSign ? 0 : 0x7f;
4015         break;
4016     case float_round_down:
4017         roundIncrement = zSign ? 0x7f : 0;
4018         break;
4019     case float_round_to_odd:
4020         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4021         break;
4022     default:
4023         abort();
4024     }
4025     roundBits = absZ & 0x7F;
4026     absZ = ( absZ + roundIncrement )>>7;
4027     if (!(roundBits ^ 0x40) && roundNearestEven) {
4028         absZ &= ~1;
4029     }
4030     z = absZ;
4031     if ( zSign ) z = - z;
4032     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4033         float_raise(float_flag_invalid, status);
4034         return zSign ? INT32_MIN : INT32_MAX;
4035     }
4036     if (roundBits) {
4037         float_raise(float_flag_inexact, status);
4038     }
4039     return z;
4040
4041 }
4042
4043 /*----------------------------------------------------------------------------
4044 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4045 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4046 | and returns the properly rounded 64-bit integer corresponding to the input.
4047 | If `zSign' is 1, the input is negated before being converted to an integer.
4048 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4049 | the inexact exception raised if the input cannot be represented exactly as
4050 | an integer.  However, if the fixed-point input is too large, the invalid
4051 | exception is raised and the largest positive or negative integer is
4052 | returned.
4053 *----------------------------------------------------------------------------*/
4054
4055 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4056                                float_status *status)
4057 {
4058     int8_t roundingMode;
4059     bool roundNearestEven, increment;
4060     int64_t z;
4061
4062     roundingMode = status->float_rounding_mode;
4063     roundNearestEven = ( roundingMode == float_round_nearest_even );
4064     switch (roundingMode) {
4065     case float_round_nearest_even:
4066     case float_round_ties_away:
4067         increment = ((int64_t) absZ1 < 0);
4068         break;
4069     case float_round_to_zero:
4070         increment = 0;
4071         break;
4072     case float_round_up:
4073         increment = !zSign && absZ1;
4074         break;
4075     case float_round_down:
4076         increment = zSign && absZ1;
4077         break;
4078     case float_round_to_odd:
4079         increment = !(absZ0 & 1) && absZ1;
4080         break;
4081     default:
4082         abort();
4083     }
4084     if ( increment ) {
4085         ++absZ0;
4086         if ( absZ0 == 0 ) goto overflow;
4087         if (!(absZ1 << 1) && roundNearestEven) {
4088             absZ0 &= ~1;
4089         }
4090     }
4091     z = absZ0;
4092     if ( zSign ) z = - z;
4093     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4094  overflow:
4095         float_raise(float_flag_invalid, status);
4096         return zSign ? INT64_MIN : INT64_MAX;
4097     }
4098     if (absZ1) {
4099         float_raise(float_flag_inexact, status);
4100     }
4101     return z;
4102
4103 }
4104
4105 /*----------------------------------------------------------------------------
4106 | Normalizes the subnormal single-precision floating-point value represented
4107 | by the denormalized significand `aSig'.  The normalized exponent and
4108 | significand are stored at the locations pointed to by `zExpPtr' and
4109 | `zSigPtr', respectively.
4110 *----------------------------------------------------------------------------*/
4111
4112 static void
4113  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4114 {
4115     int8_t shiftCount;
4116
4117     shiftCount = clz32(aSig) - 8;
4118     *zSigPtr = aSig<<shiftCount;
4119     *zExpPtr = 1 - shiftCount;
4120
4121 }
4122
4123 /*----------------------------------------------------------------------------
4124 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4125 | and significand `zSig', and returns the proper single-precision floating-
4126 | point value corresponding to the abstract input.  Ordinarily, the abstract
4127 | value is simply rounded and packed into the single-precision format, with
4128 | the inexact exception raised if the abstract input cannot be represented
4129 | exactly.  However, if the abstract value is too large, the overflow and
4130 | inexact exceptions are raised and an infinity or maximal finite value is
4131 | returned.  If the abstract value is too small, the input value is rounded to
4132 | a subnormal number, and the underflow and inexact exceptions are raised if
4133 | the abstract input cannot be represented exactly as a subnormal single-
4134 | precision floating-point number.
4135 |     The input significand `zSig' has its binary point between bits 30
4136 | and 29, which is 7 bits to the left of the usual location.  This shifted
4137 | significand must be normalized or smaller.  If `zSig' is not normalized,
4138 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4139 | and it must not require rounding.  In the usual case that `zSig' is
4140 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4141 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4142 | Binary Floating-Point Arithmetic.
4143 *----------------------------------------------------------------------------*/
4144
4145 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4146                                    float_status *status)
4147 {
4148     int8_t roundingMode;
4149     bool roundNearestEven;
4150     int8_t roundIncrement, roundBits;
4151     bool isTiny;
4152
4153     roundingMode = status->float_rounding_mode;
4154     roundNearestEven = ( roundingMode == float_round_nearest_even );
4155     switch (roundingMode) {
4156     case float_round_nearest_even:
4157     case float_round_ties_away:
4158         roundIncrement = 0x40;
4159         break;
4160     case float_round_to_zero:
4161         roundIncrement = 0;
4162         break;
4163     case float_round_up:
4164         roundIncrement = zSign ? 0 : 0x7f;
4165         break;
4166     case float_round_down:
4167         roundIncrement = zSign ? 0x7f : 0;
4168         break;
4169     case float_round_to_odd:
4170         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4171         break;
4172     default:
4173         abort();
4174         break;
4175     }
4176     roundBits = zSig & 0x7F;
4177     if ( 0xFD <= (uint16_t) zExp ) {
4178         if (    ( 0xFD < zExp )
4179              || (    ( zExp == 0xFD )
4180                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4181            ) {
4182             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4183                                    roundIncrement != 0;
4184             float_raise(float_flag_overflow | float_flag_inexact, status);
4185             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4186         }
4187         if ( zExp < 0 ) {
4188             if (status->flush_to_zero) {
4189                 float_raise(float_flag_output_denormal, status);
4190                 return packFloat32(zSign, 0, 0);
4191             }
4192             isTiny = status->tininess_before_rounding
4193                   || (zExp < -1)
4194                   || (zSig + roundIncrement < 0x80000000);
4195             shift32RightJamming( zSig, - zExp, &zSig );
4196             zExp = 0;
4197             roundBits = zSig & 0x7F;
4198             if (isTiny && roundBits) {
4199                 float_raise(float_flag_underflow, status);
4200             }
4201             if (roundingMode == float_round_to_odd) {
4202                 /*
4203                  * For round-to-odd case, the roundIncrement depends on
4204                  * zSig which just changed.
4205                  */
4206                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4207             }
4208         }
4209     }
4210     if (roundBits) {
4211         float_raise(float_flag_inexact, status);
4212     }
4213     zSig = ( zSig + roundIncrement )>>7;
4214     if (!(roundBits ^ 0x40) && roundNearestEven) {
4215         zSig &= ~1;
4216     }
4217     if ( zSig == 0 ) zExp = 0;
4218     return packFloat32( zSign, zExp, zSig );
4219
4220 }
4221
4222 /*----------------------------------------------------------------------------
4223 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4224 | and significand `zSig', and returns the proper single-precision floating-
4225 | point value corresponding to the abstract input.  This routine is just like
4226 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4227 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4228 | floating-point exponent.
4229 *----------------------------------------------------------------------------*/
4230
4231 static float32
4232  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4233                               float_status *status)
4234 {
4235     int8_t shiftCount;
4236
4237     shiftCount = clz32(zSig) - 1;
4238     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4239                                status);
4240
4241 }
4242
4243 /*----------------------------------------------------------------------------
4244 | Normalizes the subnormal double-precision floating-point value represented
4245 | by the denormalized significand `aSig'.  The normalized exponent and
4246 | significand are stored at the locations pointed to by `zExpPtr' and
4247 | `zSigPtr', respectively.
4248 *----------------------------------------------------------------------------*/
4249
4250 static void
4251  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4252 {
4253     int8_t shiftCount;
4254
4255     shiftCount = clz64(aSig) - 11;
4256     *zSigPtr = aSig<<shiftCount;
4257     *zExpPtr = 1 - shiftCount;
4258
4259 }
4260
4261 /*----------------------------------------------------------------------------
4262 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4263 | double-precision floating-point value, returning the result.  After being
4264 | shifted into the proper positions, the three fields are simply added
4265 | together to form the result.  This means that any integer portion of `zSig'
4266 | will be added into the exponent.  Since a properly normalized significand
4267 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4268 | than the desired result exponent whenever `zSig' is a complete, normalized
4269 | significand.
4270 *----------------------------------------------------------------------------*/
4271
4272 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4273 {
4274
4275     return make_float64(
4276         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4277
4278 }
4279
4280 /*----------------------------------------------------------------------------
4281 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4282 | and significand `zSig', and returns the proper double-precision floating-
4283 | point value corresponding to the abstract input.  Ordinarily, the abstract
4284 | value is simply rounded and packed into the double-precision format, with
4285 | the inexact exception raised if the abstract input cannot be represented
4286 | exactly.  However, if the abstract value is too large, the overflow and
4287 | inexact exceptions are raised and an infinity or maximal finite value is
4288 | returned.  If the abstract value is too small, the input value is rounded to
4289 | a subnormal number, and the underflow and inexact exceptions are raised if
4290 | the abstract input cannot be represented exactly as a subnormal double-
4291 | precision floating-point number.
4292 |     The input significand `zSig' has its binary point between bits 62
4293 | and 61, which is 10 bits to the left of the usual location.  This shifted
4294 | significand must be normalized or smaller.  If `zSig' is not normalized,
4295 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4296 | and it must not require rounding.  In the usual case that `zSig' is
4297 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4298 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4299 | Binary Floating-Point Arithmetic.
4300 *----------------------------------------------------------------------------*/
4301
4302 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4303                                    float_status *status)
4304 {
4305     int8_t roundingMode;
4306     bool roundNearestEven;
4307     int roundIncrement, roundBits;
4308     bool isTiny;
4309
4310     roundingMode = status->float_rounding_mode;
4311     roundNearestEven = ( roundingMode == float_round_nearest_even );
4312     switch (roundingMode) {
4313     case float_round_nearest_even:
4314     case float_round_ties_away:
4315         roundIncrement = 0x200;
4316         break;
4317     case float_round_to_zero:
4318         roundIncrement = 0;
4319         break;
4320     case float_round_up:
4321         roundIncrement = zSign ? 0 : 0x3ff;
4322         break;
4323     case float_round_down:
4324         roundIncrement = zSign ? 0x3ff : 0;
4325         break;
4326     case float_round_to_odd:
4327         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4328         break;
4329     default:
4330         abort();
4331     }
4332     roundBits = zSig & 0x3FF;
4333     if ( 0x7FD <= (uint16_t) zExp ) {
4334         if (    ( 0x7FD < zExp )
4335              || (    ( zExp == 0x7FD )
4336                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4337            ) {
4338             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4339                                    roundIncrement != 0;
4340             float_raise(float_flag_overflow | float_flag_inexact, status);
4341             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4342         }
4343         if ( zExp < 0 ) {
4344             if (status->flush_to_zero) {
4345                 float_raise(float_flag_output_denormal, status);
4346                 return packFloat64(zSign, 0, 0);
4347             }
4348             isTiny = status->tininess_before_rounding
4349                   || (zExp < -1)
4350                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4351             shift64RightJamming( zSig, - zExp, &zSig );
4352             zExp = 0;
4353             roundBits = zSig & 0x3FF;
4354             if (isTiny && roundBits) {
4355                 float_raise(float_flag_underflow, status);
4356             }
4357             if (roundingMode == float_round_to_odd) {
4358                 /*
4359                  * For round-to-odd case, the roundIncrement depends on
4360                  * zSig which just changed.
4361                  */
4362                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4363             }
4364         }
4365     }
4366     if (roundBits) {
4367         float_raise(float_flag_inexact, status);
4368     }
4369     zSig = ( zSig + roundIncrement )>>10;
4370     if (!(roundBits ^ 0x200) && roundNearestEven) {
4371         zSig &= ~1;
4372     }
4373     if ( zSig == 0 ) zExp = 0;
4374     return packFloat64( zSign, zExp, zSig );
4375
4376 }
4377
4378 /*----------------------------------------------------------------------------
4379 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4380 | and significand `zSig', and returns the proper double-precision floating-
4381 | point value corresponding to the abstract input.  This routine is just like
4382 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4383 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4384 | floating-point exponent.
4385 *----------------------------------------------------------------------------*/
4386
4387 static float64
4388  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4389                               float_status *status)
4390 {
4391     int8_t shiftCount;
4392
4393     shiftCount = clz64(zSig) - 1;
4394     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4395                                status);
4396
4397 }
4398
4399 /*----------------------------------------------------------------------------
4400 | Normalizes the subnormal extended double-precision floating-point value
4401 | represented by the denormalized significand `aSig'.  The normalized exponent
4402 | and significand are stored at the locations pointed to by `zExpPtr' and
4403 | `zSigPtr', respectively.
4404 *----------------------------------------------------------------------------*/
4405
4406 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4407                                 uint64_t *zSigPtr)
4408 {
4409     int8_t shiftCount;
4410
4411     shiftCount = clz64(aSig);
4412     *zSigPtr = aSig<<shiftCount;
4413     *zExpPtr = 1 - shiftCount;
4414 }
4415
4416 /*----------------------------------------------------------------------------
4417 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4418 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4419 | and returns the proper extended double-precision floating-point value
4420 | corresponding to the abstract input.  Ordinarily, the abstract value is
4421 | rounded and packed into the extended double-precision format, with the
4422 | inexact exception raised if the abstract input cannot be represented
4423 | exactly.  However, if the abstract value is too large, the overflow and
4424 | inexact exceptions are raised and an infinity or maximal finite value is
4425 | returned.  If the abstract value is too small, the input value is rounded to
4426 | a subnormal number, and the underflow and inexact exceptions are raised if
4427 | the abstract input cannot be represented exactly as a subnormal extended
4428 | double-precision floating-point number.
4429 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4430 | number of bits as single or double precision, respectively.  Otherwise, the
4431 | result is rounded to the full precision of the extended double-precision
4432 | format.
4433 |     The input significand must be normalized or smaller.  If the input
4434 | significand is not normalized, `zExp' must be 0; in that case, the result
4435 | returned is a subnormal number, and it must not require rounding.  The
4436 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4437 | Floating-Point Arithmetic.
4438 *----------------------------------------------------------------------------*/
4439
4440 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4441                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4442                               float_status *status)
4443 {
4444     int8_t roundingMode;
4445     bool roundNearestEven, increment, isTiny;
4446     int64_t roundIncrement, roundMask, roundBits;
4447
4448     roundingMode = status->float_rounding_mode;
4449     roundNearestEven = ( roundingMode == float_round_nearest_even );
4450     if ( roundingPrecision == 80 ) goto precision80;
4451     if ( roundingPrecision == 64 ) {
4452         roundIncrement = UINT64_C(0x0000000000000400);
4453         roundMask = UINT64_C(0x00000000000007FF);
4454     }
4455     else if ( roundingPrecision == 32 ) {
4456         roundIncrement = UINT64_C(0x0000008000000000);
4457         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4458     }
4459     else {
4460         goto precision80;
4461     }
4462     zSig0 |= ( zSig1 != 0 );
4463     switch (roundingMode) {
4464     case float_round_nearest_even:
4465     case float_round_ties_away:
4466         break;
4467     case float_round_to_zero:
4468         roundIncrement = 0;
4469         break;
4470     case float_round_up:
4471         roundIncrement = zSign ? 0 : roundMask;
4472         break;
4473     case float_round_down:
4474         roundIncrement = zSign ? roundMask : 0;
4475         break;
4476     default:
4477         abort();
4478     }
4479     roundBits = zSig0 & roundMask;
4480     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4481         if (    ( 0x7FFE < zExp )
4482              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4483            ) {
4484             goto overflow;
4485         }
4486         if ( zExp <= 0 ) {
4487             if (status->flush_to_zero) {
4488                 float_raise(float_flag_output_denormal, status);
4489                 return packFloatx80(zSign, 0, 0);
4490             }
4491             isTiny = status->tininess_before_rounding
4492                   || (zExp < 0 )
4493                   || (zSig0 <= zSig0 + roundIncrement);
4494             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4495             zExp = 0;
4496             roundBits = zSig0 & roundMask;
4497             if (isTiny && roundBits) {
4498                 float_raise(float_flag_underflow, status);
4499             }
4500             if (roundBits) {
4501                 float_raise(float_flag_inexact, status);
4502             }
4503             zSig0 += roundIncrement;
4504             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4505             roundIncrement = roundMask + 1;
4506             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4507                 roundMask |= roundIncrement;
4508             }
4509             zSig0 &= ~ roundMask;
4510             return packFloatx80( zSign, zExp, zSig0 );
4511         }
4512     }
4513     if (roundBits) {
4514         float_raise(float_flag_inexact, status);
4515     }
4516     zSig0 += roundIncrement;
4517     if ( zSig0 < roundIncrement ) {
4518         ++zExp;
4519         zSig0 = UINT64_C(0x8000000000000000);
4520     }
4521     roundIncrement = roundMask + 1;
4522     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4523         roundMask |= roundIncrement;
4524     }
4525     zSig0 &= ~ roundMask;
4526     if ( zSig0 == 0 ) zExp = 0;
4527     return packFloatx80( zSign, zExp, zSig0 );
4528  precision80:
4529     switch (roundingMode) {
4530     case float_round_nearest_even:
4531     case float_round_ties_away:
4532         increment = ((int64_t)zSig1 < 0);
4533         break;
4534     case float_round_to_zero:
4535         increment = 0;
4536         break;
4537     case float_round_up:
4538         increment = !zSign && zSig1;
4539         break;
4540     case float_round_down:
4541         increment = zSign && zSig1;
4542         break;
4543     default:
4544         abort();
4545     }
4546     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4547         if (    ( 0x7FFE < zExp )
4548              || (    ( zExp == 0x7FFE )
4549                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4550                   && increment
4551                 )
4552            ) {
4553             roundMask = 0;
4554  overflow:
4555             float_raise(float_flag_overflow | float_flag_inexact, status);
4556             if (    ( roundingMode == float_round_to_zero )
4557                  || ( zSign && ( roundingMode == float_round_up ) )
4558                  || ( ! zSign && ( roundingMode == float_round_down ) )
4559                ) {
4560                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4561             }
4562             return packFloatx80(zSign,
4563                                 floatx80_infinity_high,
4564                                 floatx80_infinity_low);
4565         }
4566         if ( zExp <= 0 ) {
4567             isTiny = status->tininess_before_rounding
4568                   || (zExp < 0)
4569                   || !increment
4570                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4571             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4572             zExp = 0;
4573             if (isTiny && zSig1) {
4574                 float_raise(float_flag_underflow, status);
4575             }
4576             if (zSig1) {
4577                 float_raise(float_flag_inexact, status);
4578             }
4579             switch (roundingMode) {
4580             case float_round_nearest_even:
4581             case float_round_ties_away:
4582                 increment = ((int64_t)zSig1 < 0);
4583                 break;
4584             case float_round_to_zero:
4585                 increment = 0;
4586                 break;
4587             case float_round_up:
4588                 increment = !zSign && zSig1;
4589                 break;
4590             case float_round_down:
4591                 increment = zSign && zSig1;
4592                 break;
4593             default:
4594                 abort();
4595             }
4596             if ( increment ) {
4597                 ++zSig0;
4598                 if (!(zSig1 << 1) && roundNearestEven) {
4599                     zSig0 &= ~1;
4600                 }
4601                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4602             }
4603             return packFloatx80( zSign, zExp, zSig0 );
4604         }
4605     }
4606     if (zSig1) {
4607         float_raise(float_flag_inexact, status);
4608     }
4609     if ( increment ) {
4610         ++zSig0;
4611         if ( zSig0 == 0 ) {
4612             ++zExp;
4613             zSig0 = UINT64_C(0x8000000000000000);
4614         }
4615         else {
4616             if (!(zSig1 << 1) && roundNearestEven) {
4617                 zSig0 &= ~1;
4618             }
4619         }
4620     }
4621     else {
4622         if ( zSig0 == 0 ) zExp = 0;
4623     }
4624     return packFloatx80( zSign, zExp, zSig0 );
4625
4626 }
4627
4628 /*----------------------------------------------------------------------------
4629 | Takes an abstract floating-point value having sign `zSign', exponent
4630 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4631 | and returns the proper extended double-precision floating-point value
4632 | corresponding to the abstract input.  This routine is just like
4633 | `roundAndPackFloatx80' except that the input significand does not have to be
4634 | normalized.
4635 *----------------------------------------------------------------------------*/
4636
4637 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4638                                        bool zSign, int32_t zExp,
4639                                        uint64_t zSig0, uint64_t zSig1,
4640                                        float_status *status)
4641 {
4642     int8_t shiftCount;
4643
4644     if ( zSig0 == 0 ) {
4645         zSig0 = zSig1;
4646         zSig1 = 0;
4647         zExp -= 64;
4648     }
4649     shiftCount = clz64(zSig0);
4650     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4651     zExp -= shiftCount;
4652     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4653                                 zSig0, zSig1, status);
4654
4655 }
4656
4657 /*----------------------------------------------------------------------------
4658 | Returns the least-significant 64 fraction bits of the quadruple-precision
4659 | floating-point value `a'.
4660 *----------------------------------------------------------------------------*/
4661
4662 static inline uint64_t extractFloat128Frac1( float128 a )
4663 {
4664
4665     return a.low;
4666
4667 }
4668
4669 /*----------------------------------------------------------------------------
4670 | Returns the most-significant 48 fraction bits of the quadruple-precision
4671 | floating-point value `a'.
4672 *----------------------------------------------------------------------------*/
4673
4674 static inline uint64_t extractFloat128Frac0( float128 a )
4675 {
4676
4677     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4678
4679 }
4680
4681 /*----------------------------------------------------------------------------
4682 | Returns the exponent bits of the quadruple-precision floating-point value
4683 | `a'.
4684 *----------------------------------------------------------------------------*/
4685
4686 static inline int32_t extractFloat128Exp( float128 a )
4687 {
4688
4689     return ( a.high>>48 ) & 0x7FFF;
4690
4691 }
4692
4693 /*----------------------------------------------------------------------------
4694 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4695 *----------------------------------------------------------------------------*/
4696
4697 static inline bool extractFloat128Sign(float128 a)
4698 {
4699     return a.high >> 63;
4700 }
4701
4702 /*----------------------------------------------------------------------------
4703 | Normalizes the subnormal quadruple-precision floating-point value
4704 | represented by the denormalized significand formed by the concatenation of
4705 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4706 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4707 | significand are stored at the location pointed to by `zSig0Ptr', and the
4708 | least significant 64 bits of the normalized significand are stored at the
4709 | location pointed to by `zSig1Ptr'.
4710 *----------------------------------------------------------------------------*/
4711
4712 static void
4713  normalizeFloat128Subnormal(
4714      uint64_t aSig0,
4715      uint64_t aSig1,
4716      int32_t *zExpPtr,
4717      uint64_t *zSig0Ptr,
4718      uint64_t *zSig1Ptr
4719  )
4720 {
4721     int8_t shiftCount;
4722
4723     if ( aSig0 == 0 ) {
4724         shiftCount = clz64(aSig1) - 15;
4725         if ( shiftCount < 0 ) {
4726             *zSig0Ptr = aSig1>>( - shiftCount );
4727             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4728         }
4729         else {
4730             *zSig0Ptr = aSig1<<shiftCount;
4731             *zSig1Ptr = 0;
4732         }
4733         *zExpPtr = - shiftCount - 63;
4734     }
4735     else {
4736         shiftCount = clz64(aSig0) - 15;
4737         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4738         *zExpPtr = 1 - shiftCount;
4739     }
4740
4741 }
4742
4743 /*----------------------------------------------------------------------------
4744 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4745 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4746 | floating-point value, returning the result.  After being shifted into the
4747 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4748 | added together to form the most significant 32 bits of the result.  This
4749 | means that any integer portion of `zSig0' will be added into the exponent.
4750 | Since a properly normalized significand will have an integer portion equal
4751 | to 1, the `zExp' input should be 1 less than the desired result exponent
4752 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4753 | significand.
4754 *----------------------------------------------------------------------------*/
4755
4756 static inline float128
4757 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4758 {
4759     float128 z;
4760
4761     z.low = zSig1;
4762     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4763     return z;
4764 }
4765
4766 /*----------------------------------------------------------------------------
4767 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4768 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4769 | and `zSig2', and returns the proper quadruple-precision floating-point value
4770 | corresponding to the abstract input.  Ordinarily, the abstract value is
4771 | simply rounded and packed into the quadruple-precision format, with the
4772 | inexact exception raised if the abstract input cannot be represented
4773 | exactly.  However, if the abstract value is too large, the overflow and
4774 | inexact exceptions are raised and an infinity or maximal finite value is
4775 | returned.  If the abstract value is too small, the input value is rounded to
4776 | a subnormal number, and the underflow and inexact exceptions are raised if
4777 | the abstract input cannot be represented exactly as a subnormal quadruple-
4778 | precision floating-point number.
4779 |     The input significand must be normalized or smaller.  If the input
4780 | significand is not normalized, `zExp' must be 0; in that case, the result
4781 | returned is a subnormal number, and it must not require rounding.  In the
4782 | usual case that the input significand is normalized, `zExp' must be 1 less
4783 | than the ``true'' floating-point exponent.  The handling of underflow and
4784 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4785 *----------------------------------------------------------------------------*/
4786
4787 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4788                                      uint64_t zSig0, uint64_t zSig1,
4789                                      uint64_t zSig2, float_status *status)
4790 {
4791     int8_t roundingMode;
4792     bool roundNearestEven, increment, isTiny;
4793
4794     roundingMode = status->float_rounding_mode;
4795     roundNearestEven = ( roundingMode == float_round_nearest_even );
4796     switch (roundingMode) {
4797     case float_round_nearest_even:
4798     case float_round_ties_away:
4799         increment = ((int64_t)zSig2 < 0);
4800         break;
4801     case float_round_to_zero:
4802         increment = 0;
4803         break;
4804     case float_round_up:
4805         increment = !zSign && zSig2;
4806         break;
4807     case float_round_down:
4808         increment = zSign && zSig2;
4809         break;
4810     case float_round_to_odd:
4811         increment = !(zSig1 & 0x1) && zSig2;
4812         break;
4813     default:
4814         abort();
4815     }
4816     if ( 0x7FFD <= (uint32_t) zExp ) {
4817         if (    ( 0x7FFD < zExp )
4818              || (    ( zExp == 0x7FFD )
4819                   && eq128(
4820                          UINT64_C(0x0001FFFFFFFFFFFF),
4821                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4822                          zSig0,
4823                          zSig1
4824                      )
4825                   && increment
4826                 )
4827            ) {
4828             float_raise(float_flag_overflow | float_flag_inexact, status);
4829             if (    ( roundingMode == float_round_to_zero )
4830                  || ( zSign && ( roundingMode == float_round_up ) )
4831                  || ( ! zSign && ( roundingMode == float_round_down ) )
4832                  || (roundingMode == float_round_to_odd)
4833                ) {
4834                 return
4835                     packFloat128(
4836                         zSign,
4837                         0x7FFE,
4838                         UINT64_C(0x0000FFFFFFFFFFFF),
4839                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4840                     );
4841             }
4842             return packFloat128( zSign, 0x7FFF, 0, 0 );
4843         }
4844         if ( zExp < 0 ) {
4845             if (status->flush_to_zero) {
4846                 float_raise(float_flag_output_denormal, status);
4847                 return packFloat128(zSign, 0, 0, 0);
4848             }
4849             isTiny = status->tininess_before_rounding
4850                   || (zExp < -1)
4851                   || !increment
4852                   || lt128(zSig0, zSig1,
4853                            UINT64_C(0x0001FFFFFFFFFFFF),
4854                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4855             shift128ExtraRightJamming(
4856                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4857             zExp = 0;
4858             if (isTiny && zSig2) {
4859                 float_raise(float_flag_underflow, status);
4860             }
4861             switch (roundingMode) {
4862             case float_round_nearest_even:
4863             case float_round_ties_away:
4864                 increment = ((int64_t)zSig2 < 0);
4865                 break;
4866             case float_round_to_zero:
4867                 increment = 0;
4868                 break;
4869             case float_round_up:
4870                 increment = !zSign && zSig2;
4871                 break;
4872             case float_round_down:
4873                 increment = zSign && zSig2;
4874                 break;
4875             case float_round_to_odd:
4876                 increment = !(zSig1 & 0x1) && zSig2;
4877                 break;
4878             default:
4879                 abort();
4880             }
4881         }
4882     }
4883     if (zSig2) {
4884         float_raise(float_flag_inexact, status);
4885     }
4886     if ( increment ) {
4887         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4888         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4889             zSig1 &= ~1;
4890         }
4891     }
4892     else {
4893         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4894     }
4895     return packFloat128( zSign, zExp, zSig0, zSig1 );
4896
4897 }
4898
4899 /*----------------------------------------------------------------------------
4900 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4901 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4902 | returns the proper quadruple-precision floating-point value corresponding
4903 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4904 | except that the input significand has fewer bits and does not have to be
4905 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4906 | point exponent.
4907 *----------------------------------------------------------------------------*/
4908
4909 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4910                                               uint64_t zSig0, uint64_t zSig1,
4911                                               float_status *status)
4912 {
4913     int8_t shiftCount;
4914     uint64_t zSig2;
4915
4916     if ( zSig0 == 0 ) {
4917         zSig0 = zSig1;
4918         zSig1 = 0;
4919         zExp -= 64;
4920     }
4921     shiftCount = clz64(zSig0) - 15;
4922     if ( 0 <= shiftCount ) {
4923         zSig2 = 0;
4924         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4925     }
4926     else {
4927         shift128ExtraRightJamming(
4928             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4929     }
4930     zExp -= shiftCount;
4931     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4932
4933 }
4934
4935
4936 /*----------------------------------------------------------------------------
4937 | Returns the result of converting the 32-bit two's complement integer `a'
4938 | to the extended double-precision floating-point format.  The conversion
4939 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4940 | Arithmetic.
4941 *----------------------------------------------------------------------------*/
4942
4943 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4944 {
4945     bool zSign;
4946     uint32_t absA;
4947     int8_t shiftCount;
4948     uint64_t zSig;
4949
4950     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4951     zSign = ( a < 0 );
4952     absA = zSign ? - a : a;
4953     shiftCount = clz32(absA) + 32;
4954     zSig = absA;
4955     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4956
4957 }
4958
4959 /*----------------------------------------------------------------------------
4960 | Returns the result of converting the 32-bit two's complement integer `a' to
4961 | the quadruple-precision floating-point format.  The conversion is performed
4962 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4963 *----------------------------------------------------------------------------*/
4964
4965 float128 int32_to_float128(int32_t a, float_status *status)
4966 {
4967     bool zSign;
4968     uint32_t absA;
4969     int8_t shiftCount;
4970     uint64_t zSig0;
4971
4972     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4973     zSign = ( a < 0 );
4974     absA = zSign ? - a : a;
4975     shiftCount = clz32(absA) + 17;
4976     zSig0 = absA;
4977     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4978
4979 }
4980
4981 /*----------------------------------------------------------------------------
4982 | Returns the result of converting the 64-bit two's complement integer `a'
4983 | to the extended double-precision floating-point format.  The conversion
4984 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4985 | Arithmetic.
4986 *----------------------------------------------------------------------------*/
4987
4988 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4989 {
4990     bool zSign;
4991     uint64_t absA;
4992     int8_t shiftCount;
4993
4994     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4995     zSign = ( a < 0 );
4996     absA = zSign ? - a : a;
4997     shiftCount = clz64(absA);
4998     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4999
5000 }
5001
5002 /*----------------------------------------------------------------------------
5003 | Returns the result of converting the 64-bit two's complement integer `a' to
5004 | the quadruple-precision floating-point format.  The conversion is performed
5005 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5006 *----------------------------------------------------------------------------*/
5007
5008 float128 int64_to_float128(int64_t a, float_status *status)
5009 {
5010     bool zSign;
5011     uint64_t absA;
5012     int8_t shiftCount;
5013     int32_t zExp;
5014     uint64_t zSig0, zSig1;
5015
5016     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5017     zSign = ( a < 0 );
5018     absA = zSign ? - a : a;
5019     shiftCount = clz64(absA) + 49;
5020     zExp = 0x406E - shiftCount;
5021     if ( 64 <= shiftCount ) {
5022         zSig1 = 0;
5023         zSig0 = absA;
5024         shiftCount -= 64;
5025     }
5026     else {
5027         zSig1 = absA;
5028         zSig0 = 0;
5029     }
5030     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5031     return packFloat128( zSign, zExp, zSig0, zSig1 );
5032
5033 }
5034
5035 /*----------------------------------------------------------------------------
5036 | Returns the result of converting the 64-bit unsigned integer `a'
5037 | to the quadruple-precision floating-point format.  The conversion is performed
5038 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5039 *----------------------------------------------------------------------------*/
5040
5041 float128 uint64_to_float128(uint64_t a, float_status *status)
5042 {
5043     if (a == 0) {
5044         return float128_zero;
5045     }
5046     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5047 }
5048
5049 /*----------------------------------------------------------------------------
5050 | Returns the result of converting the single-precision floating-point value
5051 | `a' to the extended double-precision floating-point format.  The conversion
5052 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5053 | Arithmetic.
5054 *----------------------------------------------------------------------------*/
5055
5056 floatx80 float32_to_floatx80(float32 a, float_status *status)
5057 {
5058     bool aSign;
5059     int aExp;
5060     uint32_t aSig;
5061
5062     a = float32_squash_input_denormal(a, status);
5063     aSig = extractFloat32Frac( a );
5064     aExp = extractFloat32Exp( a );
5065     aSign = extractFloat32Sign( a );
5066     if ( aExp == 0xFF ) {
5067         if (aSig) {
5068             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5069                                                status);
5070             return floatx80_silence_nan(res, status);
5071         }
5072         return packFloatx80(aSign,
5073                             floatx80_infinity_high,
5074                             floatx80_infinity_low);
5075     }
5076     if ( aExp == 0 ) {
5077         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5078         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5079     }
5080     aSig |= 0x00800000;
5081     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5082
5083 }
5084
5085 /*----------------------------------------------------------------------------
5086 | Returns the remainder of the single-precision floating-point value `a'
5087 | with respect to the corresponding value `b'.  The operation is performed
5088 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5089 *----------------------------------------------------------------------------*/
5090
5091 float32 float32_rem(float32 a, float32 b, float_status *status)
5092 {
5093     bool aSign, zSign;
5094     int aExp, bExp, expDiff;
5095     uint32_t aSig, bSig;
5096     uint32_t q;
5097     uint64_t aSig64, bSig64, q64;
5098     uint32_t alternateASig;
5099     int32_t sigMean;
5100     a = float32_squash_input_denormal(a, status);
5101     b = float32_squash_input_denormal(b, status);
5102
5103     aSig = extractFloat32Frac( a );
5104     aExp = extractFloat32Exp( a );
5105     aSign = extractFloat32Sign( a );
5106     bSig = extractFloat32Frac( b );
5107     bExp = extractFloat32Exp( b );
5108     if ( aExp == 0xFF ) {
5109         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5110             return propagateFloat32NaN(a, b, status);
5111         }
5112         float_raise(float_flag_invalid, status);
5113         return float32_default_nan(status);
5114     }
5115     if ( bExp == 0xFF ) {
5116         if (bSig) {
5117             return propagateFloat32NaN(a, b, status);
5118         }
5119         return a;
5120     }
5121     if ( bExp == 0 ) {
5122         if ( bSig == 0 ) {
5123             float_raise(float_flag_invalid, status);
5124             return float32_default_nan(status);
5125         }
5126         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5127     }
5128     if ( aExp == 0 ) {
5129         if ( aSig == 0 ) return a;
5130         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5131     }
5132     expDiff = aExp - bExp;
5133     aSig |= 0x00800000;
5134     bSig |= 0x00800000;
5135     if ( expDiff < 32 ) {
5136         aSig <<= 8;
5137         bSig <<= 8;
5138         if ( expDiff < 0 ) {
5139             if ( expDiff < -1 ) return a;
5140             aSig >>= 1;
5141         }
5142         q = ( bSig <= aSig );
5143         if ( q ) aSig -= bSig;
5144         if ( 0 < expDiff ) {
5145             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5146             q >>= 32 - expDiff;
5147             bSig >>= 2;
5148             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5149         }
5150         else {
5151             aSig >>= 2;
5152             bSig >>= 2;
5153         }
5154     }
5155     else {
5156         if ( bSig <= aSig ) aSig -= bSig;
5157         aSig64 = ( (uint64_t) aSig )<<40;
5158         bSig64 = ( (uint64_t) bSig )<<40;
5159         expDiff -= 64;
5160         while ( 0 < expDiff ) {
5161             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5162             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5163             aSig64 = - ( ( bSig * q64 )<<38 );
5164             expDiff -= 62;
5165         }
5166         expDiff += 64;
5167         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5168         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5169         q = q64>>( 64 - expDiff );
5170         bSig <<= 6;
5171         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5172     }
5173     do {
5174         alternateASig = aSig;
5175         ++q;
5176         aSig -= bSig;
5177     } while ( 0 <= (int32_t) aSig );
5178     sigMean = aSig + alternateASig;
5179     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5180         aSig = alternateASig;
5181     }
5182     zSign = ( (int32_t) aSig < 0 );
5183     if ( zSign ) aSig = - aSig;
5184     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5185 }
5186
5187
5188
5189 /*----------------------------------------------------------------------------
5190 | Returns the binary exponential of the single-precision floating-point value
5191 | `a'. The operation is performed according to the IEC/IEEE Standard for
5192 | Binary Floating-Point Arithmetic.
5193 |
5194 | Uses the following identities:
5195 |
5196 | 1. -------------------------------------------------------------------------
5197 |      x    x*ln(2)
5198 |     2  = e
5199 |
5200 | 2. -------------------------------------------------------------------------
5201 |                      2     3     4     5           n
5202 |      x        x     x     x     x     x           x
5203 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5204 |               1!    2!    3!    4!    5!          n!
5205 *----------------------------------------------------------------------------*/
5206
5207 static const float64 float32_exp2_coefficients[15] =
5208 {
5209     const_float64( 0x3ff0000000000000ll ), /*  1 */
5210     const_float64( 0x3fe0000000000000ll ), /*  2 */
5211     const_float64( 0x3fc5555555555555ll ), /*  3 */
5212     const_float64( 0x3fa5555555555555ll ), /*  4 */
5213     const_float64( 0x3f81111111111111ll ), /*  5 */
5214     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5215     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5216     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5217     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5218     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5219     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5220     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5221     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5222     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5223     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5224 };
5225
5226 float32 float32_exp2(float32 a, float_status *status)
5227 {
5228     bool aSign;
5229     int aExp;
5230     uint32_t aSig;
5231     float64 r, x, xn;
5232     int i;
5233     a = float32_squash_input_denormal(a, status);
5234
5235     aSig = extractFloat32Frac( a );
5236     aExp = extractFloat32Exp( a );
5237     aSign = extractFloat32Sign( a );
5238
5239     if ( aExp == 0xFF) {
5240         if (aSig) {
5241             return propagateFloat32NaN(a, float32_zero, status);
5242         }
5243         return (aSign) ? float32_zero : a;
5244     }
5245     if (aExp == 0) {
5246         if (aSig == 0) return float32_one;
5247     }
5248
5249     float_raise(float_flag_inexact, status);
5250
5251     /* ******************************* */
5252     /* using float64 for approximation */
5253     /* ******************************* */
5254     x = float32_to_float64(a, status);
5255     x = float64_mul(x, float64_ln2, status);
5256
5257     xn = x;
5258     r = float64_one;
5259     for (i = 0 ; i < 15 ; i++) {
5260         float64 f;
5261
5262         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5263         r = float64_add(r, f, status);
5264
5265         xn = float64_mul(xn, x, status);
5266     }
5267
5268     return float64_to_float32(r, status);
5269 }
5270
5271 /*----------------------------------------------------------------------------
5272 | Returns the binary log of the single-precision floating-point value `a'.
5273 | The operation is performed according to the IEC/IEEE Standard for Binary
5274 | Floating-Point Arithmetic.
5275 *----------------------------------------------------------------------------*/
5276 float32 float32_log2(float32 a, float_status *status)
5277 {
5278     bool aSign, zSign;
5279     int aExp;
5280     uint32_t aSig, zSig, i;
5281
5282     a = float32_squash_input_denormal(a, status);
5283     aSig = extractFloat32Frac( a );
5284     aExp = extractFloat32Exp( a );
5285     aSign = extractFloat32Sign( a );
5286
5287     if ( aExp == 0 ) {
5288         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5289         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5290     }
5291     if ( aSign ) {
5292         float_raise(float_flag_invalid, status);
5293         return float32_default_nan(status);
5294     }
5295     if ( aExp == 0xFF ) {
5296         if (aSig) {
5297             return propagateFloat32NaN(a, float32_zero, status);
5298         }
5299         return a;
5300     }
5301
5302     aExp -= 0x7F;
5303     aSig |= 0x00800000;
5304     zSign = aExp < 0;
5305     zSig = aExp << 23;
5306
5307     for (i = 1 << 22; i > 0; i >>= 1) {
5308         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5309         if ( aSig & 0x01000000 ) {
5310             aSig >>= 1;
5311             zSig |= i;
5312         }
5313     }
5314
5315     if ( zSign )
5316         zSig = -zSig;
5317
5318     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5319 }
5320
5321 /*----------------------------------------------------------------------------
5322 | Returns the result of converting the double-precision floating-point value
5323 | `a' to the extended double-precision floating-point format.  The conversion
5324 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5325 | Arithmetic.
5326 *----------------------------------------------------------------------------*/
5327
5328 floatx80 float64_to_floatx80(float64 a, float_status *status)
5329 {
5330     bool aSign;
5331     int aExp;
5332     uint64_t aSig;
5333
5334     a = float64_squash_input_denormal(a, status);
5335     aSig = extractFloat64Frac( a );
5336     aExp = extractFloat64Exp( a );
5337     aSign = extractFloat64Sign( a );
5338     if ( aExp == 0x7FF ) {
5339         if (aSig) {
5340             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5341                                                status);
5342             return floatx80_silence_nan(res, status);
5343         }
5344         return packFloatx80(aSign,
5345                             floatx80_infinity_high,
5346                             floatx80_infinity_low);
5347     }
5348     if ( aExp == 0 ) {
5349         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5350         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5351     }
5352     return
5353         packFloatx80(
5354             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5355
5356 }
5357
5358 /*----------------------------------------------------------------------------
5359 | Returns the remainder of the double-precision floating-point value `a'
5360 | with respect to the corresponding value `b'.  The operation is performed
5361 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5362 *----------------------------------------------------------------------------*/
5363
5364 float64 float64_rem(float64 a, float64 b, float_status *status)
5365 {
5366     bool aSign, zSign;
5367     int aExp, bExp, expDiff;
5368     uint64_t aSig, bSig;
5369     uint64_t q, alternateASig;
5370     int64_t sigMean;
5371
5372     a = float64_squash_input_denormal(a, status);
5373     b = float64_squash_input_denormal(b, status);
5374     aSig = extractFloat64Frac( a );
5375     aExp = extractFloat64Exp( a );
5376     aSign = extractFloat64Sign( a );
5377     bSig = extractFloat64Frac( b );
5378     bExp = extractFloat64Exp( b );
5379     if ( aExp == 0x7FF ) {
5380         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5381             return propagateFloat64NaN(a, b, status);
5382         }
5383         float_raise(float_flag_invalid, status);
5384         return float64_default_nan(status);
5385     }
5386     if ( bExp == 0x7FF ) {
5387         if (bSig) {
5388             return propagateFloat64NaN(a, b, status);
5389         }
5390         return a;
5391     }
5392     if ( bExp == 0 ) {
5393         if ( bSig == 0 ) {
5394             float_raise(float_flag_invalid, status);
5395             return float64_default_nan(status);
5396         }
5397         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5398     }
5399     if ( aExp == 0 ) {
5400         if ( aSig == 0 ) return a;
5401         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5402     }
5403     expDiff = aExp - bExp;
5404     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5405     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5406     if ( expDiff < 0 ) {
5407         if ( expDiff < -1 ) return a;
5408         aSig >>= 1;
5409     }
5410     q = ( bSig <= aSig );
5411     if ( q ) aSig -= bSig;
5412     expDiff -= 64;
5413     while ( 0 < expDiff ) {
5414         q = estimateDiv128To64( aSig, 0, bSig );
5415         q = ( 2 < q ) ? q - 2 : 0;
5416         aSig = - ( ( bSig>>2 ) * q );
5417         expDiff -= 62;
5418     }
5419     expDiff += 64;
5420     if ( 0 < expDiff ) {
5421         q = estimateDiv128To64( aSig, 0, bSig );
5422         q = ( 2 < q ) ? q - 2 : 0;
5423         q >>= 64 - expDiff;
5424         bSig >>= 2;
5425         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5426     }
5427     else {
5428         aSig >>= 2;
5429         bSig >>= 2;
5430     }
5431     do {
5432         alternateASig = aSig;
5433         ++q;
5434         aSig -= bSig;
5435     } while ( 0 <= (int64_t) aSig );
5436     sigMean = aSig + alternateASig;
5437     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5438         aSig = alternateASig;
5439     }
5440     zSign = ( (int64_t) aSig < 0 );
5441     if ( zSign ) aSig = - aSig;
5442     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5443
5444 }
5445
5446 /*----------------------------------------------------------------------------
5447 | Returns the binary log of the double-precision floating-point value `a'.
5448 | The operation is performed according to the IEC/IEEE Standard for Binary
5449 | Floating-Point Arithmetic.
5450 *----------------------------------------------------------------------------*/
5451 float64 float64_log2(float64 a, float_status *status)
5452 {
5453     bool aSign, zSign;
5454     int aExp;
5455     uint64_t aSig, aSig0, aSig1, zSig, i;
5456     a = float64_squash_input_denormal(a, status);
5457
5458     aSig = extractFloat64Frac( a );
5459     aExp = extractFloat64Exp( a );
5460     aSign = extractFloat64Sign( a );
5461
5462     if ( aExp == 0 ) {
5463         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5464         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5465     }
5466     if ( aSign ) {
5467         float_raise(float_flag_invalid, status);
5468         return float64_default_nan(status);
5469     }
5470     if ( aExp == 0x7FF ) {
5471         if (aSig) {
5472             return propagateFloat64NaN(a, float64_zero, status);
5473         }
5474         return a;
5475     }
5476
5477     aExp -= 0x3FF;
5478     aSig |= UINT64_C(0x0010000000000000);
5479     zSign = aExp < 0;
5480     zSig = (uint64_t)aExp << 52;
5481     for (i = 1LL << 51; i > 0; i >>= 1) {
5482         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5483         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5484         if ( aSig & UINT64_C(0x0020000000000000) ) {
5485             aSig >>= 1;
5486             zSig |= i;
5487         }
5488     }
5489
5490     if ( zSign )
5491         zSig = -zSig;
5492     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5493 }
5494
5495 /*----------------------------------------------------------------------------
5496 | Returns the result of converting the extended double-precision floating-
5497 | point value `a' to the 32-bit two's complement integer format.  The
5498 | conversion is performed according to the IEC/IEEE Standard for Binary
5499 | Floating-Point Arithmetic---which means in particular that the conversion
5500 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5501 | largest positive integer is returned.  Otherwise, if the conversion
5502 | overflows, the largest integer with the same sign as `a' is returned.
5503 *----------------------------------------------------------------------------*/
5504
5505 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5506 {
5507     bool aSign;
5508     int32_t aExp, shiftCount;
5509     uint64_t aSig;
5510
5511     if (floatx80_invalid_encoding(a)) {
5512         float_raise(float_flag_invalid, status);
5513         return 1 << 31;
5514     }
5515     aSig = extractFloatx80Frac( a );
5516     aExp = extractFloatx80Exp( a );
5517     aSign = extractFloatx80Sign( a );
5518     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5519     shiftCount = 0x4037 - aExp;
5520     if ( shiftCount <= 0 ) shiftCount = 1;
5521     shift64RightJamming( aSig, shiftCount, &aSig );
5522     return roundAndPackInt32(aSign, aSig, status);
5523
5524 }
5525
5526 /*----------------------------------------------------------------------------
5527 | Returns the result of converting the extended double-precision floating-
5528 | point value `a' to the 32-bit two's complement integer format.  The
5529 | conversion is performed according to the IEC/IEEE Standard for Binary
5530 | Floating-Point Arithmetic, except that the conversion is always rounded
5531 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5532 | Otherwise, if the conversion overflows, the largest integer with the same
5533 | sign as `a' is returned.
5534 *----------------------------------------------------------------------------*/
5535
5536 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5537 {
5538     bool aSign;
5539     int32_t aExp, shiftCount;
5540     uint64_t aSig, savedASig;
5541     int32_t z;
5542
5543     if (floatx80_invalid_encoding(a)) {
5544         float_raise(float_flag_invalid, status);
5545         return 1 << 31;
5546     }
5547     aSig = extractFloatx80Frac( a );
5548     aExp = extractFloatx80Exp( a );
5549     aSign = extractFloatx80Sign( a );
5550     if ( 0x401E < aExp ) {
5551         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5552         goto invalid;
5553     }
5554     else if ( aExp < 0x3FFF ) {
5555         if (aExp || aSig) {
5556             float_raise(float_flag_inexact, status);
5557         }
5558         return 0;
5559     }
5560     shiftCount = 0x403E - aExp;
5561     savedASig = aSig;
5562     aSig >>= shiftCount;
5563     z = aSig;
5564     if ( aSign ) z = - z;
5565     if ( ( z < 0 ) ^ aSign ) {
5566  invalid:
5567         float_raise(float_flag_invalid, status);
5568         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5569     }
5570     if ( ( aSig<<shiftCount ) != savedASig ) {
5571         float_raise(float_flag_inexact, status);
5572     }
5573     return z;
5574
5575 }
5576
5577 /*----------------------------------------------------------------------------
5578 | Returns the result of converting the extended double-precision floating-
5579 | point value `a' to the 64-bit two's complement integer format.  The
5580 | conversion is performed according to the IEC/IEEE Standard for Binary
5581 | Floating-Point Arithmetic---which means in particular that the conversion
5582 | is rounded according to the current rounding mode.  If `a' is a NaN,
5583 | the largest positive integer is returned.  Otherwise, if the conversion
5584 | overflows, the largest integer with the same sign as `a' is returned.
5585 *----------------------------------------------------------------------------*/
5586
5587 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5588 {
5589     bool aSign;
5590     int32_t aExp, shiftCount;
5591     uint64_t aSig, aSigExtra;
5592
5593     if (floatx80_invalid_encoding(a)) {
5594         float_raise(float_flag_invalid, status);
5595         return 1ULL << 63;
5596     }
5597     aSig = extractFloatx80Frac( a );
5598     aExp = extractFloatx80Exp( a );
5599     aSign = extractFloatx80Sign( a );
5600     shiftCount = 0x403E - aExp;
5601     if ( shiftCount <= 0 ) {
5602         if ( shiftCount ) {
5603             float_raise(float_flag_invalid, status);
5604             if (!aSign || floatx80_is_any_nan(a)) {
5605                 return INT64_MAX;
5606             }
5607             return INT64_MIN;
5608         }
5609         aSigExtra = 0;
5610     }
5611     else {
5612         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5613     }
5614     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5615
5616 }
5617
5618 /*----------------------------------------------------------------------------
5619 | Returns the result of converting the extended double-precision floating-
5620 | point value `a' to the 64-bit two's complement integer format.  The
5621 | conversion is performed according to the IEC/IEEE Standard for Binary
5622 | Floating-Point Arithmetic, except that the conversion is always rounded
5623 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5624 | Otherwise, if the conversion overflows, the largest integer with the same
5625 | sign as `a' is returned.
5626 *----------------------------------------------------------------------------*/
5627
5628 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5629 {
5630     bool aSign;
5631     int32_t aExp, shiftCount;
5632     uint64_t aSig;
5633     int64_t z;
5634
5635     if (floatx80_invalid_encoding(a)) {
5636         float_raise(float_flag_invalid, status);
5637         return 1ULL << 63;
5638     }
5639     aSig = extractFloatx80Frac( a );
5640     aExp = extractFloatx80Exp( a );
5641     aSign = extractFloatx80Sign( a );
5642     shiftCount = aExp - 0x403E;
5643     if ( 0 <= shiftCount ) {
5644         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5645         if ( ( a.high != 0xC03E ) || aSig ) {
5646             float_raise(float_flag_invalid, status);
5647             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5648                 return INT64_MAX;
5649             }
5650         }
5651         return INT64_MIN;
5652     }
5653     else if ( aExp < 0x3FFF ) {
5654         if (aExp | aSig) {
5655             float_raise(float_flag_inexact, status);
5656         }
5657         return 0;
5658     }
5659     z = aSig>>( - shiftCount );
5660     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5661         float_raise(float_flag_inexact, status);
5662     }
5663     if ( aSign ) z = - z;
5664     return z;
5665
5666 }
5667
5668 /*----------------------------------------------------------------------------
5669 | Returns the result of converting the extended double-precision floating-
5670 | point value `a' to the single-precision floating-point format.  The
5671 | conversion is performed according to the IEC/IEEE Standard for Binary
5672 | Floating-Point Arithmetic.
5673 *----------------------------------------------------------------------------*/
5674
5675 float32 floatx80_to_float32(floatx80 a, float_status *status)
5676 {
5677     bool aSign;
5678     int32_t aExp;
5679     uint64_t aSig;
5680
5681     if (floatx80_invalid_encoding(a)) {
5682         float_raise(float_flag_invalid, status);
5683         return float32_default_nan(status);
5684     }
5685     aSig = extractFloatx80Frac( a );
5686     aExp = extractFloatx80Exp( a );
5687     aSign = extractFloatx80Sign( a );
5688     if ( aExp == 0x7FFF ) {
5689         if ( (uint64_t) ( aSig<<1 ) ) {
5690             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5691                                              status);
5692             return float32_silence_nan(res, status);
5693         }
5694         return packFloat32( aSign, 0xFF, 0 );
5695     }
5696     shift64RightJamming( aSig, 33, &aSig );
5697     if ( aExp || aSig ) aExp -= 0x3F81;
5698     return roundAndPackFloat32(aSign, aExp, aSig, status);
5699
5700 }
5701
5702 /*----------------------------------------------------------------------------
5703 | Returns the result of converting the extended double-precision floating-
5704 | point value `a' to the double-precision floating-point format.  The
5705 | conversion is performed according to the IEC/IEEE Standard for Binary
5706 | Floating-Point Arithmetic.
5707 *----------------------------------------------------------------------------*/
5708
5709 float64 floatx80_to_float64(floatx80 a, float_status *status)
5710 {
5711     bool aSign;
5712     int32_t aExp;
5713     uint64_t aSig, zSig;
5714
5715     if (floatx80_invalid_encoding(a)) {
5716         float_raise(float_flag_invalid, status);
5717         return float64_default_nan(status);
5718     }
5719     aSig = extractFloatx80Frac( a );
5720     aExp = extractFloatx80Exp( a );
5721     aSign = extractFloatx80Sign( a );
5722     if ( aExp == 0x7FFF ) {
5723         if ( (uint64_t) ( aSig<<1 ) ) {
5724             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5725                                              status);
5726             return float64_silence_nan(res, status);
5727         }
5728         return packFloat64( aSign, 0x7FF, 0 );
5729     }
5730     shift64RightJamming( aSig, 1, &zSig );
5731     if ( aExp || aSig ) aExp -= 0x3C01;
5732     return roundAndPackFloat64(aSign, aExp, zSig, status);
5733
5734 }
5735
5736 /*----------------------------------------------------------------------------
5737 | Returns the result of converting the extended double-precision floating-
5738 | point value `a' to the quadruple-precision floating-point format.  The
5739 | conversion is performed according to the IEC/IEEE Standard for Binary
5740 | Floating-Point Arithmetic.
5741 *----------------------------------------------------------------------------*/
5742
5743 float128 floatx80_to_float128(floatx80 a, float_status *status)
5744 {
5745     bool aSign;
5746     int aExp;
5747     uint64_t aSig, zSig0, zSig1;
5748
5749     if (floatx80_invalid_encoding(a)) {
5750         float_raise(float_flag_invalid, status);
5751         return float128_default_nan(status);
5752     }
5753     aSig = extractFloatx80Frac( a );
5754     aExp = extractFloatx80Exp( a );
5755     aSign = extractFloatx80Sign( a );
5756     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5757         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5758                                            status);
5759         return float128_silence_nan(res, status);
5760     }
5761     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5762     return packFloat128( aSign, aExp, zSig0, zSig1 );
5763
5764 }
5765
5766 /*----------------------------------------------------------------------------
5767 | Rounds the extended double-precision floating-point value `a'
5768 | to the precision provided by floatx80_rounding_precision and returns the
5769 | result as an extended double-precision floating-point value.
5770 | The operation is performed according to the IEC/IEEE Standard for Binary
5771 | Floating-Point Arithmetic.
5772 *----------------------------------------------------------------------------*/
5773
5774 floatx80 floatx80_round(floatx80 a, float_status *status)
5775 {
5776     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5777                                 extractFloatx80Sign(a),
5778                                 extractFloatx80Exp(a),
5779                                 extractFloatx80Frac(a), 0, status);
5780 }
5781
5782 /*----------------------------------------------------------------------------
5783 | Rounds the extended double-precision floating-point value `a' to an integer,
5784 | and returns the result as an extended quadruple-precision floating-point
5785 | value.  The operation is performed according to the IEC/IEEE Standard for
5786 | Binary Floating-Point Arithmetic.
5787 *----------------------------------------------------------------------------*/
5788
5789 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5790 {
5791     bool aSign;
5792     int32_t aExp;
5793     uint64_t lastBitMask, roundBitsMask;
5794     floatx80 z;
5795
5796     if (floatx80_invalid_encoding(a)) {
5797         float_raise(float_flag_invalid, status);
5798         return floatx80_default_nan(status);
5799     }
5800     aExp = extractFloatx80Exp( a );
5801     if ( 0x403E <= aExp ) {
5802         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5803             return propagateFloatx80NaN(a, a, status);
5804         }
5805         return a;
5806     }
5807     if ( aExp < 0x3FFF ) {
5808         if (    ( aExp == 0 )
5809              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5810             return a;
5811         }
5812         float_raise(float_flag_inexact, status);
5813         aSign = extractFloatx80Sign( a );
5814         switch (status->float_rounding_mode) {
5815          case float_round_nearest_even:
5816             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5817                ) {
5818                 return
5819                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5820             }
5821             break;
5822         case float_round_ties_away:
5823             if (aExp == 0x3FFE) {
5824                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5825             }
5826             break;
5827          case float_round_down:
5828             return
5829                   aSign ?
5830                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5831                 : packFloatx80( 0, 0, 0 );
5832          case float_round_up:
5833             return
5834                   aSign ? packFloatx80( 1, 0, 0 )
5835                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5836
5837         case float_round_to_zero:
5838             break;
5839         default:
5840             g_assert_not_reached();
5841         }
5842         return packFloatx80( aSign, 0, 0 );
5843     }
5844     lastBitMask = 1;
5845     lastBitMask <<= 0x403E - aExp;
5846     roundBitsMask = lastBitMask - 1;
5847     z = a;
5848     switch (status->float_rounding_mode) {
5849     case float_round_nearest_even:
5850         z.low += lastBitMask>>1;
5851         if ((z.low & roundBitsMask) == 0) {
5852             z.low &= ~lastBitMask;
5853         }
5854         break;
5855     case float_round_ties_away:
5856         z.low += lastBitMask >> 1;
5857         break;
5858     case float_round_to_zero:
5859         break;
5860     case float_round_up:
5861         if (!extractFloatx80Sign(z)) {
5862             z.low += roundBitsMask;
5863         }
5864         break;
5865     case float_round_down:
5866         if (extractFloatx80Sign(z)) {
5867             z.low += roundBitsMask;
5868         }
5869         break;
5870     default:
5871         abort();
5872     }
5873     z.low &= ~ roundBitsMask;
5874     if ( z.low == 0 ) {
5875         ++z.high;
5876         z.low = UINT64_C(0x8000000000000000);
5877     }
5878     if (z.low != a.low) {
5879         float_raise(float_flag_inexact, status);
5880     }
5881     return z;
5882
5883 }
5884
5885 /*----------------------------------------------------------------------------
5886 | Returns the result of adding the absolute values of the extended double-
5887 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5888 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5889 | The addition is performed according to the IEC/IEEE Standard for Binary
5890 | Floating-Point Arithmetic.
5891 *----------------------------------------------------------------------------*/
5892
5893 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5894                                 float_status *status)
5895 {
5896     int32_t aExp, bExp, zExp;
5897     uint64_t aSig, bSig, zSig0, zSig1;
5898     int32_t expDiff;
5899
5900     aSig = extractFloatx80Frac( a );
5901     aExp = extractFloatx80Exp( a );
5902     bSig = extractFloatx80Frac( b );
5903     bExp = extractFloatx80Exp( b );
5904     expDiff = aExp - bExp;
5905     if ( 0 < expDiff ) {
5906         if ( aExp == 0x7FFF ) {
5907             if ((uint64_t)(aSig << 1)) {
5908                 return propagateFloatx80NaN(a, b, status);
5909             }
5910             return a;
5911         }
5912         if ( bExp == 0 ) --expDiff;
5913         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5914         zExp = aExp;
5915     }
5916     else if ( expDiff < 0 ) {
5917         if ( bExp == 0x7FFF ) {
5918             if ((uint64_t)(bSig << 1)) {
5919                 return propagateFloatx80NaN(a, b, status);
5920             }
5921             return packFloatx80(zSign,
5922                                 floatx80_infinity_high,
5923                                 floatx80_infinity_low);
5924         }
5925         if ( aExp == 0 ) ++expDiff;
5926         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5927         zExp = bExp;
5928     }
5929     else {
5930         if ( aExp == 0x7FFF ) {
5931             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5932                 return propagateFloatx80NaN(a, b, status);
5933             }
5934             return a;
5935         }
5936         zSig1 = 0;
5937         zSig0 = aSig + bSig;
5938         if ( aExp == 0 ) {
5939             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5940                 /* At least one of the values is a pseudo-denormal,
5941                  * and there is a carry out of the result.  */
5942                 zExp = 1;
5943                 goto shiftRight1;
5944             }
5945             if (zSig0 == 0) {
5946                 return packFloatx80(zSign, 0, 0);
5947             }
5948             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5949             goto roundAndPack;
5950         }
5951         zExp = aExp;
5952         goto shiftRight1;
5953     }
5954     zSig0 = aSig + bSig;
5955     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5956  shiftRight1:
5957     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5958     zSig0 |= UINT64_C(0x8000000000000000);
5959     ++zExp;
5960  roundAndPack:
5961     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5962                                 zSign, zExp, zSig0, zSig1, status);
5963 }
5964
5965 /*----------------------------------------------------------------------------
5966 | Returns the result of subtracting the absolute values of the extended
5967 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5968 | difference is negated before being returned.  `zSign' is ignored if the
5969 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
5970 | Standard for Binary Floating-Point Arithmetic.
5971 *----------------------------------------------------------------------------*/
5972
5973 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5974                                 float_status *status)
5975 {
5976     int32_t aExp, bExp, zExp;
5977     uint64_t aSig, bSig, zSig0, zSig1;
5978     int32_t expDiff;
5979
5980     aSig = extractFloatx80Frac( a );
5981     aExp = extractFloatx80Exp( a );
5982     bSig = extractFloatx80Frac( b );
5983     bExp = extractFloatx80Exp( b );
5984     expDiff = aExp - bExp;
5985     if ( 0 < expDiff ) goto aExpBigger;
5986     if ( expDiff < 0 ) goto bExpBigger;
5987     if ( aExp == 0x7FFF ) {
5988         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5989             return propagateFloatx80NaN(a, b, status);
5990         }
5991         float_raise(float_flag_invalid, status);
5992         return floatx80_default_nan(status);
5993     }
5994     if ( aExp == 0 ) {
5995         aExp = 1;
5996         bExp = 1;
5997     }
5998     zSig1 = 0;
5999     if ( bSig < aSig ) goto aBigger;
6000     if ( aSig < bSig ) goto bBigger;
6001     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6002  bExpBigger:
6003     if ( bExp == 0x7FFF ) {
6004         if ((uint64_t)(bSig << 1)) {
6005             return propagateFloatx80NaN(a, b, status);
6006         }
6007         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6008                             floatx80_infinity_low);
6009     }
6010     if ( aExp == 0 ) ++expDiff;
6011     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6012  bBigger:
6013     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6014     zExp = bExp;
6015     zSign ^= 1;
6016     goto normalizeRoundAndPack;
6017  aExpBigger:
6018     if ( aExp == 0x7FFF ) {
6019         if ((uint64_t)(aSig << 1)) {
6020             return propagateFloatx80NaN(a, b, status);
6021         }
6022         return a;
6023     }
6024     if ( bExp == 0 ) --expDiff;
6025     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6026  aBigger:
6027     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6028     zExp = aExp;
6029  normalizeRoundAndPack:
6030     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6031                                          zSign, zExp, zSig0, zSig1, status);
6032 }
6033
6034 /*----------------------------------------------------------------------------
6035 | Returns the result of adding the extended double-precision floating-point
6036 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6037 | Standard for Binary Floating-Point Arithmetic.
6038 *----------------------------------------------------------------------------*/
6039
6040 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6041 {
6042     bool aSign, bSign;
6043
6044     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6045         float_raise(float_flag_invalid, status);
6046         return floatx80_default_nan(status);
6047     }
6048     aSign = extractFloatx80Sign( a );
6049     bSign = extractFloatx80Sign( b );
6050     if ( aSign == bSign ) {
6051         return addFloatx80Sigs(a, b, aSign, status);
6052     }
6053     else {
6054         return subFloatx80Sigs(a, b, aSign, status);
6055     }
6056
6057 }
6058
6059 /*----------------------------------------------------------------------------
6060 | Returns the result of subtracting the extended double-precision floating-
6061 | point values `a' and `b'.  The operation is performed according to the
6062 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6063 *----------------------------------------------------------------------------*/
6064
6065 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6066 {
6067     bool aSign, bSign;
6068
6069     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6070         float_raise(float_flag_invalid, status);
6071         return floatx80_default_nan(status);
6072     }
6073     aSign = extractFloatx80Sign( a );
6074     bSign = extractFloatx80Sign( b );
6075     if ( aSign == bSign ) {
6076         return subFloatx80Sigs(a, b, aSign, status);
6077     }
6078     else {
6079         return addFloatx80Sigs(a, b, aSign, status);
6080     }
6081
6082 }
6083
6084 /*----------------------------------------------------------------------------
6085 | Returns the result of multiplying the extended double-precision floating-
6086 | point values `a' and `b'.  The operation is performed according to the
6087 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6088 *----------------------------------------------------------------------------*/
6089
6090 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6091 {
6092     bool aSign, bSign, zSign;
6093     int32_t aExp, bExp, zExp;
6094     uint64_t aSig, bSig, zSig0, zSig1;
6095
6096     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6097         float_raise(float_flag_invalid, status);
6098         return floatx80_default_nan(status);
6099     }
6100     aSig = extractFloatx80Frac( a );
6101     aExp = extractFloatx80Exp( a );
6102     aSign = extractFloatx80Sign( a );
6103     bSig = extractFloatx80Frac( b );
6104     bExp = extractFloatx80Exp( b );
6105     bSign = extractFloatx80Sign( b );
6106     zSign = aSign ^ bSign;
6107     if ( aExp == 0x7FFF ) {
6108         if (    (uint64_t) ( aSig<<1 )
6109              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6110             return propagateFloatx80NaN(a, b, status);
6111         }
6112         if ( ( bExp | bSig ) == 0 ) goto invalid;
6113         return packFloatx80(zSign, floatx80_infinity_high,
6114                                    floatx80_infinity_low);
6115     }
6116     if ( bExp == 0x7FFF ) {
6117         if ((uint64_t)(bSig << 1)) {
6118             return propagateFloatx80NaN(a, b, status);
6119         }
6120         if ( ( aExp | aSig ) == 0 ) {
6121  invalid:
6122             float_raise(float_flag_invalid, status);
6123             return floatx80_default_nan(status);
6124         }
6125         return packFloatx80(zSign, floatx80_infinity_high,
6126                                    floatx80_infinity_low);
6127     }
6128     if ( aExp == 0 ) {
6129         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6130         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6131     }
6132     if ( bExp == 0 ) {
6133         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6134         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6135     }
6136     zExp = aExp + bExp - 0x3FFE;
6137     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6138     if ( 0 < (int64_t) zSig0 ) {
6139         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6140         --zExp;
6141     }
6142     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6143                                 zSign, zExp, zSig0, zSig1, status);
6144 }
6145
6146 /*----------------------------------------------------------------------------
6147 | Returns the result of dividing the extended double-precision floating-point
6148 | value `a' by the corresponding value `b'.  The operation is performed
6149 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6150 *----------------------------------------------------------------------------*/
6151
6152 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6153 {
6154     bool aSign, bSign, zSign;
6155     int32_t aExp, bExp, zExp;
6156     uint64_t aSig, bSig, zSig0, zSig1;
6157     uint64_t rem0, rem1, rem2, term0, term1, term2;
6158
6159     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6160         float_raise(float_flag_invalid, status);
6161         return floatx80_default_nan(status);
6162     }
6163     aSig = extractFloatx80Frac( a );
6164     aExp = extractFloatx80Exp( a );
6165     aSign = extractFloatx80Sign( a );
6166     bSig = extractFloatx80Frac( b );
6167     bExp = extractFloatx80Exp( b );
6168     bSign = extractFloatx80Sign( b );
6169     zSign = aSign ^ bSign;
6170     if ( aExp == 0x7FFF ) {
6171         if ((uint64_t)(aSig << 1)) {
6172             return propagateFloatx80NaN(a, b, status);
6173         }
6174         if ( bExp == 0x7FFF ) {
6175             if ((uint64_t)(bSig << 1)) {
6176                 return propagateFloatx80NaN(a, b, status);
6177             }
6178             goto invalid;
6179         }
6180         return packFloatx80(zSign, floatx80_infinity_high,
6181                                    floatx80_infinity_low);
6182     }
6183     if ( bExp == 0x7FFF ) {
6184         if ((uint64_t)(bSig << 1)) {
6185             return propagateFloatx80NaN(a, b, status);
6186         }
6187         return packFloatx80( zSign, 0, 0 );
6188     }
6189     if ( bExp == 0 ) {
6190         if ( bSig == 0 ) {
6191             if ( ( aExp | aSig ) == 0 ) {
6192  invalid:
6193                 float_raise(float_flag_invalid, status);
6194                 return floatx80_default_nan(status);
6195             }
6196             float_raise(float_flag_divbyzero, status);
6197             return packFloatx80(zSign, floatx80_infinity_high,
6198                                        floatx80_infinity_low);
6199         }
6200         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6201     }
6202     if ( aExp == 0 ) {
6203         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6204         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6205     }
6206     zExp = aExp - bExp + 0x3FFE;
6207     rem1 = 0;
6208     if ( bSig <= aSig ) {
6209         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6210         ++zExp;
6211     }
6212     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6213     mul64To128( bSig, zSig0, &term0, &term1 );
6214     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6215     while ( (int64_t) rem0 < 0 ) {
6216         --zSig0;
6217         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6218     }
6219     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6220     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6221         mul64To128( bSig, zSig1, &term1, &term2 );
6222         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6223         while ( (int64_t) rem1 < 0 ) {
6224             --zSig1;
6225             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6226         }
6227         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6228     }
6229     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6230                                 zSign, zExp, zSig0, zSig1, status);
6231 }
6232
6233 /*----------------------------------------------------------------------------
6234 | Returns the remainder of the extended double-precision floating-point value
6235 | `a' with respect to the corresponding value `b'.  The operation is performed
6236 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6237 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6238 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6239 | the absolute value of the integer quotient.
6240 *----------------------------------------------------------------------------*/
6241
6242 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6243                          float_status *status)
6244 {
6245     bool aSign, zSign;
6246     int32_t aExp, bExp, expDiff, aExpOrig;
6247     uint64_t aSig0, aSig1, bSig;
6248     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6249
6250     *quotient = 0;
6251     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6252         float_raise(float_flag_invalid, status);
6253         return floatx80_default_nan(status);
6254     }
6255     aSig0 = extractFloatx80Frac( a );
6256     aExpOrig = aExp = extractFloatx80Exp( a );
6257     aSign = extractFloatx80Sign( a );
6258     bSig = extractFloatx80Frac( b );
6259     bExp = extractFloatx80Exp( b );
6260     if ( aExp == 0x7FFF ) {
6261         if (    (uint64_t) ( aSig0<<1 )
6262              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6263             return propagateFloatx80NaN(a, b, status);
6264         }
6265         goto invalid;
6266     }
6267     if ( bExp == 0x7FFF ) {
6268         if ((uint64_t)(bSig << 1)) {
6269             return propagateFloatx80NaN(a, b, status);
6270         }
6271         if (aExp == 0 && aSig0 >> 63) {
6272             /*
6273              * Pseudo-denormal argument must be returned in normalized
6274              * form.
6275              */
6276             return packFloatx80(aSign, 1, aSig0);
6277         }
6278         return a;
6279     }
6280     if ( bExp == 0 ) {
6281         if ( bSig == 0 ) {
6282  invalid:
6283             float_raise(float_flag_invalid, status);
6284             return floatx80_default_nan(status);
6285         }
6286         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6287     }
6288     if ( aExp == 0 ) {
6289         if ( aSig0 == 0 ) return a;
6290         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6291     }
6292     zSign = aSign;
6293     expDiff = aExp - bExp;
6294     aSig1 = 0;
6295     if ( expDiff < 0 ) {
6296         if ( mod || expDiff < -1 ) {
6297             if (aExp == 1 && aExpOrig == 0) {
6298                 /*
6299                  * Pseudo-denormal argument must be returned in
6300                  * normalized form.
6301                  */
6302                 return packFloatx80(aSign, aExp, aSig0);
6303             }
6304             return a;
6305         }
6306         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6307         expDiff = 0;
6308     }
6309     *quotient = q = ( bSig <= aSig0 );
6310     if ( q ) aSig0 -= bSig;
6311     expDiff -= 64;
6312     while ( 0 < expDiff ) {
6313         q = estimateDiv128To64( aSig0, aSig1, bSig );
6314         q = ( 2 < q ) ? q - 2 : 0;
6315         mul64To128( bSig, q, &term0, &term1 );
6316         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6317         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6318         expDiff -= 62;
6319         *quotient <<= 62;
6320         *quotient += q;
6321     }
6322     expDiff += 64;
6323     if ( 0 < expDiff ) {
6324         q = estimateDiv128To64( aSig0, aSig1, bSig );
6325         q = ( 2 < q ) ? q - 2 : 0;
6326         q >>= 64 - expDiff;
6327         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6328         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6329         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6330         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6331             ++q;
6332             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6333         }
6334         if (expDiff < 64) {
6335             *quotient <<= expDiff;
6336         } else {
6337             *quotient = 0;
6338         }
6339         *quotient += q;
6340     }
6341     else {
6342         term1 = 0;
6343         term0 = bSig;
6344     }
6345     if (!mod) {
6346         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6347         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6348                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6349                         && ( q & 1 ) )
6350             ) {
6351             aSig0 = alternateASig0;
6352             aSig1 = alternateASig1;
6353             zSign = ! zSign;
6354             ++*quotient;
6355         }
6356     }
6357     return
6358         normalizeRoundAndPackFloatx80(
6359             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6360
6361 }
6362
6363 /*----------------------------------------------------------------------------
6364 | Returns the remainder of the extended double-precision floating-point value
6365 | `a' with respect to the corresponding value `b'.  The operation is performed
6366 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6367 *----------------------------------------------------------------------------*/
6368
6369 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6370 {
6371     uint64_t quotient;
6372     return floatx80_modrem(a, b, false, &quotient, status);
6373 }
6374
6375 /*----------------------------------------------------------------------------
6376 | Returns the remainder of the extended double-precision floating-point value
6377 | `a' with respect to the corresponding value `b', with the quotient truncated
6378 | toward zero.
6379 *----------------------------------------------------------------------------*/
6380
6381 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6382 {
6383     uint64_t quotient;
6384     return floatx80_modrem(a, b, true, &quotient, status);
6385 }
6386
6387 /*----------------------------------------------------------------------------
6388 | Returns the square root of the extended double-precision floating-point
6389 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6390 | for Binary Floating-Point Arithmetic.
6391 *----------------------------------------------------------------------------*/
6392
6393 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6394 {
6395     bool aSign;
6396     int32_t aExp, zExp;
6397     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6398     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6399
6400     if (floatx80_invalid_encoding(a)) {
6401         float_raise(float_flag_invalid, status);
6402         return floatx80_default_nan(status);
6403     }
6404     aSig0 = extractFloatx80Frac( a );
6405     aExp = extractFloatx80Exp( a );
6406     aSign = extractFloatx80Sign( a );
6407     if ( aExp == 0x7FFF ) {
6408         if ((uint64_t)(aSig0 << 1)) {
6409             return propagateFloatx80NaN(a, a, status);
6410         }
6411         if ( ! aSign ) return a;
6412         goto invalid;
6413     }
6414     if ( aSign ) {
6415         if ( ( aExp | aSig0 ) == 0 ) return a;
6416  invalid:
6417         float_raise(float_flag_invalid, status);
6418         return floatx80_default_nan(status);
6419     }
6420     if ( aExp == 0 ) {
6421         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6422         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6423     }
6424     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6425     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6426     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6427     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6428     doubleZSig0 = zSig0<<1;
6429     mul64To128( zSig0, zSig0, &term0, &term1 );
6430     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6431     while ( (int64_t) rem0 < 0 ) {
6432         --zSig0;
6433         doubleZSig0 -= 2;
6434         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6435     }
6436     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6437     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6438         if ( zSig1 == 0 ) zSig1 = 1;
6439         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6440         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6441         mul64To128( zSig1, zSig1, &term2, &term3 );
6442         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6443         while ( (int64_t) rem1 < 0 ) {
6444             --zSig1;
6445             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6446             term3 |= 1;
6447             term2 |= doubleZSig0;
6448             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6449         }
6450         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6451     }
6452     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6453     zSig0 |= doubleZSig0;
6454     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6455                                 0, zExp, zSig0, zSig1, status);
6456 }
6457
6458 /*----------------------------------------------------------------------------
6459 | Returns the result of converting the quadruple-precision floating-point
6460 | value `a' to the extended double-precision floating-point format.  The
6461 | conversion is performed according to the IEC/IEEE Standard for Binary
6462 | Floating-Point Arithmetic.
6463 *----------------------------------------------------------------------------*/
6464
6465 floatx80 float128_to_floatx80(float128 a, float_status *status)
6466 {
6467     bool aSign;
6468     int32_t aExp;
6469     uint64_t aSig0, aSig1;
6470
6471     aSig1 = extractFloat128Frac1( a );
6472     aSig0 = extractFloat128Frac0( a );
6473     aExp = extractFloat128Exp( a );
6474     aSign = extractFloat128Sign( a );
6475     if ( aExp == 0x7FFF ) {
6476         if ( aSig0 | aSig1 ) {
6477             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6478                                                status);
6479             return floatx80_silence_nan(res, status);
6480         }
6481         return packFloatx80(aSign, floatx80_infinity_high,
6482                                    floatx80_infinity_low);
6483     }
6484     if ( aExp == 0 ) {
6485         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6486         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6487     }
6488     else {
6489         aSig0 |= UINT64_C(0x0001000000000000);
6490     }
6491     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6492     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6493
6494 }
6495
6496 /*----------------------------------------------------------------------------
6497 | Returns the remainder of the quadruple-precision floating-point value `a'
6498 | with respect to the corresponding value `b'.  The operation is performed
6499 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6500 *----------------------------------------------------------------------------*/
6501
6502 float128 float128_rem(float128 a, float128 b, float_status *status)
6503 {
6504     bool aSign, zSign;
6505     int32_t aExp, bExp, expDiff;
6506     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6507     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6508     int64_t sigMean0;
6509
6510     aSig1 = extractFloat128Frac1( a );
6511     aSig0 = extractFloat128Frac0( a );
6512     aExp = extractFloat128Exp( a );
6513     aSign = extractFloat128Sign( a );
6514     bSig1 = extractFloat128Frac1( b );
6515     bSig0 = extractFloat128Frac0( b );
6516     bExp = extractFloat128Exp( b );
6517     if ( aExp == 0x7FFF ) {
6518         if (    ( aSig0 | aSig1 )
6519              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6520             return propagateFloat128NaN(a, b, status);
6521         }
6522         goto invalid;
6523     }
6524     if ( bExp == 0x7FFF ) {
6525         if (bSig0 | bSig1) {
6526             return propagateFloat128NaN(a, b, status);
6527         }
6528         return a;
6529     }
6530     if ( bExp == 0 ) {
6531         if ( ( bSig0 | bSig1 ) == 0 ) {
6532  invalid:
6533             float_raise(float_flag_invalid, status);
6534             return float128_default_nan(status);
6535         }
6536         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6537     }
6538     if ( aExp == 0 ) {
6539         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6540         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6541     }
6542     expDiff = aExp - bExp;
6543     if ( expDiff < -1 ) return a;
6544     shortShift128Left(
6545         aSig0 | UINT64_C(0x0001000000000000),
6546         aSig1,
6547         15 - ( expDiff < 0 ),
6548         &aSig0,
6549         &aSig1
6550     );
6551     shortShift128Left(
6552         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6553     q = le128( bSig0, bSig1, aSig0, aSig1 );
6554     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6555     expDiff -= 64;
6556     while ( 0 < expDiff ) {
6557         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6558         q = ( 4 < q ) ? q - 4 : 0;
6559         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6560         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6561         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6562         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6563         expDiff -= 61;
6564     }
6565     if ( -64 < expDiff ) {
6566         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6567         q = ( 4 < q ) ? q - 4 : 0;
6568         q >>= - expDiff;
6569         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6570         expDiff += 52;
6571         if ( expDiff < 0 ) {
6572             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6573         }
6574         else {
6575             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6576         }
6577         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6578         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6579     }
6580     else {
6581         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6582         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6583     }
6584     do {
6585         alternateASig0 = aSig0;
6586         alternateASig1 = aSig1;
6587         ++q;
6588         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6589     } while ( 0 <= (int64_t) aSig0 );
6590     add128(
6591         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6592     if (    ( sigMean0 < 0 )
6593          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6594         aSig0 = alternateASig0;
6595         aSig1 = alternateASig1;
6596     }
6597     zSign = ( (int64_t) aSig0 < 0 );
6598     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6599     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6600                                          status);
6601 }
6602
6603 /*----------------------------------------------------------------------------
6604 | Returns the square root of the quadruple-precision floating-point value `a'.
6605 | The operation is performed according to the IEC/IEEE Standard for Binary
6606 | Floating-Point Arithmetic.
6607 *----------------------------------------------------------------------------*/
6608
6609 float128 float128_sqrt(float128 a, float_status *status)
6610 {
6611     bool aSign;
6612     int32_t aExp, zExp;
6613     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6614     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6615
6616     aSig1 = extractFloat128Frac1( a );
6617     aSig0 = extractFloat128Frac0( a );
6618     aExp = extractFloat128Exp( a );
6619     aSign = extractFloat128Sign( a );
6620     if ( aExp == 0x7FFF ) {
6621         if (aSig0 | aSig1) {
6622             return propagateFloat128NaN(a, a, status);
6623         }
6624         if ( ! aSign ) return a;
6625         goto invalid;
6626     }
6627     if ( aSign ) {
6628         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6629  invalid:
6630         float_raise(float_flag_invalid, status);
6631         return float128_default_nan(status);
6632     }
6633     if ( aExp == 0 ) {
6634         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6635         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6636     }
6637     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6638     aSig0 |= UINT64_C(0x0001000000000000);
6639     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6640     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6641     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6642     doubleZSig0 = zSig0<<1;
6643     mul64To128( zSig0, zSig0, &term0, &term1 );
6644     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6645     while ( (int64_t) rem0 < 0 ) {
6646         --zSig0;
6647         doubleZSig0 -= 2;
6648         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6649     }
6650     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6651     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6652         if ( zSig1 == 0 ) zSig1 = 1;
6653         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6654         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6655         mul64To128( zSig1, zSig1, &term2, &term3 );
6656         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6657         while ( (int64_t) rem1 < 0 ) {
6658             --zSig1;
6659             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6660             term3 |= 1;
6661             term2 |= doubleZSig0;
6662             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6663         }
6664         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6665     }
6666     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6667     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6668
6669 }
6670
6671 static inline FloatRelation
6672 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6673                           float_status *status)
6674 {
6675     bool aSign, bSign;
6676
6677     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6678         float_raise(float_flag_invalid, status);
6679         return float_relation_unordered;
6680     }
6681     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6682           ( extractFloatx80Frac( a )<<1 ) ) ||
6683         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6684           ( extractFloatx80Frac( b )<<1 ) )) {
6685         if (!is_quiet ||
6686             floatx80_is_signaling_nan(a, status) ||
6687             floatx80_is_signaling_nan(b, status)) {
6688             float_raise(float_flag_invalid, status);
6689         }
6690         return float_relation_unordered;
6691     }
6692     aSign = extractFloatx80Sign( a );
6693     bSign = extractFloatx80Sign( b );
6694     if ( aSign != bSign ) {
6695
6696         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6697              ( ( a.low | b.low ) == 0 ) ) {
6698             /* zero case */
6699             return float_relation_equal;
6700         } else {
6701             return 1 - (2 * aSign);
6702         }
6703     } else {
6704         /* Normalize pseudo-denormals before comparison.  */
6705         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6706             ++a.high;
6707         }
6708         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6709             ++b.high;
6710         }
6711         if (a.low == b.low && a.high == b.high) {
6712             return float_relation_equal;
6713         } else {
6714             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6715         }
6716     }
6717 }
6718
6719 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6720 {
6721     return floatx80_compare_internal(a, b, 0, status);
6722 }
6723
6724 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6725                                      float_status *status)
6726 {
6727     return floatx80_compare_internal(a, b, 1, status);
6728 }
6729
6730 static inline FloatRelation
6731 float128_compare_internal(float128 a, float128 b, bool is_quiet,
6732                           float_status *status)
6733 {
6734     bool aSign, bSign;
6735
6736     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6737           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6738         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6739           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6740         if (!is_quiet ||
6741             float128_is_signaling_nan(a, status) ||
6742             float128_is_signaling_nan(b, status)) {
6743             float_raise(float_flag_invalid, status);
6744         }
6745         return float_relation_unordered;
6746     }
6747     aSign = extractFloat128Sign( a );
6748     bSign = extractFloat128Sign( b );
6749     if ( aSign != bSign ) {
6750         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6751             /* zero case */
6752             return float_relation_equal;
6753         } else {
6754             return 1 - (2 * aSign);
6755         }
6756     } else {
6757         if (a.low == b.low && a.high == b.high) {
6758             return float_relation_equal;
6759         } else {
6760             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6761         }
6762     }
6763 }
6764
6765 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
6766 {
6767     return float128_compare_internal(a, b, 0, status);
6768 }
6769
6770 FloatRelation float128_compare_quiet(float128 a, float128 b,
6771                                      float_status *status)
6772 {
6773     return float128_compare_internal(a, b, 1, status);
6774 }
6775
6776 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6777 {
6778     bool aSign;
6779     int32_t aExp;
6780     uint64_t aSig;
6781
6782     if (floatx80_invalid_encoding(a)) {
6783         float_raise(float_flag_invalid, status);
6784         return floatx80_default_nan(status);
6785     }
6786     aSig = extractFloatx80Frac( a );
6787     aExp = extractFloatx80Exp( a );
6788     aSign = extractFloatx80Sign( a );
6789
6790     if ( aExp == 0x7FFF ) {
6791         if ( aSig<<1 ) {
6792             return propagateFloatx80NaN(a, a, status);
6793         }
6794         return a;
6795     }
6796
6797     if (aExp == 0) {
6798         if (aSig == 0) {
6799             return a;
6800         }
6801         aExp++;
6802     }
6803
6804     if (n > 0x10000) {
6805         n = 0x10000;
6806     } else if (n < -0x10000) {
6807         n = -0x10000;
6808     }
6809
6810     aExp += n;
6811     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6812                                          aSign, aExp, aSig, 0, status);
6813 }
6814
6815 float128 float128_scalbn(float128 a, int n, float_status *status)
6816 {
6817     bool aSign;
6818     int32_t aExp;
6819     uint64_t aSig0, aSig1;
6820
6821     aSig1 = extractFloat128Frac1( a );
6822     aSig0 = extractFloat128Frac0( a );
6823     aExp = extractFloat128Exp( a );
6824     aSign = extractFloat128Sign( a );
6825     if ( aExp == 0x7FFF ) {
6826         if ( aSig0 | aSig1 ) {
6827             return propagateFloat128NaN(a, a, status);
6828         }
6829         return a;
6830     }
6831     if (aExp != 0) {
6832         aSig0 |= UINT64_C(0x0001000000000000);
6833     } else if (aSig0 == 0 && aSig1 == 0) {
6834         return a;
6835     } else {
6836         aExp++;
6837     }
6838
6839     if (n > 0x10000) {
6840         n = 0x10000;
6841     } else if (n < -0x10000) {
6842         n = -0x10000;
6843     }
6844
6845     aExp += n - 1;
6846     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6847                                          , status);
6848
6849 }
6850
6851 static void __attribute__((constructor)) softfloat_init(void)
6852 {
6853     union_float64 ua, ub, uc, ur;
6854
6855     if (QEMU_NO_HARDFLOAT) {
6856         return;
6857     }
6858     /*
6859      * Test that the host's FMA is not obviously broken. For example,
6860      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6861      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6862      */
6863     ua.s = 0x0020000000000001ULL;
6864     ub.s = 0x3ca0000000000000ULL;
6865     uc.s = 0x0020000000000000ULL;
6866     ur.h = fma(ua.h, ub.h, uc.h);
6867     if (ur.s != 0x0020000000000001ULL) {
6868         force_soft_fma = true;
6869     }
6870 }