fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485 /* Flags for parts_minmax. */
 486 enum {
 487     /* Set for minimum; clear for maximum. */
 488     minmax_ismin = 1,
 489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 490     minmax_isnum = 2,
 491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 492     minmax_ismag = 4,
 493 };
 494
 495 /* Simple helpers for checking if, or what kind of, NaN we have */
 496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 497 {
 498     return unlikely(c >= float_class_qnan);
 499 }
 500
 501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 502 {
 503     return c == float_class_snan;
 504 }
 505
 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 507 {
 508     return c == float_class_qnan;
 509 }
 510
 511 /*
 512  * Structure holding all of the decomposed parts of a float.
 513  * The exponent is unbiased and the fraction is normalized.
 514  *
 515  * The fraction words are stored in big-endian word ordering,
 516  * so that truncation from a larger format to a smaller format
 517  * can be done simply by ignoring subsequent elements.
 518  */
 519
 520 typedef struct {
 521     FloatClass cls;
 522     bool sign;
 523     int32_t exp;
 524     union {
 525         /* Routines that know the structure may reference the singular name. */
 526         uint64_t frac;
 527         /*
 528          * Routines expanded with multiple structures reference "hi" and "lo"
 529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 530          * both the same word and aliased here.
 531          */
 532         uint64_t frac_hi;
 533         uint64_t frac_lo;
 534     };
 535 } FloatParts64;
 536
 537 typedef struct {
 538     FloatClass cls;
 539     bool sign;
 540     int32_t exp;
 541     uint64_t frac_hi;
 542     uint64_t frac_lo;
 543 } FloatParts128;
 544
 545 typedef struct {
 546     FloatClass cls;
 547     bool sign;
 548     int32_t exp;
 549     uint64_t frac_hi;
 550     uint64_t frac_hm;  /* high-middle */
 551     uint64_t frac_lm;  /* low-middle */
 552     uint64_t frac_lo;
 553 } FloatParts256;
 554
 555 /* These apply to the most significant word of each FloatPartsN. */
 556 #define DECOMPOSED_BINARY_POINT    63
 557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 558
 559 /* Structure holding all of the relevant parameters for a format.
 560  *   exp_size: the size of the exponent field
 561  *   exp_bias: the offset applied to the exponent field
 562  *   exp_max: the maximum normalised exponent
 563  *   frac_size: the size of the fraction field
 564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 565  * The following are computed based the size of fraction
 566  *   frac_lsb: least significant bit of fraction
 567  *   frac_lsbm1: the bit below the least significant bit (for rounding)
 568  *   round_mask/roundeven_mask: masks used for rounding
 569  * The following optional modifiers are available:
 570  *   arm_althp: handle ARM Alternative Half Precision
 571  */
 572 typedef struct {
 573     int exp_size;
 574     int exp_bias;
 575     int exp_max;
 576     int frac_size;
 577     int frac_shift;
 578     uint64_t frac_lsb;
 579     uint64_t frac_lsbm1;
 580     uint64_t round_mask;
 581     uint64_t roundeven_mask;
 582     bool arm_althp;
 583 } FloatFmt;
 584
 585 /* Expand fields based on the size of exponent and fraction */
 586 #define FLOAT_PARAMS(E, F)                                           \
 587     .exp_size       = E,                                             \
 588     .exp_bias       = ((1 << E) - 1) >> 1,                           \
 589     .exp_max        = (1 << E) - 1,                                  \
 590     .frac_size      = F,                                             \
 591     .frac_shift     = (-F - 1) & 63,                                 \
 592     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
 593     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
 594     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
 595     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 596
 597 static const FloatFmt float16_params = {
 598     FLOAT_PARAMS(5, 10)
 599 };
 600
 601 static const FloatFmt float16_params_ahp = {
 602     FLOAT_PARAMS(5, 10),
 603     .arm_althp = true
 604 };
 605
 606 static const FloatFmt bfloat16_params = {
 607     FLOAT_PARAMS(8, 7)
 608 };
 609
 610 static const FloatFmt float32_params = {
 611     FLOAT_PARAMS(8, 23)
 612 };
 613
 614 static const FloatFmt float64_params = {
 615     FLOAT_PARAMS(11, 52)
 616 };
 617
 618 static const FloatFmt float128_params = {
 619     FLOAT_PARAMS(15, 112)
 620 };
 621
 622 /* Unpack a float to parts, but do not canonicalize.  */
 623 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 624 {
 625     const int f_size = fmt->frac_size;
 626     const int e_size = fmt->exp_size;
 627
 628     *r = (FloatParts64) {
 629         .cls = float_class_unclassified,
 630         .sign = extract64(raw, f_size + e_size, 1),
 631         .exp = extract64(raw, f_size, e_size),
 632         .frac = extract64(raw, 0, f_size)
 633     };
 634 }
 635
 636 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 637 {
 638     unpack_raw64(p, &float16_params, f);
 639 }
 640
 641 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 642 {
 643     unpack_raw64(p, &bfloat16_params, f);
 644 }
 645
 646 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 647 {
 648     unpack_raw64(p, &float32_params, f);
 649 }
 650
 651 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 652 {
 653     unpack_raw64(p, &float64_params, f);
 654 }
 655
 656 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 657 {
 658     const int f_size = float128_params.frac_size - 64;
 659     const int e_size = float128_params.exp_size;
 660
 661     *p = (FloatParts128) {
 662         .cls = float_class_unclassified,
 663         .sign = extract64(f.high, f_size + e_size, 1),
 664         .exp = extract64(f.high, f_size, e_size),
 665         .frac_hi = extract64(f.high, 0, f_size),
 666         .frac_lo = f.low,
 667     };
 668 }
 669
 670 /* Pack a float from parts, but do not canonicalize.  */
 671 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 672 {
 673     const int f_size = fmt->frac_size;
 674     const int e_size = fmt->exp_size;
 675     uint64_t ret;
 676
 677     ret = (uint64_t)p->sign << (f_size + e_size);
 678     ret = deposit64(ret, f_size, e_size, p->exp);
 679     ret = deposit64(ret, 0, f_size, p->frac);
 680     return ret;
 681 }
 682
 683 static inline float16 float16_pack_raw(const FloatParts64 *p)
 684 {
 685     return make_float16(pack_raw64(p, &float16_params));
 686 }
 687
 688 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 689 {
 690     return pack_raw64(p, &bfloat16_params);
 691 }
 692
 693 static inline float32 float32_pack_raw(const FloatParts64 *p)
 694 {
 695     return make_float32(pack_raw64(p, &float32_params));
 696 }
 697
 698 static inline float64 float64_pack_raw(const FloatParts64 *p)
 699 {
 700     return make_float64(pack_raw64(p, &float64_params));
 701 }
 702
 703 static float128 float128_pack_raw(const FloatParts128 *p)
 704 {
 705     const int f_size = float128_params.frac_size - 64;
 706     const int e_size = float128_params.exp_size;
 707     uint64_t hi;
 708
 709     hi = (uint64_t)p->sign << (f_size + e_size);
 710     hi = deposit64(hi, f_size, e_size, p->exp);
 711     hi = deposit64(hi, 0, f_size, p->frac_hi);
 712     return make_float128(hi, p->frac_lo);
 713 }
 714
 715 /*----------------------------------------------------------------------------
 716 | Functions and definitions to determine:  (1) whether tininess for underflow
 717 | is detected before or after rounding by default, (2) what (if anything)
 718 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 719 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 720 | are propagated from function inputs to output.  These details are target-
 721 | specific.
 722 *----------------------------------------------------------------------------*/
 723 #include "softfloat-specialize.c.inc"
 724
 725 #define PARTS_GENERIC_64_128(NAME, P) \
 726     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 727
 728 #define PARTS_GENERIC_64_128_256(NAME, P) \
 729     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 730                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 731
 732 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 733 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 734
 735 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 736 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 737
 738 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 739
 740 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 741                                       float_status *s);
 742 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 743                                         float_status *s);
 744
 745 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 746
 747 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 748                                              FloatParts64 *c, float_status *s,
 749                                              int ab_mask, int abc_mask);
 750 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 751                                                FloatParts128 *b,
 752                                                FloatParts128 *c,
 753                                                float_status *s,
 754                                                int ab_mask, int abc_mask);
 755
 756 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 757     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 758
 759 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 760                                  const FloatFmt *fmt);
 761 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 762                                   const FloatFmt *fmt);
 763
 764 #define parts_canonicalize(A, S, F) \
 765     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 766
 767 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 768                             const FloatFmt *fmt);
 769 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 770                              const FloatFmt *fmt);
 771
 772 #define parts_uncanon(A, S, F) \
 773     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 774
 775 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 776 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 777 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 778
 779 #define parts_add_normal(A, B) \
 780     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 781
 782 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 783 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 784 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 785
 786 #define parts_sub_normal(A, B) \
 787     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 788
 789 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 790                                     float_status *s, bool subtract);
 791 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 792                                       float_status *s, bool subtract);
 793
 794 #define parts_addsub(A, B, S, Z) \
 795     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 796
 797 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 798                                  float_status *s);
 799 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 800                                    float_status *s);
 801
 802 #define parts_mul(A, B, S) \
 803     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 804
 805 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 806                                     FloatParts64 *c, int flags,
 807                                     float_status *s);
 808 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 809                                       FloatParts128 *c, int flags,
 810                                       float_status *s);
 811
 812 #define parts_muladd(A, B, C, Z, S) \
 813     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 814
 815 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 816                                  float_status *s);
 817 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 818                                    float_status *s);
 819
 820 #define parts_div(A, B, S) \
 821     PARTS_GENERIC_64_128(div, A)(A, B, S)
 822
 823 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 824                                         int scale, int frac_size);
 825 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 826                                          int scale, int frac_size);
 827
 828 #define parts_round_to_int_normal(A, R, C, F) \
 829     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 830
 831 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 832                                  int scale, float_status *s,
 833                                  const FloatFmt *fmt);
 834 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 835                                   int scale, float_status *s,
 836                                   const FloatFmt *fmt);
 837
 838 #define parts_round_to_int(A, R, C, S, F) \
 839     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 840
 841 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 842                                      int scale, int64_t min, int64_t max,
 843                                      float_status *s);
 844 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 845                                      int scale, int64_t min, int64_t max,
 846                                      float_status *s);
 847
 848 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 849     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 850
 851 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 852                                       int scale, uint64_t max,
 853                                       float_status *s);
 854 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 855                                        int scale, uint64_t max,
 856                                        float_status *s);
 857
 858 #define parts_float_to_uint(P, R, Z, M, S) \
 859     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 860
 861 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 862                                   int scale, float_status *s);
 863 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 864                                    int scale, float_status *s);
 865
 866 #define parts_sint_to_float(P, I, Z, S) \
 867     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 868
 869 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 870                                   int scale, float_status *s);
 871 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 872                                    int scale, float_status *s);
 873
 874 #define parts_uint_to_float(P, I, Z, S) \
 875     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 876
 877 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 878                                     float_status *s, int flags);
 879 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 880                                       float_status *s, int flags);
 881
 882 #define parts_minmax(A, B, S, F) \
 883     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 884
 885 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 886                            float_status *s, bool q);
 887 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 888                             float_status *s, bool q);
 889
 890 #define parts_compare(A, B, S, Q) \
 891     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 892
 893 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 894 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 895
 896 #define parts_scalbn(A, N, S) \
 897     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 898
 899 /*
 900  * Helper functions for softfloat-parts.c.inc, per-size operations.
 901  */
 902
 903 #define FRAC_GENERIC_64_128(NAME, P) \
 904     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 905
 906 #define FRAC_GENERIC_64_128_256(NAME, P) \
 907     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 908                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 909
 910 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 911 {
 912     return uadd64_overflow(a->frac, b->frac, &r->frac);
 913 }
 914
 915 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 916 {
 917     bool c = 0;
 918     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 919     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 920     return c;
 921 }
 922
 923 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 924 {
 925     bool c = 0;
 926     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 927     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 928     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 929     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 930     return c;
 931 }
 932
 933 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 934
 935 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 936 {
 937     return uadd64_overflow(a->frac, c, &r->frac);
 938 }
 939
 940 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 941 {
 942     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 943     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 944 }
 945
 946 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 947
 948 static void frac64_allones(FloatParts64 *a)
 949 {
 950     a->frac = -1;
 951 }
 952
 953 static void frac128_allones(FloatParts128 *a)
 954 {
 955     a->frac_hi = a->frac_lo = -1;
 956 }
 957
 958 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 959
 960 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 961 {
 962     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 963 }
 964
 965 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 966 {
 967     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 968     if (ta == tb) {
 969         ta = a->frac_lo, tb = b->frac_lo;
 970         if (ta == tb) {
 971             return 0;
 972         }
 973     }
 974     return ta < tb ? -1 : 1;
 975 }
 976
 977 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 978
 979 static void frac64_clear(FloatParts64 *a)
 980 {
 981     a->frac = 0;
 982 }
 983
 984 static void frac128_clear(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = 0;
 987 }
 988
 989 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 990
 991 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     uint64_t n1, n0, r, q;
 994     bool ret;
 995
 996     /*
 997      * We want a 2*N / N-bit division to produce exactly an N-bit
 998      * result, so that we do not lose any precision and so that we
 999      * do not have to renormalize afterward.  If A.frac < B.frac,
1000      * then division would produce an (N-1)-bit result; shift A left
1001      * by one to produce the an N-bit result, and return true to
1002      * decrement the exponent to match.
1003      *
1004      * The udiv_qrnnd algorithm that we're using requires normalization,
1005      * i.e. the msb of the denominator must be set, which is already true.
1006      */
1007     ret = a->frac < b->frac;
1008     if (ret) {
1009         n0 = a->frac;
1010         n1 = 0;
1011     } else {
1012         n0 = a->frac >> 1;
1013         n1 = a->frac << 63;
1014     }
1015     q = udiv_qrnnd(&r, n0, n1, b->frac);
1016
1017     /* Set lsb if there is a remainder, to set inexact. */
1018     a->frac = q | (r != 0);
1019
1020     return ret;
1021 }
1022
1023 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024 {
1025     uint64_t q0, q1, a0, a1, b0, b1;
1026     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027     bool ret = false;
1028
1029     a0 = a->frac_hi, a1 = a->frac_lo;
1030     b0 = b->frac_hi, b1 = b->frac_lo;
1031
1032     ret = lt128(a0, a1, b0, b1);
1033     if (!ret) {
1034         a1 = shr_double(a0, a1, 1);
1035         a0 = a0 >> 1;
1036     }
1037
1038     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039     q0 = estimateDiv128To64(a0, a1, b0);
1040
1041     /*
1042      * Estimate is high because B1 was not included (unless B1 == 0).
1043      * Reduce quotient and increase remainder until remainder is non-negative.
1044      * This loop will execute 0 to 2 times.
1045      */
1046     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048     while (r0 != 0) {
1049         q0--;
1050         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051     }
1052
1053     /* Repeat using the remainder, producing a second word of quotient. */
1054     q1 = estimateDiv128To64(r1, r2, b0);
1055     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057     while (r1 != 0) {
1058         q1--;
1059         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060     }
1061
1062     /* Any remainder indicates inexact; set sticky bit. */
1063     q1 |= (r2 | r3) != 0;
1064
1065     a->frac_hi = q0;
1066     a->frac_lo = q1;
1067     return ret;
1068 }
1069
1070 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1071
1072 static bool frac64_eqz(FloatParts64 *a)
1073 {
1074     return a->frac == 0;
1075 }
1076
1077 static bool frac128_eqz(FloatParts128 *a)
1078 {
1079     return (a->frac_hi | a->frac_lo) == 0;
1080 }
1081
1082 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1083
1084 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085 {
1086     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087 }
1088
1089 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090 {
1091     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093 }
1094
1095 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096
1097 static void frac64_neg(FloatParts64 *a)
1098 {
1099     a->frac = -a->frac;
1100 }
1101
1102 static void frac128_neg(FloatParts128 *a)
1103 {
1104     bool c = 0;
1105     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108
1109 static void frac256_neg(FloatParts256 *a)
1110 {
1111     bool c = 0;
1112     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116 }
1117
1118 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1119
1120 static int frac64_normalize(FloatParts64 *a)
1121 {
1122     if (a->frac) {
1123         int shift = clz64(a->frac);
1124         a->frac <<= shift;
1125         return shift;
1126     }
1127     return 64;
1128 }
1129
1130 static int frac128_normalize(FloatParts128 *a)
1131 {
1132     if (a->frac_hi) {
1133         int shl = clz64(a->frac_hi);
1134         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135         a->frac_lo <<= shl;
1136         return shl;
1137     } else if (a->frac_lo) {
1138         int shl = clz64(a->frac_lo);
1139         a->frac_hi = a->frac_lo << shl;
1140         a->frac_lo = 0;
1141         return shl + 64;
1142     }
1143     return 128;
1144 }
1145
1146 static int frac256_normalize(FloatParts256 *a)
1147 {
1148     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150     int ret, shl;
1151
1152     if (likely(a0)) {
1153         shl = clz64(a0);
1154         if (shl == 0) {
1155             return 0;
1156         }
1157         ret = shl;
1158     } else {
1159         if (a1) {
1160             ret = 64;
1161             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162         } else if (a2) {
1163             ret = 128;
1164             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165         } else if (a3) {
1166             ret = 192;
1167             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168         } else {
1169             ret = 256;
1170             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171             goto done;
1172         }
1173         shl = clz64(a0);
1174         if (shl == 0) {
1175             goto done;
1176         }
1177         ret += shl;
1178     }
1179
1180     a0 = shl_double(a0, a1, shl);
1181     a1 = shl_double(a1, a2, shl);
1182     a2 = shl_double(a2, a3, shl);
1183     a3 <<= shl;
1184
1185  done:
1186     a->frac_hi = a0;
1187     a->frac_hm = a1;
1188     a->frac_lm = a2;
1189     a->frac_lo = a3;
1190     return ret;
1191 }
1192
1193 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1194
1195 static void frac64_shl(FloatParts64 *a, int c)
1196 {
1197     a->frac <<= c;
1198 }
1199
1200 static void frac128_shl(FloatParts128 *a, int c)
1201 {
1202     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1203
1204     if (c & 64) {
1205         a0 = a1, a1 = 0;
1206     }
1207
1208     c &= 63;
1209     if (c) {
1210         a0 = shl_double(a0, a1, c);
1211         a1 = a1 << c;
1212     }
1213
1214     a->frac_hi = a0;
1215     a->frac_lo = a1;
1216 }
1217
1218 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1219
1220 static void frac64_shr(FloatParts64 *a, int c)
1221 {
1222     a->frac >>= c;
1223 }
1224
1225 static void frac128_shr(FloatParts128 *a, int c)
1226 {
1227     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1228
1229     if (c & 64) {
1230         a1 = a0, a0 = 0;
1231     }
1232
1233     c &= 63;
1234     if (c) {
1235         a1 = shr_double(a0, a1, c);
1236         a0 = a0 >> c;
1237     }
1238
1239     a->frac_hi = a0;
1240     a->frac_lo = a1;
1241 }
1242
1243 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1244
1245 static void frac64_shrjam(FloatParts64 *a, int c)
1246 {
1247     uint64_t a0 = a->frac;
1248
1249     if (likely(c != 0)) {
1250         if (likely(c < 64)) {
1251             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1252         } else {
1253             a0 = a0 != 0;
1254         }
1255         a->frac = a0;
1256     }
1257 }
1258
1259 static void frac128_shrjam(FloatParts128 *a, int c)
1260 {
1261     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1262     uint64_t sticky = 0;
1263
1264     if (unlikely(c == 0)) {
1265         return;
1266     } else if (likely(c < 64)) {
1267         /* nothing */
1268     } else if (likely(c < 128)) {
1269         sticky = a1;
1270         a1 = a0;
1271         a0 = 0;
1272         c &= 63;
1273         if (c == 0) {
1274             goto done;
1275         }
1276     } else {
1277         sticky = a0 | a1;
1278         a0 = a1 = 0;
1279         goto done;
1280     }
1281
1282     sticky |= shr_double(a1, 0, c);
1283     a1 = shr_double(a0, a1, c);
1284     a0 = a0 >> c;
1285
1286  done:
1287     a->frac_lo = a1 | (sticky != 0);
1288     a->frac_hi = a0;
1289 }
1290
1291 static void frac256_shrjam(FloatParts256 *a, int c)
1292 {
1293     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1294     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1295     uint64_t sticky = 0;
1296
1297     if (unlikely(c == 0)) {
1298         return;
1299     } else if (likely(c < 64)) {
1300         /* nothing */
1301     } else if (likely(c < 256)) {
1302         if (unlikely(c & 128)) {
1303             sticky |= a2 | a3;
1304             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1305         }
1306         if (unlikely(c & 64)) {
1307             sticky |= a3;
1308             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1309         }
1310         c &= 63;
1311         if (c == 0) {
1312             goto done;
1313         }
1314     } else {
1315         sticky = a0 | a1 | a2 | a3;
1316         a0 = a1 = a2 = a3 = 0;
1317         goto done;
1318     }
1319
1320     sticky |= shr_double(a3, 0, c);
1321     a3 = shr_double(a2, a3, c);
1322     a2 = shr_double(a1, a2, c);
1323     a1 = shr_double(a0, a1, c);
1324     a0 = a0 >> c;
1325
1326  done:
1327     a->frac_lo = a3 | (sticky != 0);
1328     a->frac_lm = a2;
1329     a->frac_hm = a1;
1330     a->frac_hi = a0;
1331 }
1332
1333 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1334
1335 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1336 {
1337     return usub64_overflow(a->frac, b->frac, &r->frac);
1338 }
1339
1340 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1341 {
1342     bool c = 0;
1343     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1344     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1345     return c;
1346 }
1347
1348 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1349 {
1350     bool c = 0;
1351     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1352     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1353     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1354     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1355     return c;
1356 }
1357
1358 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1359
1360 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1361 {
1362     r->frac = a->frac_hi | (a->frac_lo != 0);
1363 }
1364
1365 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1366 {
1367     r->frac_hi = a->frac_hi;
1368     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1369 }
1370
1371 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1372
1373 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1374 {
1375     r->frac_hi = a->frac;
1376     r->frac_lo = 0;
1377 }
1378
1379 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1380 {
1381     r->frac_hi = a->frac_hi;
1382     r->frac_hm = a->frac_lo;
1383     r->frac_lm = 0;
1384     r->frac_lo = 0;
1385 }
1386
1387 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1388
1389 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1390 #define FloatPartsN    glue(FloatParts,N)
1391 #define FloatPartsW    glue(FloatParts,W)
1392
1393 #define N 64
1394 #define W 128
1395
1396 #include "softfloat-parts-addsub.c.inc"
1397 #include "softfloat-parts.c.inc"
1398
1399 #undef  N
1400 #undef  W
1401 #define N 128
1402 #define W 256
1403
1404 #include "softfloat-parts-addsub.c.inc"
1405 #include "softfloat-parts.c.inc"
1406
1407 #undef  N
1408 #undef  W
1409 #define N            256
1410
1411 #include "softfloat-parts-addsub.c.inc"
1412
1413 #undef  N
1414 #undef  W
1415 #undef  partsN
1416 #undef  FloatPartsN
1417 #undef  FloatPartsW
1418
1419 /*
1420  * Pack/unpack routines with a specific FloatFmt.
1421  */
1422
1423 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1424                                       float_status *s, const FloatFmt *params)
1425 {
1426     float16_unpack_raw(p, f);
1427     parts_canonicalize(p, s, params);
1428 }
1429
1430 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1431                                      float_status *s)
1432 {
1433     float16a_unpack_canonical(p, f, s, &float16_params);
1434 }
1435
1436 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1437                                       float_status *s)
1438 {
1439     bfloat16_unpack_raw(p, f);
1440     parts_canonicalize(p, s, &bfloat16_params);
1441 }
1442
1443 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1444                                              float_status *s,
1445                                              const FloatFmt *params)
1446 {
1447     parts_uncanon(p, s, params);
1448     return float16_pack_raw(p);
1449 }
1450
1451 static float16 float16_round_pack_canonical(FloatParts64 *p,
1452                                             float_status *s)
1453 {
1454     return float16a_round_pack_canonical(p, s, &float16_params);
1455 }
1456
1457 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1458                                               float_status *s)
1459 {
1460     parts_uncanon(p, s, &bfloat16_params);
1461     return bfloat16_pack_raw(p);
1462 }
1463
1464 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1465                                      float_status *s)
1466 {
1467     float32_unpack_raw(p, f);
1468     parts_canonicalize(p, s, &float32_params);
1469 }
1470
1471 static float32 float32_round_pack_canonical(FloatParts64 *p,
1472                                             float_status *s)
1473 {
1474     parts_uncanon(p, s, &float32_params);
1475     return float32_pack_raw(p);
1476 }
1477
1478 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1479                                      float_status *s)
1480 {
1481     float64_unpack_raw(p, f);
1482     parts_canonicalize(p, s, &float64_params);
1483 }
1484
1485 static float64 float64_round_pack_canonical(FloatParts64 *p,
1486                                             float_status *s)
1487 {
1488     parts_uncanon(p, s, &float64_params);
1489     return float64_pack_raw(p);
1490 }
1491
1492 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1493                                       float_status *s)
1494 {
1495     float128_unpack_raw(p, f);
1496     parts_canonicalize(p, s, &float128_params);
1497 }
1498
1499 static float128 float128_round_pack_canonical(FloatParts128 *p,
1500                                               float_status *s)
1501 {
1502     parts_uncanon(p, s, &float128_params);
1503     return float128_pack_raw(p);
1504 }
1505
1506 /*
1507  * Addition and subtraction
1508  */
1509
1510 static float16 QEMU_FLATTEN
1511 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1512 {
1513     FloatParts64 pa, pb, *pr;
1514
1515     float16_unpack_canonical(&pa, a, status);
1516     float16_unpack_canonical(&pb, b, status);
1517     pr = parts_addsub(&pa, &pb, status, subtract);
1518
1519     return float16_round_pack_canonical(pr, status);
1520 }
1521
1522 float16 float16_add(float16 a, float16 b, float_status *status)
1523 {
1524     return float16_addsub(a, b, status, false);
1525 }
1526
1527 float16 float16_sub(float16 a, float16 b, float_status *status)
1528 {
1529     return float16_addsub(a, b, status, true);
1530 }
1531
1532 static float32 QEMU_SOFTFLOAT_ATTR
1533 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1534 {
1535     FloatParts64 pa, pb, *pr;
1536
1537     float32_unpack_canonical(&pa, a, status);
1538     float32_unpack_canonical(&pb, b, status);
1539     pr = parts_addsub(&pa, &pb, status, subtract);
1540
1541     return float32_round_pack_canonical(pr, status);
1542 }
1543
1544 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1545 {
1546     return soft_f32_addsub(a, b, status, false);
1547 }
1548
1549 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1550 {
1551     return soft_f32_addsub(a, b, status, true);
1552 }
1553
1554 static float64 QEMU_SOFTFLOAT_ATTR
1555 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1556 {
1557     FloatParts64 pa, pb, *pr;
1558
1559     float64_unpack_canonical(&pa, a, status);
1560     float64_unpack_canonical(&pb, b, status);
1561     pr = parts_addsub(&pa, &pb, status, subtract);
1562
1563     return float64_round_pack_canonical(pr, status);
1564 }
1565
1566 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1567 {
1568     return soft_f64_addsub(a, b, status, false);
1569 }
1570
1571 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1572 {
1573     return soft_f64_addsub(a, b, status, true);
1574 }
1575
1576 static float hard_f32_add(float a, float b)
1577 {
1578     return a + b;
1579 }
1580
1581 static float hard_f32_sub(float a, float b)
1582 {
1583     return a - b;
1584 }
1585
1586 static double hard_f64_add(double a, double b)
1587 {
1588     return a + b;
1589 }
1590
1591 static double hard_f64_sub(double a, double b)
1592 {
1593     return a - b;
1594 }
1595
1596 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1597 {
1598     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1599         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1600     }
1601     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1602 }
1603
1604 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1605 {
1606     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1607         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1608     } else {
1609         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1610     }
1611 }
1612
1613 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1614                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1615 {
1616     return float32_gen2(a, b, s, hard, soft,
1617                         f32_is_zon2, f32_addsubmul_post);
1618 }
1619
1620 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1621                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1622 {
1623     return float64_gen2(a, b, s, hard, soft,
1624                         f64_is_zon2, f64_addsubmul_post);
1625 }
1626
1627 float32 QEMU_FLATTEN
1628 float32_add(float32 a, float32 b, float_status *s)
1629 {
1630     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1631 }
1632
1633 float32 QEMU_FLATTEN
1634 float32_sub(float32 a, float32 b, float_status *s)
1635 {
1636     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1637 }
1638
1639 float64 QEMU_FLATTEN
1640 float64_add(float64 a, float64 b, float_status *s)
1641 {
1642     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1643 }
1644
1645 float64 QEMU_FLATTEN
1646 float64_sub(float64 a, float64 b, float_status *s)
1647 {
1648     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1649 }
1650
1651 static bfloat16 QEMU_FLATTEN
1652 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1653 {
1654     FloatParts64 pa, pb, *pr;
1655
1656     bfloat16_unpack_canonical(&pa, a, status);
1657     bfloat16_unpack_canonical(&pb, b, status);
1658     pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660     return bfloat16_round_pack_canonical(pr, status);
1661 }
1662
1663 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1664 {
1665     return bfloat16_addsub(a, b, status, false);
1666 }
1667
1668 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1669 {
1670     return bfloat16_addsub(a, b, status, true);
1671 }
1672
1673 static float128 QEMU_FLATTEN
1674 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1675 {
1676     FloatParts128 pa, pb, *pr;
1677
1678     float128_unpack_canonical(&pa, a, status);
1679     float128_unpack_canonical(&pb, b, status);
1680     pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682     return float128_round_pack_canonical(pr, status);
1683 }
1684
1685 float128 float128_add(float128 a, float128 b, float_status *status)
1686 {
1687     return float128_addsub(a, b, status, false);
1688 }
1689
1690 float128 float128_sub(float128 a, float128 b, float_status *status)
1691 {
1692     return float128_addsub(a, b, status, true);
1693 }
1694
1695 /*
1696  * Multiplication
1697  */
1698
1699 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1700 {
1701     FloatParts64 pa, pb, *pr;
1702
1703     float16_unpack_canonical(&pa, a, status);
1704     float16_unpack_canonical(&pb, b, status);
1705     pr = parts_mul(&pa, &pb, status);
1706
1707     return float16_round_pack_canonical(pr, status);
1708 }
1709
1710 static float32 QEMU_SOFTFLOAT_ATTR
1711 soft_f32_mul(float32 a, float32 b, float_status *status)
1712 {
1713     FloatParts64 pa, pb, *pr;
1714
1715     float32_unpack_canonical(&pa, a, status);
1716     float32_unpack_canonical(&pb, b, status);
1717     pr = parts_mul(&pa, &pb, status);
1718
1719     return float32_round_pack_canonical(pr, status);
1720 }
1721
1722 static float64 QEMU_SOFTFLOAT_ATTR
1723 soft_f64_mul(float64 a, float64 b, float_status *status)
1724 {
1725     FloatParts64 pa, pb, *pr;
1726
1727     float64_unpack_canonical(&pa, a, status);
1728     float64_unpack_canonical(&pb, b, status);
1729     pr = parts_mul(&pa, &pb, status);
1730
1731     return float64_round_pack_canonical(pr, status);
1732 }
1733
1734 static float hard_f32_mul(float a, float b)
1735 {
1736     return a * b;
1737 }
1738
1739 static double hard_f64_mul(double a, double b)
1740 {
1741     return a * b;
1742 }
1743
1744 float32 QEMU_FLATTEN
1745 float32_mul(float32 a, float32 b, float_status *s)
1746 {
1747     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1748                         f32_is_zon2, f32_addsubmul_post);
1749 }
1750
1751 float64 QEMU_FLATTEN
1752 float64_mul(float64 a, float64 b, float_status *s)
1753 {
1754     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1755                         f64_is_zon2, f64_addsubmul_post);
1756 }
1757
1758 bfloat16 QEMU_FLATTEN
1759 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1760 {
1761     FloatParts64 pa, pb, *pr;
1762
1763     bfloat16_unpack_canonical(&pa, a, status);
1764     bfloat16_unpack_canonical(&pb, b, status);
1765     pr = parts_mul(&pa, &pb, status);
1766
1767     return bfloat16_round_pack_canonical(pr, status);
1768 }
1769
1770 float128 QEMU_FLATTEN
1771 float128_mul(float128 a, float128 b, float_status *status)
1772 {
1773     FloatParts128 pa, pb, *pr;
1774
1775     float128_unpack_canonical(&pa, a, status);
1776     float128_unpack_canonical(&pb, b, status);
1777     pr = parts_mul(&pa, &pb, status);
1778
1779     return float128_round_pack_canonical(pr, status);
1780 }
1781
1782 /*
1783  * Fused multiply-add
1784  */
1785
1786 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1787                                     int flags, float_status *status)
1788 {
1789     FloatParts64 pa, pb, pc, *pr;
1790
1791     float16_unpack_canonical(&pa, a, status);
1792     float16_unpack_canonical(&pb, b, status);
1793     float16_unpack_canonical(&pc, c, status);
1794     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1795
1796     return float16_round_pack_canonical(pr, status);
1797 }
1798
1799 static float32 QEMU_SOFTFLOAT_ATTR
1800 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1801                 float_status *status)
1802 {
1803     FloatParts64 pa, pb, pc, *pr;
1804
1805     float32_unpack_canonical(&pa, a, status);
1806     float32_unpack_canonical(&pb, b, status);
1807     float32_unpack_canonical(&pc, c, status);
1808     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1809
1810     return float32_round_pack_canonical(pr, status);
1811 }
1812
1813 static float64 QEMU_SOFTFLOAT_ATTR
1814 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1815                 float_status *status)
1816 {
1817     FloatParts64 pa, pb, pc, *pr;
1818
1819     float64_unpack_canonical(&pa, a, status);
1820     float64_unpack_canonical(&pb, b, status);
1821     float64_unpack_canonical(&pc, c, status);
1822     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1823
1824     return float64_round_pack_canonical(pr, status);
1825 }
1826
1827 static bool force_soft_fma;
1828
1829 float32 QEMU_FLATTEN
1830 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1831 {
1832     union_float32 ua, ub, uc, ur;
1833
1834     ua.s = xa;
1835     ub.s = xb;
1836     uc.s = xc;
1837
1838     if (unlikely(!can_use_fpu(s))) {
1839         goto soft;
1840     }
1841     if (unlikely(flags & float_muladd_halve_result)) {
1842         goto soft;
1843     }
1844
1845     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1846     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1847         goto soft;
1848     }
1849
1850     if (unlikely(force_soft_fma)) {
1851         goto soft;
1852     }
1853
1854     /*
1855      * When (a || b) == 0, there's no need to check for under/over flow,
1856      * since we know the addend is (normal || 0) and the product is 0.
1857      */
1858     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1859         union_float32 up;
1860         bool prod_sign;
1861
1862         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1863         prod_sign ^= !!(flags & float_muladd_negate_product);
1864         up.s = float32_set_sign(float32_zero, prod_sign);
1865
1866         if (flags & float_muladd_negate_c) {
1867             uc.h = -uc.h;
1868         }
1869         ur.h = up.h + uc.h;
1870     } else {
1871         union_float32 ua_orig = ua;
1872         union_float32 uc_orig = uc;
1873
1874         if (flags & float_muladd_negate_product) {
1875             ua.h = -ua.h;
1876         }
1877         if (flags & float_muladd_negate_c) {
1878             uc.h = -uc.h;
1879         }
1880
1881         ur.h = fmaf(ua.h, ub.h, uc.h);
1882
1883         if (unlikely(f32_is_inf(ur))) {
1884             float_raise(float_flag_overflow, s);
1885         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1886             ua = ua_orig;
1887             uc = uc_orig;
1888             goto soft;
1889         }
1890     }
1891     if (flags & float_muladd_negate_result) {
1892         return float32_chs(ur.s);
1893     }
1894     return ur.s;
1895
1896  soft:
1897     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1898 }
1899
1900 float64 QEMU_FLATTEN
1901 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1902 {
1903     union_float64 ua, ub, uc, ur;
1904
1905     ua.s = xa;
1906     ub.s = xb;
1907     uc.s = xc;
1908
1909     if (unlikely(!can_use_fpu(s))) {
1910         goto soft;
1911     }
1912     if (unlikely(flags & float_muladd_halve_result)) {
1913         goto soft;
1914     }
1915
1916     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1917     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1918         goto soft;
1919     }
1920
1921     if (unlikely(force_soft_fma)) {
1922         goto soft;
1923     }
1924
1925     /*
1926      * When (a || b) == 0, there's no need to check for under/over flow,
1927      * since we know the addend is (normal || 0) and the product is 0.
1928      */
1929     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1930         union_float64 up;
1931         bool prod_sign;
1932
1933         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1934         prod_sign ^= !!(flags & float_muladd_negate_product);
1935         up.s = float64_set_sign(float64_zero, prod_sign);
1936
1937         if (flags & float_muladd_negate_c) {
1938             uc.h = -uc.h;
1939         }
1940         ur.h = up.h + uc.h;
1941     } else {
1942         union_float64 ua_orig = ua;
1943         union_float64 uc_orig = uc;
1944
1945         if (flags & float_muladd_negate_product) {
1946             ua.h = -ua.h;
1947         }
1948         if (flags & float_muladd_negate_c) {
1949             uc.h = -uc.h;
1950         }
1951
1952         ur.h = fma(ua.h, ub.h, uc.h);
1953
1954         if (unlikely(f64_is_inf(ur))) {
1955             float_raise(float_flag_overflow, s);
1956         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1957             ua = ua_orig;
1958             uc = uc_orig;
1959             goto soft;
1960         }
1961     }
1962     if (flags & float_muladd_negate_result) {
1963         return float64_chs(ur.s);
1964     }
1965     return ur.s;
1966
1967  soft:
1968     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1969 }
1970
1971 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1972                                       int flags, float_status *status)
1973 {
1974     FloatParts64 pa, pb, pc, *pr;
1975
1976     bfloat16_unpack_canonical(&pa, a, status);
1977     bfloat16_unpack_canonical(&pb, b, status);
1978     bfloat16_unpack_canonical(&pc, c, status);
1979     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1980
1981     return bfloat16_round_pack_canonical(pr, status);
1982 }
1983
1984 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1985                                       int flags, float_status *status)
1986 {
1987     FloatParts128 pa, pb, pc, *pr;
1988
1989     float128_unpack_canonical(&pa, a, status);
1990     float128_unpack_canonical(&pb, b, status);
1991     float128_unpack_canonical(&pc, c, status);
1992     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1993
1994     return float128_round_pack_canonical(pr, status);
1995 }
1996
1997 /*
1998  * Division
1999  */
2000
2001 float16 float16_div(float16 a, float16 b, float_status *status)
2002 {
2003     FloatParts64 pa, pb, *pr;
2004
2005     float16_unpack_canonical(&pa, a, status);
2006     float16_unpack_canonical(&pb, b, status);
2007     pr = parts_div(&pa, &pb, status);
2008
2009     return float16_round_pack_canonical(pr, status);
2010 }
2011
2012 static float32 QEMU_SOFTFLOAT_ATTR
2013 soft_f32_div(float32 a, float32 b, float_status *status)
2014 {
2015     FloatParts64 pa, pb, *pr;
2016
2017     float32_unpack_canonical(&pa, a, status);
2018     float32_unpack_canonical(&pb, b, status);
2019     pr = parts_div(&pa, &pb, status);
2020
2021     return float32_round_pack_canonical(pr, status);
2022 }
2023
2024 static float64 QEMU_SOFTFLOAT_ATTR
2025 soft_f64_div(float64 a, float64 b, float_status *status)
2026 {
2027     FloatParts64 pa, pb, *pr;
2028
2029     float64_unpack_canonical(&pa, a, status);
2030     float64_unpack_canonical(&pb, b, status);
2031     pr = parts_div(&pa, &pb, status);
2032
2033     return float64_round_pack_canonical(pr, status);
2034 }
2035
2036 static float hard_f32_div(float a, float b)
2037 {
2038     return a / b;
2039 }
2040
2041 static double hard_f64_div(double a, double b)
2042 {
2043     return a / b;
2044 }
2045
2046 static bool f32_div_pre(union_float32 a, union_float32 b)
2047 {
2048     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2049         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2050                fpclassify(b.h) == FP_NORMAL;
2051     }
2052     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2053 }
2054
2055 static bool f64_div_pre(union_float64 a, union_float64 b)
2056 {
2057     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2058         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2059                fpclassify(b.h) == FP_NORMAL;
2060     }
2061     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2062 }
2063
2064 static bool f32_div_post(union_float32 a, union_float32 b)
2065 {
2066     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2067         return fpclassify(a.h) != FP_ZERO;
2068     }
2069     return !float32_is_zero(a.s);
2070 }
2071
2072 static bool f64_div_post(union_float64 a, union_float64 b)
2073 {
2074     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2075         return fpclassify(a.h) != FP_ZERO;
2076     }
2077     return !float64_is_zero(a.s);
2078 }
2079
2080 float32 QEMU_FLATTEN
2081 float32_div(float32 a, float32 b, float_status *s)
2082 {
2083     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2084                         f32_div_pre, f32_div_post);
2085 }
2086
2087 float64 QEMU_FLATTEN
2088 float64_div(float64 a, float64 b, float_status *s)
2089 {
2090     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2091                         f64_div_pre, f64_div_post);
2092 }
2093
2094 bfloat16 QEMU_FLATTEN
2095 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2096 {
2097     FloatParts64 pa, pb, *pr;
2098
2099     bfloat16_unpack_canonical(&pa, a, status);
2100     bfloat16_unpack_canonical(&pb, b, status);
2101     pr = parts_div(&pa, &pb, status);
2102
2103     return bfloat16_round_pack_canonical(pr, status);
2104 }
2105
2106 float128 QEMU_FLATTEN
2107 float128_div(float128 a, float128 b, float_status *status)
2108 {
2109     FloatParts128 pa, pb, *pr;
2110
2111     float128_unpack_canonical(&pa, a, status);
2112     float128_unpack_canonical(&pb, b, status);
2113     pr = parts_div(&pa, &pb, status);
2114
2115     return float128_round_pack_canonical(pr, status);
2116 }
2117
2118 /*
2119  * Float to Float conversions
2120  *
2121  * Returns the result of converting one float format to another. The
2122  * conversion is performed according to the IEC/IEEE Standard for
2123  * Binary Floating-Point Arithmetic.
2124  *
2125  * Usually this only needs to take care of raising invalid exceptions
2126  * and handling the conversion on NaNs.
2127  */
2128
2129 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2130 {
2131     switch (a->cls) {
2132     case float_class_qnan:
2133     case float_class_snan:
2134         /*
2135          * There is no NaN in the destination format.  Raise Invalid
2136          * and return a zero with the sign of the input NaN.
2137          */
2138         float_raise(float_flag_invalid, s);
2139         a->cls = float_class_zero;
2140         break;
2141
2142     case float_class_inf:
2143         /*
2144          * There is no Inf in the destination format.  Raise Invalid
2145          * and return the maximum normal with the correct sign.
2146          */
2147         float_raise(float_flag_invalid, s);
2148         a->cls = float_class_normal;
2149         a->exp = float16_params_ahp.exp_max;
2150         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2151                                   float16_params_ahp.frac_size + 1);
2152         break;
2153
2154     case float_class_normal:
2155     case float_class_zero:
2156         break;
2157
2158     default:
2159         g_assert_not_reached();
2160     }
2161 }
2162
2163 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2164 {
2165     if (is_nan(a->cls)) {
2166         parts_return_nan(a, s);
2167     }
2168 }
2169
2170 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2171 {
2172     if (is_nan(a->cls)) {
2173         parts_return_nan(a, s);
2174     }
2175 }
2176
2177 #define parts_float_to_float(P, S) \
2178     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2179
2180 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2181                                         float_status *s)
2182 {
2183     a->cls = b->cls;
2184     a->sign = b->sign;
2185     a->exp = b->exp;
2186
2187     if (a->cls == float_class_normal) {
2188         frac_truncjam(a, b);
2189     } else if (is_nan(a->cls)) {
2190         /* Discard the low bits of the NaN. */
2191         a->frac = b->frac_hi;
2192         parts_return_nan(a, s);
2193     }
2194 }
2195
2196 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2197                                        float_status *s)
2198 {
2199     a->cls = b->cls;
2200     a->sign = b->sign;
2201     a->exp = b->exp;
2202     frac_widen(a, b);
2203
2204     if (is_nan(a->cls)) {
2205         parts_return_nan(a, s);
2206     }
2207 }
2208
2209 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2210 {
2211     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2212     FloatParts64 p;
2213
2214     float16a_unpack_canonical(&p, a, s, fmt16);
2215     parts_float_to_float(&p, s);
2216     return float32_round_pack_canonical(&p, s);
2217 }
2218
2219 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2220 {
2221     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2222     FloatParts64 p;
2223
2224     float16a_unpack_canonical(&p, a, s, fmt16);
2225     parts_float_to_float(&p, s);
2226     return float64_round_pack_canonical(&p, s);
2227 }
2228
2229 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2230 {
2231     FloatParts64 p;
2232     const FloatFmt *fmt;
2233
2234     float32_unpack_canonical(&p, a, s);
2235     if (ieee) {
2236         parts_float_to_float(&p, s);
2237         fmt = &float16_params;
2238     } else {
2239         parts_float_to_ahp(&p, s);
2240         fmt = &float16_params_ahp;
2241     }
2242     return float16a_round_pack_canonical(&p, s, fmt);
2243 }
2244
2245 static float64 QEMU_SOFTFLOAT_ATTR
2246 soft_float32_to_float64(float32 a, float_status *s)
2247 {
2248     FloatParts64 p;
2249
2250     float32_unpack_canonical(&p, a, s);
2251     parts_float_to_float(&p, s);
2252     return float64_round_pack_canonical(&p, s);
2253 }
2254
2255 float64 float32_to_float64(float32 a, float_status *s)
2256 {
2257     if (likely(float32_is_normal(a))) {
2258         /* Widening conversion can never produce inexact results.  */
2259         union_float32 uf;
2260         union_float64 ud;
2261         uf.s = a;
2262         ud.h = uf.h;
2263         return ud.s;
2264     } else if (float32_is_zero(a)) {
2265         return float64_set_sign(float64_zero, float32_is_neg(a));
2266     } else {
2267         return soft_float32_to_float64(a, s);
2268     }
2269 }
2270
2271 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2272 {
2273     FloatParts64 p;
2274     const FloatFmt *fmt;
2275
2276     float64_unpack_canonical(&p, a, s);
2277     if (ieee) {
2278         parts_float_to_float(&p, s);
2279         fmt = &float16_params;
2280     } else {
2281         parts_float_to_ahp(&p, s);
2282         fmt = &float16_params_ahp;
2283     }
2284     return float16a_round_pack_canonical(&p, s, fmt);
2285 }
2286
2287 float32 float64_to_float32(float64 a, float_status *s)
2288 {
2289     FloatParts64 p;
2290
2291     float64_unpack_canonical(&p, a, s);
2292     parts_float_to_float(&p, s);
2293     return float32_round_pack_canonical(&p, s);
2294 }
2295
2296 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2297 {
2298     FloatParts64 p;
2299
2300     bfloat16_unpack_canonical(&p, a, s);
2301     parts_float_to_float(&p, s);
2302     return float32_round_pack_canonical(&p, s);
2303 }
2304
2305 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2306 {
2307     FloatParts64 p;
2308
2309     bfloat16_unpack_canonical(&p, a, s);
2310     parts_float_to_float(&p, s);
2311     return float64_round_pack_canonical(&p, s);
2312 }
2313
2314 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2315 {
2316     FloatParts64 p;
2317
2318     float32_unpack_canonical(&p, a, s);
2319     parts_float_to_float(&p, s);
2320     return bfloat16_round_pack_canonical(&p, s);
2321 }
2322
2323 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2324 {
2325     FloatParts64 p;
2326
2327     float64_unpack_canonical(&p, a, s);
2328     parts_float_to_float(&p, s);
2329     return bfloat16_round_pack_canonical(&p, s);
2330 }
2331
2332 float32 float128_to_float32(float128 a, float_status *s)
2333 {
2334     FloatParts64 p64;
2335     FloatParts128 p128;
2336
2337     float128_unpack_canonical(&p128, a, s);
2338     parts_float_to_float_narrow(&p64, &p128, s);
2339     return float32_round_pack_canonical(&p64, s);
2340 }
2341
2342 float64 float128_to_float64(float128 a, float_status *s)
2343 {
2344     FloatParts64 p64;
2345     FloatParts128 p128;
2346
2347     float128_unpack_canonical(&p128, a, s);
2348     parts_float_to_float_narrow(&p64, &p128, s);
2349     return float64_round_pack_canonical(&p64, s);
2350 }
2351
2352 float128 float32_to_float128(float32 a, float_status *s)
2353 {
2354     FloatParts64 p64;
2355     FloatParts128 p128;
2356
2357     float32_unpack_canonical(&p64, a, s);
2358     parts_float_to_float_widen(&p128, &p64, s);
2359     return float128_round_pack_canonical(&p128, s);
2360 }
2361
2362 float128 float64_to_float128(float64 a, float_status *s)
2363 {
2364     FloatParts64 p64;
2365     FloatParts128 p128;
2366
2367     float64_unpack_canonical(&p64, a, s);
2368     parts_float_to_float_widen(&p128, &p64, s);
2369     return float128_round_pack_canonical(&p128, s);
2370 }
2371
2372 /*
2373  * Round to integral value
2374  */
2375
2376 float16 float16_round_to_int(float16 a, float_status *s)
2377 {
2378     FloatParts64 p;
2379
2380     float16_unpack_canonical(&p, a, s);
2381     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2382     return float16_round_pack_canonical(&p, s);
2383 }
2384
2385 float32 float32_round_to_int(float32 a, float_status *s)
2386 {
2387     FloatParts64 p;
2388
2389     float32_unpack_canonical(&p, a, s);
2390     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2391     return float32_round_pack_canonical(&p, s);
2392 }
2393
2394 float64 float64_round_to_int(float64 a, float_status *s)
2395 {
2396     FloatParts64 p;
2397
2398     float64_unpack_canonical(&p, a, s);
2399     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2400     return float64_round_pack_canonical(&p, s);
2401 }
2402
2403 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2404 {
2405     FloatParts64 p;
2406
2407     bfloat16_unpack_canonical(&p, a, s);
2408     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2409     return bfloat16_round_pack_canonical(&p, s);
2410 }
2411
2412 float128 float128_round_to_int(float128 a, float_status *s)
2413 {
2414     FloatParts128 p;
2415
2416     float128_unpack_canonical(&p, a, s);
2417     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2418     return float128_round_pack_canonical(&p, s);
2419 }
2420
2421 /*
2422  * Floating-point to signed integer conversions
2423  */
2424
2425 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2426                               float_status *s)
2427 {
2428     FloatParts64 p;
2429
2430     float16_unpack_canonical(&p, a, s);
2431     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2432 }
2433
2434 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2435                                 float_status *s)
2436 {
2437     FloatParts64 p;
2438
2439     float16_unpack_canonical(&p, a, s);
2440     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2441 }
2442
2443 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2444                                 float_status *s)
2445 {
2446     FloatParts64 p;
2447
2448     float16_unpack_canonical(&p, a, s);
2449     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2450 }
2451
2452 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2453                                 float_status *s)
2454 {
2455     FloatParts64 p;
2456
2457     float16_unpack_canonical(&p, a, s);
2458     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2459 }
2460
2461 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2462                                 float_status *s)
2463 {
2464     FloatParts64 p;
2465
2466     float32_unpack_canonical(&p, a, s);
2467     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2468 }
2469
2470 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2471                                 float_status *s)
2472 {
2473     FloatParts64 p;
2474
2475     float32_unpack_canonical(&p, a, s);
2476     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2477 }
2478
2479 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2480                                 float_status *s)
2481 {
2482     FloatParts64 p;
2483
2484     float32_unpack_canonical(&p, a, s);
2485     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2486 }
2487
2488 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2489                                 float_status *s)
2490 {
2491     FloatParts64 p;
2492
2493     float64_unpack_canonical(&p, a, s);
2494     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2495 }
2496
2497 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2498                                 float_status *s)
2499 {
2500     FloatParts64 p;
2501
2502     float64_unpack_canonical(&p, a, s);
2503     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2504 }
2505
2506 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2507                                 float_status *s)
2508 {
2509     FloatParts64 p;
2510
2511     float64_unpack_canonical(&p, a, s);
2512     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2513 }
2514
2515 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2516                                  float_status *s)
2517 {
2518     FloatParts64 p;
2519
2520     bfloat16_unpack_canonical(&p, a, s);
2521     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2522 }
2523
2524 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2525                                  float_status *s)
2526 {
2527     FloatParts64 p;
2528
2529     bfloat16_unpack_canonical(&p, a, s);
2530     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2531 }
2532
2533 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2534                                  float_status *s)
2535 {
2536     FloatParts64 p;
2537
2538     bfloat16_unpack_canonical(&p, a, s);
2539     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2540 }
2541
2542 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2543                                         int scale, float_status *s)
2544 {
2545     FloatParts128 p;
2546
2547     float128_unpack_canonical(&p, a, s);
2548     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2549 }
2550
2551 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2552                                         int scale, float_status *s)
2553 {
2554     FloatParts128 p;
2555
2556     float128_unpack_canonical(&p, a, s);
2557     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2558 }
2559
2560 int8_t float16_to_int8(float16 a, float_status *s)
2561 {
2562     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2563 }
2564
2565 int16_t float16_to_int16(float16 a, float_status *s)
2566 {
2567     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2568 }
2569
2570 int32_t float16_to_int32(float16 a, float_status *s)
2571 {
2572     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2573 }
2574
2575 int64_t float16_to_int64(float16 a, float_status *s)
2576 {
2577     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2578 }
2579
2580 int16_t float32_to_int16(float32 a, float_status *s)
2581 {
2582     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2583 }
2584
2585 int32_t float32_to_int32(float32 a, float_status *s)
2586 {
2587     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2588 }
2589
2590 int64_t float32_to_int64(float32 a, float_status *s)
2591 {
2592     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2593 }
2594
2595 int16_t float64_to_int16(float64 a, float_status *s)
2596 {
2597     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2598 }
2599
2600 int32_t float64_to_int32(float64 a, float_status *s)
2601 {
2602     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2603 }
2604
2605 int64_t float64_to_int64(float64 a, float_status *s)
2606 {
2607     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2608 }
2609
2610 int32_t float128_to_int32(float128 a, float_status *s)
2611 {
2612     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2613 }
2614
2615 int64_t float128_to_int64(float128 a, float_status *s)
2616 {
2617     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2618 }
2619
2620 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2621 {
2622     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2623 }
2624
2625 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2626 {
2627     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2628 }
2629
2630 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2631 {
2632     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2633 }
2634
2635 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2636 {
2637     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2638 }
2639
2640 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2641 {
2642     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2643 }
2644
2645 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2646 {
2647     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2648 }
2649
2650 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2651 {
2652     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2653 }
2654
2655 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2656 {
2657     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2658 }
2659
2660 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2661 {
2662     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2663 }
2664
2665 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2666 {
2667     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2668 }
2669
2670 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2671 {
2672     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2673 }
2674
2675 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2676 {
2677     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2678 }
2679
2680 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2681 {
2682     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2683 }
2684
2685 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2686 {
2687     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2688 }
2689
2690 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2691 {
2692     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2693 }
2694
2695 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2696 {
2697     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2698 }
2699
2700 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2701 {
2702     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2703 }
2704
2705 /*
2706  * Floating-point to unsigned integer conversions
2707  */
2708
2709 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2710                                 float_status *s)
2711 {
2712     FloatParts64 p;
2713
2714     float16_unpack_canonical(&p, a, s);
2715     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2716 }
2717
2718 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2719                                   float_status *s)
2720 {
2721     FloatParts64 p;
2722
2723     float16_unpack_canonical(&p, a, s);
2724     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2725 }
2726
2727 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2728                                   float_status *s)
2729 {
2730     FloatParts64 p;
2731
2732     float16_unpack_canonical(&p, a, s);
2733     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2734 }
2735
2736 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2737                                   float_status *s)
2738 {
2739     FloatParts64 p;
2740
2741     float16_unpack_canonical(&p, a, s);
2742     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2743 }
2744
2745 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2746                                   float_status *s)
2747 {
2748     FloatParts64 p;
2749
2750     float32_unpack_canonical(&p, a, s);
2751     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2752 }
2753
2754 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2755                                   float_status *s)
2756 {
2757     FloatParts64 p;
2758
2759     float32_unpack_canonical(&p, a, s);
2760     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2761 }
2762
2763 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2764                                   float_status *s)
2765 {
2766     FloatParts64 p;
2767
2768     float32_unpack_canonical(&p, a, s);
2769     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2770 }
2771
2772 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2773                                   float_status *s)
2774 {
2775     FloatParts64 p;
2776
2777     float64_unpack_canonical(&p, a, s);
2778     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2779 }
2780
2781 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2782                                   float_status *s)
2783 {
2784     FloatParts64 p;
2785
2786     float64_unpack_canonical(&p, a, s);
2787     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2788 }
2789
2790 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2791                                   float_status *s)
2792 {
2793     FloatParts64 p;
2794
2795     float64_unpack_canonical(&p, a, s);
2796     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2797 }
2798
2799 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2800                                    int scale, float_status *s)
2801 {
2802     FloatParts64 p;
2803
2804     bfloat16_unpack_canonical(&p, a, s);
2805     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2806 }
2807
2808 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2809                                    int scale, float_status *s)
2810 {
2811     FloatParts64 p;
2812
2813     bfloat16_unpack_canonical(&p, a, s);
2814     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2815 }
2816
2817 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2818                                    int scale, float_status *s)
2819 {
2820     FloatParts64 p;
2821
2822     bfloat16_unpack_canonical(&p, a, s);
2823     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2824 }
2825
2826 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2827                                           int scale, float_status *s)
2828 {
2829     FloatParts128 p;
2830
2831     float128_unpack_canonical(&p, a, s);
2832     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2833 }
2834
2835 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2836                                           int scale, float_status *s)
2837 {
2838     FloatParts128 p;
2839
2840     float128_unpack_canonical(&p, a, s);
2841     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2842 }
2843
2844 uint8_t float16_to_uint8(float16 a, float_status *s)
2845 {
2846     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2847 }
2848
2849 uint16_t float16_to_uint16(float16 a, float_status *s)
2850 {
2851     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2852 }
2853
2854 uint32_t float16_to_uint32(float16 a, float_status *s)
2855 {
2856     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2857 }
2858
2859 uint64_t float16_to_uint64(float16 a, float_status *s)
2860 {
2861     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2862 }
2863
2864 uint16_t float32_to_uint16(float32 a, float_status *s)
2865 {
2866     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2867 }
2868
2869 uint32_t float32_to_uint32(float32 a, float_status *s)
2870 {
2871     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2872 }
2873
2874 uint64_t float32_to_uint64(float32 a, float_status *s)
2875 {
2876     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2877 }
2878
2879 uint16_t float64_to_uint16(float64 a, float_status *s)
2880 {
2881     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2882 }
2883
2884 uint32_t float64_to_uint32(float64 a, float_status *s)
2885 {
2886     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2887 }
2888
2889 uint64_t float64_to_uint64(float64 a, float_status *s)
2890 {
2891     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2892 }
2893
2894 uint32_t float128_to_uint32(float128 a, float_status *s)
2895 {
2896     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2897 }
2898
2899 uint64_t float128_to_uint64(float128 a, float_status *s)
2900 {
2901     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2902 }
2903
2904 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2905 {
2906     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2907 }
2908
2909 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2910 {
2911     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2912 }
2913
2914 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2915 {
2916     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2917 }
2918
2919 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2920 {
2921     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2922 }
2923
2924 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2925 {
2926     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2927 }
2928
2929 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2930 {
2931     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2932 }
2933
2934 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2935 {
2936     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2937 }
2938
2939 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2940 {
2941     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2942 }
2943
2944 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2945 {
2946     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2947 }
2948
2949 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2950 {
2951     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2952 }
2953
2954 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2955 {
2956     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2957 }
2958
2959 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2960 {
2961     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2962 }
2963
2964 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2965 {
2966     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2967 }
2968
2969 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2970 {
2971     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2972 }
2973
2974 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2975 {
2976     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2977 }
2978
2979 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2980 {
2981     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2982 }
2983
2984 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2985 {
2986     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2987 }
2988
2989 /*
2990  * Signed integer to floating-point conversions
2991  */
2992
2993 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2994 {
2995     FloatParts64 p;
2996
2997     parts_sint_to_float(&p, a, scale, status);
2998     return float16_round_pack_canonical(&p, status);
2999 }
3000
3001 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3002 {
3003     return int64_to_float16_scalbn(a, scale, status);
3004 }
3005
3006 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3007 {
3008     return int64_to_float16_scalbn(a, scale, status);
3009 }
3010
3011 float16 int64_to_float16(int64_t a, float_status *status)
3012 {
3013     return int64_to_float16_scalbn(a, 0, status);
3014 }
3015
3016 float16 int32_to_float16(int32_t a, float_status *status)
3017 {
3018     return int64_to_float16_scalbn(a, 0, status);
3019 }
3020
3021 float16 int16_to_float16(int16_t a, float_status *status)
3022 {
3023     return int64_to_float16_scalbn(a, 0, status);
3024 }
3025
3026 float16 int8_to_float16(int8_t a, float_status *status)
3027 {
3028     return int64_to_float16_scalbn(a, 0, status);
3029 }
3030
3031 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3032 {
3033     FloatParts64 p;
3034
3035     parts64_sint_to_float(&p, a, scale, status);
3036     return float32_round_pack_canonical(&p, status);
3037 }
3038
3039 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3040 {
3041     return int64_to_float32_scalbn(a, scale, status);
3042 }
3043
3044 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3045 {
3046     return int64_to_float32_scalbn(a, scale, status);
3047 }
3048
3049 float32 int64_to_float32(int64_t a, float_status *status)
3050 {
3051     return int64_to_float32_scalbn(a, 0, status);
3052 }
3053
3054 float32 int32_to_float32(int32_t a, float_status *status)
3055 {
3056     return int64_to_float32_scalbn(a, 0, status);
3057 }
3058
3059 float32 int16_to_float32(int16_t a, float_status *status)
3060 {
3061     return int64_to_float32_scalbn(a, 0, status);
3062 }
3063
3064 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3065 {
3066     FloatParts64 p;
3067
3068     parts_sint_to_float(&p, a, scale, status);
3069     return float64_round_pack_canonical(&p, status);
3070 }
3071
3072 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3073 {
3074     return int64_to_float64_scalbn(a, scale, status);
3075 }
3076
3077 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3078 {
3079     return int64_to_float64_scalbn(a, scale, status);
3080 }
3081
3082 float64 int64_to_float64(int64_t a, float_status *status)
3083 {
3084     return int64_to_float64_scalbn(a, 0, status);
3085 }
3086
3087 float64 int32_to_float64(int32_t a, float_status *status)
3088 {
3089     return int64_to_float64_scalbn(a, 0, status);
3090 }
3091
3092 float64 int16_to_float64(int16_t a, float_status *status)
3093 {
3094     return int64_to_float64_scalbn(a, 0, status);
3095 }
3096
3097 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3098 {
3099     FloatParts64 p;
3100
3101     parts_sint_to_float(&p, a, scale, status);
3102     return bfloat16_round_pack_canonical(&p, status);
3103 }
3104
3105 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3106 {
3107     return int64_to_bfloat16_scalbn(a, scale, status);
3108 }
3109
3110 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3111 {
3112     return int64_to_bfloat16_scalbn(a, scale, status);
3113 }
3114
3115 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3116 {
3117     return int64_to_bfloat16_scalbn(a, 0, status);
3118 }
3119
3120 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3121 {
3122     return int64_to_bfloat16_scalbn(a, 0, status);
3123 }
3124
3125 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3126 {
3127     return int64_to_bfloat16_scalbn(a, 0, status);
3128 }
3129
3130 float128 int64_to_float128(int64_t a, float_status *status)
3131 {
3132     FloatParts128 p;
3133
3134     parts_sint_to_float(&p, a, 0, status);
3135     return float128_round_pack_canonical(&p, status);
3136 }
3137
3138 float128 int32_to_float128(int32_t a, float_status *status)
3139 {
3140     return int64_to_float128(a, status);
3141 }
3142
3143 /*
3144  * Unsigned Integer to floating-point conversions
3145  */
3146
3147 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3148 {
3149     FloatParts64 p;
3150
3151     parts_uint_to_float(&p, a, scale, status);
3152     return float16_round_pack_canonical(&p, status);
3153 }
3154
3155 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3156 {
3157     return uint64_to_float16_scalbn(a, scale, status);
3158 }
3159
3160 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3161 {
3162     return uint64_to_float16_scalbn(a, scale, status);
3163 }
3164
3165 float16 uint64_to_float16(uint64_t a, float_status *status)
3166 {
3167     return uint64_to_float16_scalbn(a, 0, status);
3168 }
3169
3170 float16 uint32_to_float16(uint32_t a, float_status *status)
3171 {
3172     return uint64_to_float16_scalbn(a, 0, status);
3173 }
3174
3175 float16 uint16_to_float16(uint16_t a, float_status *status)
3176 {
3177     return uint64_to_float16_scalbn(a, 0, status);
3178 }
3179
3180 float16 uint8_to_float16(uint8_t a, float_status *status)
3181 {
3182     return uint64_to_float16_scalbn(a, 0, status);
3183 }
3184
3185 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3186 {
3187     FloatParts64 p;
3188
3189     parts_uint_to_float(&p, a, scale, status);
3190     return float32_round_pack_canonical(&p, status);
3191 }
3192
3193 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3194 {
3195     return uint64_to_float32_scalbn(a, scale, status);
3196 }
3197
3198 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3199 {
3200     return uint64_to_float32_scalbn(a, scale, status);
3201 }
3202
3203 float32 uint64_to_float32(uint64_t a, float_status *status)
3204 {
3205     return uint64_to_float32_scalbn(a, 0, status);
3206 }
3207
3208 float32 uint32_to_float32(uint32_t a, float_status *status)
3209 {
3210     return uint64_to_float32_scalbn(a, 0, status);
3211 }
3212
3213 float32 uint16_to_float32(uint16_t a, float_status *status)
3214 {
3215     return uint64_to_float32_scalbn(a, 0, status);
3216 }
3217
3218 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3219 {
3220     FloatParts64 p;
3221
3222     parts_uint_to_float(&p, a, scale, status);
3223     return float64_round_pack_canonical(&p, status);
3224 }
3225
3226 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3227 {
3228     return uint64_to_float64_scalbn(a, scale, status);
3229 }
3230
3231 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3232 {
3233     return uint64_to_float64_scalbn(a, scale, status);
3234 }
3235
3236 float64 uint64_to_float64(uint64_t a, float_status *status)
3237 {
3238     return uint64_to_float64_scalbn(a, 0, status);
3239 }
3240
3241 float64 uint32_to_float64(uint32_t a, float_status *status)
3242 {
3243     return uint64_to_float64_scalbn(a, 0, status);
3244 }
3245
3246 float64 uint16_to_float64(uint16_t a, float_status *status)
3247 {
3248     return uint64_to_float64_scalbn(a, 0, status);
3249 }
3250
3251 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3252 {
3253     FloatParts64 p;
3254
3255     parts_uint_to_float(&p, a, scale, status);
3256     return bfloat16_round_pack_canonical(&p, status);
3257 }
3258
3259 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3260 {
3261     return uint64_to_bfloat16_scalbn(a, scale, status);
3262 }
3263
3264 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3265 {
3266     return uint64_to_bfloat16_scalbn(a, scale, status);
3267 }
3268
3269 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3270 {
3271     return uint64_to_bfloat16_scalbn(a, 0, status);
3272 }
3273
3274 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3275 {
3276     return uint64_to_bfloat16_scalbn(a, 0, status);
3277 }
3278
3279 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3280 {
3281     return uint64_to_bfloat16_scalbn(a, 0, status);
3282 }
3283
3284 float128 uint64_to_float128(uint64_t a, float_status *status)
3285 {
3286     FloatParts128 p;
3287
3288     parts_uint_to_float(&p, a, 0, status);
3289     return float128_round_pack_canonical(&p, status);
3290 }
3291
3292 /*
3293  * Minimum and maximum
3294  */
3295
3296 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3297 {
3298     FloatParts64 pa, pb, *pr;
3299
3300     float16_unpack_canonical(&pa, a, s);
3301     float16_unpack_canonical(&pb, b, s);
3302     pr = parts_minmax(&pa, &pb, s, flags);
3303
3304     return float16_round_pack_canonical(pr, s);
3305 }
3306
3307 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3308                                 float_status *s, int flags)
3309 {
3310     FloatParts64 pa, pb, *pr;
3311
3312     bfloat16_unpack_canonical(&pa, a, s);
3313     bfloat16_unpack_canonical(&pb, b, s);
3314     pr = parts_minmax(&pa, &pb, s, flags);
3315
3316     return bfloat16_round_pack_canonical(pr, s);
3317 }
3318
3319 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3320 {
3321     FloatParts64 pa, pb, *pr;
3322
3323     float32_unpack_canonical(&pa, a, s);
3324     float32_unpack_canonical(&pb, b, s);
3325     pr = parts_minmax(&pa, &pb, s, flags);
3326
3327     return float32_round_pack_canonical(pr, s);
3328 }
3329
3330 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3331 {
3332     FloatParts64 pa, pb, *pr;
3333
3334     float64_unpack_canonical(&pa, a, s);
3335     float64_unpack_canonical(&pb, b, s);
3336     pr = parts_minmax(&pa, &pb, s, flags);
3337
3338     return float64_round_pack_canonical(pr, s);
3339 }
3340
3341 static float128 float128_minmax(float128 a, float128 b,
3342                                 float_status *s, int flags)
3343 {
3344     FloatParts128 pa, pb, *pr;
3345
3346     float128_unpack_canonical(&pa, a, s);
3347     float128_unpack_canonical(&pb, b, s);
3348     pr = parts_minmax(&pa, &pb, s, flags);
3349
3350     return float128_round_pack_canonical(pr, s);
3351 }
3352
3353 #define MINMAX_1(type, name, flags) \
3354     type type##_##name(type a, type b, float_status *s) \
3355     { return type##_minmax(a, b, s, flags); }
3356
3357 #define MINMAX_2(type) \
3358     MINMAX_1(type, max, 0)                                      \
3359     MINMAX_1(type, maxnum, minmax_isnum)                        \
3360     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3361     MINMAX_1(type, min, minmax_ismin)                           \
3362     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3363     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3364
3365 MINMAX_2(float16)
3366 MINMAX_2(bfloat16)
3367 MINMAX_2(float32)
3368 MINMAX_2(float64)
3369 MINMAX_2(float128)
3370
3371 #undef MINMAX_1
3372 #undef MINMAX_2
3373
3374 /*
3375  * Floating point compare
3376  */
3377
3378 static FloatRelation QEMU_FLATTEN
3379 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3380 {
3381     FloatParts64 pa, pb;
3382
3383     float16_unpack_canonical(&pa, a, s);
3384     float16_unpack_canonical(&pb, b, s);
3385     return parts_compare(&pa, &pb, s, is_quiet);
3386 }
3387
3388 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3389 {
3390     return float16_do_compare(a, b, s, false);
3391 }
3392
3393 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3394 {
3395     return float16_do_compare(a, b, s, true);
3396 }
3397
3398 static FloatRelation QEMU_SOFTFLOAT_ATTR
3399 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3400 {
3401     FloatParts64 pa, pb;
3402
3403     float32_unpack_canonical(&pa, a, s);
3404     float32_unpack_canonical(&pb, b, s);
3405     return parts_compare(&pa, &pb, s, is_quiet);
3406 }
3407
3408 static FloatRelation QEMU_FLATTEN
3409 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3410 {
3411     union_float32 ua, ub;
3412
3413     ua.s = xa;
3414     ub.s = xb;
3415
3416     if (QEMU_NO_HARDFLOAT) {
3417         goto soft;
3418     }
3419
3420     float32_input_flush2(&ua.s, &ub.s, s);
3421     if (isgreaterequal(ua.h, ub.h)) {
3422         if (isgreater(ua.h, ub.h)) {
3423             return float_relation_greater;
3424         }
3425         return float_relation_equal;
3426     }
3427     if (likely(isless(ua.h, ub.h))) {
3428         return float_relation_less;
3429     }
3430     /*
3431      * The only condition remaining is unordered.
3432      * Fall through to set flags.
3433      */
3434  soft:
3435     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3436 }
3437
3438 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3439 {
3440     return float32_hs_compare(a, b, s, false);
3441 }
3442
3443 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3444 {
3445     return float32_hs_compare(a, b, s, true);
3446 }
3447
3448 static FloatRelation QEMU_SOFTFLOAT_ATTR
3449 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3450 {
3451     FloatParts64 pa, pb;
3452
3453     float64_unpack_canonical(&pa, a, s);
3454     float64_unpack_canonical(&pb, b, s);
3455     return parts_compare(&pa, &pb, s, is_quiet);
3456 }
3457
3458 static FloatRelation QEMU_FLATTEN
3459 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3460 {
3461     union_float64 ua, ub;
3462
3463     ua.s = xa;
3464     ub.s = xb;
3465
3466     if (QEMU_NO_HARDFLOAT) {
3467         goto soft;
3468     }
3469
3470     float64_input_flush2(&ua.s, &ub.s, s);
3471     if (isgreaterequal(ua.h, ub.h)) {
3472         if (isgreater(ua.h, ub.h)) {
3473             return float_relation_greater;
3474         }
3475         return float_relation_equal;
3476     }
3477     if (likely(isless(ua.h, ub.h))) {
3478         return float_relation_less;
3479     }
3480     /*
3481      * The only condition remaining is unordered.
3482      * Fall through to set flags.
3483      */
3484  soft:
3485     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3486 }
3487
3488 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3489 {
3490     return float64_hs_compare(a, b, s, false);
3491 }
3492
3493 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3494 {
3495     return float64_hs_compare(a, b, s, true);
3496 }
3497
3498 static FloatRelation QEMU_FLATTEN
3499 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3500 {
3501     FloatParts64 pa, pb;
3502
3503     bfloat16_unpack_canonical(&pa, a, s);
3504     bfloat16_unpack_canonical(&pb, b, s);
3505     return parts_compare(&pa, &pb, s, is_quiet);
3506 }
3507
3508 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3509 {
3510     return bfloat16_do_compare(a, b, s, false);
3511 }
3512
3513 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3514 {
3515     return bfloat16_do_compare(a, b, s, true);
3516 }
3517
3518 static FloatRelation QEMU_FLATTEN
3519 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3520 {
3521     FloatParts128 pa, pb;
3522
3523     float128_unpack_canonical(&pa, a, s);
3524     float128_unpack_canonical(&pb, b, s);
3525     return parts_compare(&pa, &pb, s, is_quiet);
3526 }
3527
3528 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3529 {
3530     return float128_do_compare(a, b, s, false);
3531 }
3532
3533 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3534 {
3535     return float128_do_compare(a, b, s, true);
3536 }
3537
3538 /*
3539  * Scale by 2**N
3540  */
3541
3542 float16 float16_scalbn(float16 a, int n, float_status *status)
3543 {
3544     FloatParts64 p;
3545
3546     float16_unpack_canonical(&p, a, status);
3547     parts_scalbn(&p, n, status);
3548     return float16_round_pack_canonical(&p, status);
3549 }
3550
3551 float32 float32_scalbn(float32 a, int n, float_status *status)
3552 {
3553     FloatParts64 p;
3554
3555     float32_unpack_canonical(&p, a, status);
3556     parts_scalbn(&p, n, status);
3557     return float32_round_pack_canonical(&p, status);
3558 }
3559
3560 float64 float64_scalbn(float64 a, int n, float_status *status)
3561 {
3562     FloatParts64 p;
3563
3564     float64_unpack_canonical(&p, a, status);
3565     parts_scalbn(&p, n, status);
3566     return float64_round_pack_canonical(&p, status);
3567 }
3568
3569 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3570 {
3571     FloatParts64 p;
3572
3573     bfloat16_unpack_canonical(&p, a, status);
3574     parts_scalbn(&p, n, status);
3575     return bfloat16_round_pack_canonical(&p, status);
3576 }
3577
3578 float128 float128_scalbn(float128 a, int n, float_status *status)
3579 {
3580     FloatParts128 p;
3581
3582     float128_unpack_canonical(&p, a, status);
3583     parts_scalbn(&p, n, status);
3584     return float128_round_pack_canonical(&p, status);
3585 }
3586
3587 /*
3588  * Square Root
3589  *
3590  * The old softfloat code did an approximation step before zeroing in
3591  * on the final result. However for simpleness we just compute the
3592  * square root by iterating down from the implicit bit to enough extra
3593  * bits to ensure we get a correctly rounded result.
3594  *
3595  * This does mean however the calculation is slower than before,
3596  * especially for 64 bit floats.
3597  */
3598
3599 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3600 {
3601     uint64_t a_frac, r_frac, s_frac;
3602     int bit, last_bit;
3603
3604     if (is_nan(a.cls)) {
3605         parts_return_nan(&a, s);
3606         return a;
3607     }
3608     if (a.cls == float_class_zero) {
3609         return a;  /* sqrt(+-0) = +-0 */
3610     }
3611     if (a.sign) {
3612         float_raise(float_flag_invalid, s);
3613         parts_default_nan(&a, s);
3614         return a;
3615     }
3616     if (a.cls == float_class_inf) {
3617         return a;  /* sqrt(+inf) = +inf */
3618     }
3619
3620     assert(a.cls == float_class_normal);
3621
3622     /* We need two overflow bits at the top. Adding room for that is a
3623      * right shift. If the exponent is odd, we can discard the low bit
3624      * by multiplying the fraction by 2; that's a left shift. Combine
3625      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3626      */
3627     a_frac = a.frac >> (2 - (a.exp & 1));
3628     a.exp >>= 1;
3629
3630     /* Bit-by-bit computation of sqrt.  */
3631     r_frac = 0;
3632     s_frac = 0;
3633
3634     /* Iterate from implicit bit down to the 3 extra bits to compute a
3635      * properly rounded result. Remember we've inserted two more bits
3636      * at the top, so these positions are two less.
3637      */
3638     bit = DECOMPOSED_BINARY_POINT - 2;
3639     last_bit = MAX(p->frac_shift - 4, 0);
3640     do {
3641         uint64_t q = 1ULL << bit;
3642         uint64_t t_frac = s_frac + q;
3643         if (t_frac <= a_frac) {
3644             s_frac = t_frac + q;
3645             a_frac -= t_frac;
3646             r_frac += q;
3647         }
3648         a_frac <<= 1;
3649     } while (--bit >= last_bit);
3650
3651     /* Undo the right shift done above. If there is any remaining
3652      * fraction, the result is inexact. Set the sticky bit.
3653      */
3654     a.frac = (r_frac << 2) + (a_frac != 0);
3655
3656     return a;
3657 }
3658
3659 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3660 {
3661     FloatParts64 pa, pr;
3662
3663     float16_unpack_canonical(&pa, a, status);
3664     pr = sqrt_float(pa, status, &float16_params);
3665     return float16_round_pack_canonical(&pr, status);
3666 }
3667
3668 static float32 QEMU_SOFTFLOAT_ATTR
3669 soft_f32_sqrt(float32 a, float_status *status)
3670 {
3671     FloatParts64 pa, pr;
3672
3673     float32_unpack_canonical(&pa, a, status);
3674     pr = sqrt_float(pa, status, &float32_params);
3675     return float32_round_pack_canonical(&pr, status);
3676 }
3677
3678 static float64 QEMU_SOFTFLOAT_ATTR
3679 soft_f64_sqrt(float64 a, float_status *status)
3680 {
3681     FloatParts64 pa, pr;
3682
3683     float64_unpack_canonical(&pa, a, status);
3684     pr = sqrt_float(pa, status, &float64_params);
3685     return float64_round_pack_canonical(&pr, status);
3686 }
3687
3688 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3689 {
3690     union_float32 ua, ur;
3691
3692     ua.s = xa;
3693     if (unlikely(!can_use_fpu(s))) {
3694         goto soft;
3695     }
3696
3697     float32_input_flush1(&ua.s, s);
3698     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3699         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3700                        fpclassify(ua.h) == FP_ZERO) ||
3701                      signbit(ua.h))) {
3702             goto soft;
3703         }
3704     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3705                         float32_is_neg(ua.s))) {
3706         goto soft;
3707     }
3708     ur.h = sqrtf(ua.h);
3709     return ur.s;
3710
3711  soft:
3712     return soft_f32_sqrt(ua.s, s);
3713 }
3714
3715 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3716 {
3717     union_float64 ua, ur;
3718
3719     ua.s = xa;
3720     if (unlikely(!can_use_fpu(s))) {
3721         goto soft;
3722     }
3723
3724     float64_input_flush1(&ua.s, s);
3725     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3726         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3727                        fpclassify(ua.h) == FP_ZERO) ||
3728                      signbit(ua.h))) {
3729             goto soft;
3730         }
3731     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3732                         float64_is_neg(ua.s))) {
3733         goto soft;
3734     }
3735     ur.h = sqrt(ua.h);
3736     return ur.s;
3737
3738  soft:
3739     return soft_f64_sqrt(ua.s, s);
3740 }
3741
3742 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3743 {
3744     FloatParts64 pa, pr;
3745
3746     bfloat16_unpack_canonical(&pa, a, status);
3747     pr = sqrt_float(pa, status, &bfloat16_params);
3748     return bfloat16_round_pack_canonical(&pr, status);
3749 }
3750
3751 /*----------------------------------------------------------------------------
3752 | The pattern for a default generated NaN.
3753 *----------------------------------------------------------------------------*/
3754
3755 float16 float16_default_nan(float_status *status)
3756 {
3757     FloatParts64 p;
3758
3759     parts_default_nan(&p, status);
3760     p.frac >>= float16_params.frac_shift;
3761     return float16_pack_raw(&p);
3762 }
3763
3764 float32 float32_default_nan(float_status *status)
3765 {
3766     FloatParts64 p;
3767
3768     parts_default_nan(&p, status);
3769     p.frac >>= float32_params.frac_shift;
3770     return float32_pack_raw(&p);
3771 }
3772
3773 float64 float64_default_nan(float_status *status)
3774 {
3775     FloatParts64 p;
3776
3777     parts_default_nan(&p, status);
3778     p.frac >>= float64_params.frac_shift;
3779     return float64_pack_raw(&p);
3780 }
3781
3782 float128 float128_default_nan(float_status *status)
3783 {
3784     FloatParts128 p;
3785
3786     parts_default_nan(&p, status);
3787     frac_shr(&p, float128_params.frac_shift);
3788     return float128_pack_raw(&p);
3789 }
3790
3791 bfloat16 bfloat16_default_nan(float_status *status)
3792 {
3793     FloatParts64 p;
3794
3795     parts_default_nan(&p, status);
3796     p.frac >>= bfloat16_params.frac_shift;
3797     return bfloat16_pack_raw(&p);
3798 }
3799
3800 /*----------------------------------------------------------------------------
3801 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3802 *----------------------------------------------------------------------------*/
3803
3804 float16 float16_silence_nan(float16 a, float_status *status)
3805 {
3806     FloatParts64 p;
3807
3808     float16_unpack_raw(&p, a);
3809     p.frac <<= float16_params.frac_shift;
3810     parts_silence_nan(&p, status);
3811     p.frac >>= float16_params.frac_shift;
3812     return float16_pack_raw(&p);
3813 }
3814
3815 float32 float32_silence_nan(float32 a, float_status *status)
3816 {
3817     FloatParts64 p;
3818
3819     float32_unpack_raw(&p, a);
3820     p.frac <<= float32_params.frac_shift;
3821     parts_silence_nan(&p, status);
3822     p.frac >>= float32_params.frac_shift;
3823     return float32_pack_raw(&p);
3824 }
3825
3826 float64 float64_silence_nan(float64 a, float_status *status)
3827 {
3828     FloatParts64 p;
3829
3830     float64_unpack_raw(&p, a);
3831     p.frac <<= float64_params.frac_shift;
3832     parts_silence_nan(&p, status);
3833     p.frac >>= float64_params.frac_shift;
3834     return float64_pack_raw(&p);
3835 }
3836
3837 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3838 {
3839     FloatParts64 p;
3840
3841     bfloat16_unpack_raw(&p, a);
3842     p.frac <<= bfloat16_params.frac_shift;
3843     parts_silence_nan(&p, status);
3844     p.frac >>= bfloat16_params.frac_shift;
3845     return bfloat16_pack_raw(&p);
3846 }
3847
3848 float128 float128_silence_nan(float128 a, float_status *status)
3849 {
3850     FloatParts128 p;
3851
3852     float128_unpack_raw(&p, a);
3853     frac_shl(&p, float128_params.frac_shift);
3854     parts_silence_nan(&p, status);
3855     frac_shr(&p, float128_params.frac_shift);
3856     return float128_pack_raw(&p);
3857 }
3858
3859 /*----------------------------------------------------------------------------
3860 | If `a' is denormal and we are in flush-to-zero mode then set the
3861 | input-denormal exception and return zero. Otherwise just return the value.
3862 *----------------------------------------------------------------------------*/
3863
3864 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3865 {
3866     if (p.exp == 0 && p.frac != 0) {
3867         float_raise(float_flag_input_denormal, status);
3868         return true;
3869     }
3870
3871     return false;
3872 }
3873
3874 float16 float16_squash_input_denormal(float16 a, float_status *status)
3875 {
3876     if (status->flush_inputs_to_zero) {
3877         FloatParts64 p;
3878
3879         float16_unpack_raw(&p, a);
3880         if (parts_squash_denormal(p, status)) {
3881             return float16_set_sign(float16_zero, p.sign);
3882         }
3883     }
3884     return a;
3885 }
3886
3887 float32 float32_squash_input_denormal(float32 a, float_status *status)
3888 {
3889     if (status->flush_inputs_to_zero) {
3890         FloatParts64 p;
3891
3892         float32_unpack_raw(&p, a);
3893         if (parts_squash_denormal(p, status)) {
3894             return float32_set_sign(float32_zero, p.sign);
3895         }
3896     }
3897     return a;
3898 }
3899
3900 float64 float64_squash_input_denormal(float64 a, float_status *status)
3901 {
3902     if (status->flush_inputs_to_zero) {
3903         FloatParts64 p;
3904
3905         float64_unpack_raw(&p, a);
3906         if (parts_squash_denormal(p, status)) {
3907             return float64_set_sign(float64_zero, p.sign);
3908         }
3909     }
3910     return a;
3911 }
3912
3913 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3914 {
3915     if (status->flush_inputs_to_zero) {
3916         FloatParts64 p;
3917
3918         bfloat16_unpack_raw(&p, a);
3919         if (parts_squash_denormal(p, status)) {
3920             return bfloat16_set_sign(bfloat16_zero, p.sign);
3921         }
3922     }
3923     return a;
3924 }
3925
3926 /*----------------------------------------------------------------------------
3927 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3928 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3929 | input.  If `zSign' is 1, the input is negated before being converted to an
3930 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3931 | is simply rounded to an integer, with the inexact exception raised if the
3932 | input cannot be represented exactly as an integer.  However, if the fixed-
3933 | point input is too large, the invalid exception is raised and the largest
3934 | positive or negative integer is returned.
3935 *----------------------------------------------------------------------------*/
3936
3937 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3938                                  float_status *status)
3939 {
3940     int8_t roundingMode;
3941     bool roundNearestEven;
3942     int8_t roundIncrement, roundBits;
3943     int32_t z;
3944
3945     roundingMode = status->float_rounding_mode;
3946     roundNearestEven = ( roundingMode == float_round_nearest_even );
3947     switch (roundingMode) {
3948     case float_round_nearest_even:
3949     case float_round_ties_away:
3950         roundIncrement = 0x40;
3951         break;
3952     case float_round_to_zero:
3953         roundIncrement = 0;
3954         break;
3955     case float_round_up:
3956         roundIncrement = zSign ? 0 : 0x7f;
3957         break;
3958     case float_round_down:
3959         roundIncrement = zSign ? 0x7f : 0;
3960         break;
3961     case float_round_to_odd:
3962         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3963         break;
3964     default:
3965         abort();
3966     }
3967     roundBits = absZ & 0x7F;
3968     absZ = ( absZ + roundIncrement )>>7;
3969     if (!(roundBits ^ 0x40) && roundNearestEven) {
3970         absZ &= ~1;
3971     }
3972     z = absZ;
3973     if ( zSign ) z = - z;
3974     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3975         float_raise(float_flag_invalid, status);
3976         return zSign ? INT32_MIN : INT32_MAX;
3977     }
3978     if (roundBits) {
3979         float_raise(float_flag_inexact, status);
3980     }
3981     return z;
3982
3983 }
3984
3985 /*----------------------------------------------------------------------------
3986 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3987 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3988 | and returns the properly rounded 64-bit integer corresponding to the input.
3989 | If `zSign' is 1, the input is negated before being converted to an integer.
3990 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3991 | the inexact exception raised if the input cannot be represented exactly as
3992 | an integer.  However, if the fixed-point input is too large, the invalid
3993 | exception is raised and the largest positive or negative integer is
3994 | returned.
3995 *----------------------------------------------------------------------------*/
3996
3997 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3998                                float_status *status)
3999 {
4000     int8_t roundingMode;
4001     bool roundNearestEven, increment;
4002     int64_t z;
4003
4004     roundingMode = status->float_rounding_mode;
4005     roundNearestEven = ( roundingMode == float_round_nearest_even );
4006     switch (roundingMode) {
4007     case float_round_nearest_even:
4008     case float_round_ties_away:
4009         increment = ((int64_t) absZ1 < 0);
4010         break;
4011     case float_round_to_zero:
4012         increment = 0;
4013         break;
4014     case float_round_up:
4015         increment = !zSign && absZ1;
4016         break;
4017     case float_round_down:
4018         increment = zSign && absZ1;
4019         break;
4020     case float_round_to_odd:
4021         increment = !(absZ0 & 1) && absZ1;
4022         break;
4023     default:
4024         abort();
4025     }
4026     if ( increment ) {
4027         ++absZ0;
4028         if ( absZ0 == 0 ) goto overflow;
4029         if (!(absZ1 << 1) && roundNearestEven) {
4030             absZ0 &= ~1;
4031         }
4032     }
4033     z = absZ0;
4034     if ( zSign ) z = - z;
4035     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4036  overflow:
4037         float_raise(float_flag_invalid, status);
4038         return zSign ? INT64_MIN : INT64_MAX;
4039     }
4040     if (absZ1) {
4041         float_raise(float_flag_inexact, status);
4042     }
4043     return z;
4044
4045 }
4046
4047 /*----------------------------------------------------------------------------
4048 | Normalizes the subnormal single-precision floating-point value represented
4049 | by the denormalized significand `aSig'.  The normalized exponent and
4050 | significand are stored at the locations pointed to by `zExpPtr' and
4051 | `zSigPtr', respectively.
4052 *----------------------------------------------------------------------------*/
4053
4054 static void
4055  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4056 {
4057     int8_t shiftCount;
4058
4059     shiftCount = clz32(aSig) - 8;
4060     *zSigPtr = aSig<<shiftCount;
4061     *zExpPtr = 1 - shiftCount;
4062
4063 }
4064
4065 /*----------------------------------------------------------------------------
4066 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4067 | and significand `zSig', and returns the proper single-precision floating-
4068 | point value corresponding to the abstract input.  Ordinarily, the abstract
4069 | value is simply rounded and packed into the single-precision format, with
4070 | the inexact exception raised if the abstract input cannot be represented
4071 | exactly.  However, if the abstract value is too large, the overflow and
4072 | inexact exceptions are raised and an infinity or maximal finite value is
4073 | returned.  If the abstract value is too small, the input value is rounded to
4074 | a subnormal number, and the underflow and inexact exceptions are raised if
4075 | the abstract input cannot be represented exactly as a subnormal single-
4076 | precision floating-point number.
4077 |     The input significand `zSig' has its binary point between bits 30
4078 | and 29, which is 7 bits to the left of the usual location.  This shifted
4079 | significand must be normalized or smaller.  If `zSig' is not normalized,
4080 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4081 | and it must not require rounding.  In the usual case that `zSig' is
4082 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4083 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4084 | Binary Floating-Point Arithmetic.
4085 *----------------------------------------------------------------------------*/
4086
4087 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4088                                    float_status *status)
4089 {
4090     int8_t roundingMode;
4091     bool roundNearestEven;
4092     int8_t roundIncrement, roundBits;
4093     bool isTiny;
4094
4095     roundingMode = status->float_rounding_mode;
4096     roundNearestEven = ( roundingMode == float_round_nearest_even );
4097     switch (roundingMode) {
4098     case float_round_nearest_even:
4099     case float_round_ties_away:
4100         roundIncrement = 0x40;
4101         break;
4102     case float_round_to_zero:
4103         roundIncrement = 0;
4104         break;
4105     case float_round_up:
4106         roundIncrement = zSign ? 0 : 0x7f;
4107         break;
4108     case float_round_down:
4109         roundIncrement = zSign ? 0x7f : 0;
4110         break;
4111     case float_round_to_odd:
4112         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4113         break;
4114     default:
4115         abort();
4116         break;
4117     }
4118     roundBits = zSig & 0x7F;
4119     if ( 0xFD <= (uint16_t) zExp ) {
4120         if (    ( 0xFD < zExp )
4121              || (    ( zExp == 0xFD )
4122                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4123            ) {
4124             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4125                                    roundIncrement != 0;
4126             float_raise(float_flag_overflow | float_flag_inexact, status);
4127             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4128         }
4129         if ( zExp < 0 ) {
4130             if (status->flush_to_zero) {
4131                 float_raise(float_flag_output_denormal, status);
4132                 return packFloat32(zSign, 0, 0);
4133             }
4134             isTiny = status->tininess_before_rounding
4135                   || (zExp < -1)
4136                   || (zSig + roundIncrement < 0x80000000);
4137             shift32RightJamming( zSig, - zExp, &zSig );
4138             zExp = 0;
4139             roundBits = zSig & 0x7F;
4140             if (isTiny && roundBits) {
4141                 float_raise(float_flag_underflow, status);
4142             }
4143             if (roundingMode == float_round_to_odd) {
4144                 /*
4145                  * For round-to-odd case, the roundIncrement depends on
4146                  * zSig which just changed.
4147                  */
4148                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4149             }
4150         }
4151     }
4152     if (roundBits) {
4153         float_raise(float_flag_inexact, status);
4154     }
4155     zSig = ( zSig + roundIncrement )>>7;
4156     if (!(roundBits ^ 0x40) && roundNearestEven) {
4157         zSig &= ~1;
4158     }
4159     if ( zSig == 0 ) zExp = 0;
4160     return packFloat32( zSign, zExp, zSig );
4161
4162 }
4163
4164 /*----------------------------------------------------------------------------
4165 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4166 | and significand `zSig', and returns the proper single-precision floating-
4167 | point value corresponding to the abstract input.  This routine is just like
4168 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4169 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4170 | floating-point exponent.
4171 *----------------------------------------------------------------------------*/
4172
4173 static float32
4174  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4175                               float_status *status)
4176 {
4177     int8_t shiftCount;
4178
4179     shiftCount = clz32(zSig) - 1;
4180     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4181                                status);
4182
4183 }
4184
4185 /*----------------------------------------------------------------------------
4186 | Normalizes the subnormal double-precision floating-point value represented
4187 | by the denormalized significand `aSig'.  The normalized exponent and
4188 | significand are stored at the locations pointed to by `zExpPtr' and
4189 | `zSigPtr', respectively.
4190 *----------------------------------------------------------------------------*/
4191
4192 static void
4193  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4194 {
4195     int8_t shiftCount;
4196
4197     shiftCount = clz64(aSig) - 11;
4198     *zSigPtr = aSig<<shiftCount;
4199     *zExpPtr = 1 - shiftCount;
4200
4201 }
4202
4203 /*----------------------------------------------------------------------------
4204 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4205 | double-precision floating-point value, returning the result.  After being
4206 | shifted into the proper positions, the three fields are simply added
4207 | together to form the result.  This means that any integer portion of `zSig'
4208 | will be added into the exponent.  Since a properly normalized significand
4209 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4210 | than the desired result exponent whenever `zSig' is a complete, normalized
4211 | significand.
4212 *----------------------------------------------------------------------------*/
4213
4214 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4215 {
4216
4217     return make_float64(
4218         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4219
4220 }
4221
4222 /*----------------------------------------------------------------------------
4223 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4224 | and significand `zSig', and returns the proper double-precision floating-
4225 | point value corresponding to the abstract input.  Ordinarily, the abstract
4226 | value is simply rounded and packed into the double-precision format, with
4227 | the inexact exception raised if the abstract input cannot be represented
4228 | exactly.  However, if the abstract value is too large, the overflow and
4229 | inexact exceptions are raised and an infinity or maximal finite value is
4230 | returned.  If the abstract value is too small, the input value is rounded to
4231 | a subnormal number, and the underflow and inexact exceptions are raised if
4232 | the abstract input cannot be represented exactly as a subnormal double-
4233 | precision floating-point number.
4234 |     The input significand `zSig' has its binary point between bits 62
4235 | and 61, which is 10 bits to the left of the usual location.  This shifted
4236 | significand must be normalized or smaller.  If `zSig' is not normalized,
4237 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4238 | and it must not require rounding.  In the usual case that `zSig' is
4239 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4240 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4241 | Binary Floating-Point Arithmetic.
4242 *----------------------------------------------------------------------------*/
4243
4244 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4245                                    float_status *status)
4246 {
4247     int8_t roundingMode;
4248     bool roundNearestEven;
4249     int roundIncrement, roundBits;
4250     bool isTiny;
4251
4252     roundingMode = status->float_rounding_mode;
4253     roundNearestEven = ( roundingMode == float_round_nearest_even );
4254     switch (roundingMode) {
4255     case float_round_nearest_even:
4256     case float_round_ties_away:
4257         roundIncrement = 0x200;
4258         break;
4259     case float_round_to_zero:
4260         roundIncrement = 0;
4261         break;
4262     case float_round_up:
4263         roundIncrement = zSign ? 0 : 0x3ff;
4264         break;
4265     case float_round_down:
4266         roundIncrement = zSign ? 0x3ff : 0;
4267         break;
4268     case float_round_to_odd:
4269         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4270         break;
4271     default:
4272         abort();
4273     }
4274     roundBits = zSig & 0x3FF;
4275     if ( 0x7FD <= (uint16_t) zExp ) {
4276         if (    ( 0x7FD < zExp )
4277              || (    ( zExp == 0x7FD )
4278                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4279            ) {
4280             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4281                                    roundIncrement != 0;
4282             float_raise(float_flag_overflow | float_flag_inexact, status);
4283             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4284         }
4285         if ( zExp < 0 ) {
4286             if (status->flush_to_zero) {
4287                 float_raise(float_flag_output_denormal, status);
4288                 return packFloat64(zSign, 0, 0);
4289             }
4290             isTiny = status->tininess_before_rounding
4291                   || (zExp < -1)
4292                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4293             shift64RightJamming( zSig, - zExp, &zSig );
4294             zExp = 0;
4295             roundBits = zSig & 0x3FF;
4296             if (isTiny && roundBits) {
4297                 float_raise(float_flag_underflow, status);
4298             }
4299             if (roundingMode == float_round_to_odd) {
4300                 /*
4301                  * For round-to-odd case, the roundIncrement depends on
4302                  * zSig which just changed.
4303                  */
4304                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4305             }
4306         }
4307     }
4308     if (roundBits) {
4309         float_raise(float_flag_inexact, status);
4310     }
4311     zSig = ( zSig + roundIncrement )>>10;
4312     if (!(roundBits ^ 0x200) && roundNearestEven) {
4313         zSig &= ~1;
4314     }
4315     if ( zSig == 0 ) zExp = 0;
4316     return packFloat64( zSign, zExp, zSig );
4317
4318 }
4319
4320 /*----------------------------------------------------------------------------
4321 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4322 | and significand `zSig', and returns the proper double-precision floating-
4323 | point value corresponding to the abstract input.  This routine is just like
4324 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4325 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4326 | floating-point exponent.
4327 *----------------------------------------------------------------------------*/
4328
4329 static float64
4330  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4331                               float_status *status)
4332 {
4333     int8_t shiftCount;
4334
4335     shiftCount = clz64(zSig) - 1;
4336     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4337                                status);
4338
4339 }
4340
4341 /*----------------------------------------------------------------------------
4342 | Normalizes the subnormal extended double-precision floating-point value
4343 | represented by the denormalized significand `aSig'.  The normalized exponent
4344 | and significand are stored at the locations pointed to by `zExpPtr' and
4345 | `zSigPtr', respectively.
4346 *----------------------------------------------------------------------------*/
4347
4348 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4349                                 uint64_t *zSigPtr)
4350 {
4351     int8_t shiftCount;
4352
4353     shiftCount = clz64(aSig);
4354     *zSigPtr = aSig<<shiftCount;
4355     *zExpPtr = 1 - shiftCount;
4356 }
4357
4358 /*----------------------------------------------------------------------------
4359 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4360 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4361 | and returns the proper extended double-precision floating-point value
4362 | corresponding to the abstract input.  Ordinarily, the abstract value is
4363 | rounded and packed into the extended double-precision format, with the
4364 | inexact exception raised if the abstract input cannot be represented
4365 | exactly.  However, if the abstract value is too large, the overflow and
4366 | inexact exceptions are raised and an infinity or maximal finite value is
4367 | returned.  If the abstract value is too small, the input value is rounded to
4368 | a subnormal number, and the underflow and inexact exceptions are raised if
4369 | the abstract input cannot be represented exactly as a subnormal extended
4370 | double-precision floating-point number.
4371 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4372 | number of bits as single or double precision, respectively.  Otherwise, the
4373 | result is rounded to the full precision of the extended double-precision
4374 | format.
4375 |     The input significand must be normalized or smaller.  If the input
4376 | significand is not normalized, `zExp' must be 0; in that case, the result
4377 | returned is a subnormal number, and it must not require rounding.  The
4378 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4379 | Floating-Point Arithmetic.
4380 *----------------------------------------------------------------------------*/
4381
4382 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4383                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4384                               float_status *status)
4385 {
4386     int8_t roundingMode;
4387     bool roundNearestEven, increment, isTiny;
4388     int64_t roundIncrement, roundMask, roundBits;
4389
4390     roundingMode = status->float_rounding_mode;
4391     roundNearestEven = ( roundingMode == float_round_nearest_even );
4392     if ( roundingPrecision == 80 ) goto precision80;
4393     if ( roundingPrecision == 64 ) {
4394         roundIncrement = UINT64_C(0x0000000000000400);
4395         roundMask = UINT64_C(0x00000000000007FF);
4396     }
4397     else if ( roundingPrecision == 32 ) {
4398         roundIncrement = UINT64_C(0x0000008000000000);
4399         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4400     }
4401     else {
4402         goto precision80;
4403     }
4404     zSig0 |= ( zSig1 != 0 );
4405     switch (roundingMode) {
4406     case float_round_nearest_even:
4407     case float_round_ties_away:
4408         break;
4409     case float_round_to_zero:
4410         roundIncrement = 0;
4411         break;
4412     case float_round_up:
4413         roundIncrement = zSign ? 0 : roundMask;
4414         break;
4415     case float_round_down:
4416         roundIncrement = zSign ? roundMask : 0;
4417         break;
4418     default:
4419         abort();
4420     }
4421     roundBits = zSig0 & roundMask;
4422     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4423         if (    ( 0x7FFE < zExp )
4424              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4425            ) {
4426             goto overflow;
4427         }
4428         if ( zExp <= 0 ) {
4429             if (status->flush_to_zero) {
4430                 float_raise(float_flag_output_denormal, status);
4431                 return packFloatx80(zSign, 0, 0);
4432             }
4433             isTiny = status->tininess_before_rounding
4434                   || (zExp < 0 )
4435                   || (zSig0 <= zSig0 + roundIncrement);
4436             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4437             zExp = 0;
4438             roundBits = zSig0 & roundMask;
4439             if (isTiny && roundBits) {
4440                 float_raise(float_flag_underflow, status);
4441             }
4442             if (roundBits) {
4443                 float_raise(float_flag_inexact, status);
4444             }
4445             zSig0 += roundIncrement;
4446             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4447             roundIncrement = roundMask + 1;
4448             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4449                 roundMask |= roundIncrement;
4450             }
4451             zSig0 &= ~ roundMask;
4452             return packFloatx80( zSign, zExp, zSig0 );
4453         }
4454     }
4455     if (roundBits) {
4456         float_raise(float_flag_inexact, status);
4457     }
4458     zSig0 += roundIncrement;
4459     if ( zSig0 < roundIncrement ) {
4460         ++zExp;
4461         zSig0 = UINT64_C(0x8000000000000000);
4462     }
4463     roundIncrement = roundMask + 1;
4464     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4465         roundMask |= roundIncrement;
4466     }
4467     zSig0 &= ~ roundMask;
4468     if ( zSig0 == 0 ) zExp = 0;
4469     return packFloatx80( zSign, zExp, zSig0 );
4470  precision80:
4471     switch (roundingMode) {
4472     case float_round_nearest_even:
4473     case float_round_ties_away:
4474         increment = ((int64_t)zSig1 < 0);
4475         break;
4476     case float_round_to_zero:
4477         increment = 0;
4478         break;
4479     case float_round_up:
4480         increment = !zSign && zSig1;
4481         break;
4482     case float_round_down:
4483         increment = zSign && zSig1;
4484         break;
4485     default:
4486         abort();
4487     }
4488     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4489         if (    ( 0x7FFE < zExp )
4490              || (    ( zExp == 0x7FFE )
4491                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4492                   && increment
4493                 )
4494            ) {
4495             roundMask = 0;
4496  overflow:
4497             float_raise(float_flag_overflow | float_flag_inexact, status);
4498             if (    ( roundingMode == float_round_to_zero )
4499                  || ( zSign && ( roundingMode == float_round_up ) )
4500                  || ( ! zSign && ( roundingMode == float_round_down ) )
4501                ) {
4502                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4503             }
4504             return packFloatx80(zSign,
4505                                 floatx80_infinity_high,
4506                                 floatx80_infinity_low);
4507         }
4508         if ( zExp <= 0 ) {
4509             isTiny = status->tininess_before_rounding
4510                   || (zExp < 0)
4511                   || !increment
4512                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4513             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4514             zExp = 0;
4515             if (isTiny && zSig1) {
4516                 float_raise(float_flag_underflow, status);
4517             }
4518             if (zSig1) {
4519                 float_raise(float_flag_inexact, status);
4520             }
4521             switch (roundingMode) {
4522             case float_round_nearest_even:
4523             case float_round_ties_away:
4524                 increment = ((int64_t)zSig1 < 0);
4525                 break;
4526             case float_round_to_zero:
4527                 increment = 0;
4528                 break;
4529             case float_round_up:
4530                 increment = !zSign && zSig1;
4531                 break;
4532             case float_round_down:
4533                 increment = zSign && zSig1;
4534                 break;
4535             default:
4536                 abort();
4537             }
4538             if ( increment ) {
4539                 ++zSig0;
4540                 if (!(zSig1 << 1) && roundNearestEven) {
4541                     zSig0 &= ~1;
4542                 }
4543                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4544             }
4545             return packFloatx80( zSign, zExp, zSig0 );
4546         }
4547     }
4548     if (zSig1) {
4549         float_raise(float_flag_inexact, status);
4550     }
4551     if ( increment ) {
4552         ++zSig0;
4553         if ( zSig0 == 0 ) {
4554             ++zExp;
4555             zSig0 = UINT64_C(0x8000000000000000);
4556         }
4557         else {
4558             if (!(zSig1 << 1) && roundNearestEven) {
4559                 zSig0 &= ~1;
4560             }
4561         }
4562     }
4563     else {
4564         if ( zSig0 == 0 ) zExp = 0;
4565     }
4566     return packFloatx80( zSign, zExp, zSig0 );
4567
4568 }
4569
4570 /*----------------------------------------------------------------------------
4571 | Takes an abstract floating-point value having sign `zSign', exponent
4572 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4573 | and returns the proper extended double-precision floating-point value
4574 | corresponding to the abstract input.  This routine is just like
4575 | `roundAndPackFloatx80' except that the input significand does not have to be
4576 | normalized.
4577 *----------------------------------------------------------------------------*/
4578
4579 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4580                                        bool zSign, int32_t zExp,
4581                                        uint64_t zSig0, uint64_t zSig1,
4582                                        float_status *status)
4583 {
4584     int8_t shiftCount;
4585
4586     if ( zSig0 == 0 ) {
4587         zSig0 = zSig1;
4588         zSig1 = 0;
4589         zExp -= 64;
4590     }
4591     shiftCount = clz64(zSig0);
4592     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4593     zExp -= shiftCount;
4594     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4595                                 zSig0, zSig1, status);
4596
4597 }
4598
4599 /*----------------------------------------------------------------------------
4600 | Returns the least-significant 64 fraction bits of the quadruple-precision
4601 | floating-point value `a'.
4602 *----------------------------------------------------------------------------*/
4603
4604 static inline uint64_t extractFloat128Frac1( float128 a )
4605 {
4606
4607     return a.low;
4608
4609 }
4610
4611 /*----------------------------------------------------------------------------
4612 | Returns the most-significant 48 fraction bits of the quadruple-precision
4613 | floating-point value `a'.
4614 *----------------------------------------------------------------------------*/
4615
4616 static inline uint64_t extractFloat128Frac0( float128 a )
4617 {
4618
4619     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4620
4621 }
4622
4623 /*----------------------------------------------------------------------------
4624 | Returns the exponent bits of the quadruple-precision floating-point value
4625 | `a'.
4626 *----------------------------------------------------------------------------*/
4627
4628 static inline int32_t extractFloat128Exp( float128 a )
4629 {
4630
4631     return ( a.high>>48 ) & 0x7FFF;
4632
4633 }
4634
4635 /*----------------------------------------------------------------------------
4636 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4637 *----------------------------------------------------------------------------*/
4638
4639 static inline bool extractFloat128Sign(float128 a)
4640 {
4641     return a.high >> 63;
4642 }
4643
4644 /*----------------------------------------------------------------------------
4645 | Normalizes the subnormal quadruple-precision floating-point value
4646 | represented by the denormalized significand formed by the concatenation of
4647 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4648 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4649 | significand are stored at the location pointed to by `zSig0Ptr', and the
4650 | least significant 64 bits of the normalized significand are stored at the
4651 | location pointed to by `zSig1Ptr'.
4652 *----------------------------------------------------------------------------*/
4653
4654 static void
4655  normalizeFloat128Subnormal(
4656      uint64_t aSig0,
4657      uint64_t aSig1,
4658      int32_t *zExpPtr,
4659      uint64_t *zSig0Ptr,
4660      uint64_t *zSig1Ptr
4661  )
4662 {
4663     int8_t shiftCount;
4664
4665     if ( aSig0 == 0 ) {
4666         shiftCount = clz64(aSig1) - 15;
4667         if ( shiftCount < 0 ) {
4668             *zSig0Ptr = aSig1>>( - shiftCount );
4669             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4670         }
4671         else {
4672             *zSig0Ptr = aSig1<<shiftCount;
4673             *zSig1Ptr = 0;
4674         }
4675         *zExpPtr = - shiftCount - 63;
4676     }
4677     else {
4678         shiftCount = clz64(aSig0) - 15;
4679         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4680         *zExpPtr = 1 - shiftCount;
4681     }
4682
4683 }
4684
4685 /*----------------------------------------------------------------------------
4686 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4687 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4688 | floating-point value, returning the result.  After being shifted into the
4689 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4690 | added together to form the most significant 32 bits of the result.  This
4691 | means that any integer portion of `zSig0' will be added into the exponent.
4692 | Since a properly normalized significand will have an integer portion equal
4693 | to 1, the `zExp' input should be 1 less than the desired result exponent
4694 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4695 | significand.
4696 *----------------------------------------------------------------------------*/
4697
4698 static inline float128
4699 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4700 {
4701     float128 z;
4702
4703     z.low = zSig1;
4704     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4705     return z;
4706 }
4707
4708 /*----------------------------------------------------------------------------
4709 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4710 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4711 | and `zSig2', and returns the proper quadruple-precision floating-point value
4712 | corresponding to the abstract input.  Ordinarily, the abstract value is
4713 | simply rounded and packed into the quadruple-precision format, with the
4714 | inexact exception raised if the abstract input cannot be represented
4715 | exactly.  However, if the abstract value is too large, the overflow and
4716 | inexact exceptions are raised and an infinity or maximal finite value is
4717 | returned.  If the abstract value is too small, the input value is rounded to
4718 | a subnormal number, and the underflow and inexact exceptions are raised if
4719 | the abstract input cannot be represented exactly as a subnormal quadruple-
4720 | precision floating-point number.
4721 |     The input significand must be normalized or smaller.  If the input
4722 | significand is not normalized, `zExp' must be 0; in that case, the result
4723 | returned is a subnormal number, and it must not require rounding.  In the
4724 | usual case that the input significand is normalized, `zExp' must be 1 less
4725 | than the ``true'' floating-point exponent.  The handling of underflow and
4726 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4727 *----------------------------------------------------------------------------*/
4728
4729 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4730                                      uint64_t zSig0, uint64_t zSig1,
4731                                      uint64_t zSig2, float_status *status)
4732 {
4733     int8_t roundingMode;
4734     bool roundNearestEven, increment, isTiny;
4735
4736     roundingMode = status->float_rounding_mode;
4737     roundNearestEven = ( roundingMode == float_round_nearest_even );
4738     switch (roundingMode) {
4739     case float_round_nearest_even:
4740     case float_round_ties_away:
4741         increment = ((int64_t)zSig2 < 0);
4742         break;
4743     case float_round_to_zero:
4744         increment = 0;
4745         break;
4746     case float_round_up:
4747         increment = !zSign && zSig2;
4748         break;
4749     case float_round_down:
4750         increment = zSign && zSig2;
4751         break;
4752     case float_round_to_odd:
4753         increment = !(zSig1 & 0x1) && zSig2;
4754         break;
4755     default:
4756         abort();
4757     }
4758     if ( 0x7FFD <= (uint32_t) zExp ) {
4759         if (    ( 0x7FFD < zExp )
4760              || (    ( zExp == 0x7FFD )
4761                   && eq128(
4762                          UINT64_C(0x0001FFFFFFFFFFFF),
4763                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4764                          zSig0,
4765                          zSig1
4766                      )
4767                   && increment
4768                 )
4769            ) {
4770             float_raise(float_flag_overflow | float_flag_inexact, status);
4771             if (    ( roundingMode == float_round_to_zero )
4772                  || ( zSign && ( roundingMode == float_round_up ) )
4773                  || ( ! zSign && ( roundingMode == float_round_down ) )
4774                  || (roundingMode == float_round_to_odd)
4775                ) {
4776                 return
4777                     packFloat128(
4778                         zSign,
4779                         0x7FFE,
4780                         UINT64_C(0x0000FFFFFFFFFFFF),
4781                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4782                     );
4783             }
4784             return packFloat128( zSign, 0x7FFF, 0, 0 );
4785         }
4786         if ( zExp < 0 ) {
4787             if (status->flush_to_zero) {
4788                 float_raise(float_flag_output_denormal, status);
4789                 return packFloat128(zSign, 0, 0, 0);
4790             }
4791             isTiny = status->tininess_before_rounding
4792                   || (zExp < -1)
4793                   || !increment
4794                   || lt128(zSig0, zSig1,
4795                            UINT64_C(0x0001FFFFFFFFFFFF),
4796                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4797             shift128ExtraRightJamming(
4798                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4799             zExp = 0;
4800             if (isTiny && zSig2) {
4801                 float_raise(float_flag_underflow, status);
4802             }
4803             switch (roundingMode) {
4804             case float_round_nearest_even:
4805             case float_round_ties_away:
4806                 increment = ((int64_t)zSig2 < 0);
4807                 break;
4808             case float_round_to_zero:
4809                 increment = 0;
4810                 break;
4811             case float_round_up:
4812                 increment = !zSign && zSig2;
4813                 break;
4814             case float_round_down:
4815                 increment = zSign && zSig2;
4816                 break;
4817             case float_round_to_odd:
4818                 increment = !(zSig1 & 0x1) && zSig2;
4819                 break;
4820             default:
4821                 abort();
4822             }
4823         }
4824     }
4825     if (zSig2) {
4826         float_raise(float_flag_inexact, status);
4827     }
4828     if ( increment ) {
4829         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4830         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4831             zSig1 &= ~1;
4832         }
4833     }
4834     else {
4835         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4836     }
4837     return packFloat128( zSign, zExp, zSig0, zSig1 );
4838
4839 }
4840
4841 /*----------------------------------------------------------------------------
4842 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4843 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4844 | returns the proper quadruple-precision floating-point value corresponding
4845 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4846 | except that the input significand has fewer bits and does not have to be
4847 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4848 | point exponent.
4849 *----------------------------------------------------------------------------*/
4850
4851 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4852                                               uint64_t zSig0, uint64_t zSig1,
4853                                               float_status *status)
4854 {
4855     int8_t shiftCount;
4856     uint64_t zSig2;
4857
4858     if ( zSig0 == 0 ) {
4859         zSig0 = zSig1;
4860         zSig1 = 0;
4861         zExp -= 64;
4862     }
4863     shiftCount = clz64(zSig0) - 15;
4864     if ( 0 <= shiftCount ) {
4865         zSig2 = 0;
4866         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4867     }
4868     else {
4869         shift128ExtraRightJamming(
4870             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4871     }
4872     zExp -= shiftCount;
4873     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4874
4875 }
4876
4877
4878 /*----------------------------------------------------------------------------
4879 | Returns the result of converting the 32-bit two's complement integer `a'
4880 | to the extended double-precision floating-point format.  The conversion
4881 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4882 | Arithmetic.
4883 *----------------------------------------------------------------------------*/
4884
4885 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4886 {
4887     bool zSign;
4888     uint32_t absA;
4889     int8_t shiftCount;
4890     uint64_t zSig;
4891
4892     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4893     zSign = ( a < 0 );
4894     absA = zSign ? - a : a;
4895     shiftCount = clz32(absA) + 32;
4896     zSig = absA;
4897     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4898
4899 }
4900
4901 /*----------------------------------------------------------------------------
4902 | Returns the result of converting the 64-bit two's complement integer `a'
4903 | to the extended double-precision floating-point format.  The conversion
4904 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4905 | Arithmetic.
4906 *----------------------------------------------------------------------------*/
4907
4908 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4909 {
4910     bool zSign;
4911     uint64_t absA;
4912     int8_t shiftCount;
4913
4914     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4915     zSign = ( a < 0 );
4916     absA = zSign ? - a : a;
4917     shiftCount = clz64(absA);
4918     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4919
4920 }
4921
4922 /*----------------------------------------------------------------------------
4923 | Returns the result of converting the single-precision floating-point value
4924 | `a' to the extended double-precision floating-point format.  The conversion
4925 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4926 | Arithmetic.
4927 *----------------------------------------------------------------------------*/
4928
4929 floatx80 float32_to_floatx80(float32 a, float_status *status)
4930 {
4931     bool aSign;
4932     int aExp;
4933     uint32_t aSig;
4934
4935     a = float32_squash_input_denormal(a, status);
4936     aSig = extractFloat32Frac( a );
4937     aExp = extractFloat32Exp( a );
4938     aSign = extractFloat32Sign( a );
4939     if ( aExp == 0xFF ) {
4940         if (aSig) {
4941             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4942                                                status);
4943             return floatx80_silence_nan(res, status);
4944         }
4945         return packFloatx80(aSign,
4946                             floatx80_infinity_high,
4947                             floatx80_infinity_low);
4948     }
4949     if ( aExp == 0 ) {
4950         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4951         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4952     }
4953     aSig |= 0x00800000;
4954     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
4955
4956 }
4957
4958 /*----------------------------------------------------------------------------
4959 | Returns the remainder of the single-precision floating-point value `a'
4960 | with respect to the corresponding value `b'.  The operation is performed
4961 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4962 *----------------------------------------------------------------------------*/
4963
4964 float32 float32_rem(float32 a, float32 b, float_status *status)
4965 {
4966     bool aSign, zSign;
4967     int aExp, bExp, expDiff;
4968     uint32_t aSig, bSig;
4969     uint32_t q;
4970     uint64_t aSig64, bSig64, q64;
4971     uint32_t alternateASig;
4972     int32_t sigMean;
4973     a = float32_squash_input_denormal(a, status);
4974     b = float32_squash_input_denormal(b, status);
4975
4976     aSig = extractFloat32Frac( a );
4977     aExp = extractFloat32Exp( a );
4978     aSign = extractFloat32Sign( a );
4979     bSig = extractFloat32Frac( b );
4980     bExp = extractFloat32Exp( b );
4981     if ( aExp == 0xFF ) {
4982         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
4983             return propagateFloat32NaN(a, b, status);
4984         }
4985         float_raise(float_flag_invalid, status);
4986         return float32_default_nan(status);
4987     }
4988     if ( bExp == 0xFF ) {
4989         if (bSig) {
4990             return propagateFloat32NaN(a, b, status);
4991         }
4992         return a;
4993     }
4994     if ( bExp == 0 ) {
4995         if ( bSig == 0 ) {
4996             float_raise(float_flag_invalid, status);
4997             return float32_default_nan(status);
4998         }
4999         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5000     }
5001     if ( aExp == 0 ) {
5002         if ( aSig == 0 ) return a;
5003         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5004     }
5005     expDiff = aExp - bExp;
5006     aSig |= 0x00800000;
5007     bSig |= 0x00800000;
5008     if ( expDiff < 32 ) {
5009         aSig <<= 8;
5010         bSig <<= 8;
5011         if ( expDiff < 0 ) {
5012             if ( expDiff < -1 ) return a;
5013             aSig >>= 1;
5014         }
5015         q = ( bSig <= aSig );
5016         if ( q ) aSig -= bSig;
5017         if ( 0 < expDiff ) {
5018             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5019             q >>= 32 - expDiff;
5020             bSig >>= 2;
5021             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5022         }
5023         else {
5024             aSig >>= 2;
5025             bSig >>= 2;
5026         }
5027     }
5028     else {
5029         if ( bSig <= aSig ) aSig -= bSig;
5030         aSig64 = ( (uint64_t) aSig )<<40;
5031         bSig64 = ( (uint64_t) bSig )<<40;
5032         expDiff -= 64;
5033         while ( 0 < expDiff ) {
5034             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5035             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5036             aSig64 = - ( ( bSig * q64 )<<38 );
5037             expDiff -= 62;
5038         }
5039         expDiff += 64;
5040         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5041         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5042         q = q64>>( 64 - expDiff );
5043         bSig <<= 6;
5044         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5045     }
5046     do {
5047         alternateASig = aSig;
5048         ++q;
5049         aSig -= bSig;
5050     } while ( 0 <= (int32_t) aSig );
5051     sigMean = aSig + alternateASig;
5052     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5053         aSig = alternateASig;
5054     }
5055     zSign = ( (int32_t) aSig < 0 );
5056     if ( zSign ) aSig = - aSig;
5057     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5058 }
5059
5060
5061
5062 /*----------------------------------------------------------------------------
5063 | Returns the binary exponential of the single-precision floating-point value
5064 | `a'. The operation is performed according to the IEC/IEEE Standard for
5065 | Binary Floating-Point Arithmetic.
5066 |
5067 | Uses the following identities:
5068 |
5069 | 1. -------------------------------------------------------------------------
5070 |      x    x*ln(2)
5071 |     2  = e
5072 |
5073 | 2. -------------------------------------------------------------------------
5074 |                      2     3     4     5           n
5075 |      x        x     x     x     x     x           x
5076 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5077 |               1!    2!    3!    4!    5!          n!
5078 *----------------------------------------------------------------------------*/
5079
5080 static const float64 float32_exp2_coefficients[15] =
5081 {
5082     const_float64( 0x3ff0000000000000ll ), /*  1 */
5083     const_float64( 0x3fe0000000000000ll ), /*  2 */
5084     const_float64( 0x3fc5555555555555ll ), /*  3 */
5085     const_float64( 0x3fa5555555555555ll ), /*  4 */
5086     const_float64( 0x3f81111111111111ll ), /*  5 */
5087     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5088     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5089     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5090     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5091     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5092     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5093     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5094     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5095     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5096     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5097 };
5098
5099 float32 float32_exp2(float32 a, float_status *status)
5100 {
5101     bool aSign;
5102     int aExp;
5103     uint32_t aSig;
5104     float64 r, x, xn;
5105     int i;
5106     a = float32_squash_input_denormal(a, status);
5107
5108     aSig = extractFloat32Frac( a );
5109     aExp = extractFloat32Exp( a );
5110     aSign = extractFloat32Sign( a );
5111
5112     if ( aExp == 0xFF) {
5113         if (aSig) {
5114             return propagateFloat32NaN(a, float32_zero, status);
5115         }
5116         return (aSign) ? float32_zero : a;
5117     }
5118     if (aExp == 0) {
5119         if (aSig == 0) return float32_one;
5120     }
5121
5122     float_raise(float_flag_inexact, status);
5123
5124     /* ******************************* */
5125     /* using float64 for approximation */
5126     /* ******************************* */
5127     x = float32_to_float64(a, status);
5128     x = float64_mul(x, float64_ln2, status);
5129
5130     xn = x;
5131     r = float64_one;
5132     for (i = 0 ; i < 15 ; i++) {
5133         float64 f;
5134
5135         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5136         r = float64_add(r, f, status);
5137
5138         xn = float64_mul(xn, x, status);
5139     }
5140
5141     return float64_to_float32(r, status);
5142 }
5143
5144 /*----------------------------------------------------------------------------
5145 | Returns the binary log of the single-precision floating-point value `a'.
5146 | The operation is performed according to the IEC/IEEE Standard for Binary
5147 | Floating-Point Arithmetic.
5148 *----------------------------------------------------------------------------*/
5149 float32 float32_log2(float32 a, float_status *status)
5150 {
5151     bool aSign, zSign;
5152     int aExp;
5153     uint32_t aSig, zSig, i;
5154
5155     a = float32_squash_input_denormal(a, status);
5156     aSig = extractFloat32Frac( a );
5157     aExp = extractFloat32Exp( a );
5158     aSign = extractFloat32Sign( a );
5159
5160     if ( aExp == 0 ) {
5161         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5162         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5163     }
5164     if ( aSign ) {
5165         float_raise(float_flag_invalid, status);
5166         return float32_default_nan(status);
5167     }
5168     if ( aExp == 0xFF ) {
5169         if (aSig) {
5170             return propagateFloat32NaN(a, float32_zero, status);
5171         }
5172         return a;
5173     }
5174
5175     aExp -= 0x7F;
5176     aSig |= 0x00800000;
5177     zSign = aExp < 0;
5178     zSig = aExp << 23;
5179
5180     for (i = 1 << 22; i > 0; i >>= 1) {
5181         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5182         if ( aSig & 0x01000000 ) {
5183             aSig >>= 1;
5184             zSig |= i;
5185         }
5186     }
5187
5188     if ( zSign )
5189         zSig = -zSig;
5190
5191     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5192 }
5193
5194 /*----------------------------------------------------------------------------
5195 | Returns the result of converting the double-precision floating-point value
5196 | `a' to the extended double-precision floating-point format.  The conversion
5197 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5198 | Arithmetic.
5199 *----------------------------------------------------------------------------*/
5200
5201 floatx80 float64_to_floatx80(float64 a, float_status *status)
5202 {
5203     bool aSign;
5204     int aExp;
5205     uint64_t aSig;
5206
5207     a = float64_squash_input_denormal(a, status);
5208     aSig = extractFloat64Frac( a );
5209     aExp = extractFloat64Exp( a );
5210     aSign = extractFloat64Sign( a );
5211     if ( aExp == 0x7FF ) {
5212         if (aSig) {
5213             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5214                                                status);
5215             return floatx80_silence_nan(res, status);
5216         }
5217         return packFloatx80(aSign,
5218                             floatx80_infinity_high,
5219                             floatx80_infinity_low);
5220     }
5221     if ( aExp == 0 ) {
5222         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5223         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5224     }
5225     return
5226         packFloatx80(
5227             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5228
5229 }
5230
5231 /*----------------------------------------------------------------------------
5232 | Returns the remainder of the double-precision floating-point value `a'
5233 | with respect to the corresponding value `b'.  The operation is performed
5234 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5235 *----------------------------------------------------------------------------*/
5236
5237 float64 float64_rem(float64 a, float64 b, float_status *status)
5238 {
5239     bool aSign, zSign;
5240     int aExp, bExp, expDiff;
5241     uint64_t aSig, bSig;
5242     uint64_t q, alternateASig;
5243     int64_t sigMean;
5244
5245     a = float64_squash_input_denormal(a, status);
5246     b = float64_squash_input_denormal(b, status);
5247     aSig = extractFloat64Frac( a );
5248     aExp = extractFloat64Exp( a );
5249     aSign = extractFloat64Sign( a );
5250     bSig = extractFloat64Frac( b );
5251     bExp = extractFloat64Exp( b );
5252     if ( aExp == 0x7FF ) {
5253         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5254             return propagateFloat64NaN(a, b, status);
5255         }
5256         float_raise(float_flag_invalid, status);
5257         return float64_default_nan(status);
5258     }
5259     if ( bExp == 0x7FF ) {
5260         if (bSig) {
5261             return propagateFloat64NaN(a, b, status);
5262         }
5263         return a;
5264     }
5265     if ( bExp == 0 ) {
5266         if ( bSig == 0 ) {
5267             float_raise(float_flag_invalid, status);
5268             return float64_default_nan(status);
5269         }
5270         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5271     }
5272     if ( aExp == 0 ) {
5273         if ( aSig == 0 ) return a;
5274         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5275     }
5276     expDiff = aExp - bExp;
5277     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5278     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5279     if ( expDiff < 0 ) {
5280         if ( expDiff < -1 ) return a;
5281         aSig >>= 1;
5282     }
5283     q = ( bSig <= aSig );
5284     if ( q ) aSig -= bSig;
5285     expDiff -= 64;
5286     while ( 0 < expDiff ) {
5287         q = estimateDiv128To64( aSig, 0, bSig );
5288         q = ( 2 < q ) ? q - 2 : 0;
5289         aSig = - ( ( bSig>>2 ) * q );
5290         expDiff -= 62;
5291     }
5292     expDiff += 64;
5293     if ( 0 < expDiff ) {
5294         q = estimateDiv128To64( aSig, 0, bSig );
5295         q = ( 2 < q ) ? q - 2 : 0;
5296         q >>= 64 - expDiff;
5297         bSig >>= 2;
5298         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5299     }
5300     else {
5301         aSig >>= 2;
5302         bSig >>= 2;
5303     }
5304     do {
5305         alternateASig = aSig;
5306         ++q;
5307         aSig -= bSig;
5308     } while ( 0 <= (int64_t) aSig );
5309     sigMean = aSig + alternateASig;
5310     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5311         aSig = alternateASig;
5312     }
5313     zSign = ( (int64_t) aSig < 0 );
5314     if ( zSign ) aSig = - aSig;
5315     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5316
5317 }
5318
5319 /*----------------------------------------------------------------------------
5320 | Returns the binary log of the double-precision floating-point value `a'.
5321 | The operation is performed according to the IEC/IEEE Standard for Binary
5322 | Floating-Point Arithmetic.
5323 *----------------------------------------------------------------------------*/
5324 float64 float64_log2(float64 a, float_status *status)
5325 {
5326     bool aSign, zSign;
5327     int aExp;
5328     uint64_t aSig, aSig0, aSig1, zSig, i;
5329     a = float64_squash_input_denormal(a, status);
5330
5331     aSig = extractFloat64Frac( a );
5332     aExp = extractFloat64Exp( a );
5333     aSign = extractFloat64Sign( a );
5334
5335     if ( aExp == 0 ) {
5336         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5337         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5338     }
5339     if ( aSign ) {
5340         float_raise(float_flag_invalid, status);
5341         return float64_default_nan(status);
5342     }
5343     if ( aExp == 0x7FF ) {
5344         if (aSig) {
5345             return propagateFloat64NaN(a, float64_zero, status);
5346         }
5347         return a;
5348     }
5349
5350     aExp -= 0x3FF;
5351     aSig |= UINT64_C(0x0010000000000000);
5352     zSign = aExp < 0;
5353     zSig = (uint64_t)aExp << 52;
5354     for (i = 1LL << 51; i > 0; i >>= 1) {
5355         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5356         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5357         if ( aSig & UINT64_C(0x0020000000000000) ) {
5358             aSig >>= 1;
5359             zSig |= i;
5360         }
5361     }
5362
5363     if ( zSign )
5364         zSig = -zSig;
5365     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5366 }
5367
5368 /*----------------------------------------------------------------------------
5369 | Returns the result of converting the extended double-precision floating-
5370 | point value `a' to the 32-bit two's complement integer format.  The
5371 | conversion is performed according to the IEC/IEEE Standard for Binary
5372 | Floating-Point Arithmetic---which means in particular that the conversion
5373 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5374 | largest positive integer is returned.  Otherwise, if the conversion
5375 | overflows, the largest integer with the same sign as `a' is returned.
5376 *----------------------------------------------------------------------------*/
5377
5378 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5379 {
5380     bool aSign;
5381     int32_t aExp, shiftCount;
5382     uint64_t aSig;
5383
5384     if (floatx80_invalid_encoding(a)) {
5385         float_raise(float_flag_invalid, status);
5386         return 1 << 31;
5387     }
5388     aSig = extractFloatx80Frac( a );
5389     aExp = extractFloatx80Exp( a );
5390     aSign = extractFloatx80Sign( a );
5391     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5392     shiftCount = 0x4037 - aExp;
5393     if ( shiftCount <= 0 ) shiftCount = 1;
5394     shift64RightJamming( aSig, shiftCount, &aSig );
5395     return roundAndPackInt32(aSign, aSig, status);
5396
5397 }
5398
5399 /*----------------------------------------------------------------------------
5400 | Returns the result of converting the extended double-precision floating-
5401 | point value `a' to the 32-bit two's complement integer format.  The
5402 | conversion is performed according to the IEC/IEEE Standard for Binary
5403 | Floating-Point Arithmetic, except that the conversion is always rounded
5404 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5405 | Otherwise, if the conversion overflows, the largest integer with the same
5406 | sign as `a' is returned.
5407 *----------------------------------------------------------------------------*/
5408
5409 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5410 {
5411     bool aSign;
5412     int32_t aExp, shiftCount;
5413     uint64_t aSig, savedASig;
5414     int32_t z;
5415
5416     if (floatx80_invalid_encoding(a)) {
5417         float_raise(float_flag_invalid, status);
5418         return 1 << 31;
5419     }
5420     aSig = extractFloatx80Frac( a );
5421     aExp = extractFloatx80Exp( a );
5422     aSign = extractFloatx80Sign( a );
5423     if ( 0x401E < aExp ) {
5424         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5425         goto invalid;
5426     }
5427     else if ( aExp < 0x3FFF ) {
5428         if (aExp || aSig) {
5429             float_raise(float_flag_inexact, status);
5430         }
5431         return 0;
5432     }
5433     shiftCount = 0x403E - aExp;
5434     savedASig = aSig;
5435     aSig >>= shiftCount;
5436     z = aSig;
5437     if ( aSign ) z = - z;
5438     if ( ( z < 0 ) ^ aSign ) {
5439  invalid:
5440         float_raise(float_flag_invalid, status);
5441         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5442     }
5443     if ( ( aSig<<shiftCount ) != savedASig ) {
5444         float_raise(float_flag_inexact, status);
5445     }
5446     return z;
5447
5448 }
5449
5450 /*----------------------------------------------------------------------------
5451 | Returns the result of converting the extended double-precision floating-
5452 | point value `a' to the 64-bit two's complement integer format.  The
5453 | conversion is performed according to the IEC/IEEE Standard for Binary
5454 | Floating-Point Arithmetic---which means in particular that the conversion
5455 | is rounded according to the current rounding mode.  If `a' is a NaN,
5456 | the largest positive integer is returned.  Otherwise, if the conversion
5457 | overflows, the largest integer with the same sign as `a' is returned.
5458 *----------------------------------------------------------------------------*/
5459
5460 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5461 {
5462     bool aSign;
5463     int32_t aExp, shiftCount;
5464     uint64_t aSig, aSigExtra;
5465
5466     if (floatx80_invalid_encoding(a)) {
5467         float_raise(float_flag_invalid, status);
5468         return 1ULL << 63;
5469     }
5470     aSig = extractFloatx80Frac( a );
5471     aExp = extractFloatx80Exp( a );
5472     aSign = extractFloatx80Sign( a );
5473     shiftCount = 0x403E - aExp;
5474     if ( shiftCount <= 0 ) {
5475         if ( shiftCount ) {
5476             float_raise(float_flag_invalid, status);
5477             if (!aSign || floatx80_is_any_nan(a)) {
5478                 return INT64_MAX;
5479             }
5480             return INT64_MIN;
5481         }
5482         aSigExtra = 0;
5483     }
5484     else {
5485         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5486     }
5487     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5488
5489 }
5490
5491 /*----------------------------------------------------------------------------
5492 | Returns the result of converting the extended double-precision floating-
5493 | point value `a' to the 64-bit two's complement integer format.  The
5494 | conversion is performed according to the IEC/IEEE Standard for Binary
5495 | Floating-Point Arithmetic, except that the conversion is always rounded
5496 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5497 | Otherwise, if the conversion overflows, the largest integer with the same
5498 | sign as `a' is returned.
5499 *----------------------------------------------------------------------------*/
5500
5501 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5502 {
5503     bool aSign;
5504     int32_t aExp, shiftCount;
5505     uint64_t aSig;
5506     int64_t z;
5507
5508     if (floatx80_invalid_encoding(a)) {
5509         float_raise(float_flag_invalid, status);
5510         return 1ULL << 63;
5511     }
5512     aSig = extractFloatx80Frac( a );
5513     aExp = extractFloatx80Exp( a );
5514     aSign = extractFloatx80Sign( a );
5515     shiftCount = aExp - 0x403E;
5516     if ( 0 <= shiftCount ) {
5517         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5518         if ( ( a.high != 0xC03E ) || aSig ) {
5519             float_raise(float_flag_invalid, status);
5520             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5521                 return INT64_MAX;
5522             }
5523         }
5524         return INT64_MIN;
5525     }
5526     else if ( aExp < 0x3FFF ) {
5527         if (aExp | aSig) {
5528             float_raise(float_flag_inexact, status);
5529         }
5530         return 0;
5531     }
5532     z = aSig>>( - shiftCount );
5533     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5534         float_raise(float_flag_inexact, status);
5535     }
5536     if ( aSign ) z = - z;
5537     return z;
5538
5539 }
5540
5541 /*----------------------------------------------------------------------------
5542 | Returns the result of converting the extended double-precision floating-
5543 | point value `a' to the single-precision floating-point format.  The
5544 | conversion is performed according to the IEC/IEEE Standard for Binary
5545 | Floating-Point Arithmetic.
5546 *----------------------------------------------------------------------------*/
5547
5548 float32 floatx80_to_float32(floatx80 a, float_status *status)
5549 {
5550     bool aSign;
5551     int32_t aExp;
5552     uint64_t aSig;
5553
5554     if (floatx80_invalid_encoding(a)) {
5555         float_raise(float_flag_invalid, status);
5556         return float32_default_nan(status);
5557     }
5558     aSig = extractFloatx80Frac( a );
5559     aExp = extractFloatx80Exp( a );
5560     aSign = extractFloatx80Sign( a );
5561     if ( aExp == 0x7FFF ) {
5562         if ( (uint64_t) ( aSig<<1 ) ) {
5563             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5564                                              status);
5565             return float32_silence_nan(res, status);
5566         }
5567         return packFloat32( aSign, 0xFF, 0 );
5568     }
5569     shift64RightJamming( aSig, 33, &aSig );
5570     if ( aExp || aSig ) aExp -= 0x3F81;
5571     return roundAndPackFloat32(aSign, aExp, aSig, status);
5572
5573 }
5574
5575 /*----------------------------------------------------------------------------
5576 | Returns the result of converting the extended double-precision floating-
5577 | point value `a' to the double-precision floating-point format.  The
5578 | conversion is performed according to the IEC/IEEE Standard for Binary
5579 | Floating-Point Arithmetic.
5580 *----------------------------------------------------------------------------*/
5581
5582 float64 floatx80_to_float64(floatx80 a, float_status *status)
5583 {
5584     bool aSign;
5585     int32_t aExp;
5586     uint64_t aSig, zSig;
5587
5588     if (floatx80_invalid_encoding(a)) {
5589         float_raise(float_flag_invalid, status);
5590         return float64_default_nan(status);
5591     }
5592     aSig = extractFloatx80Frac( a );
5593     aExp = extractFloatx80Exp( a );
5594     aSign = extractFloatx80Sign( a );
5595     if ( aExp == 0x7FFF ) {
5596         if ( (uint64_t) ( aSig<<1 ) ) {
5597             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5598                                              status);
5599             return float64_silence_nan(res, status);
5600         }
5601         return packFloat64( aSign, 0x7FF, 0 );
5602     }
5603     shift64RightJamming( aSig, 1, &zSig );
5604     if ( aExp || aSig ) aExp -= 0x3C01;
5605     return roundAndPackFloat64(aSign, aExp, zSig, status);
5606
5607 }
5608
5609 /*----------------------------------------------------------------------------
5610 | Returns the result of converting the extended double-precision floating-
5611 | point value `a' to the quadruple-precision floating-point format.  The
5612 | conversion is performed according to the IEC/IEEE Standard for Binary
5613 | Floating-Point Arithmetic.
5614 *----------------------------------------------------------------------------*/
5615
5616 float128 floatx80_to_float128(floatx80 a, float_status *status)
5617 {
5618     bool aSign;
5619     int aExp;
5620     uint64_t aSig, zSig0, zSig1;
5621
5622     if (floatx80_invalid_encoding(a)) {
5623         float_raise(float_flag_invalid, status);
5624         return float128_default_nan(status);
5625     }
5626     aSig = extractFloatx80Frac( a );
5627     aExp = extractFloatx80Exp( a );
5628     aSign = extractFloatx80Sign( a );
5629     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5630         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5631                                            status);
5632         return float128_silence_nan(res, status);
5633     }
5634     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5635     return packFloat128( aSign, aExp, zSig0, zSig1 );
5636
5637 }
5638
5639 /*----------------------------------------------------------------------------
5640 | Rounds the extended double-precision floating-point value `a'
5641 | to the precision provided by floatx80_rounding_precision and returns the
5642 | result as an extended double-precision floating-point value.
5643 | The operation is performed according to the IEC/IEEE Standard for Binary
5644 | Floating-Point Arithmetic.
5645 *----------------------------------------------------------------------------*/
5646
5647 floatx80 floatx80_round(floatx80 a, float_status *status)
5648 {
5649     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5650                                 extractFloatx80Sign(a),
5651                                 extractFloatx80Exp(a),
5652                                 extractFloatx80Frac(a), 0, status);
5653 }
5654
5655 /*----------------------------------------------------------------------------
5656 | Rounds the extended double-precision floating-point value `a' to an integer,
5657 | and returns the result as an extended quadruple-precision floating-point
5658 | value.  The operation is performed according to the IEC/IEEE Standard for
5659 | Binary Floating-Point Arithmetic.
5660 *----------------------------------------------------------------------------*/
5661
5662 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5663 {
5664     bool aSign;
5665     int32_t aExp;
5666     uint64_t lastBitMask, roundBitsMask;
5667     floatx80 z;
5668
5669     if (floatx80_invalid_encoding(a)) {
5670         float_raise(float_flag_invalid, status);
5671         return floatx80_default_nan(status);
5672     }
5673     aExp = extractFloatx80Exp( a );
5674     if ( 0x403E <= aExp ) {
5675         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5676             return propagateFloatx80NaN(a, a, status);
5677         }
5678         return a;
5679     }
5680     if ( aExp < 0x3FFF ) {
5681         if (    ( aExp == 0 )
5682              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5683             return a;
5684         }
5685         float_raise(float_flag_inexact, status);
5686         aSign = extractFloatx80Sign( a );
5687         switch (status->float_rounding_mode) {
5688          case float_round_nearest_even:
5689             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5690                ) {
5691                 return
5692                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5693             }
5694             break;
5695         case float_round_ties_away:
5696             if (aExp == 0x3FFE) {
5697                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5698             }
5699             break;
5700          case float_round_down:
5701             return
5702                   aSign ?
5703                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5704                 : packFloatx80( 0, 0, 0 );
5705          case float_round_up:
5706             return
5707                   aSign ? packFloatx80( 1, 0, 0 )
5708                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5709
5710         case float_round_to_zero:
5711             break;
5712         default:
5713             g_assert_not_reached();
5714         }
5715         return packFloatx80( aSign, 0, 0 );
5716     }
5717     lastBitMask = 1;
5718     lastBitMask <<= 0x403E - aExp;
5719     roundBitsMask = lastBitMask - 1;
5720     z = a;
5721     switch (status->float_rounding_mode) {
5722     case float_round_nearest_even:
5723         z.low += lastBitMask>>1;
5724         if ((z.low & roundBitsMask) == 0) {
5725             z.low &= ~lastBitMask;
5726         }
5727         break;
5728     case float_round_ties_away:
5729         z.low += lastBitMask >> 1;
5730         break;
5731     case float_round_to_zero:
5732         break;
5733     case float_round_up:
5734         if (!extractFloatx80Sign(z)) {
5735             z.low += roundBitsMask;
5736         }
5737         break;
5738     case float_round_down:
5739         if (extractFloatx80Sign(z)) {
5740             z.low += roundBitsMask;
5741         }
5742         break;
5743     default:
5744         abort();
5745     }
5746     z.low &= ~ roundBitsMask;
5747     if ( z.low == 0 ) {
5748         ++z.high;
5749         z.low = UINT64_C(0x8000000000000000);
5750     }
5751     if (z.low != a.low) {
5752         float_raise(float_flag_inexact, status);
5753     }
5754     return z;
5755
5756 }
5757
5758 /*----------------------------------------------------------------------------
5759 | Returns the result of adding the absolute values of the extended double-
5760 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5761 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5762 | The addition is performed according to the IEC/IEEE Standard for Binary
5763 | Floating-Point Arithmetic.
5764 *----------------------------------------------------------------------------*/
5765
5766 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5767                                 float_status *status)
5768 {
5769     int32_t aExp, bExp, zExp;
5770     uint64_t aSig, bSig, zSig0, zSig1;
5771     int32_t expDiff;
5772
5773     aSig = extractFloatx80Frac( a );
5774     aExp = extractFloatx80Exp( a );
5775     bSig = extractFloatx80Frac( b );
5776     bExp = extractFloatx80Exp( b );
5777     expDiff = aExp - bExp;
5778     if ( 0 < expDiff ) {
5779         if ( aExp == 0x7FFF ) {
5780             if ((uint64_t)(aSig << 1)) {
5781                 return propagateFloatx80NaN(a, b, status);
5782             }
5783             return a;
5784         }
5785         if ( bExp == 0 ) --expDiff;
5786         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5787         zExp = aExp;
5788     }
5789     else if ( expDiff < 0 ) {
5790         if ( bExp == 0x7FFF ) {
5791             if ((uint64_t)(bSig << 1)) {
5792                 return propagateFloatx80NaN(a, b, status);
5793             }
5794             return packFloatx80(zSign,
5795                                 floatx80_infinity_high,
5796                                 floatx80_infinity_low);
5797         }
5798         if ( aExp == 0 ) ++expDiff;
5799         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5800         zExp = bExp;
5801     }
5802     else {
5803         if ( aExp == 0x7FFF ) {
5804             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5805                 return propagateFloatx80NaN(a, b, status);
5806             }
5807             return a;
5808         }
5809         zSig1 = 0;
5810         zSig0 = aSig + bSig;
5811         if ( aExp == 0 ) {
5812             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5813                 /* At least one of the values is a pseudo-denormal,
5814                  * and there is a carry out of the result.  */
5815                 zExp = 1;
5816                 goto shiftRight1;
5817             }
5818             if (zSig0 == 0) {
5819                 return packFloatx80(zSign, 0, 0);
5820             }
5821             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5822             goto roundAndPack;
5823         }
5824         zExp = aExp;
5825         goto shiftRight1;
5826     }
5827     zSig0 = aSig + bSig;
5828     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5829  shiftRight1:
5830     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5831     zSig0 |= UINT64_C(0x8000000000000000);
5832     ++zExp;
5833  roundAndPack:
5834     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5835                                 zSign, zExp, zSig0, zSig1, status);
5836 }
5837
5838 /*----------------------------------------------------------------------------
5839 | Returns the result of subtracting the absolute values of the extended
5840 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5841 | difference is negated before being returned.  `zSign' is ignored if the
5842 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
5843 | Standard for Binary Floating-Point Arithmetic.
5844 *----------------------------------------------------------------------------*/
5845
5846 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5847                                 float_status *status)
5848 {
5849     int32_t aExp, bExp, zExp;
5850     uint64_t aSig, bSig, zSig0, zSig1;
5851     int32_t expDiff;
5852
5853     aSig = extractFloatx80Frac( a );
5854     aExp = extractFloatx80Exp( a );
5855     bSig = extractFloatx80Frac( b );
5856     bExp = extractFloatx80Exp( b );
5857     expDiff = aExp - bExp;
5858     if ( 0 < expDiff ) goto aExpBigger;
5859     if ( expDiff < 0 ) goto bExpBigger;
5860     if ( aExp == 0x7FFF ) {
5861         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5862             return propagateFloatx80NaN(a, b, status);
5863         }
5864         float_raise(float_flag_invalid, status);
5865         return floatx80_default_nan(status);
5866     }
5867     if ( aExp == 0 ) {
5868         aExp = 1;
5869         bExp = 1;
5870     }
5871     zSig1 = 0;
5872     if ( bSig < aSig ) goto aBigger;
5873     if ( aSig < bSig ) goto bBigger;
5874     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5875  bExpBigger:
5876     if ( bExp == 0x7FFF ) {
5877         if ((uint64_t)(bSig << 1)) {
5878             return propagateFloatx80NaN(a, b, status);
5879         }
5880         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5881                             floatx80_infinity_low);
5882     }
5883     if ( aExp == 0 ) ++expDiff;
5884     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5885  bBigger:
5886     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5887     zExp = bExp;
5888     zSign ^= 1;
5889     goto normalizeRoundAndPack;
5890  aExpBigger:
5891     if ( aExp == 0x7FFF ) {
5892         if ((uint64_t)(aSig << 1)) {
5893             return propagateFloatx80NaN(a, b, status);
5894         }
5895         return a;
5896     }
5897     if ( bExp == 0 ) --expDiff;
5898     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5899  aBigger:
5900     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5901     zExp = aExp;
5902  normalizeRoundAndPack:
5903     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5904                                          zSign, zExp, zSig0, zSig1, status);
5905 }
5906
5907 /*----------------------------------------------------------------------------
5908 | Returns the result of adding the extended double-precision floating-point
5909 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
5910 | Standard for Binary Floating-Point Arithmetic.
5911 *----------------------------------------------------------------------------*/
5912
5913 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5914 {
5915     bool aSign, bSign;
5916
5917     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5918         float_raise(float_flag_invalid, status);
5919         return floatx80_default_nan(status);
5920     }
5921     aSign = extractFloatx80Sign( a );
5922     bSign = extractFloatx80Sign( b );
5923     if ( aSign == bSign ) {
5924         return addFloatx80Sigs(a, b, aSign, status);
5925     }
5926     else {
5927         return subFloatx80Sigs(a, b, aSign, status);
5928     }
5929
5930 }
5931
5932 /*----------------------------------------------------------------------------
5933 | Returns the result of subtracting the extended double-precision floating-
5934 | point values `a' and `b'.  The operation is performed according to the
5935 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5936 *----------------------------------------------------------------------------*/
5937
5938 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5939 {
5940     bool aSign, bSign;
5941
5942     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5943         float_raise(float_flag_invalid, status);
5944         return floatx80_default_nan(status);
5945     }
5946     aSign = extractFloatx80Sign( a );
5947     bSign = extractFloatx80Sign( b );
5948     if ( aSign == bSign ) {
5949         return subFloatx80Sigs(a, b, aSign, status);
5950     }
5951     else {
5952         return addFloatx80Sigs(a, b, aSign, status);
5953     }
5954
5955 }
5956
5957 /*----------------------------------------------------------------------------
5958 | Returns the result of multiplying the extended double-precision floating-
5959 | point values `a' and `b'.  The operation is performed according to the
5960 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5961 *----------------------------------------------------------------------------*/
5962
5963 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5964 {
5965     bool aSign, bSign, zSign;
5966     int32_t aExp, bExp, zExp;
5967     uint64_t aSig, bSig, zSig0, zSig1;
5968
5969     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5970         float_raise(float_flag_invalid, status);
5971         return floatx80_default_nan(status);
5972     }
5973     aSig = extractFloatx80Frac( a );
5974     aExp = extractFloatx80Exp( a );
5975     aSign = extractFloatx80Sign( a );
5976     bSig = extractFloatx80Frac( b );
5977     bExp = extractFloatx80Exp( b );
5978     bSign = extractFloatx80Sign( b );
5979     zSign = aSign ^ bSign;
5980     if ( aExp == 0x7FFF ) {
5981         if (    (uint64_t) ( aSig<<1 )
5982              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5983             return propagateFloatx80NaN(a, b, status);
5984         }
5985         if ( ( bExp | bSig ) == 0 ) goto invalid;
5986         return packFloatx80(zSign, floatx80_infinity_high,
5987                                    floatx80_infinity_low);
5988     }
5989     if ( bExp == 0x7FFF ) {
5990         if ((uint64_t)(bSig << 1)) {
5991             return propagateFloatx80NaN(a, b, status);
5992         }
5993         if ( ( aExp | aSig ) == 0 ) {
5994  invalid:
5995             float_raise(float_flag_invalid, status);
5996             return floatx80_default_nan(status);
5997         }
5998         return packFloatx80(zSign, floatx80_infinity_high,
5999                                    floatx80_infinity_low);
6000     }
6001     if ( aExp == 0 ) {
6002         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6003         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6004     }
6005     if ( bExp == 0 ) {
6006         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6007         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6008     }
6009     zExp = aExp + bExp - 0x3FFE;
6010     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6011     if ( 0 < (int64_t) zSig0 ) {
6012         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6013         --zExp;
6014     }
6015     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6016                                 zSign, zExp, zSig0, zSig1, status);
6017 }
6018
6019 /*----------------------------------------------------------------------------
6020 | Returns the result of dividing the extended double-precision floating-point
6021 | value `a' by the corresponding value `b'.  The operation is performed
6022 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6023 *----------------------------------------------------------------------------*/
6024
6025 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6026 {
6027     bool aSign, bSign, zSign;
6028     int32_t aExp, bExp, zExp;
6029     uint64_t aSig, bSig, zSig0, zSig1;
6030     uint64_t rem0, rem1, rem2, term0, term1, term2;
6031
6032     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6033         float_raise(float_flag_invalid, status);
6034         return floatx80_default_nan(status);
6035     }
6036     aSig = extractFloatx80Frac( a );
6037     aExp = extractFloatx80Exp( a );
6038     aSign = extractFloatx80Sign( a );
6039     bSig = extractFloatx80Frac( b );
6040     bExp = extractFloatx80Exp( b );
6041     bSign = extractFloatx80Sign( b );
6042     zSign = aSign ^ bSign;
6043     if ( aExp == 0x7FFF ) {
6044         if ((uint64_t)(aSig << 1)) {
6045             return propagateFloatx80NaN(a, b, status);
6046         }
6047         if ( bExp == 0x7FFF ) {
6048             if ((uint64_t)(bSig << 1)) {
6049                 return propagateFloatx80NaN(a, b, status);
6050             }
6051             goto invalid;
6052         }
6053         return packFloatx80(zSign, floatx80_infinity_high,
6054                                    floatx80_infinity_low);
6055     }
6056     if ( bExp == 0x7FFF ) {
6057         if ((uint64_t)(bSig << 1)) {
6058             return propagateFloatx80NaN(a, b, status);
6059         }
6060         return packFloatx80( zSign, 0, 0 );
6061     }
6062     if ( bExp == 0 ) {
6063         if ( bSig == 0 ) {
6064             if ( ( aExp | aSig ) == 0 ) {
6065  invalid:
6066                 float_raise(float_flag_invalid, status);
6067                 return floatx80_default_nan(status);
6068             }
6069             float_raise(float_flag_divbyzero, status);
6070             return packFloatx80(zSign, floatx80_infinity_high,
6071                                        floatx80_infinity_low);
6072         }
6073         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6074     }
6075     if ( aExp == 0 ) {
6076         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6077         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6078     }
6079     zExp = aExp - bExp + 0x3FFE;
6080     rem1 = 0;
6081     if ( bSig <= aSig ) {
6082         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6083         ++zExp;
6084     }
6085     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6086     mul64To128( bSig, zSig0, &term0, &term1 );
6087     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6088     while ( (int64_t) rem0 < 0 ) {
6089         --zSig0;
6090         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6091     }
6092     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6093     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6094         mul64To128( bSig, zSig1, &term1, &term2 );
6095         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6096         while ( (int64_t) rem1 < 0 ) {
6097             --zSig1;
6098             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6099         }
6100         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6101     }
6102     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6103                                 zSign, zExp, zSig0, zSig1, status);
6104 }
6105
6106 /*----------------------------------------------------------------------------
6107 | Returns the remainder of the extended double-precision floating-point value
6108 | `a' with respect to the corresponding value `b'.  The operation is performed
6109 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6110 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6111 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6112 | the absolute value of the integer quotient.
6113 *----------------------------------------------------------------------------*/
6114
6115 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6116                          float_status *status)
6117 {
6118     bool aSign, zSign;
6119     int32_t aExp, bExp, expDiff, aExpOrig;
6120     uint64_t aSig0, aSig1, bSig;
6121     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6122
6123     *quotient = 0;
6124     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6125         float_raise(float_flag_invalid, status);
6126         return floatx80_default_nan(status);
6127     }
6128     aSig0 = extractFloatx80Frac( a );
6129     aExpOrig = aExp = extractFloatx80Exp( a );
6130     aSign = extractFloatx80Sign( a );
6131     bSig = extractFloatx80Frac( b );
6132     bExp = extractFloatx80Exp( b );
6133     if ( aExp == 0x7FFF ) {
6134         if (    (uint64_t) ( aSig0<<1 )
6135              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6136             return propagateFloatx80NaN(a, b, status);
6137         }
6138         goto invalid;
6139     }
6140     if ( bExp == 0x7FFF ) {
6141         if ((uint64_t)(bSig << 1)) {
6142             return propagateFloatx80NaN(a, b, status);
6143         }
6144         if (aExp == 0 && aSig0 >> 63) {
6145             /*
6146              * Pseudo-denormal argument must be returned in normalized
6147              * form.
6148              */
6149             return packFloatx80(aSign, 1, aSig0);
6150         }
6151         return a;
6152     }
6153     if ( bExp == 0 ) {
6154         if ( bSig == 0 ) {
6155  invalid:
6156             float_raise(float_flag_invalid, status);
6157             return floatx80_default_nan(status);
6158         }
6159         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6160     }
6161     if ( aExp == 0 ) {
6162         if ( aSig0 == 0 ) return a;
6163         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6164     }
6165     zSign = aSign;
6166     expDiff = aExp - bExp;
6167     aSig1 = 0;
6168     if ( expDiff < 0 ) {
6169         if ( mod || expDiff < -1 ) {
6170             if (aExp == 1 && aExpOrig == 0) {
6171                 /*
6172                  * Pseudo-denormal argument must be returned in
6173                  * normalized form.
6174                  */
6175                 return packFloatx80(aSign, aExp, aSig0);
6176             }
6177             return a;
6178         }
6179         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6180         expDiff = 0;
6181     }
6182     *quotient = q = ( bSig <= aSig0 );
6183     if ( q ) aSig0 -= bSig;
6184     expDiff -= 64;
6185     while ( 0 < expDiff ) {
6186         q = estimateDiv128To64( aSig0, aSig1, bSig );
6187         q = ( 2 < q ) ? q - 2 : 0;
6188         mul64To128( bSig, q, &term0, &term1 );
6189         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6190         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6191         expDiff -= 62;
6192         *quotient <<= 62;
6193         *quotient += q;
6194     }
6195     expDiff += 64;
6196     if ( 0 < expDiff ) {
6197         q = estimateDiv128To64( aSig0, aSig1, bSig );
6198         q = ( 2 < q ) ? q - 2 : 0;
6199         q >>= 64 - expDiff;
6200         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6201         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6202         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6203         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6204             ++q;
6205             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6206         }
6207         if (expDiff < 64) {
6208             *quotient <<= expDiff;
6209         } else {
6210             *quotient = 0;
6211         }
6212         *quotient += q;
6213     }
6214     else {
6215         term1 = 0;
6216         term0 = bSig;
6217     }
6218     if (!mod) {
6219         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6220         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6221                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6222                         && ( q & 1 ) )
6223             ) {
6224             aSig0 = alternateASig0;
6225             aSig1 = alternateASig1;
6226             zSign = ! zSign;
6227             ++*quotient;
6228         }
6229     }
6230     return
6231         normalizeRoundAndPackFloatx80(
6232             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6233
6234 }
6235
6236 /*----------------------------------------------------------------------------
6237 | Returns the remainder of the extended double-precision floating-point value
6238 | `a' with respect to the corresponding value `b'.  The operation is performed
6239 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6240 *----------------------------------------------------------------------------*/
6241
6242 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6243 {
6244     uint64_t quotient;
6245     return floatx80_modrem(a, b, false, &quotient, status);
6246 }
6247
6248 /*----------------------------------------------------------------------------
6249 | Returns the remainder of the extended double-precision floating-point value
6250 | `a' with respect to the corresponding value `b', with the quotient truncated
6251 | toward zero.
6252 *----------------------------------------------------------------------------*/
6253
6254 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6255 {
6256     uint64_t quotient;
6257     return floatx80_modrem(a, b, true, &quotient, status);
6258 }
6259
6260 /*----------------------------------------------------------------------------
6261 | Returns the square root of the extended double-precision floating-point
6262 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6263 | for Binary Floating-Point Arithmetic.
6264 *----------------------------------------------------------------------------*/
6265
6266 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6267 {
6268     bool aSign;
6269     int32_t aExp, zExp;
6270     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6271     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6272
6273     if (floatx80_invalid_encoding(a)) {
6274         float_raise(float_flag_invalid, status);
6275         return floatx80_default_nan(status);
6276     }
6277     aSig0 = extractFloatx80Frac( a );
6278     aExp = extractFloatx80Exp( a );
6279     aSign = extractFloatx80Sign( a );
6280     if ( aExp == 0x7FFF ) {
6281         if ((uint64_t)(aSig0 << 1)) {
6282             return propagateFloatx80NaN(a, a, status);
6283         }
6284         if ( ! aSign ) return a;
6285         goto invalid;
6286     }
6287     if ( aSign ) {
6288         if ( ( aExp | aSig0 ) == 0 ) return a;
6289  invalid:
6290         float_raise(float_flag_invalid, status);
6291         return floatx80_default_nan(status);
6292     }
6293     if ( aExp == 0 ) {
6294         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6295         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6296     }
6297     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6298     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6299     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6300     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6301     doubleZSig0 = zSig0<<1;
6302     mul64To128( zSig0, zSig0, &term0, &term1 );
6303     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6304     while ( (int64_t) rem0 < 0 ) {
6305         --zSig0;
6306         doubleZSig0 -= 2;
6307         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6308     }
6309     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6310     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6311         if ( zSig1 == 0 ) zSig1 = 1;
6312         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6313         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6314         mul64To128( zSig1, zSig1, &term2, &term3 );
6315         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6316         while ( (int64_t) rem1 < 0 ) {
6317             --zSig1;
6318             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6319             term3 |= 1;
6320             term2 |= doubleZSig0;
6321             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6322         }
6323         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6324     }
6325     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6326     zSig0 |= doubleZSig0;
6327     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6328                                 0, zExp, zSig0, zSig1, status);
6329 }
6330
6331 /*----------------------------------------------------------------------------
6332 | Returns the result of converting the quadruple-precision floating-point
6333 | value `a' to the extended double-precision floating-point format.  The
6334 | conversion is performed according to the IEC/IEEE Standard for Binary
6335 | Floating-Point Arithmetic.
6336 *----------------------------------------------------------------------------*/
6337
6338 floatx80 float128_to_floatx80(float128 a, float_status *status)
6339 {
6340     bool aSign;
6341     int32_t aExp;
6342     uint64_t aSig0, aSig1;
6343
6344     aSig1 = extractFloat128Frac1( a );
6345     aSig0 = extractFloat128Frac0( a );
6346     aExp = extractFloat128Exp( a );
6347     aSign = extractFloat128Sign( a );
6348     if ( aExp == 0x7FFF ) {
6349         if ( aSig0 | aSig1 ) {
6350             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6351                                                status);
6352             return floatx80_silence_nan(res, status);
6353         }
6354         return packFloatx80(aSign, floatx80_infinity_high,
6355                                    floatx80_infinity_low);
6356     }
6357     if ( aExp == 0 ) {
6358         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6359         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6360     }
6361     else {
6362         aSig0 |= UINT64_C(0x0001000000000000);
6363     }
6364     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6365     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6366
6367 }
6368
6369 /*----------------------------------------------------------------------------
6370 | Returns the remainder of the quadruple-precision floating-point value `a'
6371 | with respect to the corresponding value `b'.  The operation is performed
6372 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6373 *----------------------------------------------------------------------------*/
6374
6375 float128 float128_rem(float128 a, float128 b, float_status *status)
6376 {
6377     bool aSign, zSign;
6378     int32_t aExp, bExp, expDiff;
6379     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6380     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6381     int64_t sigMean0;
6382
6383     aSig1 = extractFloat128Frac1( a );
6384     aSig0 = extractFloat128Frac0( a );
6385     aExp = extractFloat128Exp( a );
6386     aSign = extractFloat128Sign( a );
6387     bSig1 = extractFloat128Frac1( b );
6388     bSig0 = extractFloat128Frac0( b );
6389     bExp = extractFloat128Exp( b );
6390     if ( aExp == 0x7FFF ) {
6391         if (    ( aSig0 | aSig1 )
6392              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6393             return propagateFloat128NaN(a, b, status);
6394         }
6395         goto invalid;
6396     }
6397     if ( bExp == 0x7FFF ) {
6398         if (bSig0 | bSig1) {
6399             return propagateFloat128NaN(a, b, status);
6400         }
6401         return a;
6402     }
6403     if ( bExp == 0 ) {
6404         if ( ( bSig0 | bSig1 ) == 0 ) {
6405  invalid:
6406             float_raise(float_flag_invalid, status);
6407             return float128_default_nan(status);
6408         }
6409         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6410     }
6411     if ( aExp == 0 ) {
6412         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6413         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6414     }
6415     expDiff = aExp - bExp;
6416     if ( expDiff < -1 ) return a;
6417     shortShift128Left(
6418         aSig0 | UINT64_C(0x0001000000000000),
6419         aSig1,
6420         15 - ( expDiff < 0 ),
6421         &aSig0,
6422         &aSig1
6423     );
6424     shortShift128Left(
6425         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6426     q = le128( bSig0, bSig1, aSig0, aSig1 );
6427     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6428     expDiff -= 64;
6429     while ( 0 < expDiff ) {
6430         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6431         q = ( 4 < q ) ? q - 4 : 0;
6432         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6433         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6434         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6435         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6436         expDiff -= 61;
6437     }
6438     if ( -64 < expDiff ) {
6439         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6440         q = ( 4 < q ) ? q - 4 : 0;
6441         q >>= - expDiff;
6442         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6443         expDiff += 52;
6444         if ( expDiff < 0 ) {
6445             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6446         }
6447         else {
6448             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6449         }
6450         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6451         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6452     }
6453     else {
6454         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6455         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6456     }
6457     do {
6458         alternateASig0 = aSig0;
6459         alternateASig1 = aSig1;
6460         ++q;
6461         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6462     } while ( 0 <= (int64_t) aSig0 );
6463     add128(
6464         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6465     if (    ( sigMean0 < 0 )
6466          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6467         aSig0 = alternateASig0;
6468         aSig1 = alternateASig1;
6469     }
6470     zSign = ( (int64_t) aSig0 < 0 );
6471     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6472     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6473                                          status);
6474 }
6475
6476 /*----------------------------------------------------------------------------
6477 | Returns the square root of the quadruple-precision floating-point value `a'.
6478 | The operation is performed according to the IEC/IEEE Standard for Binary
6479 | Floating-Point Arithmetic.
6480 *----------------------------------------------------------------------------*/
6481
6482 float128 float128_sqrt(float128 a, float_status *status)
6483 {
6484     bool aSign;
6485     int32_t aExp, zExp;
6486     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6487     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6488
6489     aSig1 = extractFloat128Frac1( a );
6490     aSig0 = extractFloat128Frac0( a );
6491     aExp = extractFloat128Exp( a );
6492     aSign = extractFloat128Sign( a );
6493     if ( aExp == 0x7FFF ) {
6494         if (aSig0 | aSig1) {
6495             return propagateFloat128NaN(a, a, status);
6496         }
6497         if ( ! aSign ) return a;
6498         goto invalid;
6499     }
6500     if ( aSign ) {
6501         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6502  invalid:
6503         float_raise(float_flag_invalid, status);
6504         return float128_default_nan(status);
6505     }
6506     if ( aExp == 0 ) {
6507         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6508         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6509     }
6510     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6511     aSig0 |= UINT64_C(0x0001000000000000);
6512     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6513     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6514     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6515     doubleZSig0 = zSig0<<1;
6516     mul64To128( zSig0, zSig0, &term0, &term1 );
6517     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6518     while ( (int64_t) rem0 < 0 ) {
6519         --zSig0;
6520         doubleZSig0 -= 2;
6521         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6522     }
6523     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6524     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6525         if ( zSig1 == 0 ) zSig1 = 1;
6526         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6527         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6528         mul64To128( zSig1, zSig1, &term2, &term3 );
6529         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6530         while ( (int64_t) rem1 < 0 ) {
6531             --zSig1;
6532             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6533             term3 |= 1;
6534             term2 |= doubleZSig0;
6535             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6536         }
6537         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6538     }
6539     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6540     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6541
6542 }
6543
6544 static inline FloatRelation
6545 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6546                           float_status *status)
6547 {
6548     bool aSign, bSign;
6549
6550     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6551         float_raise(float_flag_invalid, status);
6552         return float_relation_unordered;
6553     }
6554     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6555           ( extractFloatx80Frac( a )<<1 ) ) ||
6556         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6557           ( extractFloatx80Frac( b )<<1 ) )) {
6558         if (!is_quiet ||
6559             floatx80_is_signaling_nan(a, status) ||
6560             floatx80_is_signaling_nan(b, status)) {
6561             float_raise(float_flag_invalid, status);
6562         }
6563         return float_relation_unordered;
6564     }
6565     aSign = extractFloatx80Sign( a );
6566     bSign = extractFloatx80Sign( b );
6567     if ( aSign != bSign ) {
6568
6569         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6570              ( ( a.low | b.low ) == 0 ) ) {
6571             /* zero case */
6572             return float_relation_equal;
6573         } else {
6574             return 1 - (2 * aSign);
6575         }
6576     } else {
6577         /* Normalize pseudo-denormals before comparison.  */
6578         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6579             ++a.high;
6580         }
6581         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6582             ++b.high;
6583         }
6584         if (a.low == b.low && a.high == b.high) {
6585             return float_relation_equal;
6586         } else {
6587             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6588         }
6589     }
6590 }
6591
6592 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6593 {
6594     return floatx80_compare_internal(a, b, 0, status);
6595 }
6596
6597 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6598                                      float_status *status)
6599 {
6600     return floatx80_compare_internal(a, b, 1, status);
6601 }
6602
6603 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6604 {
6605     bool aSign;
6606     int32_t aExp;
6607     uint64_t aSig;
6608
6609     if (floatx80_invalid_encoding(a)) {
6610         float_raise(float_flag_invalid, status);
6611         return floatx80_default_nan(status);
6612     }
6613     aSig = extractFloatx80Frac( a );
6614     aExp = extractFloatx80Exp( a );
6615     aSign = extractFloatx80Sign( a );
6616
6617     if ( aExp == 0x7FFF ) {
6618         if ( aSig<<1 ) {
6619             return propagateFloatx80NaN(a, a, status);
6620         }
6621         return a;
6622     }
6623
6624     if (aExp == 0) {
6625         if (aSig == 0) {
6626             return a;
6627         }
6628         aExp++;
6629     }
6630
6631     if (n > 0x10000) {
6632         n = 0x10000;
6633     } else if (n < -0x10000) {
6634         n = -0x10000;
6635     }
6636
6637     aExp += n;
6638     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6639                                          aSign, aExp, aSig, 0, status);
6640 }
6641
6642 static void __attribute__((constructor)) softfloat_init(void)
6643 {
6644     union_float64 ua, ub, uc, ur;
6645
6646     if (QEMU_NO_HARDFLOAT) {
6647         return;
6648     }
6649     /*
6650      * Test that the host's FMA is not obviously broken. For example,
6651      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6652      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6653      */
6654     ua.s = 0x0020000000000001ULL;
6655     ub.s = 0x3ca0000000000000ULL;
6656     uc.s = 0x0020000000000000ULL;
6657     ur.h = fma(ua.h, ub.h, uc.h);
6658     if (ur.s != 0x0020000000000001ULL) {
6659         force_soft_fma = true;
6660     }
6661 }