fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485
 486 /* Simple helpers for checking if, or what kind of, NaN we have */
 487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 488 {
 489     return unlikely(c >= float_class_qnan);
 490 }
 491
 492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 493 {
 494     return c == float_class_snan;
 495 }
 496
 497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 498 {
 499     return c == float_class_qnan;
 500 }
 501
 502 /*
 503  * Structure holding all of the decomposed parts of a float.
 504  * The exponent is unbiased and the fraction is normalized.
 505  *
 506  * The fraction words are stored in big-endian word ordering,
 507  * so that truncation from a larger format to a smaller format
 508  * can be done simply by ignoring subsequent elements.
 509  */
 510
 511 typedef struct {
 512     FloatClass cls;
 513     bool sign;
 514     int32_t exp;
 515     union {
 516         /* Routines that know the structure may reference the singular name. */
 517         uint64_t frac;
 518         /*
 519          * Routines expanded with multiple structures reference "hi" and "lo"
 520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 521          * both the same word and aliased here.
 522          */
 523         uint64_t frac_hi;
 524         uint64_t frac_lo;
 525     };
 526 } FloatParts64;
 527
 528 typedef struct {
 529     FloatClass cls;
 530     bool sign;
 531     int32_t exp;
 532     uint64_t frac_hi;
 533     uint64_t frac_lo;
 534 } FloatParts128;
 535
 536 typedef struct {
 537     FloatClass cls;
 538     bool sign;
 539     int32_t exp;
 540     uint64_t frac_hi;
 541     uint64_t frac_hm;  /* high-middle */
 542     uint64_t frac_lm;  /* low-middle */
 543     uint64_t frac_lo;
 544 } FloatParts256;
 545
 546 /* These apply to the most significant word of each FloatPartsN. */
 547 #define DECOMPOSED_BINARY_POINT    63
 548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 549
 550 /* Structure holding all of the relevant parameters for a format.
 551  *   exp_size: the size of the exponent field
 552  *   exp_bias: the offset applied to the exponent field
 553  *   exp_max: the maximum normalised exponent
 554  *   frac_size: the size of the fraction field
 555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 556  * The following are computed based the size of fraction
 557  *   frac_lsb: least significant bit of fraction
 558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
 559  *   round_mask/roundeven_mask: masks used for rounding
 560  * The following optional modifiers are available:
 561  *   arm_althp: handle ARM Alternative Half Precision
 562  */
 563 typedef struct {
 564     int exp_size;
 565     int exp_bias;
 566     int exp_max;
 567     int frac_size;
 568     int frac_shift;
 569     uint64_t frac_lsb;
 570     uint64_t frac_lsbm1;
 571     uint64_t round_mask;
 572     uint64_t roundeven_mask;
 573     bool arm_althp;
 574 } FloatFmt;
 575
 576 /* Expand fields based on the size of exponent and fraction */
 577 #define FLOAT_PARAMS(E, F)                                           \
 578     .exp_size       = E,                                             \
 579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
 580     .exp_max        = (1 << E) - 1,                                  \
 581     .frac_size      = F,                                             \
 582     .frac_shift     = (-F - 1) & 63,                                 \
 583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
 584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
 585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
 586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 587
 588 static const FloatFmt float16_params = {
 589     FLOAT_PARAMS(5, 10)
 590 };
 591
 592 static const FloatFmt float16_params_ahp = {
 593     FLOAT_PARAMS(5, 10),
 594     .arm_althp = true
 595 };
 596
 597 static const FloatFmt bfloat16_params = {
 598     FLOAT_PARAMS(8, 7)
 599 };
 600
 601 static const FloatFmt float32_params = {
 602     FLOAT_PARAMS(8, 23)
 603 };
 604
 605 static const FloatFmt float64_params = {
 606     FLOAT_PARAMS(11, 52)
 607 };
 608
 609 static const FloatFmt float128_params = {
 610     FLOAT_PARAMS(15, 112)
 611 };
 612
 613 /* Unpack a float to parts, but do not canonicalize.  */
 614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 615 {
 616     const int f_size = fmt->frac_size;
 617     const int e_size = fmt->exp_size;
 618
 619     *r = (FloatParts64) {
 620         .cls = float_class_unclassified,
 621         .sign = extract64(raw, f_size + e_size, 1),
 622         .exp = extract64(raw, f_size, e_size),
 623         .frac = extract64(raw, 0, f_size)
 624     };
 625 }
 626
 627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 628 {
 629     unpack_raw64(p, &float16_params, f);
 630 }
 631
 632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 633 {
 634     unpack_raw64(p, &bfloat16_params, f);
 635 }
 636
 637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 638 {
 639     unpack_raw64(p, &float32_params, f);
 640 }
 641
 642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 643 {
 644     unpack_raw64(p, &float64_params, f);
 645 }
 646
 647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 648 {
 649     const int f_size = float128_params.frac_size - 64;
 650     const int e_size = float128_params.exp_size;
 651
 652     *p = (FloatParts128) {
 653         .cls = float_class_unclassified,
 654         .sign = extract64(f.high, f_size + e_size, 1),
 655         .exp = extract64(f.high, f_size, e_size),
 656         .frac_hi = extract64(f.high, 0, f_size),
 657         .frac_lo = f.low,
 658     };
 659 }
 660
 661 /* Pack a float from parts, but do not canonicalize.  */
 662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 663 {
 664     const int f_size = fmt->frac_size;
 665     const int e_size = fmt->exp_size;
 666     uint64_t ret;
 667
 668     ret = (uint64_t)p->sign << (f_size + e_size);
 669     ret = deposit64(ret, f_size, e_size, p->exp);
 670     ret = deposit64(ret, 0, f_size, p->frac);
 671     return ret;
 672 }
 673
 674 static inline float16 float16_pack_raw(const FloatParts64 *p)
 675 {
 676     return make_float16(pack_raw64(p, &float16_params));
 677 }
 678
 679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 680 {
 681     return pack_raw64(p, &bfloat16_params);
 682 }
 683
 684 static inline float32 float32_pack_raw(const FloatParts64 *p)
 685 {
 686     return make_float32(pack_raw64(p, &float32_params));
 687 }
 688
 689 static inline float64 float64_pack_raw(const FloatParts64 *p)
 690 {
 691     return make_float64(pack_raw64(p, &float64_params));
 692 }
 693
 694 static float128 float128_pack_raw(const FloatParts128 *p)
 695 {
 696     const int f_size = float128_params.frac_size - 64;
 697     const int e_size = float128_params.exp_size;
 698     uint64_t hi;
 699
 700     hi = (uint64_t)p->sign << (f_size + e_size);
 701     hi = deposit64(hi, f_size, e_size, p->exp);
 702     hi = deposit64(hi, 0, f_size, p->frac_hi);
 703     return make_float128(hi, p->frac_lo);
 704 }
 705
 706 /*----------------------------------------------------------------------------
 707 | Functions and definitions to determine:  (1) whether tininess for underflow
 708 | is detected before or after rounding by default, (2) what (if anything)
 709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 711 | are propagated from function inputs to output.  These details are target-
 712 | specific.
 713 *----------------------------------------------------------------------------*/
 714 #include "softfloat-specialize.c.inc"
 715
 716 #define PARTS_GENERIC_64_128(NAME, P) \
 717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 718
 719 #define PARTS_GENERIC_64_128_256(NAME, P) \
 720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 722
 723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 725
 726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 728
 729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 730
 731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 732                                       float_status *s);
 733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 734                                         float_status *s);
 735
 736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 737
 738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 739                                              FloatParts64 *c, float_status *s,
 740                                              int ab_mask, int abc_mask);
 741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 742                                                FloatParts128 *b,
 743                                                FloatParts128 *c,
 744                                                float_status *s,
 745                                                int ab_mask, int abc_mask);
 746
 747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 749
 750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 751                                  const FloatFmt *fmt);
 752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 753                                   const FloatFmt *fmt);
 754
 755 #define parts_canonicalize(A, S, F) \
 756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 757
 758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 759                             const FloatFmt *fmt);
 760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 761                              const FloatFmt *fmt);
 762
 763 #define parts_uncanon(A, S, F) \
 764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 765
 766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 769
 770 #define parts_add_normal(A, B) \
 771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 772
 773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 776
 777 #define parts_sub_normal(A, B) \
 778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 779
 780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 781                                     float_status *s, bool subtract);
 782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 783                                       float_status *s, bool subtract);
 784
 785 #define parts_addsub(A, B, S, Z) \
 786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 787
 788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 789                                  float_status *s);
 790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 791                                    float_status *s);
 792
 793 #define parts_mul(A, B, S) \
 794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 795
 796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 797                                     FloatParts64 *c, int flags,
 798                                     float_status *s);
 799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 800                                       FloatParts128 *c, int flags,
 801                                       float_status *s);
 802
 803 #define parts_muladd(A, B, C, Z, S) \
 804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 805
 806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 807                                  float_status *s);
 808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 809                                    float_status *s);
 810
 811 #define parts_div(A, B, S) \
 812     PARTS_GENERIC_64_128(div, A)(A, B, S)
 813
 814 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 815                                         int scale, int frac_size);
 816 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 817                                          int scale, int frac_size);
 818
 819 #define parts_round_to_int_normal(A, R, C, F) \
 820     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 821
 822 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 823                                  int scale, float_status *s,
 824                                  const FloatFmt *fmt);
 825 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 826                                   int scale, float_status *s,
 827                                   const FloatFmt *fmt);
 828
 829 #define parts_round_to_int(A, R, C, S, F) \
 830     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 831
 832 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 833                                      int scale, int64_t min, int64_t max,
 834                                      float_status *s);
 835 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 836                                      int scale, int64_t min, int64_t max,
 837                                      float_status *s);
 838
 839 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 840     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 841
 842 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 843                                       int scale, uint64_t max,
 844                                       float_status *s);
 845 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 846                                        int scale, uint64_t max,
 847                                        float_status *s);
 848
 849 #define parts_float_to_uint(P, R, Z, M, S) \
 850     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 851
 852 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 853                                   int scale, float_status *s);
 854 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 855                                    int scale, float_status *s);
 856
 857 #define parts_sint_to_float(P, I, Z, S) \
 858     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 859
 860 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 861                                   int scale, float_status *s);
 862 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 863                                    int scale, float_status *s);
 864
 865 #define parts_uint_to_float(P, I, Z, S) \
 866     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 867
 868 /*
 869  * Helper functions for softfloat-parts.c.inc, per-size operations.
 870  */
 871
 872 #define FRAC_GENERIC_64_128(NAME, P) \
 873     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 874
 875 #define FRAC_GENERIC_64_128_256(NAME, P) \
 876     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 877                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 878
 879 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 880 {
 881     return uadd64_overflow(a->frac, b->frac, &r->frac);
 882 }
 883
 884 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 885 {
 886     bool c = 0;
 887     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 888     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 889     return c;
 890 }
 891
 892 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 893 {
 894     bool c = 0;
 895     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 896     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 897     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 898     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 899     return c;
 900 }
 901
 902 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 903
 904 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 905 {
 906     return uadd64_overflow(a->frac, c, &r->frac);
 907 }
 908
 909 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 910 {
 911     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 912     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 913 }
 914
 915 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 916
 917 static void frac64_allones(FloatParts64 *a)
 918 {
 919     a->frac = -1;
 920 }
 921
 922 static void frac128_allones(FloatParts128 *a)
 923 {
 924     a->frac_hi = a->frac_lo = -1;
 925 }
 926
 927 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 928
 929 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 930 {
 931     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 932 }
 933
 934 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 935 {
 936     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 937     if (ta == tb) {
 938         ta = a->frac_lo, tb = b->frac_lo;
 939         if (ta == tb) {
 940             return 0;
 941         }
 942     }
 943     return ta < tb ? -1 : 1;
 944 }
 945
 946 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 947
 948 static void frac64_clear(FloatParts64 *a)
 949 {
 950     a->frac = 0;
 951 }
 952
 953 static void frac128_clear(FloatParts128 *a)
 954 {
 955     a->frac_hi = a->frac_lo = 0;
 956 }
 957
 958 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 959
 960 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 961 {
 962     uint64_t n1, n0, r, q;
 963     bool ret;
 964
 965     /*
 966      * We want a 2*N / N-bit division to produce exactly an N-bit
 967      * result, so that we do not lose any precision and so that we
 968      * do not have to renormalize afterward.  If A.frac < B.frac,
 969      * then division would produce an (N-1)-bit result; shift A left
 970      * by one to produce the an N-bit result, and return true to
 971      * decrement the exponent to match.
 972      *
 973      * The udiv_qrnnd algorithm that we're using requires normalization,
 974      * i.e. the msb of the denominator must be set, which is already true.
 975      */
 976     ret = a->frac < b->frac;
 977     if (ret) {
 978         n0 = a->frac;
 979         n1 = 0;
 980     } else {
 981         n0 = a->frac >> 1;
 982         n1 = a->frac << 63;
 983     }
 984     q = udiv_qrnnd(&r, n0, n1, b->frac);
 985
 986     /* Set lsb if there is a remainder, to set inexact. */
 987     a->frac = q | (r != 0);
 988
 989     return ret;
 990 }
 991
 992 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
 993 {
 994     uint64_t q0, q1, a0, a1, b0, b1;
 995     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
 996     bool ret = false;
 997
 998     a0 = a->frac_hi, a1 = a->frac_lo;
 999     b0 = b->frac_hi, b1 = b->frac_lo;
1000
1001     ret = lt128(a0, a1, b0, b1);
1002     if (!ret) {
1003         a1 = shr_double(a0, a1, 1);
1004         a0 = a0 >> 1;
1005     }
1006
1007     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1008     q0 = estimateDiv128To64(a0, a1, b0);
1009
1010     /*
1011      * Estimate is high because B1 was not included (unless B1 == 0).
1012      * Reduce quotient and increase remainder until remainder is non-negative.
1013      * This loop will execute 0 to 2 times.
1014      */
1015     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1016     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1017     while (r0 != 0) {
1018         q0--;
1019         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1020     }
1021
1022     /* Repeat using the remainder, producing a second word of quotient. */
1023     q1 = estimateDiv128To64(r1, r2, b0);
1024     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1025     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1026     while (r1 != 0) {
1027         q1--;
1028         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1029     }
1030
1031     /* Any remainder indicates inexact; set sticky bit. */
1032     q1 |= (r2 | r3) != 0;
1033
1034     a->frac_hi = q0;
1035     a->frac_lo = q1;
1036     return ret;
1037 }
1038
1039 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1040
1041 static bool frac64_eqz(FloatParts64 *a)
1042 {
1043     return a->frac == 0;
1044 }
1045
1046 static bool frac128_eqz(FloatParts128 *a)
1047 {
1048     return (a->frac_hi | a->frac_lo) == 0;
1049 }
1050
1051 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1052
1053 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1054 {
1055     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1056 }
1057
1058 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1059 {
1060     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1061                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1062 }
1063
1064 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1065
1066 static void frac64_neg(FloatParts64 *a)
1067 {
1068     a->frac = -a->frac;
1069 }
1070
1071 static void frac128_neg(FloatParts128 *a)
1072 {
1073     bool c = 0;
1074     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1075     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1076 }
1077
1078 static void frac256_neg(FloatParts256 *a)
1079 {
1080     bool c = 0;
1081     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1082     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1083     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1084     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1085 }
1086
1087 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1088
1089 static int frac64_normalize(FloatParts64 *a)
1090 {
1091     if (a->frac) {
1092         int shift = clz64(a->frac);
1093         a->frac <<= shift;
1094         return shift;
1095     }
1096     return 64;
1097 }
1098
1099 static int frac128_normalize(FloatParts128 *a)
1100 {
1101     if (a->frac_hi) {
1102         int shl = clz64(a->frac_hi);
1103         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1104         a->frac_lo <<= shl;
1105         return shl;
1106     } else if (a->frac_lo) {
1107         int shl = clz64(a->frac_lo);
1108         a->frac_hi = a->frac_lo << shl;
1109         a->frac_lo = 0;
1110         return shl + 64;
1111     }
1112     return 128;
1113 }
1114
1115 static int frac256_normalize(FloatParts256 *a)
1116 {
1117     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1118     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1119     int ret, shl;
1120
1121     if (likely(a0)) {
1122         shl = clz64(a0);
1123         if (shl == 0) {
1124             return 0;
1125         }
1126         ret = shl;
1127     } else {
1128         if (a1) {
1129             ret = 64;
1130             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1131         } else if (a2) {
1132             ret = 128;
1133             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1134         } else if (a3) {
1135             ret = 192;
1136             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1137         } else {
1138             ret = 256;
1139             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1140             goto done;
1141         }
1142         shl = clz64(a0);
1143         if (shl == 0) {
1144             goto done;
1145         }
1146         ret += shl;
1147     }
1148
1149     a0 = shl_double(a0, a1, shl);
1150     a1 = shl_double(a1, a2, shl);
1151     a2 = shl_double(a2, a3, shl);
1152     a3 <<= shl;
1153
1154  done:
1155     a->frac_hi = a0;
1156     a->frac_hm = a1;
1157     a->frac_lm = a2;
1158     a->frac_lo = a3;
1159     return ret;
1160 }
1161
1162 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1163
1164 static void frac64_shl(FloatParts64 *a, int c)
1165 {
1166     a->frac <<= c;
1167 }
1168
1169 static void frac128_shl(FloatParts128 *a, int c)
1170 {
1171     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1172
1173     if (c & 64) {
1174         a0 = a1, a1 = 0;
1175     }
1176
1177     c &= 63;
1178     if (c) {
1179         a0 = shl_double(a0, a1, c);
1180         a1 = a1 << c;
1181     }
1182
1183     a->frac_hi = a0;
1184     a->frac_lo = a1;
1185 }
1186
1187 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1188
1189 static void frac64_shr(FloatParts64 *a, int c)
1190 {
1191     a->frac >>= c;
1192 }
1193
1194 static void frac128_shr(FloatParts128 *a, int c)
1195 {
1196     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1197
1198     if (c & 64) {
1199         a1 = a0, a0 = 0;
1200     }
1201
1202     c &= 63;
1203     if (c) {
1204         a1 = shr_double(a0, a1, c);
1205         a0 = a0 >> c;
1206     }
1207
1208     a->frac_hi = a0;
1209     a->frac_lo = a1;
1210 }
1211
1212 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1213
1214 static void frac64_shrjam(FloatParts64 *a, int c)
1215 {
1216     uint64_t a0 = a->frac;
1217
1218     if (likely(c != 0)) {
1219         if (likely(c < 64)) {
1220             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1221         } else {
1222             a0 = a0 != 0;
1223         }
1224         a->frac = a0;
1225     }
1226 }
1227
1228 static void frac128_shrjam(FloatParts128 *a, int c)
1229 {
1230     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1231     uint64_t sticky = 0;
1232
1233     if (unlikely(c == 0)) {
1234         return;
1235     } else if (likely(c < 64)) {
1236         /* nothing */
1237     } else if (likely(c < 128)) {
1238         sticky = a1;
1239         a1 = a0;
1240         a0 = 0;
1241         c &= 63;
1242         if (c == 0) {
1243             goto done;
1244         }
1245     } else {
1246         sticky = a0 | a1;
1247         a0 = a1 = 0;
1248         goto done;
1249     }
1250
1251     sticky |= shr_double(a1, 0, c);
1252     a1 = shr_double(a0, a1, c);
1253     a0 = a0 >> c;
1254
1255  done:
1256     a->frac_lo = a1 | (sticky != 0);
1257     a->frac_hi = a0;
1258 }
1259
1260 static void frac256_shrjam(FloatParts256 *a, int c)
1261 {
1262     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1263     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1264     uint64_t sticky = 0;
1265
1266     if (unlikely(c == 0)) {
1267         return;
1268     } else if (likely(c < 64)) {
1269         /* nothing */
1270     } else if (likely(c < 256)) {
1271         if (unlikely(c & 128)) {
1272             sticky |= a2 | a3;
1273             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1274         }
1275         if (unlikely(c & 64)) {
1276             sticky |= a3;
1277             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1278         }
1279         c &= 63;
1280         if (c == 0) {
1281             goto done;
1282         }
1283     } else {
1284         sticky = a0 | a1 | a2 | a3;
1285         a0 = a1 = a2 = a3 = 0;
1286         goto done;
1287     }
1288
1289     sticky |= shr_double(a3, 0, c);
1290     a3 = shr_double(a2, a3, c);
1291     a2 = shr_double(a1, a2, c);
1292     a1 = shr_double(a0, a1, c);
1293     a0 = a0 >> c;
1294
1295  done:
1296     a->frac_lo = a3 | (sticky != 0);
1297     a->frac_lm = a2;
1298     a->frac_hm = a1;
1299     a->frac_hi = a0;
1300 }
1301
1302 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1303
1304 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1305 {
1306     return usub64_overflow(a->frac, b->frac, &r->frac);
1307 }
1308
1309 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1310 {
1311     bool c = 0;
1312     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1313     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1314     return c;
1315 }
1316
1317 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1318 {
1319     bool c = 0;
1320     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1321     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1322     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1323     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1324     return c;
1325 }
1326
1327 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1328
1329 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1330 {
1331     r->frac = a->frac_hi | (a->frac_lo != 0);
1332 }
1333
1334 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1335 {
1336     r->frac_hi = a->frac_hi;
1337     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1338 }
1339
1340 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1341
1342 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1343 {
1344     r->frac_hi = a->frac;
1345     r->frac_lo = 0;
1346 }
1347
1348 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1349 {
1350     r->frac_hi = a->frac_hi;
1351     r->frac_hm = a->frac_lo;
1352     r->frac_lm = 0;
1353     r->frac_lo = 0;
1354 }
1355
1356 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1357
1358 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1359 #define FloatPartsN    glue(FloatParts,N)
1360 #define FloatPartsW    glue(FloatParts,W)
1361
1362 #define N 64
1363 #define W 128
1364
1365 #include "softfloat-parts-addsub.c.inc"
1366 #include "softfloat-parts.c.inc"
1367
1368 #undef  N
1369 #undef  W
1370 #define N 128
1371 #define W 256
1372
1373 #include "softfloat-parts-addsub.c.inc"
1374 #include "softfloat-parts.c.inc"
1375
1376 #undef  N
1377 #undef  W
1378 #define N            256
1379
1380 #include "softfloat-parts-addsub.c.inc"
1381
1382 #undef  N
1383 #undef  W
1384 #undef  partsN
1385 #undef  FloatPartsN
1386 #undef  FloatPartsW
1387
1388 /*
1389  * Pack/unpack routines with a specific FloatFmt.
1390  */
1391
1392 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1393                                       float_status *s, const FloatFmt *params)
1394 {
1395     float16_unpack_raw(p, f);
1396     parts_canonicalize(p, s, params);
1397 }
1398
1399 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1400                                      float_status *s)
1401 {
1402     float16a_unpack_canonical(p, f, s, &float16_params);
1403 }
1404
1405 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1406                                       float_status *s)
1407 {
1408     bfloat16_unpack_raw(p, f);
1409     parts_canonicalize(p, s, &bfloat16_params);
1410 }
1411
1412 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1413                                              float_status *s,
1414                                              const FloatFmt *params)
1415 {
1416     parts_uncanon(p, s, params);
1417     return float16_pack_raw(p);
1418 }
1419
1420 static float16 float16_round_pack_canonical(FloatParts64 *p,
1421                                             float_status *s)
1422 {
1423     return float16a_round_pack_canonical(p, s, &float16_params);
1424 }
1425
1426 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1427                                               float_status *s)
1428 {
1429     parts_uncanon(p, s, &bfloat16_params);
1430     return bfloat16_pack_raw(p);
1431 }
1432
1433 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1434                                      float_status *s)
1435 {
1436     float32_unpack_raw(p, f);
1437     parts_canonicalize(p, s, &float32_params);
1438 }
1439
1440 static float32 float32_round_pack_canonical(FloatParts64 *p,
1441                                             float_status *s)
1442 {
1443     parts_uncanon(p, s, &float32_params);
1444     return float32_pack_raw(p);
1445 }
1446
1447 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1448                                      float_status *s)
1449 {
1450     float64_unpack_raw(p, f);
1451     parts_canonicalize(p, s, &float64_params);
1452 }
1453
1454 static float64 float64_round_pack_canonical(FloatParts64 *p,
1455                                             float_status *s)
1456 {
1457     parts_uncanon(p, s, &float64_params);
1458     return float64_pack_raw(p);
1459 }
1460
1461 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1462                                       float_status *s)
1463 {
1464     float128_unpack_raw(p, f);
1465     parts_canonicalize(p, s, &float128_params);
1466 }
1467
1468 static float128 float128_round_pack_canonical(FloatParts128 *p,
1469                                               float_status *s)
1470 {
1471     parts_uncanon(p, s, &float128_params);
1472     return float128_pack_raw(p);
1473 }
1474
1475 /*
1476  * Addition and subtraction
1477  */
1478
1479 static float16 QEMU_FLATTEN
1480 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1481 {
1482     FloatParts64 pa, pb, *pr;
1483
1484     float16_unpack_canonical(&pa, a, status);
1485     float16_unpack_canonical(&pb, b, status);
1486     pr = parts_addsub(&pa, &pb, status, subtract);
1487
1488     return float16_round_pack_canonical(pr, status);
1489 }
1490
1491 float16 float16_add(float16 a, float16 b, float_status *status)
1492 {
1493     return float16_addsub(a, b, status, false);
1494 }
1495
1496 float16 float16_sub(float16 a, float16 b, float_status *status)
1497 {
1498     return float16_addsub(a, b, status, true);
1499 }
1500
1501 static float32 QEMU_SOFTFLOAT_ATTR
1502 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1503 {
1504     FloatParts64 pa, pb, *pr;
1505
1506     float32_unpack_canonical(&pa, a, status);
1507     float32_unpack_canonical(&pb, b, status);
1508     pr = parts_addsub(&pa, &pb, status, subtract);
1509
1510     return float32_round_pack_canonical(pr, status);
1511 }
1512
1513 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1514 {
1515     return soft_f32_addsub(a, b, status, false);
1516 }
1517
1518 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1519 {
1520     return soft_f32_addsub(a, b, status, true);
1521 }
1522
1523 static float64 QEMU_SOFTFLOAT_ATTR
1524 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1525 {
1526     FloatParts64 pa, pb, *pr;
1527
1528     float64_unpack_canonical(&pa, a, status);
1529     float64_unpack_canonical(&pb, b, status);
1530     pr = parts_addsub(&pa, &pb, status, subtract);
1531
1532     return float64_round_pack_canonical(pr, status);
1533 }
1534
1535 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1536 {
1537     return soft_f64_addsub(a, b, status, false);
1538 }
1539
1540 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1541 {
1542     return soft_f64_addsub(a, b, status, true);
1543 }
1544
1545 static float hard_f32_add(float a, float b)
1546 {
1547     return a + b;
1548 }
1549
1550 static float hard_f32_sub(float a, float b)
1551 {
1552     return a - b;
1553 }
1554
1555 static double hard_f64_add(double a, double b)
1556 {
1557     return a + b;
1558 }
1559
1560 static double hard_f64_sub(double a, double b)
1561 {
1562     return a - b;
1563 }
1564
1565 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1566 {
1567     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1568         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1569     }
1570     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1571 }
1572
1573 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1574 {
1575     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1576         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1577     } else {
1578         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1579     }
1580 }
1581
1582 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1583                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1584 {
1585     return float32_gen2(a, b, s, hard, soft,
1586                         f32_is_zon2, f32_addsubmul_post);
1587 }
1588
1589 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1590                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1591 {
1592     return float64_gen2(a, b, s, hard, soft,
1593                         f64_is_zon2, f64_addsubmul_post);
1594 }
1595
1596 float32 QEMU_FLATTEN
1597 float32_add(float32 a, float32 b, float_status *s)
1598 {
1599     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1600 }
1601
1602 float32 QEMU_FLATTEN
1603 float32_sub(float32 a, float32 b, float_status *s)
1604 {
1605     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1606 }
1607
1608 float64 QEMU_FLATTEN
1609 float64_add(float64 a, float64 b, float_status *s)
1610 {
1611     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1612 }
1613
1614 float64 QEMU_FLATTEN
1615 float64_sub(float64 a, float64 b, float_status *s)
1616 {
1617     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1618 }
1619
1620 static bfloat16 QEMU_FLATTEN
1621 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1622 {
1623     FloatParts64 pa, pb, *pr;
1624
1625     bfloat16_unpack_canonical(&pa, a, status);
1626     bfloat16_unpack_canonical(&pb, b, status);
1627     pr = parts_addsub(&pa, &pb, status, subtract);
1628
1629     return bfloat16_round_pack_canonical(pr, status);
1630 }
1631
1632 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1633 {
1634     return bfloat16_addsub(a, b, status, false);
1635 }
1636
1637 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1638 {
1639     return bfloat16_addsub(a, b, status, true);
1640 }
1641
1642 static float128 QEMU_FLATTEN
1643 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1644 {
1645     FloatParts128 pa, pb, *pr;
1646
1647     float128_unpack_canonical(&pa, a, status);
1648     float128_unpack_canonical(&pb, b, status);
1649     pr = parts_addsub(&pa, &pb, status, subtract);
1650
1651     return float128_round_pack_canonical(pr, status);
1652 }
1653
1654 float128 float128_add(float128 a, float128 b, float_status *status)
1655 {
1656     return float128_addsub(a, b, status, false);
1657 }
1658
1659 float128 float128_sub(float128 a, float128 b, float_status *status)
1660 {
1661     return float128_addsub(a, b, status, true);
1662 }
1663
1664 /*
1665  * Multiplication
1666  */
1667
1668 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1669 {
1670     FloatParts64 pa, pb, *pr;
1671
1672     float16_unpack_canonical(&pa, a, status);
1673     float16_unpack_canonical(&pb, b, status);
1674     pr = parts_mul(&pa, &pb, status);
1675
1676     return float16_round_pack_canonical(pr, status);
1677 }
1678
1679 static float32 QEMU_SOFTFLOAT_ATTR
1680 soft_f32_mul(float32 a, float32 b, float_status *status)
1681 {
1682     FloatParts64 pa, pb, *pr;
1683
1684     float32_unpack_canonical(&pa, a, status);
1685     float32_unpack_canonical(&pb, b, status);
1686     pr = parts_mul(&pa, &pb, status);
1687
1688     return float32_round_pack_canonical(pr, status);
1689 }
1690
1691 static float64 QEMU_SOFTFLOAT_ATTR
1692 soft_f64_mul(float64 a, float64 b, float_status *status)
1693 {
1694     FloatParts64 pa, pb, *pr;
1695
1696     float64_unpack_canonical(&pa, a, status);
1697     float64_unpack_canonical(&pb, b, status);
1698     pr = parts_mul(&pa, &pb, status);
1699
1700     return float64_round_pack_canonical(pr, status);
1701 }
1702
1703 static float hard_f32_mul(float a, float b)
1704 {
1705     return a * b;
1706 }
1707
1708 static double hard_f64_mul(double a, double b)
1709 {
1710     return a * b;
1711 }
1712
1713 float32 QEMU_FLATTEN
1714 float32_mul(float32 a, float32 b, float_status *s)
1715 {
1716     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1717                         f32_is_zon2, f32_addsubmul_post);
1718 }
1719
1720 float64 QEMU_FLATTEN
1721 float64_mul(float64 a, float64 b, float_status *s)
1722 {
1723     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1724                         f64_is_zon2, f64_addsubmul_post);
1725 }
1726
1727 bfloat16 QEMU_FLATTEN
1728 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1729 {
1730     FloatParts64 pa, pb, *pr;
1731
1732     bfloat16_unpack_canonical(&pa, a, status);
1733     bfloat16_unpack_canonical(&pb, b, status);
1734     pr = parts_mul(&pa, &pb, status);
1735
1736     return bfloat16_round_pack_canonical(pr, status);
1737 }
1738
1739 float128 QEMU_FLATTEN
1740 float128_mul(float128 a, float128 b, float_status *status)
1741 {
1742     FloatParts128 pa, pb, *pr;
1743
1744     float128_unpack_canonical(&pa, a, status);
1745     float128_unpack_canonical(&pb, b, status);
1746     pr = parts_mul(&pa, &pb, status);
1747
1748     return float128_round_pack_canonical(pr, status);
1749 }
1750
1751 /*
1752  * Fused multiply-add
1753  */
1754
1755 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1756                                     int flags, float_status *status)
1757 {
1758     FloatParts64 pa, pb, pc, *pr;
1759
1760     float16_unpack_canonical(&pa, a, status);
1761     float16_unpack_canonical(&pb, b, status);
1762     float16_unpack_canonical(&pc, c, status);
1763     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1764
1765     return float16_round_pack_canonical(pr, status);
1766 }
1767
1768 static float32 QEMU_SOFTFLOAT_ATTR
1769 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1770                 float_status *status)
1771 {
1772     FloatParts64 pa, pb, pc, *pr;
1773
1774     float32_unpack_canonical(&pa, a, status);
1775     float32_unpack_canonical(&pb, b, status);
1776     float32_unpack_canonical(&pc, c, status);
1777     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1778
1779     return float32_round_pack_canonical(pr, status);
1780 }
1781
1782 static float64 QEMU_SOFTFLOAT_ATTR
1783 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1784                 float_status *status)
1785 {
1786     FloatParts64 pa, pb, pc, *pr;
1787
1788     float64_unpack_canonical(&pa, a, status);
1789     float64_unpack_canonical(&pb, b, status);
1790     float64_unpack_canonical(&pc, c, status);
1791     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1792
1793     return float64_round_pack_canonical(pr, status);
1794 }
1795
1796 static bool force_soft_fma;
1797
1798 float32 QEMU_FLATTEN
1799 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1800 {
1801     union_float32 ua, ub, uc, ur;
1802
1803     ua.s = xa;
1804     ub.s = xb;
1805     uc.s = xc;
1806
1807     if (unlikely(!can_use_fpu(s))) {
1808         goto soft;
1809     }
1810     if (unlikely(flags & float_muladd_halve_result)) {
1811         goto soft;
1812     }
1813
1814     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1815     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1816         goto soft;
1817     }
1818
1819     if (unlikely(force_soft_fma)) {
1820         goto soft;
1821     }
1822
1823     /*
1824      * When (a || b) == 0, there's no need to check for under/over flow,
1825      * since we know the addend is (normal || 0) and the product is 0.
1826      */
1827     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1828         union_float32 up;
1829         bool prod_sign;
1830
1831         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1832         prod_sign ^= !!(flags & float_muladd_negate_product);
1833         up.s = float32_set_sign(float32_zero, prod_sign);
1834
1835         if (flags & float_muladd_negate_c) {
1836             uc.h = -uc.h;
1837         }
1838         ur.h = up.h + uc.h;
1839     } else {
1840         union_float32 ua_orig = ua;
1841         union_float32 uc_orig = uc;
1842
1843         if (flags & float_muladd_negate_product) {
1844             ua.h = -ua.h;
1845         }
1846         if (flags & float_muladd_negate_c) {
1847             uc.h = -uc.h;
1848         }
1849
1850         ur.h = fmaf(ua.h, ub.h, uc.h);
1851
1852         if (unlikely(f32_is_inf(ur))) {
1853             float_raise(float_flag_overflow, s);
1854         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1855             ua = ua_orig;
1856             uc = uc_orig;
1857             goto soft;
1858         }
1859     }
1860     if (flags & float_muladd_negate_result) {
1861         return float32_chs(ur.s);
1862     }
1863     return ur.s;
1864
1865  soft:
1866     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1867 }
1868
1869 float64 QEMU_FLATTEN
1870 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1871 {
1872     union_float64 ua, ub, uc, ur;
1873
1874     ua.s = xa;
1875     ub.s = xb;
1876     uc.s = xc;
1877
1878     if (unlikely(!can_use_fpu(s))) {
1879         goto soft;
1880     }
1881     if (unlikely(flags & float_muladd_halve_result)) {
1882         goto soft;
1883     }
1884
1885     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1886     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1887         goto soft;
1888     }
1889
1890     if (unlikely(force_soft_fma)) {
1891         goto soft;
1892     }
1893
1894     /*
1895      * When (a || b) == 0, there's no need to check for under/over flow,
1896      * since we know the addend is (normal || 0) and the product is 0.
1897      */
1898     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1899         union_float64 up;
1900         bool prod_sign;
1901
1902         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1903         prod_sign ^= !!(flags & float_muladd_negate_product);
1904         up.s = float64_set_sign(float64_zero, prod_sign);
1905
1906         if (flags & float_muladd_negate_c) {
1907             uc.h = -uc.h;
1908         }
1909         ur.h = up.h + uc.h;
1910     } else {
1911         union_float64 ua_orig = ua;
1912         union_float64 uc_orig = uc;
1913
1914         if (flags & float_muladd_negate_product) {
1915             ua.h = -ua.h;
1916         }
1917         if (flags & float_muladd_negate_c) {
1918             uc.h = -uc.h;
1919         }
1920
1921         ur.h = fma(ua.h, ub.h, uc.h);
1922
1923         if (unlikely(f64_is_inf(ur))) {
1924             float_raise(float_flag_overflow, s);
1925         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1926             ua = ua_orig;
1927             uc = uc_orig;
1928             goto soft;
1929         }
1930     }
1931     if (flags & float_muladd_negate_result) {
1932         return float64_chs(ur.s);
1933     }
1934     return ur.s;
1935
1936  soft:
1937     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1938 }
1939
1940 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1941                                       int flags, float_status *status)
1942 {
1943     FloatParts64 pa, pb, pc, *pr;
1944
1945     bfloat16_unpack_canonical(&pa, a, status);
1946     bfloat16_unpack_canonical(&pb, b, status);
1947     bfloat16_unpack_canonical(&pc, c, status);
1948     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1949
1950     return bfloat16_round_pack_canonical(pr, status);
1951 }
1952
1953 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1954                                       int flags, float_status *status)
1955 {
1956     FloatParts128 pa, pb, pc, *pr;
1957
1958     float128_unpack_canonical(&pa, a, status);
1959     float128_unpack_canonical(&pb, b, status);
1960     float128_unpack_canonical(&pc, c, status);
1961     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1962
1963     return float128_round_pack_canonical(pr, status);
1964 }
1965
1966 /*
1967  * Division
1968  */
1969
1970 float16 float16_div(float16 a, float16 b, float_status *status)
1971 {
1972     FloatParts64 pa, pb, *pr;
1973
1974     float16_unpack_canonical(&pa, a, status);
1975     float16_unpack_canonical(&pb, b, status);
1976     pr = parts_div(&pa, &pb, status);
1977
1978     return float16_round_pack_canonical(pr, status);
1979 }
1980
1981 static float32 QEMU_SOFTFLOAT_ATTR
1982 soft_f32_div(float32 a, float32 b, float_status *status)
1983 {
1984     FloatParts64 pa, pb, *pr;
1985
1986     float32_unpack_canonical(&pa, a, status);
1987     float32_unpack_canonical(&pb, b, status);
1988     pr = parts_div(&pa, &pb, status);
1989
1990     return float32_round_pack_canonical(pr, status);
1991 }
1992
1993 static float64 QEMU_SOFTFLOAT_ATTR
1994 soft_f64_div(float64 a, float64 b, float_status *status)
1995 {
1996     FloatParts64 pa, pb, *pr;
1997
1998     float64_unpack_canonical(&pa, a, status);
1999     float64_unpack_canonical(&pb, b, status);
2000     pr = parts_div(&pa, &pb, status);
2001
2002     return float64_round_pack_canonical(pr, status);
2003 }
2004
2005 static float hard_f32_div(float a, float b)
2006 {
2007     return a / b;
2008 }
2009
2010 static double hard_f64_div(double a, double b)
2011 {
2012     return a / b;
2013 }
2014
2015 static bool f32_div_pre(union_float32 a, union_float32 b)
2016 {
2017     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2018         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2019                fpclassify(b.h) == FP_NORMAL;
2020     }
2021     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2022 }
2023
2024 static bool f64_div_pre(union_float64 a, union_float64 b)
2025 {
2026     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2027         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2028                fpclassify(b.h) == FP_NORMAL;
2029     }
2030     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2031 }
2032
2033 static bool f32_div_post(union_float32 a, union_float32 b)
2034 {
2035     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2036         return fpclassify(a.h) != FP_ZERO;
2037     }
2038     return !float32_is_zero(a.s);
2039 }
2040
2041 static bool f64_div_post(union_float64 a, union_float64 b)
2042 {
2043     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2044         return fpclassify(a.h) != FP_ZERO;
2045     }
2046     return !float64_is_zero(a.s);
2047 }
2048
2049 float32 QEMU_FLATTEN
2050 float32_div(float32 a, float32 b, float_status *s)
2051 {
2052     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2053                         f32_div_pre, f32_div_post);
2054 }
2055
2056 float64 QEMU_FLATTEN
2057 float64_div(float64 a, float64 b, float_status *s)
2058 {
2059     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2060                         f64_div_pre, f64_div_post);
2061 }
2062
2063 bfloat16 QEMU_FLATTEN
2064 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066     FloatParts64 pa, pb, *pr;
2067
2068     bfloat16_unpack_canonical(&pa, a, status);
2069     bfloat16_unpack_canonical(&pb, b, status);
2070     pr = parts_div(&pa, &pb, status);
2071
2072     return bfloat16_round_pack_canonical(pr, status);
2073 }
2074
2075 float128 QEMU_FLATTEN
2076 float128_div(float128 a, float128 b, float_status *status)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079
2080     float128_unpack_canonical(&pa, a, status);
2081     float128_unpack_canonical(&pb, b, status);
2082     pr = parts_div(&pa, &pb, status);
2083
2084     return float128_round_pack_canonical(pr, status);
2085 }
2086
2087 /*
2088  * Float to Float conversions
2089  *
2090  * Returns the result of converting one float format to another. The
2091  * conversion is performed according to the IEC/IEEE Standard for
2092  * Binary Floating-Point Arithmetic.
2093  *
2094  * Usually this only needs to take care of raising invalid exceptions
2095  * and handling the conversion on NaNs.
2096  */
2097
2098 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2099 {
2100     switch (a->cls) {
2101     case float_class_qnan:
2102     case float_class_snan:
2103         /*
2104          * There is no NaN in the destination format.  Raise Invalid
2105          * and return a zero with the sign of the input NaN.
2106          */
2107         float_raise(float_flag_invalid, s);
2108         a->cls = float_class_zero;
2109         break;
2110
2111     case float_class_inf:
2112         /*
2113          * There is no Inf in the destination format.  Raise Invalid
2114          * and return the maximum normal with the correct sign.
2115          */
2116         float_raise(float_flag_invalid, s);
2117         a->cls = float_class_normal;
2118         a->exp = float16_params_ahp.exp_max;
2119         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2120                                   float16_params_ahp.frac_size + 1);
2121         break;
2122
2123     case float_class_normal:
2124     case float_class_zero:
2125         break;
2126
2127     default:
2128         g_assert_not_reached();
2129     }
2130 }
2131
2132 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2133 {
2134     if (is_nan(a->cls)) {
2135         parts_return_nan(a, s);
2136     }
2137 }
2138
2139 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2140 {
2141     if (is_nan(a->cls)) {
2142         parts_return_nan(a, s);
2143     }
2144 }
2145
2146 #define parts_float_to_float(P, S) \
2147     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2148
2149 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2150                                         float_status *s)
2151 {
2152     a->cls = b->cls;
2153     a->sign = b->sign;
2154     a->exp = b->exp;
2155
2156     if (a->cls == float_class_normal) {
2157         frac_truncjam(a, b);
2158     } else if (is_nan(a->cls)) {
2159         /* Discard the low bits of the NaN. */
2160         a->frac = b->frac_hi;
2161         parts_return_nan(a, s);
2162     }
2163 }
2164
2165 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2166                                        float_status *s)
2167 {
2168     a->cls = b->cls;
2169     a->sign = b->sign;
2170     a->exp = b->exp;
2171     frac_widen(a, b);
2172
2173     if (is_nan(a->cls)) {
2174         parts_return_nan(a, s);
2175     }
2176 }
2177
2178 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2179 {
2180     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2181     FloatParts64 p;
2182
2183     float16a_unpack_canonical(&p, a, s, fmt16);
2184     parts_float_to_float(&p, s);
2185     return float32_round_pack_canonical(&p, s);
2186 }
2187
2188 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2189 {
2190     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2191     FloatParts64 p;
2192
2193     float16a_unpack_canonical(&p, a, s, fmt16);
2194     parts_float_to_float(&p, s);
2195     return float64_round_pack_canonical(&p, s);
2196 }
2197
2198 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2199 {
2200     FloatParts64 p;
2201     const FloatFmt *fmt;
2202
2203     float32_unpack_canonical(&p, a, s);
2204     if (ieee) {
2205         parts_float_to_float(&p, s);
2206         fmt = &float16_params;
2207     } else {
2208         parts_float_to_ahp(&p, s);
2209         fmt = &float16_params_ahp;
2210     }
2211     return float16a_round_pack_canonical(&p, s, fmt);
2212 }
2213
2214 static float64 QEMU_SOFTFLOAT_ATTR
2215 soft_float32_to_float64(float32 a, float_status *s)
2216 {
2217     FloatParts64 p;
2218
2219     float32_unpack_canonical(&p, a, s);
2220     parts_float_to_float(&p, s);
2221     return float64_round_pack_canonical(&p, s);
2222 }
2223
2224 float64 float32_to_float64(float32 a, float_status *s)
2225 {
2226     if (likely(float32_is_normal(a))) {
2227         /* Widening conversion can never produce inexact results.  */
2228         union_float32 uf;
2229         union_float64 ud;
2230         uf.s = a;
2231         ud.h = uf.h;
2232         return ud.s;
2233     } else if (float32_is_zero(a)) {
2234         return float64_set_sign(float64_zero, float32_is_neg(a));
2235     } else {
2236         return soft_float32_to_float64(a, s);
2237     }
2238 }
2239
2240 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2241 {
2242     FloatParts64 p;
2243     const FloatFmt *fmt;
2244
2245     float64_unpack_canonical(&p, a, s);
2246     if (ieee) {
2247         parts_float_to_float(&p, s);
2248         fmt = &float16_params;
2249     } else {
2250         parts_float_to_ahp(&p, s);
2251         fmt = &float16_params_ahp;
2252     }
2253     return float16a_round_pack_canonical(&p, s, fmt);
2254 }
2255
2256 float32 float64_to_float32(float64 a, float_status *s)
2257 {
2258     FloatParts64 p;
2259
2260     float64_unpack_canonical(&p, a, s);
2261     parts_float_to_float(&p, s);
2262     return float32_round_pack_canonical(&p, s);
2263 }
2264
2265 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2266 {
2267     FloatParts64 p;
2268
2269     bfloat16_unpack_canonical(&p, a, s);
2270     parts_float_to_float(&p, s);
2271     return float32_round_pack_canonical(&p, s);
2272 }
2273
2274 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2275 {
2276     FloatParts64 p;
2277
2278     bfloat16_unpack_canonical(&p, a, s);
2279     parts_float_to_float(&p, s);
2280     return float64_round_pack_canonical(&p, s);
2281 }
2282
2283 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2284 {
2285     FloatParts64 p;
2286
2287     float32_unpack_canonical(&p, a, s);
2288     parts_float_to_float(&p, s);
2289     return bfloat16_round_pack_canonical(&p, s);
2290 }
2291
2292 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2293 {
2294     FloatParts64 p;
2295
2296     float64_unpack_canonical(&p, a, s);
2297     parts_float_to_float(&p, s);
2298     return bfloat16_round_pack_canonical(&p, s);
2299 }
2300
2301 float32 float128_to_float32(float128 a, float_status *s)
2302 {
2303     FloatParts64 p64;
2304     FloatParts128 p128;
2305
2306     float128_unpack_canonical(&p128, a, s);
2307     parts_float_to_float_narrow(&p64, &p128, s);
2308     return float32_round_pack_canonical(&p64, s);
2309 }
2310
2311 float64 float128_to_float64(float128 a, float_status *s)
2312 {
2313     FloatParts64 p64;
2314     FloatParts128 p128;
2315
2316     float128_unpack_canonical(&p128, a, s);
2317     parts_float_to_float_narrow(&p64, &p128, s);
2318     return float64_round_pack_canonical(&p64, s);
2319 }
2320
2321 float128 float32_to_float128(float32 a, float_status *s)
2322 {
2323     FloatParts64 p64;
2324     FloatParts128 p128;
2325
2326     float32_unpack_canonical(&p64, a, s);
2327     parts_float_to_float_widen(&p128, &p64, s);
2328     return float128_round_pack_canonical(&p128, s);
2329 }
2330
2331 float128 float64_to_float128(float64 a, float_status *s)
2332 {
2333     FloatParts64 p64;
2334     FloatParts128 p128;
2335
2336     float64_unpack_canonical(&p64, a, s);
2337     parts_float_to_float_widen(&p128, &p64, s);
2338     return float128_round_pack_canonical(&p128, s);
2339 }
2340
2341 /*
2342  * Round to integral value
2343  */
2344
2345 float16 float16_round_to_int(float16 a, float_status *s)
2346 {
2347     FloatParts64 p;
2348
2349     float16_unpack_canonical(&p, a, s);
2350     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2351     return float16_round_pack_canonical(&p, s);
2352 }
2353
2354 float32 float32_round_to_int(float32 a, float_status *s)
2355 {
2356     FloatParts64 p;
2357
2358     float32_unpack_canonical(&p, a, s);
2359     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2360     return float32_round_pack_canonical(&p, s);
2361 }
2362
2363 float64 float64_round_to_int(float64 a, float_status *s)
2364 {
2365     FloatParts64 p;
2366
2367     float64_unpack_canonical(&p, a, s);
2368     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2369     return float64_round_pack_canonical(&p, s);
2370 }
2371
2372 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2373 {
2374     FloatParts64 p;
2375
2376     bfloat16_unpack_canonical(&p, a, s);
2377     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2378     return bfloat16_round_pack_canonical(&p, s);
2379 }
2380
2381 float128 float128_round_to_int(float128 a, float_status *s)
2382 {
2383     FloatParts128 p;
2384
2385     float128_unpack_canonical(&p, a, s);
2386     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2387     return float128_round_pack_canonical(&p, s);
2388 }
2389
2390 /*
2391  * Floating-point to signed integer conversions
2392  */
2393
2394 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2395                               float_status *s)
2396 {
2397     FloatParts64 p;
2398
2399     float16_unpack_canonical(&p, a, s);
2400     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2401 }
2402
2403 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2404                                 float_status *s)
2405 {
2406     FloatParts64 p;
2407
2408     float16_unpack_canonical(&p, a, s);
2409     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2410 }
2411
2412 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2413                                 float_status *s)
2414 {
2415     FloatParts64 p;
2416
2417     float16_unpack_canonical(&p, a, s);
2418     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2419 }
2420
2421 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2422                                 float_status *s)
2423 {
2424     FloatParts64 p;
2425
2426     float16_unpack_canonical(&p, a, s);
2427     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2428 }
2429
2430 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2431                                 float_status *s)
2432 {
2433     FloatParts64 p;
2434
2435     float32_unpack_canonical(&p, a, s);
2436     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2437 }
2438
2439 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2440                                 float_status *s)
2441 {
2442     FloatParts64 p;
2443
2444     float32_unpack_canonical(&p, a, s);
2445     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2446 }
2447
2448 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2449                                 float_status *s)
2450 {
2451     FloatParts64 p;
2452
2453     float32_unpack_canonical(&p, a, s);
2454     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2455 }
2456
2457 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2458                                 float_status *s)
2459 {
2460     FloatParts64 p;
2461
2462     float64_unpack_canonical(&p, a, s);
2463     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2464 }
2465
2466 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2467                                 float_status *s)
2468 {
2469     FloatParts64 p;
2470
2471     float64_unpack_canonical(&p, a, s);
2472     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2473 }
2474
2475 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2476                                 float_status *s)
2477 {
2478     FloatParts64 p;
2479
2480     float64_unpack_canonical(&p, a, s);
2481     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2482 }
2483
2484 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2485                                  float_status *s)
2486 {
2487     FloatParts64 p;
2488
2489     bfloat16_unpack_canonical(&p, a, s);
2490     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2491 }
2492
2493 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2494                                  float_status *s)
2495 {
2496     FloatParts64 p;
2497
2498     bfloat16_unpack_canonical(&p, a, s);
2499     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2500 }
2501
2502 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2503                                  float_status *s)
2504 {
2505     FloatParts64 p;
2506
2507     bfloat16_unpack_canonical(&p, a, s);
2508     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2509 }
2510
2511 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2512                                         int scale, float_status *s)
2513 {
2514     FloatParts128 p;
2515
2516     float128_unpack_canonical(&p, a, s);
2517     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2518 }
2519
2520 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2521                                         int scale, float_status *s)
2522 {
2523     FloatParts128 p;
2524
2525     float128_unpack_canonical(&p, a, s);
2526     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2527 }
2528
2529 int8_t float16_to_int8(float16 a, float_status *s)
2530 {
2531     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2532 }
2533
2534 int16_t float16_to_int16(float16 a, float_status *s)
2535 {
2536     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2537 }
2538
2539 int32_t float16_to_int32(float16 a, float_status *s)
2540 {
2541     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2542 }
2543
2544 int64_t float16_to_int64(float16 a, float_status *s)
2545 {
2546     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2547 }
2548
2549 int16_t float32_to_int16(float32 a, float_status *s)
2550 {
2551     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2552 }
2553
2554 int32_t float32_to_int32(float32 a, float_status *s)
2555 {
2556     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2557 }
2558
2559 int64_t float32_to_int64(float32 a, float_status *s)
2560 {
2561     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2562 }
2563
2564 int16_t float64_to_int16(float64 a, float_status *s)
2565 {
2566     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2567 }
2568
2569 int32_t float64_to_int32(float64 a, float_status *s)
2570 {
2571     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2572 }
2573
2574 int64_t float64_to_int64(float64 a, float_status *s)
2575 {
2576     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2577 }
2578
2579 int32_t float128_to_int32(float128 a, float_status *s)
2580 {
2581     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2582 }
2583
2584 int64_t float128_to_int64(float128 a, float_status *s)
2585 {
2586     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2587 }
2588
2589 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2590 {
2591     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2592 }
2593
2594 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2595 {
2596     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2597 }
2598
2599 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2600 {
2601     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2602 }
2603
2604 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2605 {
2606     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2607 }
2608
2609 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2610 {
2611     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2612 }
2613
2614 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2615 {
2616     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2617 }
2618
2619 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2620 {
2621     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2622 }
2623
2624 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2625 {
2626     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2627 }
2628
2629 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2630 {
2631     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2632 }
2633
2634 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2635 {
2636     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2637 }
2638
2639 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2640 {
2641     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2642 }
2643
2644 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2645 {
2646     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2647 }
2648
2649 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2650 {
2651     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2652 }
2653
2654 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2655 {
2656     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2657 }
2658
2659 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2660 {
2661     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2662 }
2663
2664 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2665 {
2666     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2667 }
2668
2669 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2670 {
2671     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2672 }
2673
2674 /*
2675  * Floating-point to unsigned integer conversions
2676  */
2677
2678 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2679                                 float_status *s)
2680 {
2681     FloatParts64 p;
2682
2683     float16_unpack_canonical(&p, a, s);
2684     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2685 }
2686
2687 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2688                                   float_status *s)
2689 {
2690     FloatParts64 p;
2691
2692     float16_unpack_canonical(&p, a, s);
2693     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2694 }
2695
2696 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2697                                   float_status *s)
2698 {
2699     FloatParts64 p;
2700
2701     float16_unpack_canonical(&p, a, s);
2702     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2703 }
2704
2705 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2706                                   float_status *s)
2707 {
2708     FloatParts64 p;
2709
2710     float16_unpack_canonical(&p, a, s);
2711     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2712 }
2713
2714 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2715                                   float_status *s)
2716 {
2717     FloatParts64 p;
2718
2719     float32_unpack_canonical(&p, a, s);
2720     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2721 }
2722
2723 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2724                                   float_status *s)
2725 {
2726     FloatParts64 p;
2727
2728     float32_unpack_canonical(&p, a, s);
2729     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2730 }
2731
2732 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2733                                   float_status *s)
2734 {
2735     FloatParts64 p;
2736
2737     float32_unpack_canonical(&p, a, s);
2738     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2739 }
2740
2741 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2742                                   float_status *s)
2743 {
2744     FloatParts64 p;
2745
2746     float64_unpack_canonical(&p, a, s);
2747     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2748 }
2749
2750 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2751                                   float_status *s)
2752 {
2753     FloatParts64 p;
2754
2755     float64_unpack_canonical(&p, a, s);
2756     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2757 }
2758
2759 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2760                                   float_status *s)
2761 {
2762     FloatParts64 p;
2763
2764     float64_unpack_canonical(&p, a, s);
2765     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2766 }
2767
2768 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2769                                    int scale, float_status *s)
2770 {
2771     FloatParts64 p;
2772
2773     bfloat16_unpack_canonical(&p, a, s);
2774     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2775 }
2776
2777 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2778                                    int scale, float_status *s)
2779 {
2780     FloatParts64 p;
2781
2782     bfloat16_unpack_canonical(&p, a, s);
2783     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2784 }
2785
2786 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2787                                    int scale, float_status *s)
2788 {
2789     FloatParts64 p;
2790
2791     bfloat16_unpack_canonical(&p, a, s);
2792     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2793 }
2794
2795 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2796                                           int scale, float_status *s)
2797 {
2798     FloatParts128 p;
2799
2800     float128_unpack_canonical(&p, a, s);
2801     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2802 }
2803
2804 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2805                                           int scale, float_status *s)
2806 {
2807     FloatParts128 p;
2808
2809     float128_unpack_canonical(&p, a, s);
2810     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2811 }
2812
2813 uint8_t float16_to_uint8(float16 a, float_status *s)
2814 {
2815     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2816 }
2817
2818 uint16_t float16_to_uint16(float16 a, float_status *s)
2819 {
2820     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2821 }
2822
2823 uint32_t float16_to_uint32(float16 a, float_status *s)
2824 {
2825     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2826 }
2827
2828 uint64_t float16_to_uint64(float16 a, float_status *s)
2829 {
2830     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2831 }
2832
2833 uint16_t float32_to_uint16(float32 a, float_status *s)
2834 {
2835     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2836 }
2837
2838 uint32_t float32_to_uint32(float32 a, float_status *s)
2839 {
2840     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2841 }
2842
2843 uint64_t float32_to_uint64(float32 a, float_status *s)
2844 {
2845     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2846 }
2847
2848 uint16_t float64_to_uint16(float64 a, float_status *s)
2849 {
2850     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2851 }
2852
2853 uint32_t float64_to_uint32(float64 a, float_status *s)
2854 {
2855     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2856 }
2857
2858 uint64_t float64_to_uint64(float64 a, float_status *s)
2859 {
2860     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2861 }
2862
2863 uint32_t float128_to_uint32(float128 a, float_status *s)
2864 {
2865     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2866 }
2867
2868 uint64_t float128_to_uint64(float128 a, float_status *s)
2869 {
2870     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2871 }
2872
2873 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2874 {
2875     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2876 }
2877
2878 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2879 {
2880     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2881 }
2882
2883 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2884 {
2885     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2886 }
2887
2888 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2889 {
2890     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2891 }
2892
2893 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2894 {
2895     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2896 }
2897
2898 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2899 {
2900     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2901 }
2902
2903 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2904 {
2905     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2906 }
2907
2908 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2909 {
2910     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2911 }
2912
2913 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2914 {
2915     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2916 }
2917
2918 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2919 {
2920     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2921 }
2922
2923 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2924 {
2925     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2926 }
2927
2928 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2929 {
2930     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2931 }
2932
2933 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2934 {
2935     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2936 }
2937
2938 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2939 {
2940     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2941 }
2942
2943 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2944 {
2945     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2946 }
2947
2948 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2949 {
2950     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2951 }
2952
2953 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2954 {
2955     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2956 }
2957
2958 /*
2959  * Signed integer to floating-point conversions
2960  */
2961
2962 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2963 {
2964     FloatParts64 p;
2965
2966     parts_sint_to_float(&p, a, scale, status);
2967     return float16_round_pack_canonical(&p, status);
2968 }
2969
2970 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2971 {
2972     return int64_to_float16_scalbn(a, scale, status);
2973 }
2974
2975 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2976 {
2977     return int64_to_float16_scalbn(a, scale, status);
2978 }
2979
2980 float16 int64_to_float16(int64_t a, float_status *status)
2981 {
2982     return int64_to_float16_scalbn(a, 0, status);
2983 }
2984
2985 float16 int32_to_float16(int32_t a, float_status *status)
2986 {
2987     return int64_to_float16_scalbn(a, 0, status);
2988 }
2989
2990 float16 int16_to_float16(int16_t a, float_status *status)
2991 {
2992     return int64_to_float16_scalbn(a, 0, status);
2993 }
2994
2995 float16 int8_to_float16(int8_t a, float_status *status)
2996 {
2997     return int64_to_float16_scalbn(a, 0, status);
2998 }
2999
3000 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3001 {
3002     FloatParts64 p;
3003
3004     parts64_sint_to_float(&p, a, scale, status);
3005     return float32_round_pack_canonical(&p, status);
3006 }
3007
3008 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3009 {
3010     return int64_to_float32_scalbn(a, scale, status);
3011 }
3012
3013 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3014 {
3015     return int64_to_float32_scalbn(a, scale, status);
3016 }
3017
3018 float32 int64_to_float32(int64_t a, float_status *status)
3019 {
3020     return int64_to_float32_scalbn(a, 0, status);
3021 }
3022
3023 float32 int32_to_float32(int32_t a, float_status *status)
3024 {
3025     return int64_to_float32_scalbn(a, 0, status);
3026 }
3027
3028 float32 int16_to_float32(int16_t a, float_status *status)
3029 {
3030     return int64_to_float32_scalbn(a, 0, status);
3031 }
3032
3033 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3034 {
3035     FloatParts64 p;
3036
3037     parts_sint_to_float(&p, a, scale, status);
3038     return float64_round_pack_canonical(&p, status);
3039 }
3040
3041 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3042 {
3043     return int64_to_float64_scalbn(a, scale, status);
3044 }
3045
3046 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3047 {
3048     return int64_to_float64_scalbn(a, scale, status);
3049 }
3050
3051 float64 int64_to_float64(int64_t a, float_status *status)
3052 {
3053     return int64_to_float64_scalbn(a, 0, status);
3054 }
3055
3056 float64 int32_to_float64(int32_t a, float_status *status)
3057 {
3058     return int64_to_float64_scalbn(a, 0, status);
3059 }
3060
3061 float64 int16_to_float64(int16_t a, float_status *status)
3062 {
3063     return int64_to_float64_scalbn(a, 0, status);
3064 }
3065
3066 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3067 {
3068     FloatParts64 p;
3069
3070     parts_sint_to_float(&p, a, scale, status);
3071     return bfloat16_round_pack_canonical(&p, status);
3072 }
3073
3074 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3075 {
3076     return int64_to_bfloat16_scalbn(a, scale, status);
3077 }
3078
3079 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3080 {
3081     return int64_to_bfloat16_scalbn(a, scale, status);
3082 }
3083
3084 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3085 {
3086     return int64_to_bfloat16_scalbn(a, 0, status);
3087 }
3088
3089 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3090 {
3091     return int64_to_bfloat16_scalbn(a, 0, status);
3092 }
3093
3094 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3095 {
3096     return int64_to_bfloat16_scalbn(a, 0, status);
3097 }
3098
3099 float128 int64_to_float128(int64_t a, float_status *status)
3100 {
3101     FloatParts128 p;
3102
3103     parts_sint_to_float(&p, a, 0, status);
3104     return float128_round_pack_canonical(&p, status);
3105 }
3106
3107 float128 int32_to_float128(int32_t a, float_status *status)
3108 {
3109     return int64_to_float128(a, status);
3110 }
3111
3112 /*
3113  * Unsigned Integer to floating-point conversions
3114  */
3115
3116 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3117 {
3118     FloatParts64 p;
3119
3120     parts_uint_to_float(&p, a, scale, status);
3121     return float16_round_pack_canonical(&p, status);
3122 }
3123
3124 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3125 {
3126     return uint64_to_float16_scalbn(a, scale, status);
3127 }
3128
3129 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3130 {
3131     return uint64_to_float16_scalbn(a, scale, status);
3132 }
3133
3134 float16 uint64_to_float16(uint64_t a, float_status *status)
3135 {
3136     return uint64_to_float16_scalbn(a, 0, status);
3137 }
3138
3139 float16 uint32_to_float16(uint32_t a, float_status *status)
3140 {
3141     return uint64_to_float16_scalbn(a, 0, status);
3142 }
3143
3144 float16 uint16_to_float16(uint16_t a, float_status *status)
3145 {
3146     return uint64_to_float16_scalbn(a, 0, status);
3147 }
3148
3149 float16 uint8_to_float16(uint8_t a, float_status *status)
3150 {
3151     return uint64_to_float16_scalbn(a, 0, status);
3152 }
3153
3154 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3155 {
3156     FloatParts64 p;
3157
3158     parts_uint_to_float(&p, a, scale, status);
3159     return float32_round_pack_canonical(&p, status);
3160 }
3161
3162 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3163 {
3164     return uint64_to_float32_scalbn(a, scale, status);
3165 }
3166
3167 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3168 {
3169     return uint64_to_float32_scalbn(a, scale, status);
3170 }
3171
3172 float32 uint64_to_float32(uint64_t a, float_status *status)
3173 {
3174     return uint64_to_float32_scalbn(a, 0, status);
3175 }
3176
3177 float32 uint32_to_float32(uint32_t a, float_status *status)
3178 {
3179     return uint64_to_float32_scalbn(a, 0, status);
3180 }
3181
3182 float32 uint16_to_float32(uint16_t a, float_status *status)
3183 {
3184     return uint64_to_float32_scalbn(a, 0, status);
3185 }
3186
3187 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3188 {
3189     FloatParts64 p;
3190
3191     parts_uint_to_float(&p, a, scale, status);
3192     return float64_round_pack_canonical(&p, status);
3193 }
3194
3195 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3196 {
3197     return uint64_to_float64_scalbn(a, scale, status);
3198 }
3199
3200 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3201 {
3202     return uint64_to_float64_scalbn(a, scale, status);
3203 }
3204
3205 float64 uint64_to_float64(uint64_t a, float_status *status)
3206 {
3207     return uint64_to_float64_scalbn(a, 0, status);
3208 }
3209
3210 float64 uint32_to_float64(uint32_t a, float_status *status)
3211 {
3212     return uint64_to_float64_scalbn(a, 0, status);
3213 }
3214
3215 float64 uint16_to_float64(uint16_t a, float_status *status)
3216 {
3217     return uint64_to_float64_scalbn(a, 0, status);
3218 }
3219
3220 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3221 {
3222     FloatParts64 p;
3223
3224     parts_uint_to_float(&p, a, scale, status);
3225     return bfloat16_round_pack_canonical(&p, status);
3226 }
3227
3228 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3229 {
3230     return uint64_to_bfloat16_scalbn(a, scale, status);
3231 }
3232
3233 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3234 {
3235     return uint64_to_bfloat16_scalbn(a, scale, status);
3236 }
3237
3238 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3239 {
3240     return uint64_to_bfloat16_scalbn(a, 0, status);
3241 }
3242
3243 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3244 {
3245     return uint64_to_bfloat16_scalbn(a, 0, status);
3246 }
3247
3248 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3249 {
3250     return uint64_to_bfloat16_scalbn(a, 0, status);
3251 }
3252
3253 float128 uint64_to_float128(uint64_t a, float_status *status)
3254 {
3255     FloatParts128 p;
3256
3257     parts_uint_to_float(&p, a, 0, status);
3258     return float128_round_pack_canonical(&p, status);
3259 }
3260
3261 /* Float Min/Max */
3262 /* min() and max() functions. These can't be implemented as
3263  * 'compare and pick one input' because that would mishandle
3264  * NaNs and +0 vs -0.
3265  *
3266  * minnum() and maxnum() functions. These are similar to the min()
3267  * and max() functions but if one of the arguments is a QNaN and
3268  * the other is numerical then the numerical argument is returned.
3269  * SNaNs will get quietened before being returned.
3270  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3271  * and maxNum() operations. min() and max() are the typical min/max
3272  * semantics provided by many CPUs which predate that specification.
3273  *
3274  * minnummag() and maxnummag() functions correspond to minNumMag()
3275  * and minNumMag() from the IEEE-754 2008.
3276  */
3277 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3278                                 bool ieee, bool ismag, float_status *s)
3279 {
3280     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3281         if (ieee) {
3282             /* Takes two floating-point values `a' and `b', one of
3283              * which is a NaN, and returns the appropriate NaN
3284              * result. If either `a' or `b' is a signaling NaN,
3285              * the invalid exception is raised.
3286              */
3287             if (is_snan(a.cls) || is_snan(b.cls)) {
3288                 return *parts_pick_nan(&a, &b, s);
3289             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3290                 return b;
3291             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3292                 return a;
3293             }
3294         }
3295         return *parts_pick_nan(&a, &b, s);
3296     } else {
3297         int a_exp, b_exp;
3298
3299         switch (a.cls) {
3300         case float_class_normal:
3301             a_exp = a.exp;
3302             break;
3303         case float_class_inf:
3304             a_exp = INT_MAX;
3305             break;
3306         case float_class_zero:
3307             a_exp = INT_MIN;
3308             break;
3309         default:
3310             g_assert_not_reached();
3311             break;
3312         }
3313         switch (b.cls) {
3314         case float_class_normal:
3315             b_exp = b.exp;
3316             break;
3317         case float_class_inf:
3318             b_exp = INT_MAX;
3319             break;
3320         case float_class_zero:
3321             b_exp = INT_MIN;
3322             break;
3323         default:
3324             g_assert_not_reached();
3325             break;
3326         }
3327
3328         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3329             bool a_less = a_exp < b_exp;
3330             if (a_exp == b_exp) {
3331                 a_less = a.frac < b.frac;
3332             }
3333             return a_less ^ ismin ? b : a;
3334         }
3335
3336         if (a.sign == b.sign) {
3337             bool a_less = a_exp < b_exp;
3338             if (a_exp == b_exp) {
3339                 a_less = a.frac < b.frac;
3340             }
3341             return a.sign ^ a_less ^ ismin ? b : a;
3342         } else {
3343             return a.sign ^ ismin ? b : a;
3344         }
3345     }
3346 }
3347
3348 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3349 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3350                                      float_status *s)                   \
3351 {                                                                       \
3352     FloatParts64 pa, pb, pr;                                            \
3353     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3354     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3355     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3356     return float ## sz ## _round_pack_canonical(&pr, s);                \
3357 }
3358
3359 MINMAX(16, min, true, false, false)
3360 MINMAX(16, minnum, true, true, false)
3361 MINMAX(16, minnummag, true, true, true)
3362 MINMAX(16, max, false, false, false)
3363 MINMAX(16, maxnum, false, true, false)
3364 MINMAX(16, maxnummag, false, true, true)
3365
3366 MINMAX(32, min, true, false, false)
3367 MINMAX(32, minnum, true, true, false)
3368 MINMAX(32, minnummag, true, true, true)
3369 MINMAX(32, max, false, false, false)
3370 MINMAX(32, maxnum, false, true, false)
3371 MINMAX(32, maxnummag, false, true, true)
3372
3373 MINMAX(64, min, true, false, false)
3374 MINMAX(64, minnum, true, true, false)
3375 MINMAX(64, minnummag, true, true, true)
3376 MINMAX(64, max, false, false, false)
3377 MINMAX(64, maxnum, false, true, false)
3378 MINMAX(64, maxnummag, false, true, true)
3379
3380 #undef MINMAX
3381
3382 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3383 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3384 {                                                                       \
3385     FloatParts64 pa, pb, pr;                                            \
3386     bfloat16_unpack_canonical(&pa, a, s);                               \
3387     bfloat16_unpack_canonical(&pb, b, s);                               \
3388     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3389     return bfloat16_round_pack_canonical(&pr, s);                       \
3390 }
3391
3392 BF16_MINMAX(min, true, false, false)
3393 BF16_MINMAX(minnum, true, true, false)
3394 BF16_MINMAX(minnummag, true, true, true)
3395 BF16_MINMAX(max, false, false, false)
3396 BF16_MINMAX(maxnum, false, true, false)
3397 BF16_MINMAX(maxnummag, false, true, true)
3398
3399 #undef BF16_MINMAX
3400
3401 /* Floating point compare */
3402 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3403                                     float_status *s)
3404 {
3405     if (is_nan(a.cls) || is_nan(b.cls)) {
3406         if (!is_quiet ||
3407             a.cls == float_class_snan ||
3408             b.cls == float_class_snan) {
3409             float_raise(float_flag_invalid, s);
3410         }
3411         return float_relation_unordered;
3412     }
3413
3414     if (a.cls == float_class_zero) {
3415         if (b.cls == float_class_zero) {
3416             return float_relation_equal;
3417         }
3418         return b.sign ? float_relation_greater : float_relation_less;
3419     } else if (b.cls == float_class_zero) {
3420         return a.sign ? float_relation_less : float_relation_greater;
3421     }
3422
3423     /* The only really important thing about infinity is its sign. If
3424      * both are infinities the sign marks the smallest of the two.
3425      */
3426     if (a.cls == float_class_inf) {
3427         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3428             return float_relation_equal;
3429         }
3430         return a.sign ? float_relation_less : float_relation_greater;
3431     } else if (b.cls == float_class_inf) {
3432         return b.sign ? float_relation_greater : float_relation_less;
3433     }
3434
3435     if (a.sign != b.sign) {
3436         return a.sign ? float_relation_less : float_relation_greater;
3437     }
3438
3439     if (a.exp == b.exp) {
3440         if (a.frac == b.frac) {
3441             return float_relation_equal;
3442         }
3443         if (a.sign) {
3444             return a.frac > b.frac ?
3445                 float_relation_less : float_relation_greater;
3446         } else {
3447             return a.frac > b.frac ?
3448                 float_relation_greater : float_relation_less;
3449         }
3450     } else {
3451         if (a.sign) {
3452             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3453         } else {
3454             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3455         }
3456     }
3457 }
3458
3459 #define COMPARE(name, attr, sz)                                         \
3460 static int attr                                                         \
3461 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3462 {                                                                       \
3463     FloatParts64 pa, pb;                                                \
3464     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3465     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3466     return compare_floats(pa, pb, is_quiet, s);                         \
3467 }
3468
3469 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3470 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3471 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3472
3473 #undef COMPARE
3474
3475 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3476 {
3477     return soft_f16_compare(a, b, false, s);
3478 }
3479
3480 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3481 {
3482     return soft_f16_compare(a, b, true, s);
3483 }
3484
3485 static FloatRelation QEMU_FLATTEN
3486 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3487 {
3488     union_float32 ua, ub;
3489
3490     ua.s = xa;
3491     ub.s = xb;
3492
3493     if (QEMU_NO_HARDFLOAT) {
3494         goto soft;
3495     }
3496
3497     float32_input_flush2(&ua.s, &ub.s, s);
3498     if (isgreaterequal(ua.h, ub.h)) {
3499         if (isgreater(ua.h, ub.h)) {
3500             return float_relation_greater;
3501         }
3502         return float_relation_equal;
3503     }
3504     if (likely(isless(ua.h, ub.h))) {
3505         return float_relation_less;
3506     }
3507     /* The only condition remaining is unordered.
3508      * Fall through to set flags.
3509      */
3510  soft:
3511     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3512 }
3513
3514 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3515 {
3516     return f32_compare(a, b, false, s);
3517 }
3518
3519 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3520 {
3521     return f32_compare(a, b, true, s);
3522 }
3523
3524 static FloatRelation QEMU_FLATTEN
3525 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3526 {
3527     union_float64 ua, ub;
3528
3529     ua.s = xa;
3530     ub.s = xb;
3531
3532     if (QEMU_NO_HARDFLOAT) {
3533         goto soft;
3534     }
3535
3536     float64_input_flush2(&ua.s, &ub.s, s);
3537     if (isgreaterequal(ua.h, ub.h)) {
3538         if (isgreater(ua.h, ub.h)) {
3539             return float_relation_greater;
3540         }
3541         return float_relation_equal;
3542     }
3543     if (likely(isless(ua.h, ub.h))) {
3544         return float_relation_less;
3545     }
3546     /* The only condition remaining is unordered.
3547      * Fall through to set flags.
3548      */
3549  soft:
3550     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3551 }
3552
3553 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3554 {
3555     return f64_compare(a, b, false, s);
3556 }
3557
3558 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3559 {
3560     return f64_compare(a, b, true, s);
3561 }
3562
3563 static FloatRelation QEMU_FLATTEN
3564 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3565 {
3566     FloatParts64 pa, pb;
3567
3568     bfloat16_unpack_canonical(&pa, a, s);
3569     bfloat16_unpack_canonical(&pb, b, s);
3570     return compare_floats(pa, pb, is_quiet, s);
3571 }
3572
3573 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3574 {
3575     return soft_bf16_compare(a, b, false, s);
3576 }
3577
3578 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3579 {
3580     return soft_bf16_compare(a, b, true, s);
3581 }
3582
3583 /* Multiply A by 2 raised to the power N.  */
3584 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3585 {
3586     if (unlikely(is_nan(a.cls))) {
3587         parts_return_nan(&a, s);
3588     }
3589     if (a.cls == float_class_normal) {
3590         /* The largest float type (even though not supported by FloatParts64)
3591          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3592          * still allows rounding to infinity, without allowing overflow
3593          * within the int32_t that backs FloatParts64.exp.
3594          */
3595         n = MIN(MAX(n, -0x10000), 0x10000);
3596         a.exp += n;
3597     }
3598     return a;
3599 }
3600
3601 float16 float16_scalbn(float16 a, int n, float_status *status)
3602 {
3603     FloatParts64 pa, pr;
3604
3605     float16_unpack_canonical(&pa, a, status);
3606     pr = scalbn_decomposed(pa, n, status);
3607     return float16_round_pack_canonical(&pr, status);
3608 }
3609
3610 float32 float32_scalbn(float32 a, int n, float_status *status)
3611 {
3612     FloatParts64 pa, pr;
3613
3614     float32_unpack_canonical(&pa, a, status);
3615     pr = scalbn_decomposed(pa, n, status);
3616     return float32_round_pack_canonical(&pr, status);
3617 }
3618
3619 float64 float64_scalbn(float64 a, int n, float_status *status)
3620 {
3621     FloatParts64 pa, pr;
3622
3623     float64_unpack_canonical(&pa, a, status);
3624     pr = scalbn_decomposed(pa, n, status);
3625     return float64_round_pack_canonical(&pr, status);
3626 }
3627
3628 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3629 {
3630     FloatParts64 pa, pr;
3631
3632     bfloat16_unpack_canonical(&pa, a, status);
3633     pr = scalbn_decomposed(pa, n, status);
3634     return bfloat16_round_pack_canonical(&pr, status);
3635 }
3636
3637 /*
3638  * Square Root
3639  *
3640  * The old softfloat code did an approximation step before zeroing in
3641  * on the final result. However for simpleness we just compute the
3642  * square root by iterating down from the implicit bit to enough extra
3643  * bits to ensure we get a correctly rounded result.
3644  *
3645  * This does mean however the calculation is slower than before,
3646  * especially for 64 bit floats.
3647  */
3648
3649 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3650 {
3651     uint64_t a_frac, r_frac, s_frac;
3652     int bit, last_bit;
3653
3654     if (is_nan(a.cls)) {
3655         parts_return_nan(&a, s);
3656         return a;
3657     }
3658     if (a.cls == float_class_zero) {
3659         return a;  /* sqrt(+-0) = +-0 */
3660     }
3661     if (a.sign) {
3662         float_raise(float_flag_invalid, s);
3663         parts_default_nan(&a, s);
3664         return a;
3665     }
3666     if (a.cls == float_class_inf) {
3667         return a;  /* sqrt(+inf) = +inf */
3668     }
3669
3670     assert(a.cls == float_class_normal);
3671
3672     /* We need two overflow bits at the top. Adding room for that is a
3673      * right shift. If the exponent is odd, we can discard the low bit
3674      * by multiplying the fraction by 2; that's a left shift. Combine
3675      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3676      */
3677     a_frac = a.frac >> (2 - (a.exp & 1));
3678     a.exp >>= 1;
3679
3680     /* Bit-by-bit computation of sqrt.  */
3681     r_frac = 0;
3682     s_frac = 0;
3683
3684     /* Iterate from implicit bit down to the 3 extra bits to compute a
3685      * properly rounded result. Remember we've inserted two more bits
3686      * at the top, so these positions are two less.
3687      */
3688     bit = DECOMPOSED_BINARY_POINT - 2;
3689     last_bit = MAX(p->frac_shift - 4, 0);
3690     do {
3691         uint64_t q = 1ULL << bit;
3692         uint64_t t_frac = s_frac + q;
3693         if (t_frac <= a_frac) {
3694             s_frac = t_frac + q;
3695             a_frac -= t_frac;
3696             r_frac += q;
3697         }
3698         a_frac <<= 1;
3699     } while (--bit >= last_bit);
3700
3701     /* Undo the right shift done above. If there is any remaining
3702      * fraction, the result is inexact. Set the sticky bit.
3703      */
3704     a.frac = (r_frac << 2) + (a_frac != 0);
3705
3706     return a;
3707 }
3708
3709 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3710 {
3711     FloatParts64 pa, pr;
3712
3713     float16_unpack_canonical(&pa, a, status);
3714     pr = sqrt_float(pa, status, &float16_params);
3715     return float16_round_pack_canonical(&pr, status);
3716 }
3717
3718 static float32 QEMU_SOFTFLOAT_ATTR
3719 soft_f32_sqrt(float32 a, float_status *status)
3720 {
3721     FloatParts64 pa, pr;
3722
3723     float32_unpack_canonical(&pa, a, status);
3724     pr = sqrt_float(pa, status, &float32_params);
3725     return float32_round_pack_canonical(&pr, status);
3726 }
3727
3728 static float64 QEMU_SOFTFLOAT_ATTR
3729 soft_f64_sqrt(float64 a, float_status *status)
3730 {
3731     FloatParts64 pa, pr;
3732
3733     float64_unpack_canonical(&pa, a, status);
3734     pr = sqrt_float(pa, status, &float64_params);
3735     return float64_round_pack_canonical(&pr, status);
3736 }
3737
3738 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3739 {
3740     union_float32 ua, ur;
3741
3742     ua.s = xa;
3743     if (unlikely(!can_use_fpu(s))) {
3744         goto soft;
3745     }
3746
3747     float32_input_flush1(&ua.s, s);
3748     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3749         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3750                        fpclassify(ua.h) == FP_ZERO) ||
3751                      signbit(ua.h))) {
3752             goto soft;
3753         }
3754     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3755                         float32_is_neg(ua.s))) {
3756         goto soft;
3757     }
3758     ur.h = sqrtf(ua.h);
3759     return ur.s;
3760
3761  soft:
3762     return soft_f32_sqrt(ua.s, s);
3763 }
3764
3765 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3766 {
3767     union_float64 ua, ur;
3768
3769     ua.s = xa;
3770     if (unlikely(!can_use_fpu(s))) {
3771         goto soft;
3772     }
3773
3774     float64_input_flush1(&ua.s, s);
3775     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3776         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3777                        fpclassify(ua.h) == FP_ZERO) ||
3778                      signbit(ua.h))) {
3779             goto soft;
3780         }
3781     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3782                         float64_is_neg(ua.s))) {
3783         goto soft;
3784     }
3785     ur.h = sqrt(ua.h);
3786     return ur.s;
3787
3788  soft:
3789     return soft_f64_sqrt(ua.s, s);
3790 }
3791
3792 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3793 {
3794     FloatParts64 pa, pr;
3795
3796     bfloat16_unpack_canonical(&pa, a, status);
3797     pr = sqrt_float(pa, status, &bfloat16_params);
3798     return bfloat16_round_pack_canonical(&pr, status);
3799 }
3800
3801 /*----------------------------------------------------------------------------
3802 | The pattern for a default generated NaN.
3803 *----------------------------------------------------------------------------*/
3804
3805 float16 float16_default_nan(float_status *status)
3806 {
3807     FloatParts64 p;
3808
3809     parts_default_nan(&p, status);
3810     p.frac >>= float16_params.frac_shift;
3811     return float16_pack_raw(&p);
3812 }
3813
3814 float32 float32_default_nan(float_status *status)
3815 {
3816     FloatParts64 p;
3817
3818     parts_default_nan(&p, status);
3819     p.frac >>= float32_params.frac_shift;
3820     return float32_pack_raw(&p);
3821 }
3822
3823 float64 float64_default_nan(float_status *status)
3824 {
3825     FloatParts64 p;
3826
3827     parts_default_nan(&p, status);
3828     p.frac >>= float64_params.frac_shift;
3829     return float64_pack_raw(&p);
3830 }
3831
3832 float128 float128_default_nan(float_status *status)
3833 {
3834     FloatParts128 p;
3835
3836     parts_default_nan(&p, status);
3837     frac_shr(&p, float128_params.frac_shift);
3838     return float128_pack_raw(&p);
3839 }
3840
3841 bfloat16 bfloat16_default_nan(float_status *status)
3842 {
3843     FloatParts64 p;
3844
3845     parts_default_nan(&p, status);
3846     p.frac >>= bfloat16_params.frac_shift;
3847     return bfloat16_pack_raw(&p);
3848 }
3849
3850 /*----------------------------------------------------------------------------
3851 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3852 *----------------------------------------------------------------------------*/
3853
3854 float16 float16_silence_nan(float16 a, float_status *status)
3855 {
3856     FloatParts64 p;
3857
3858     float16_unpack_raw(&p, a);
3859     p.frac <<= float16_params.frac_shift;
3860     parts_silence_nan(&p, status);
3861     p.frac >>= float16_params.frac_shift;
3862     return float16_pack_raw(&p);
3863 }
3864
3865 float32 float32_silence_nan(float32 a, float_status *status)
3866 {
3867     FloatParts64 p;
3868
3869     float32_unpack_raw(&p, a);
3870     p.frac <<= float32_params.frac_shift;
3871     parts_silence_nan(&p, status);
3872     p.frac >>= float32_params.frac_shift;
3873     return float32_pack_raw(&p);
3874 }
3875
3876 float64 float64_silence_nan(float64 a, float_status *status)
3877 {
3878     FloatParts64 p;
3879
3880     float64_unpack_raw(&p, a);
3881     p.frac <<= float64_params.frac_shift;
3882     parts_silence_nan(&p, status);
3883     p.frac >>= float64_params.frac_shift;
3884     return float64_pack_raw(&p);
3885 }
3886
3887 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3888 {
3889     FloatParts64 p;
3890
3891     bfloat16_unpack_raw(&p, a);
3892     p.frac <<= bfloat16_params.frac_shift;
3893     parts_silence_nan(&p, status);
3894     p.frac >>= bfloat16_params.frac_shift;
3895     return bfloat16_pack_raw(&p);
3896 }
3897
3898 float128 float128_silence_nan(float128 a, float_status *status)
3899 {
3900     FloatParts128 p;
3901
3902     float128_unpack_raw(&p, a);
3903     frac_shl(&p, float128_params.frac_shift);
3904     parts_silence_nan(&p, status);
3905     frac_shr(&p, float128_params.frac_shift);
3906     return float128_pack_raw(&p);
3907 }
3908
3909 /*----------------------------------------------------------------------------
3910 | If `a' is denormal and we are in flush-to-zero mode then set the
3911 | input-denormal exception and return zero. Otherwise just return the value.
3912 *----------------------------------------------------------------------------*/
3913
3914 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3915 {
3916     if (p.exp == 0 && p.frac != 0) {
3917         float_raise(float_flag_input_denormal, status);
3918         return true;
3919     }
3920
3921     return false;
3922 }
3923
3924 float16 float16_squash_input_denormal(float16 a, float_status *status)
3925 {
3926     if (status->flush_inputs_to_zero) {
3927         FloatParts64 p;
3928
3929         float16_unpack_raw(&p, a);
3930         if (parts_squash_denormal(p, status)) {
3931             return float16_set_sign(float16_zero, p.sign);
3932         }
3933     }
3934     return a;
3935 }
3936
3937 float32 float32_squash_input_denormal(float32 a, float_status *status)
3938 {
3939     if (status->flush_inputs_to_zero) {
3940         FloatParts64 p;
3941
3942         float32_unpack_raw(&p, a);
3943         if (parts_squash_denormal(p, status)) {
3944             return float32_set_sign(float32_zero, p.sign);
3945         }
3946     }
3947     return a;
3948 }
3949
3950 float64 float64_squash_input_denormal(float64 a, float_status *status)
3951 {
3952     if (status->flush_inputs_to_zero) {
3953         FloatParts64 p;
3954
3955         float64_unpack_raw(&p, a);
3956         if (parts_squash_denormal(p, status)) {
3957             return float64_set_sign(float64_zero, p.sign);
3958         }
3959     }
3960     return a;
3961 }
3962
3963 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3964 {
3965     if (status->flush_inputs_to_zero) {
3966         FloatParts64 p;
3967
3968         bfloat16_unpack_raw(&p, a);
3969         if (parts_squash_denormal(p, status)) {
3970             return bfloat16_set_sign(bfloat16_zero, p.sign);
3971         }
3972     }
3973     return a;
3974 }
3975
3976 /*----------------------------------------------------------------------------
3977 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3978 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3979 | input.  If `zSign' is 1, the input is negated before being converted to an
3980 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3981 | is simply rounded to an integer, with the inexact exception raised if the
3982 | input cannot be represented exactly as an integer.  However, if the fixed-
3983 | point input is too large, the invalid exception is raised and the largest
3984 | positive or negative integer is returned.
3985 *----------------------------------------------------------------------------*/
3986
3987 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3988                                  float_status *status)
3989 {
3990     int8_t roundingMode;
3991     bool roundNearestEven;
3992     int8_t roundIncrement, roundBits;
3993     int32_t z;
3994
3995     roundingMode = status->float_rounding_mode;
3996     roundNearestEven = ( roundingMode == float_round_nearest_even );
3997     switch (roundingMode) {
3998     case float_round_nearest_even:
3999     case float_round_ties_away:
4000         roundIncrement = 0x40;
4001         break;
4002     case float_round_to_zero:
4003         roundIncrement = 0;
4004         break;
4005     case float_round_up:
4006         roundIncrement = zSign ? 0 : 0x7f;
4007         break;
4008     case float_round_down:
4009         roundIncrement = zSign ? 0x7f : 0;
4010         break;
4011     case float_round_to_odd:
4012         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4013         break;
4014     default:
4015         abort();
4016     }
4017     roundBits = absZ & 0x7F;
4018     absZ = ( absZ + roundIncrement )>>7;
4019     if (!(roundBits ^ 0x40) && roundNearestEven) {
4020         absZ &= ~1;
4021     }
4022     z = absZ;
4023     if ( zSign ) z = - z;
4024     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4025         float_raise(float_flag_invalid, status);
4026         return zSign ? INT32_MIN : INT32_MAX;
4027     }
4028     if (roundBits) {
4029         float_raise(float_flag_inexact, status);
4030     }
4031     return z;
4032
4033 }
4034
4035 /*----------------------------------------------------------------------------
4036 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4037 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4038 | and returns the properly rounded 64-bit integer corresponding to the input.
4039 | If `zSign' is 1, the input is negated before being converted to an integer.
4040 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4041 | the inexact exception raised if the input cannot be represented exactly as
4042 | an integer.  However, if the fixed-point input is too large, the invalid
4043 | exception is raised and the largest positive or negative integer is
4044 | returned.
4045 *----------------------------------------------------------------------------*/
4046
4047 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4048                                float_status *status)
4049 {
4050     int8_t roundingMode;
4051     bool roundNearestEven, increment;
4052     int64_t z;
4053
4054     roundingMode = status->float_rounding_mode;
4055     roundNearestEven = ( roundingMode == float_round_nearest_even );
4056     switch (roundingMode) {
4057     case float_round_nearest_even:
4058     case float_round_ties_away:
4059         increment = ((int64_t) absZ1 < 0);
4060         break;
4061     case float_round_to_zero:
4062         increment = 0;
4063         break;
4064     case float_round_up:
4065         increment = !zSign && absZ1;
4066         break;
4067     case float_round_down:
4068         increment = zSign && absZ1;
4069         break;
4070     case float_round_to_odd:
4071         increment = !(absZ0 & 1) && absZ1;
4072         break;
4073     default:
4074         abort();
4075     }
4076     if ( increment ) {
4077         ++absZ0;
4078         if ( absZ0 == 0 ) goto overflow;
4079         if (!(absZ1 << 1) && roundNearestEven) {
4080             absZ0 &= ~1;
4081         }
4082     }
4083     z = absZ0;
4084     if ( zSign ) z = - z;
4085     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4086  overflow:
4087         float_raise(float_flag_invalid, status);
4088         return zSign ? INT64_MIN : INT64_MAX;
4089     }
4090     if (absZ1) {
4091         float_raise(float_flag_inexact, status);
4092     }
4093     return z;
4094
4095 }
4096
4097 /*----------------------------------------------------------------------------
4098 | Normalizes the subnormal single-precision floating-point value represented
4099 | by the denormalized significand `aSig'.  The normalized exponent and
4100 | significand are stored at the locations pointed to by `zExpPtr' and
4101 | `zSigPtr', respectively.
4102 *----------------------------------------------------------------------------*/
4103
4104 static void
4105  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4106 {
4107     int8_t shiftCount;
4108
4109     shiftCount = clz32(aSig) - 8;
4110     *zSigPtr = aSig<<shiftCount;
4111     *zExpPtr = 1 - shiftCount;
4112
4113 }
4114
4115 /*----------------------------------------------------------------------------
4116 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4117 | and significand `zSig', and returns the proper single-precision floating-
4118 | point value corresponding to the abstract input.  Ordinarily, the abstract
4119 | value is simply rounded and packed into the single-precision format, with
4120 | the inexact exception raised if the abstract input cannot be represented
4121 | exactly.  However, if the abstract value is too large, the overflow and
4122 | inexact exceptions are raised and an infinity or maximal finite value is
4123 | returned.  If the abstract value is too small, the input value is rounded to
4124 | a subnormal number, and the underflow and inexact exceptions are raised if
4125 | the abstract input cannot be represented exactly as a subnormal single-
4126 | precision floating-point number.
4127 |     The input significand `zSig' has its binary point between bits 30
4128 | and 29, which is 7 bits to the left of the usual location.  This shifted
4129 | significand must be normalized or smaller.  If `zSig' is not normalized,
4130 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4131 | and it must not require rounding.  In the usual case that `zSig' is
4132 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4133 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4134 | Binary Floating-Point Arithmetic.
4135 *----------------------------------------------------------------------------*/
4136
4137 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4138                                    float_status *status)
4139 {
4140     int8_t roundingMode;
4141     bool roundNearestEven;
4142     int8_t roundIncrement, roundBits;
4143     bool isTiny;
4144
4145     roundingMode = status->float_rounding_mode;
4146     roundNearestEven = ( roundingMode == float_round_nearest_even );
4147     switch (roundingMode) {
4148     case float_round_nearest_even:
4149     case float_round_ties_away:
4150         roundIncrement = 0x40;
4151         break;
4152     case float_round_to_zero:
4153         roundIncrement = 0;
4154         break;
4155     case float_round_up:
4156         roundIncrement = zSign ? 0 : 0x7f;
4157         break;
4158     case float_round_down:
4159         roundIncrement = zSign ? 0x7f : 0;
4160         break;
4161     case float_round_to_odd:
4162         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4163         break;
4164     default:
4165         abort();
4166         break;
4167     }
4168     roundBits = zSig & 0x7F;
4169     if ( 0xFD <= (uint16_t) zExp ) {
4170         if (    ( 0xFD < zExp )
4171              || (    ( zExp == 0xFD )
4172                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4173            ) {
4174             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4175                                    roundIncrement != 0;
4176             float_raise(float_flag_overflow | float_flag_inexact, status);
4177             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4178         }
4179         if ( zExp < 0 ) {
4180             if (status->flush_to_zero) {
4181                 float_raise(float_flag_output_denormal, status);
4182                 return packFloat32(zSign, 0, 0);
4183             }
4184             isTiny = status->tininess_before_rounding
4185                   || (zExp < -1)
4186                   || (zSig + roundIncrement < 0x80000000);
4187             shift32RightJamming( zSig, - zExp, &zSig );
4188             zExp = 0;
4189             roundBits = zSig & 0x7F;
4190             if (isTiny && roundBits) {
4191                 float_raise(float_flag_underflow, status);
4192             }
4193             if (roundingMode == float_round_to_odd) {
4194                 /*
4195                  * For round-to-odd case, the roundIncrement depends on
4196                  * zSig which just changed.
4197                  */
4198                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4199             }
4200         }
4201     }
4202     if (roundBits) {
4203         float_raise(float_flag_inexact, status);
4204     }
4205     zSig = ( zSig + roundIncrement )>>7;
4206     if (!(roundBits ^ 0x40) && roundNearestEven) {
4207         zSig &= ~1;
4208     }
4209     if ( zSig == 0 ) zExp = 0;
4210     return packFloat32( zSign, zExp, zSig );
4211
4212 }
4213
4214 /*----------------------------------------------------------------------------
4215 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4216 | and significand `zSig', and returns the proper single-precision floating-
4217 | point value corresponding to the abstract input.  This routine is just like
4218 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4219 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4220 | floating-point exponent.
4221 *----------------------------------------------------------------------------*/
4222
4223 static float32
4224  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4225                               float_status *status)
4226 {
4227     int8_t shiftCount;
4228
4229     shiftCount = clz32(zSig) - 1;
4230     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4231                                status);
4232
4233 }
4234
4235 /*----------------------------------------------------------------------------
4236 | Normalizes the subnormal double-precision floating-point value represented
4237 | by the denormalized significand `aSig'.  The normalized exponent and
4238 | significand are stored at the locations pointed to by `zExpPtr' and
4239 | `zSigPtr', respectively.
4240 *----------------------------------------------------------------------------*/
4241
4242 static void
4243  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4244 {
4245     int8_t shiftCount;
4246
4247     shiftCount = clz64(aSig) - 11;
4248     *zSigPtr = aSig<<shiftCount;
4249     *zExpPtr = 1 - shiftCount;
4250
4251 }
4252
4253 /*----------------------------------------------------------------------------
4254 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4255 | double-precision floating-point value, returning the result.  After being
4256 | shifted into the proper positions, the three fields are simply added
4257 | together to form the result.  This means that any integer portion of `zSig'
4258 | will be added into the exponent.  Since a properly normalized significand
4259 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4260 | than the desired result exponent whenever `zSig' is a complete, normalized
4261 | significand.
4262 *----------------------------------------------------------------------------*/
4263
4264 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4265 {
4266
4267     return make_float64(
4268         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4269
4270 }
4271
4272 /*----------------------------------------------------------------------------
4273 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4274 | and significand `zSig', and returns the proper double-precision floating-
4275 | point value corresponding to the abstract input.  Ordinarily, the abstract
4276 | value is simply rounded and packed into the double-precision format, with
4277 | the inexact exception raised if the abstract input cannot be represented
4278 | exactly.  However, if the abstract value is too large, the overflow and
4279 | inexact exceptions are raised and an infinity or maximal finite value is
4280 | returned.  If the abstract value is too small, the input value is rounded to
4281 | a subnormal number, and the underflow and inexact exceptions are raised if
4282 | the abstract input cannot be represented exactly as a subnormal double-
4283 | precision floating-point number.
4284 |     The input significand `zSig' has its binary point between bits 62
4285 | and 61, which is 10 bits to the left of the usual location.  This shifted
4286 | significand must be normalized or smaller.  If `zSig' is not normalized,
4287 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4288 | and it must not require rounding.  In the usual case that `zSig' is
4289 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4290 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4291 | Binary Floating-Point Arithmetic.
4292 *----------------------------------------------------------------------------*/
4293
4294 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4295                                    float_status *status)
4296 {
4297     int8_t roundingMode;
4298     bool roundNearestEven;
4299     int roundIncrement, roundBits;
4300     bool isTiny;
4301
4302     roundingMode = status->float_rounding_mode;
4303     roundNearestEven = ( roundingMode == float_round_nearest_even );
4304     switch (roundingMode) {
4305     case float_round_nearest_even:
4306     case float_round_ties_away:
4307         roundIncrement = 0x200;
4308         break;
4309     case float_round_to_zero:
4310         roundIncrement = 0;
4311         break;
4312     case float_round_up:
4313         roundIncrement = zSign ? 0 : 0x3ff;
4314         break;
4315     case float_round_down:
4316         roundIncrement = zSign ? 0x3ff : 0;
4317         break;
4318     case float_round_to_odd:
4319         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4320         break;
4321     default:
4322         abort();
4323     }
4324     roundBits = zSig & 0x3FF;
4325     if ( 0x7FD <= (uint16_t) zExp ) {
4326         if (    ( 0x7FD < zExp )
4327              || (    ( zExp == 0x7FD )
4328                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4329            ) {
4330             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4331                                    roundIncrement != 0;
4332             float_raise(float_flag_overflow | float_flag_inexact, status);
4333             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4334         }
4335         if ( zExp < 0 ) {
4336             if (status->flush_to_zero) {
4337                 float_raise(float_flag_output_denormal, status);
4338                 return packFloat64(zSign, 0, 0);
4339             }
4340             isTiny = status->tininess_before_rounding
4341                   || (zExp < -1)
4342                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4343             shift64RightJamming( zSig, - zExp, &zSig );
4344             zExp = 0;
4345             roundBits = zSig & 0x3FF;
4346             if (isTiny && roundBits) {
4347                 float_raise(float_flag_underflow, status);
4348             }
4349             if (roundingMode == float_round_to_odd) {
4350                 /*
4351                  * For round-to-odd case, the roundIncrement depends on
4352                  * zSig which just changed.
4353                  */
4354                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4355             }
4356         }
4357     }
4358     if (roundBits) {
4359         float_raise(float_flag_inexact, status);
4360     }
4361     zSig = ( zSig + roundIncrement )>>10;
4362     if (!(roundBits ^ 0x200) && roundNearestEven) {
4363         zSig &= ~1;
4364     }
4365     if ( zSig == 0 ) zExp = 0;
4366     return packFloat64( zSign, zExp, zSig );
4367
4368 }
4369
4370 /*----------------------------------------------------------------------------
4371 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4372 | and significand `zSig', and returns the proper double-precision floating-
4373 | point value corresponding to the abstract input.  This routine is just like
4374 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4375 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4376 | floating-point exponent.
4377 *----------------------------------------------------------------------------*/
4378
4379 static float64
4380  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4381                               float_status *status)
4382 {
4383     int8_t shiftCount;
4384
4385     shiftCount = clz64(zSig) - 1;
4386     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4387                                status);
4388
4389 }
4390
4391 /*----------------------------------------------------------------------------
4392 | Normalizes the subnormal extended double-precision floating-point value
4393 | represented by the denormalized significand `aSig'.  The normalized exponent
4394 | and significand are stored at the locations pointed to by `zExpPtr' and
4395 | `zSigPtr', respectively.
4396 *----------------------------------------------------------------------------*/
4397
4398 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4399                                 uint64_t *zSigPtr)
4400 {
4401     int8_t shiftCount;
4402
4403     shiftCount = clz64(aSig);
4404     *zSigPtr = aSig<<shiftCount;
4405     *zExpPtr = 1 - shiftCount;
4406 }
4407
4408 /*----------------------------------------------------------------------------
4409 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4410 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4411 | and returns the proper extended double-precision floating-point value
4412 | corresponding to the abstract input.  Ordinarily, the abstract value is
4413 | rounded and packed into the extended double-precision format, with the
4414 | inexact exception raised if the abstract input cannot be represented
4415 | exactly.  However, if the abstract value is too large, the overflow and
4416 | inexact exceptions are raised and an infinity or maximal finite value is
4417 | returned.  If the abstract value is too small, the input value is rounded to
4418 | a subnormal number, and the underflow and inexact exceptions are raised if
4419 | the abstract input cannot be represented exactly as a subnormal extended
4420 | double-precision floating-point number.
4421 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4422 | number of bits as single or double precision, respectively.  Otherwise, the
4423 | result is rounded to the full precision of the extended double-precision
4424 | format.
4425 |     The input significand must be normalized or smaller.  If the input
4426 | significand is not normalized, `zExp' must be 0; in that case, the result
4427 | returned is a subnormal number, and it must not require rounding.  The
4428 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4429 | Floating-Point Arithmetic.
4430 *----------------------------------------------------------------------------*/
4431
4432 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4433                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4434                               float_status *status)
4435 {
4436     int8_t roundingMode;
4437     bool roundNearestEven, increment, isTiny;
4438     int64_t roundIncrement, roundMask, roundBits;
4439
4440     roundingMode = status->float_rounding_mode;
4441     roundNearestEven = ( roundingMode == float_round_nearest_even );
4442     if ( roundingPrecision == 80 ) goto precision80;
4443     if ( roundingPrecision == 64 ) {
4444         roundIncrement = UINT64_C(0x0000000000000400);
4445         roundMask = UINT64_C(0x00000000000007FF);
4446     }
4447     else if ( roundingPrecision == 32 ) {
4448         roundIncrement = UINT64_C(0x0000008000000000);
4449         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4450     }
4451     else {
4452         goto precision80;
4453     }
4454     zSig0 |= ( zSig1 != 0 );
4455     switch (roundingMode) {
4456     case float_round_nearest_even:
4457     case float_round_ties_away:
4458         break;
4459     case float_round_to_zero:
4460         roundIncrement = 0;
4461         break;
4462     case float_round_up:
4463         roundIncrement = zSign ? 0 : roundMask;
4464         break;
4465     case float_round_down:
4466         roundIncrement = zSign ? roundMask : 0;
4467         break;
4468     default:
4469         abort();
4470     }
4471     roundBits = zSig0 & roundMask;
4472     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4473         if (    ( 0x7FFE < zExp )
4474              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4475            ) {
4476             goto overflow;
4477         }
4478         if ( zExp <= 0 ) {
4479             if (status->flush_to_zero) {
4480                 float_raise(float_flag_output_denormal, status);
4481                 return packFloatx80(zSign, 0, 0);
4482             }
4483             isTiny = status->tininess_before_rounding
4484                   || (zExp < 0 )
4485                   || (zSig0 <= zSig0 + roundIncrement);
4486             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4487             zExp = 0;
4488             roundBits = zSig0 & roundMask;
4489             if (isTiny && roundBits) {
4490                 float_raise(float_flag_underflow, status);
4491             }
4492             if (roundBits) {
4493                 float_raise(float_flag_inexact, status);
4494             }
4495             zSig0 += roundIncrement;
4496             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4497             roundIncrement = roundMask + 1;
4498             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4499                 roundMask |= roundIncrement;
4500             }
4501             zSig0 &= ~ roundMask;
4502             return packFloatx80( zSign, zExp, zSig0 );
4503         }
4504     }
4505     if (roundBits) {
4506         float_raise(float_flag_inexact, status);
4507     }
4508     zSig0 += roundIncrement;
4509     if ( zSig0 < roundIncrement ) {
4510         ++zExp;
4511         zSig0 = UINT64_C(0x8000000000000000);
4512     }
4513     roundIncrement = roundMask + 1;
4514     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4515         roundMask |= roundIncrement;
4516     }
4517     zSig0 &= ~ roundMask;
4518     if ( zSig0 == 0 ) zExp = 0;
4519     return packFloatx80( zSign, zExp, zSig0 );
4520  precision80:
4521     switch (roundingMode) {
4522     case float_round_nearest_even:
4523     case float_round_ties_away:
4524         increment = ((int64_t)zSig1 < 0);
4525         break;
4526     case float_round_to_zero:
4527         increment = 0;
4528         break;
4529     case float_round_up:
4530         increment = !zSign && zSig1;
4531         break;
4532     case float_round_down:
4533         increment = zSign && zSig1;
4534         break;
4535     default:
4536         abort();
4537     }
4538     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4539         if (    ( 0x7FFE < zExp )
4540              || (    ( zExp == 0x7FFE )
4541                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4542                   && increment
4543                 )
4544            ) {
4545             roundMask = 0;
4546  overflow:
4547             float_raise(float_flag_overflow | float_flag_inexact, status);
4548             if (    ( roundingMode == float_round_to_zero )
4549                  || ( zSign && ( roundingMode == float_round_up ) )
4550                  || ( ! zSign && ( roundingMode == float_round_down ) )
4551                ) {
4552                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4553             }
4554             return packFloatx80(zSign,
4555                                 floatx80_infinity_high,
4556                                 floatx80_infinity_low);
4557         }
4558         if ( zExp <= 0 ) {
4559             isTiny = status->tininess_before_rounding
4560                   || (zExp < 0)
4561                   || !increment
4562                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4563             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4564             zExp = 0;
4565             if (isTiny && zSig1) {
4566                 float_raise(float_flag_underflow, status);
4567             }
4568             if (zSig1) {
4569                 float_raise(float_flag_inexact, status);
4570             }
4571             switch (roundingMode) {
4572             case float_round_nearest_even:
4573             case float_round_ties_away:
4574                 increment = ((int64_t)zSig1 < 0);
4575                 break;
4576             case float_round_to_zero:
4577                 increment = 0;
4578                 break;
4579             case float_round_up:
4580                 increment = !zSign && zSig1;
4581                 break;
4582             case float_round_down:
4583                 increment = zSign && zSig1;
4584                 break;
4585             default:
4586                 abort();
4587             }
4588             if ( increment ) {
4589                 ++zSig0;
4590                 if (!(zSig1 << 1) && roundNearestEven) {
4591                     zSig0 &= ~1;
4592                 }
4593                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4594             }
4595             return packFloatx80( zSign, zExp, zSig0 );
4596         }
4597     }
4598     if (zSig1) {
4599         float_raise(float_flag_inexact, status);
4600     }
4601     if ( increment ) {
4602         ++zSig0;
4603         if ( zSig0 == 0 ) {
4604             ++zExp;
4605             zSig0 = UINT64_C(0x8000000000000000);
4606         }
4607         else {
4608             if (!(zSig1 << 1) && roundNearestEven) {
4609                 zSig0 &= ~1;
4610             }
4611         }
4612     }
4613     else {
4614         if ( zSig0 == 0 ) zExp = 0;
4615     }
4616     return packFloatx80( zSign, zExp, zSig0 );
4617
4618 }
4619
4620 /*----------------------------------------------------------------------------
4621 | Takes an abstract floating-point value having sign `zSign', exponent
4622 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4623 | and returns the proper extended double-precision floating-point value
4624 | corresponding to the abstract input.  This routine is just like
4625 | `roundAndPackFloatx80' except that the input significand does not have to be
4626 | normalized.
4627 *----------------------------------------------------------------------------*/
4628
4629 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4630                                        bool zSign, int32_t zExp,
4631                                        uint64_t zSig0, uint64_t zSig1,
4632                                        float_status *status)
4633 {
4634     int8_t shiftCount;
4635
4636     if ( zSig0 == 0 ) {
4637         zSig0 = zSig1;
4638         zSig1 = 0;
4639         zExp -= 64;
4640     }
4641     shiftCount = clz64(zSig0);
4642     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4643     zExp -= shiftCount;
4644     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4645                                 zSig0, zSig1, status);
4646
4647 }
4648
4649 /*----------------------------------------------------------------------------
4650 | Returns the least-significant 64 fraction bits of the quadruple-precision
4651 | floating-point value `a'.
4652 *----------------------------------------------------------------------------*/
4653
4654 static inline uint64_t extractFloat128Frac1( float128 a )
4655 {
4656
4657     return a.low;
4658
4659 }
4660
4661 /*----------------------------------------------------------------------------
4662 | Returns the most-significant 48 fraction bits of the quadruple-precision
4663 | floating-point value `a'.
4664 *----------------------------------------------------------------------------*/
4665
4666 static inline uint64_t extractFloat128Frac0( float128 a )
4667 {
4668
4669     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4670
4671 }
4672
4673 /*----------------------------------------------------------------------------
4674 | Returns the exponent bits of the quadruple-precision floating-point value
4675 | `a'.
4676 *----------------------------------------------------------------------------*/
4677
4678 static inline int32_t extractFloat128Exp( float128 a )
4679 {
4680
4681     return ( a.high>>48 ) & 0x7FFF;
4682
4683 }
4684
4685 /*----------------------------------------------------------------------------
4686 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4687 *----------------------------------------------------------------------------*/
4688
4689 static inline bool extractFloat128Sign(float128 a)
4690 {
4691     return a.high >> 63;
4692 }
4693
4694 /*----------------------------------------------------------------------------
4695 | Normalizes the subnormal quadruple-precision floating-point value
4696 | represented by the denormalized significand formed by the concatenation of
4697 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4698 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4699 | significand are stored at the location pointed to by `zSig0Ptr', and the
4700 | least significant 64 bits of the normalized significand are stored at the
4701 | location pointed to by `zSig1Ptr'.
4702 *----------------------------------------------------------------------------*/
4703
4704 static void
4705  normalizeFloat128Subnormal(
4706      uint64_t aSig0,
4707      uint64_t aSig1,
4708      int32_t *zExpPtr,
4709      uint64_t *zSig0Ptr,
4710      uint64_t *zSig1Ptr
4711  )
4712 {
4713     int8_t shiftCount;
4714
4715     if ( aSig0 == 0 ) {
4716         shiftCount = clz64(aSig1) - 15;
4717         if ( shiftCount < 0 ) {
4718             *zSig0Ptr = aSig1>>( - shiftCount );
4719             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4720         }
4721         else {
4722             *zSig0Ptr = aSig1<<shiftCount;
4723             *zSig1Ptr = 0;
4724         }
4725         *zExpPtr = - shiftCount - 63;
4726     }
4727     else {
4728         shiftCount = clz64(aSig0) - 15;
4729         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4730         *zExpPtr = 1 - shiftCount;
4731     }
4732
4733 }
4734
4735 /*----------------------------------------------------------------------------
4736 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4737 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4738 | floating-point value, returning the result.  After being shifted into the
4739 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4740 | added together to form the most significant 32 bits of the result.  This
4741 | means that any integer portion of `zSig0' will be added into the exponent.
4742 | Since a properly normalized significand will have an integer portion equal
4743 | to 1, the `zExp' input should be 1 less than the desired result exponent
4744 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4745 | significand.
4746 *----------------------------------------------------------------------------*/
4747
4748 static inline float128
4749 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4750 {
4751     float128 z;
4752
4753     z.low = zSig1;
4754     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4755     return z;
4756 }
4757
4758 /*----------------------------------------------------------------------------
4759 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4760 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4761 | and `zSig2', and returns the proper quadruple-precision floating-point value
4762 | corresponding to the abstract input.  Ordinarily, the abstract value is
4763 | simply rounded and packed into the quadruple-precision format, with the
4764 | inexact exception raised if the abstract input cannot be represented
4765 | exactly.  However, if the abstract value is too large, the overflow and
4766 | inexact exceptions are raised and an infinity or maximal finite value is
4767 | returned.  If the abstract value is too small, the input value is rounded to
4768 | a subnormal number, and the underflow and inexact exceptions are raised if
4769 | the abstract input cannot be represented exactly as a subnormal quadruple-
4770 | precision floating-point number.
4771 |     The input significand must be normalized or smaller.  If the input
4772 | significand is not normalized, `zExp' must be 0; in that case, the result
4773 | returned is a subnormal number, and it must not require rounding.  In the
4774 | usual case that the input significand is normalized, `zExp' must be 1 less
4775 | than the ``true'' floating-point exponent.  The handling of underflow and
4776 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4777 *----------------------------------------------------------------------------*/
4778
4779 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4780                                      uint64_t zSig0, uint64_t zSig1,
4781                                      uint64_t zSig2, float_status *status)
4782 {
4783     int8_t roundingMode;
4784     bool roundNearestEven, increment, isTiny;
4785
4786     roundingMode = status->float_rounding_mode;
4787     roundNearestEven = ( roundingMode == float_round_nearest_even );
4788     switch (roundingMode) {
4789     case float_round_nearest_even:
4790     case float_round_ties_away:
4791         increment = ((int64_t)zSig2 < 0);
4792         break;
4793     case float_round_to_zero:
4794         increment = 0;
4795         break;
4796     case float_round_up:
4797         increment = !zSign && zSig2;
4798         break;
4799     case float_round_down:
4800         increment = zSign && zSig2;
4801         break;
4802     case float_round_to_odd:
4803         increment = !(zSig1 & 0x1) && zSig2;
4804         break;
4805     default:
4806         abort();
4807     }
4808     if ( 0x7FFD <= (uint32_t) zExp ) {
4809         if (    ( 0x7FFD < zExp )
4810              || (    ( zExp == 0x7FFD )
4811                   && eq128(
4812                          UINT64_C(0x0001FFFFFFFFFFFF),
4813                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4814                          zSig0,
4815                          zSig1
4816                      )
4817                   && increment
4818                 )
4819            ) {
4820             float_raise(float_flag_overflow | float_flag_inexact, status);
4821             if (    ( roundingMode == float_round_to_zero )
4822                  || ( zSign && ( roundingMode == float_round_up ) )
4823                  || ( ! zSign && ( roundingMode == float_round_down ) )
4824                  || (roundingMode == float_round_to_odd)
4825                ) {
4826                 return
4827                     packFloat128(
4828                         zSign,
4829                         0x7FFE,
4830                         UINT64_C(0x0000FFFFFFFFFFFF),
4831                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4832                     );
4833             }
4834             return packFloat128( zSign, 0x7FFF, 0, 0 );
4835         }
4836         if ( zExp < 0 ) {
4837             if (status->flush_to_zero) {
4838                 float_raise(float_flag_output_denormal, status);
4839                 return packFloat128(zSign, 0, 0, 0);
4840             }
4841             isTiny = status->tininess_before_rounding
4842                   || (zExp < -1)
4843                   || !increment
4844                   || lt128(zSig0, zSig1,
4845                            UINT64_C(0x0001FFFFFFFFFFFF),
4846                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4847             shift128ExtraRightJamming(
4848                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4849             zExp = 0;
4850             if (isTiny && zSig2) {
4851                 float_raise(float_flag_underflow, status);
4852             }
4853             switch (roundingMode) {
4854             case float_round_nearest_even:
4855             case float_round_ties_away:
4856                 increment = ((int64_t)zSig2 < 0);
4857                 break;
4858             case float_round_to_zero:
4859                 increment = 0;
4860                 break;
4861             case float_round_up:
4862                 increment = !zSign && zSig2;
4863                 break;
4864             case float_round_down:
4865                 increment = zSign && zSig2;
4866                 break;
4867             case float_round_to_odd:
4868                 increment = !(zSig1 & 0x1) && zSig2;
4869                 break;
4870             default:
4871                 abort();
4872             }
4873         }
4874     }
4875     if (zSig2) {
4876         float_raise(float_flag_inexact, status);
4877     }
4878     if ( increment ) {
4879         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4880         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4881             zSig1 &= ~1;
4882         }
4883     }
4884     else {
4885         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4886     }
4887     return packFloat128( zSign, zExp, zSig0, zSig1 );
4888
4889 }
4890
4891 /*----------------------------------------------------------------------------
4892 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4893 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4894 | returns the proper quadruple-precision floating-point value corresponding
4895 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4896 | except that the input significand has fewer bits and does not have to be
4897 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4898 | point exponent.
4899 *----------------------------------------------------------------------------*/
4900
4901 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4902                                               uint64_t zSig0, uint64_t zSig1,
4903                                               float_status *status)
4904 {
4905     int8_t shiftCount;
4906     uint64_t zSig2;
4907
4908     if ( zSig0 == 0 ) {
4909         zSig0 = zSig1;
4910         zSig1 = 0;
4911         zExp -= 64;
4912     }
4913     shiftCount = clz64(zSig0) - 15;
4914     if ( 0 <= shiftCount ) {
4915         zSig2 = 0;
4916         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4917     }
4918     else {
4919         shift128ExtraRightJamming(
4920             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4921     }
4922     zExp -= shiftCount;
4923     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4924
4925 }
4926
4927
4928 /*----------------------------------------------------------------------------
4929 | Returns the result of converting the 32-bit two's complement integer `a'
4930 | to the extended double-precision floating-point format.  The conversion
4931 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4932 | Arithmetic.
4933 *----------------------------------------------------------------------------*/
4934
4935 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4936 {
4937     bool zSign;
4938     uint32_t absA;
4939     int8_t shiftCount;
4940     uint64_t zSig;
4941
4942     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4943     zSign = ( a < 0 );
4944     absA = zSign ? - a : a;
4945     shiftCount = clz32(absA) + 32;
4946     zSig = absA;
4947     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4948
4949 }
4950
4951 /*----------------------------------------------------------------------------
4952 | Returns the result of converting the 64-bit two's complement integer `a'
4953 | to the extended double-precision floating-point format.  The conversion
4954 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4955 | Arithmetic.
4956 *----------------------------------------------------------------------------*/
4957
4958 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4959 {
4960     bool zSign;
4961     uint64_t absA;
4962     int8_t shiftCount;
4963
4964     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4965     zSign = ( a < 0 );
4966     absA = zSign ? - a : a;
4967     shiftCount = clz64(absA);
4968     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4969
4970 }
4971
4972 /*----------------------------------------------------------------------------
4973 | Returns the result of converting the single-precision floating-point value
4974 | `a' to the extended double-precision floating-point format.  The conversion
4975 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4976 | Arithmetic.
4977 *----------------------------------------------------------------------------*/
4978
4979 floatx80 float32_to_floatx80(float32 a, float_status *status)
4980 {
4981     bool aSign;
4982     int aExp;
4983     uint32_t aSig;
4984
4985     a = float32_squash_input_denormal(a, status);
4986     aSig = extractFloat32Frac( a );
4987     aExp = extractFloat32Exp( a );
4988     aSign = extractFloat32Sign( a );
4989     if ( aExp == 0xFF ) {
4990         if (aSig) {
4991             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4992                                                status);
4993             return floatx80_silence_nan(res, status);
4994         }
4995         return packFloatx80(aSign,
4996                             floatx80_infinity_high,
4997                             floatx80_infinity_low);
4998     }
4999     if ( aExp == 0 ) {
5000         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5001         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5002     }
5003     aSig |= 0x00800000;
5004     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5005
5006 }
5007
5008 /*----------------------------------------------------------------------------
5009 | Returns the remainder of the single-precision floating-point value `a'
5010 | with respect to the corresponding value `b'.  The operation is performed
5011 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5012 *----------------------------------------------------------------------------*/
5013
5014 float32 float32_rem(float32 a, float32 b, float_status *status)
5015 {
5016     bool aSign, zSign;
5017     int aExp, bExp, expDiff;
5018     uint32_t aSig, bSig;
5019     uint32_t q;
5020     uint64_t aSig64, bSig64, q64;
5021     uint32_t alternateASig;
5022     int32_t sigMean;
5023     a = float32_squash_input_denormal(a, status);
5024     b = float32_squash_input_denormal(b, status);
5025
5026     aSig = extractFloat32Frac( a );
5027     aExp = extractFloat32Exp( a );
5028     aSign = extractFloat32Sign( a );
5029     bSig = extractFloat32Frac( b );
5030     bExp = extractFloat32Exp( b );
5031     if ( aExp == 0xFF ) {
5032         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5033             return propagateFloat32NaN(a, b, status);
5034         }
5035         float_raise(float_flag_invalid, status);
5036         return float32_default_nan(status);
5037     }
5038     if ( bExp == 0xFF ) {
5039         if (bSig) {
5040             return propagateFloat32NaN(a, b, status);
5041         }
5042         return a;
5043     }
5044     if ( bExp == 0 ) {
5045         if ( bSig == 0 ) {
5046             float_raise(float_flag_invalid, status);
5047             return float32_default_nan(status);
5048         }
5049         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5050     }
5051     if ( aExp == 0 ) {
5052         if ( aSig == 0 ) return a;
5053         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5054     }
5055     expDiff = aExp - bExp;
5056     aSig |= 0x00800000;
5057     bSig |= 0x00800000;
5058     if ( expDiff < 32 ) {
5059         aSig <<= 8;
5060         bSig <<= 8;
5061         if ( expDiff < 0 ) {
5062             if ( expDiff < -1 ) return a;
5063             aSig >>= 1;
5064         }
5065         q = ( bSig <= aSig );
5066         if ( q ) aSig -= bSig;
5067         if ( 0 < expDiff ) {
5068             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5069             q >>= 32 - expDiff;
5070             bSig >>= 2;
5071             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5072         }
5073         else {
5074             aSig >>= 2;
5075             bSig >>= 2;
5076         }
5077     }
5078     else {
5079         if ( bSig <= aSig ) aSig -= bSig;
5080         aSig64 = ( (uint64_t) aSig )<<40;
5081         bSig64 = ( (uint64_t) bSig )<<40;
5082         expDiff -= 64;
5083         while ( 0 < expDiff ) {
5084             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5085             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5086             aSig64 = - ( ( bSig * q64 )<<38 );
5087             expDiff -= 62;
5088         }
5089         expDiff += 64;
5090         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5091         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5092         q = q64>>( 64 - expDiff );
5093         bSig <<= 6;
5094         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5095     }
5096     do {
5097         alternateASig = aSig;
5098         ++q;
5099         aSig -= bSig;
5100     } while ( 0 <= (int32_t) aSig );
5101     sigMean = aSig + alternateASig;
5102     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5103         aSig = alternateASig;
5104     }
5105     zSign = ( (int32_t) aSig < 0 );
5106     if ( zSign ) aSig = - aSig;
5107     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5108 }
5109
5110
5111
5112 /*----------------------------------------------------------------------------
5113 | Returns the binary exponential of the single-precision floating-point value
5114 | `a'. The operation is performed according to the IEC/IEEE Standard for
5115 | Binary Floating-Point Arithmetic.
5116 |
5117 | Uses the following identities:
5118 |
5119 | 1. -------------------------------------------------------------------------
5120 |      x    x*ln(2)
5121 |     2  = e
5122 |
5123 | 2. -------------------------------------------------------------------------
5124 |                      2     3     4     5           n
5125 |      x        x     x     x     x     x           x
5126 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5127 |               1!    2!    3!    4!    5!          n!
5128 *----------------------------------------------------------------------------*/
5129
5130 static const float64 float32_exp2_coefficients[15] =
5131 {
5132     const_float64( 0x3ff0000000000000ll ), /*  1 */
5133     const_float64( 0x3fe0000000000000ll ), /*  2 */
5134     const_float64( 0x3fc5555555555555ll ), /*  3 */
5135     const_float64( 0x3fa5555555555555ll ), /*  4 */
5136     const_float64( 0x3f81111111111111ll ), /*  5 */
5137     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5138     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5139     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5140     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5141     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5142     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5143     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5144     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5145     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5146     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5147 };
5148
5149 float32 float32_exp2(float32 a, float_status *status)
5150 {
5151     bool aSign;
5152     int aExp;
5153     uint32_t aSig;
5154     float64 r, x, xn;
5155     int i;
5156     a = float32_squash_input_denormal(a, status);
5157
5158     aSig = extractFloat32Frac( a );
5159     aExp = extractFloat32Exp( a );
5160     aSign = extractFloat32Sign( a );
5161
5162     if ( aExp == 0xFF) {
5163         if (aSig) {
5164             return propagateFloat32NaN(a, float32_zero, status);
5165         }
5166         return (aSign) ? float32_zero : a;
5167     }
5168     if (aExp == 0) {
5169         if (aSig == 0) return float32_one;
5170     }
5171
5172     float_raise(float_flag_inexact, status);
5173
5174     /* ******************************* */
5175     /* using float64 for approximation */
5176     /* ******************************* */
5177     x = float32_to_float64(a, status);
5178     x = float64_mul(x, float64_ln2, status);
5179
5180     xn = x;
5181     r = float64_one;
5182     for (i = 0 ; i < 15 ; i++) {
5183         float64 f;
5184
5185         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5186         r = float64_add(r, f, status);
5187
5188         xn = float64_mul(xn, x, status);
5189     }
5190
5191     return float64_to_float32(r, status);
5192 }
5193
5194 /*----------------------------------------------------------------------------
5195 | Returns the binary log of the single-precision floating-point value `a'.
5196 | The operation is performed according to the IEC/IEEE Standard for Binary
5197 | Floating-Point Arithmetic.
5198 *----------------------------------------------------------------------------*/
5199 float32 float32_log2(float32 a, float_status *status)
5200 {
5201     bool aSign, zSign;
5202     int aExp;
5203     uint32_t aSig, zSig, i;
5204
5205     a = float32_squash_input_denormal(a, status);
5206     aSig = extractFloat32Frac( a );
5207     aExp = extractFloat32Exp( a );
5208     aSign = extractFloat32Sign( a );
5209
5210     if ( aExp == 0 ) {
5211         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5212         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5213     }
5214     if ( aSign ) {
5215         float_raise(float_flag_invalid, status);
5216         return float32_default_nan(status);
5217     }
5218     if ( aExp == 0xFF ) {
5219         if (aSig) {
5220             return propagateFloat32NaN(a, float32_zero, status);
5221         }
5222         return a;
5223     }
5224
5225     aExp -= 0x7F;
5226     aSig |= 0x00800000;
5227     zSign = aExp < 0;
5228     zSig = aExp << 23;
5229
5230     for (i = 1 << 22; i > 0; i >>= 1) {
5231         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5232         if ( aSig & 0x01000000 ) {
5233             aSig >>= 1;
5234             zSig |= i;
5235         }
5236     }
5237
5238     if ( zSign )
5239         zSig = -zSig;
5240
5241     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5242 }
5243
5244 /*----------------------------------------------------------------------------
5245 | Returns the result of converting the double-precision floating-point value
5246 | `a' to the extended double-precision floating-point format.  The conversion
5247 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5248 | Arithmetic.
5249 *----------------------------------------------------------------------------*/
5250
5251 floatx80 float64_to_floatx80(float64 a, float_status *status)
5252 {
5253     bool aSign;
5254     int aExp;
5255     uint64_t aSig;
5256
5257     a = float64_squash_input_denormal(a, status);
5258     aSig = extractFloat64Frac( a );
5259     aExp = extractFloat64Exp( a );
5260     aSign = extractFloat64Sign( a );
5261     if ( aExp == 0x7FF ) {
5262         if (aSig) {
5263             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5264                                                status);
5265             return floatx80_silence_nan(res, status);
5266         }
5267         return packFloatx80(aSign,
5268                             floatx80_infinity_high,
5269                             floatx80_infinity_low);
5270     }
5271     if ( aExp == 0 ) {
5272         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5273         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5274     }
5275     return
5276         packFloatx80(
5277             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5278
5279 }
5280
5281 /*----------------------------------------------------------------------------
5282 | Returns the remainder of the double-precision floating-point value `a'
5283 | with respect to the corresponding value `b'.  The operation is performed
5284 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5285 *----------------------------------------------------------------------------*/
5286
5287 float64 float64_rem(float64 a, float64 b, float_status *status)
5288 {
5289     bool aSign, zSign;
5290     int aExp, bExp, expDiff;
5291     uint64_t aSig, bSig;
5292     uint64_t q, alternateASig;
5293     int64_t sigMean;
5294
5295     a = float64_squash_input_denormal(a, status);
5296     b = float64_squash_input_denormal(b, status);
5297     aSig = extractFloat64Frac( a );
5298     aExp = extractFloat64Exp( a );
5299     aSign = extractFloat64Sign( a );
5300     bSig = extractFloat64Frac( b );
5301     bExp = extractFloat64Exp( b );
5302     if ( aExp == 0x7FF ) {
5303         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5304             return propagateFloat64NaN(a, b, status);
5305         }
5306         float_raise(float_flag_invalid, status);
5307         return float64_default_nan(status);
5308     }
5309     if ( bExp == 0x7FF ) {
5310         if (bSig) {
5311             return propagateFloat64NaN(a, b, status);
5312         }
5313         return a;
5314     }
5315     if ( bExp == 0 ) {
5316         if ( bSig == 0 ) {
5317             float_raise(float_flag_invalid, status);
5318             return float64_default_nan(status);
5319         }
5320         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5321     }
5322     if ( aExp == 0 ) {
5323         if ( aSig == 0 ) return a;
5324         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5325     }
5326     expDiff = aExp - bExp;
5327     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5328     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5329     if ( expDiff < 0 ) {
5330         if ( expDiff < -1 ) return a;
5331         aSig >>= 1;
5332     }
5333     q = ( bSig <= aSig );
5334     if ( q ) aSig -= bSig;
5335     expDiff -= 64;
5336     while ( 0 < expDiff ) {
5337         q = estimateDiv128To64( aSig, 0, bSig );
5338         q = ( 2 < q ) ? q - 2 : 0;
5339         aSig = - ( ( bSig>>2 ) * q );
5340         expDiff -= 62;
5341     }
5342     expDiff += 64;
5343     if ( 0 < expDiff ) {
5344         q = estimateDiv128To64( aSig, 0, bSig );
5345         q = ( 2 < q ) ? q - 2 : 0;
5346         q >>= 64 - expDiff;
5347         bSig >>= 2;
5348         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5349     }
5350     else {
5351         aSig >>= 2;
5352         bSig >>= 2;
5353     }
5354     do {
5355         alternateASig = aSig;
5356         ++q;
5357         aSig -= bSig;
5358     } while ( 0 <= (int64_t) aSig );
5359     sigMean = aSig + alternateASig;
5360     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5361         aSig = alternateASig;
5362     }
5363     zSign = ( (int64_t) aSig < 0 );
5364     if ( zSign ) aSig = - aSig;
5365     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5366
5367 }
5368
5369 /*----------------------------------------------------------------------------
5370 | Returns the binary log of the double-precision floating-point value `a'.
5371 | The operation is performed according to the IEC/IEEE Standard for Binary
5372 | Floating-Point Arithmetic.
5373 *----------------------------------------------------------------------------*/
5374 float64 float64_log2(float64 a, float_status *status)
5375 {
5376     bool aSign, zSign;
5377     int aExp;
5378     uint64_t aSig, aSig0, aSig1, zSig, i;
5379     a = float64_squash_input_denormal(a, status);
5380
5381     aSig = extractFloat64Frac( a );
5382     aExp = extractFloat64Exp( a );
5383     aSign = extractFloat64Sign( a );
5384
5385     if ( aExp == 0 ) {
5386         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5387         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5388     }
5389     if ( aSign ) {
5390         float_raise(float_flag_invalid, status);
5391         return float64_default_nan(status);
5392     }
5393     if ( aExp == 0x7FF ) {
5394         if (aSig) {
5395             return propagateFloat64NaN(a, float64_zero, status);
5396         }
5397         return a;
5398     }
5399
5400     aExp -= 0x3FF;
5401     aSig |= UINT64_C(0x0010000000000000);
5402     zSign = aExp < 0;
5403     zSig = (uint64_t)aExp << 52;
5404     for (i = 1LL << 51; i > 0; i >>= 1) {
5405         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5406         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5407         if ( aSig & UINT64_C(0x0020000000000000) ) {
5408             aSig >>= 1;
5409             zSig |= i;
5410         }
5411     }
5412
5413     if ( zSign )
5414         zSig = -zSig;
5415     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5416 }
5417
5418 /*----------------------------------------------------------------------------
5419 | Returns the result of converting the extended double-precision floating-
5420 | point value `a' to the 32-bit two's complement integer format.  The
5421 | conversion is performed according to the IEC/IEEE Standard for Binary
5422 | Floating-Point Arithmetic---which means in particular that the conversion
5423 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5424 | largest positive integer is returned.  Otherwise, if the conversion
5425 | overflows, the largest integer with the same sign as `a' is returned.
5426 *----------------------------------------------------------------------------*/
5427
5428 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5429 {
5430     bool aSign;
5431     int32_t aExp, shiftCount;
5432     uint64_t aSig;
5433
5434     if (floatx80_invalid_encoding(a)) {
5435         float_raise(float_flag_invalid, status);
5436         return 1 << 31;
5437     }
5438     aSig = extractFloatx80Frac( a );
5439     aExp = extractFloatx80Exp( a );
5440     aSign = extractFloatx80Sign( a );
5441     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5442     shiftCount = 0x4037 - aExp;
5443     if ( shiftCount <= 0 ) shiftCount = 1;
5444     shift64RightJamming( aSig, shiftCount, &aSig );
5445     return roundAndPackInt32(aSign, aSig, status);
5446
5447 }
5448
5449 /*----------------------------------------------------------------------------
5450 | Returns the result of converting the extended double-precision floating-
5451 | point value `a' to the 32-bit two's complement integer format.  The
5452 | conversion is performed according to the IEC/IEEE Standard for Binary
5453 | Floating-Point Arithmetic, except that the conversion is always rounded
5454 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5455 | Otherwise, if the conversion overflows, the largest integer with the same
5456 | sign as `a' is returned.
5457 *----------------------------------------------------------------------------*/
5458
5459 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5460 {
5461     bool aSign;
5462     int32_t aExp, shiftCount;
5463     uint64_t aSig, savedASig;
5464     int32_t z;
5465
5466     if (floatx80_invalid_encoding(a)) {
5467         float_raise(float_flag_invalid, status);
5468         return 1 << 31;
5469     }
5470     aSig = extractFloatx80Frac( a );
5471     aExp = extractFloatx80Exp( a );
5472     aSign = extractFloatx80Sign( a );
5473     if ( 0x401E < aExp ) {
5474         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5475         goto invalid;
5476     }
5477     else if ( aExp < 0x3FFF ) {
5478         if (aExp || aSig) {
5479             float_raise(float_flag_inexact, status);
5480         }
5481         return 0;
5482     }
5483     shiftCount = 0x403E - aExp;
5484     savedASig = aSig;
5485     aSig >>= shiftCount;
5486     z = aSig;
5487     if ( aSign ) z = - z;
5488     if ( ( z < 0 ) ^ aSign ) {
5489  invalid:
5490         float_raise(float_flag_invalid, status);
5491         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5492     }
5493     if ( ( aSig<<shiftCount ) != savedASig ) {
5494         float_raise(float_flag_inexact, status);
5495     }
5496     return z;
5497
5498 }
5499
5500 /*----------------------------------------------------------------------------
5501 | Returns the result of converting the extended double-precision floating-
5502 | point value `a' to the 64-bit two's complement integer format.  The
5503 | conversion is performed according to the IEC/IEEE Standard for Binary
5504 | Floating-Point Arithmetic---which means in particular that the conversion
5505 | is rounded according to the current rounding mode.  If `a' is a NaN,
5506 | the largest positive integer is returned.  Otherwise, if the conversion
5507 | overflows, the largest integer with the same sign as `a' is returned.
5508 *----------------------------------------------------------------------------*/
5509
5510 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5511 {
5512     bool aSign;
5513     int32_t aExp, shiftCount;
5514     uint64_t aSig, aSigExtra;
5515
5516     if (floatx80_invalid_encoding(a)) {
5517         float_raise(float_flag_invalid, status);
5518         return 1ULL << 63;
5519     }
5520     aSig = extractFloatx80Frac( a );
5521     aExp = extractFloatx80Exp( a );
5522     aSign = extractFloatx80Sign( a );
5523     shiftCount = 0x403E - aExp;
5524     if ( shiftCount <= 0 ) {
5525         if ( shiftCount ) {
5526             float_raise(float_flag_invalid, status);
5527             if (!aSign || floatx80_is_any_nan(a)) {
5528                 return INT64_MAX;
5529             }
5530             return INT64_MIN;
5531         }
5532         aSigExtra = 0;
5533     }
5534     else {
5535         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5536     }
5537     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5538
5539 }
5540
5541 /*----------------------------------------------------------------------------
5542 | Returns the result of converting the extended double-precision floating-
5543 | point value `a' to the 64-bit two's complement integer format.  The
5544 | conversion is performed according to the IEC/IEEE Standard for Binary
5545 | Floating-Point Arithmetic, except that the conversion is always rounded
5546 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5547 | Otherwise, if the conversion overflows, the largest integer with the same
5548 | sign as `a' is returned.
5549 *----------------------------------------------------------------------------*/
5550
5551 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5552 {
5553     bool aSign;
5554     int32_t aExp, shiftCount;
5555     uint64_t aSig;
5556     int64_t z;
5557
5558     if (floatx80_invalid_encoding(a)) {
5559         float_raise(float_flag_invalid, status);
5560         return 1ULL << 63;
5561     }
5562     aSig = extractFloatx80Frac( a );
5563     aExp = extractFloatx80Exp( a );
5564     aSign = extractFloatx80Sign( a );
5565     shiftCount = aExp - 0x403E;
5566     if ( 0 <= shiftCount ) {
5567         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5568         if ( ( a.high != 0xC03E ) || aSig ) {
5569             float_raise(float_flag_invalid, status);
5570             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5571                 return INT64_MAX;
5572             }
5573         }
5574         return INT64_MIN;
5575     }
5576     else if ( aExp < 0x3FFF ) {
5577         if (aExp | aSig) {
5578             float_raise(float_flag_inexact, status);
5579         }
5580         return 0;
5581     }
5582     z = aSig>>( - shiftCount );
5583     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5584         float_raise(float_flag_inexact, status);
5585     }
5586     if ( aSign ) z = - z;
5587     return z;
5588
5589 }
5590
5591 /*----------------------------------------------------------------------------
5592 | Returns the result of converting the extended double-precision floating-
5593 | point value `a' to the single-precision floating-point format.  The
5594 | conversion is performed according to the IEC/IEEE Standard for Binary
5595 | Floating-Point Arithmetic.
5596 *----------------------------------------------------------------------------*/
5597
5598 float32 floatx80_to_float32(floatx80 a, float_status *status)
5599 {
5600     bool aSign;
5601     int32_t aExp;
5602     uint64_t aSig;
5603
5604     if (floatx80_invalid_encoding(a)) {
5605         float_raise(float_flag_invalid, status);
5606         return float32_default_nan(status);
5607     }
5608     aSig = extractFloatx80Frac( a );
5609     aExp = extractFloatx80Exp( a );
5610     aSign = extractFloatx80Sign( a );
5611     if ( aExp == 0x7FFF ) {
5612         if ( (uint64_t) ( aSig<<1 ) ) {
5613             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5614                                              status);
5615             return float32_silence_nan(res, status);
5616         }
5617         return packFloat32( aSign, 0xFF, 0 );
5618     }
5619     shift64RightJamming( aSig, 33, &aSig );
5620     if ( aExp || aSig ) aExp -= 0x3F81;
5621     return roundAndPackFloat32(aSign, aExp, aSig, status);
5622
5623 }
5624
5625 /*----------------------------------------------------------------------------
5626 | Returns the result of converting the extended double-precision floating-
5627 | point value `a' to the double-precision floating-point format.  The
5628 | conversion is performed according to the IEC/IEEE Standard for Binary
5629 | Floating-Point Arithmetic.
5630 *----------------------------------------------------------------------------*/
5631
5632 float64 floatx80_to_float64(floatx80 a, float_status *status)
5633 {
5634     bool aSign;
5635     int32_t aExp;
5636     uint64_t aSig, zSig;
5637
5638     if (floatx80_invalid_encoding(a)) {
5639         float_raise(float_flag_invalid, status);
5640         return float64_default_nan(status);
5641     }
5642     aSig = extractFloatx80Frac( a );
5643     aExp = extractFloatx80Exp( a );
5644     aSign = extractFloatx80Sign( a );
5645     if ( aExp == 0x7FFF ) {
5646         if ( (uint64_t) ( aSig<<1 ) ) {
5647             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5648                                              status);
5649             return float64_silence_nan(res, status);
5650         }
5651         return packFloat64( aSign, 0x7FF, 0 );
5652     }
5653     shift64RightJamming( aSig, 1, &zSig );
5654     if ( aExp || aSig ) aExp -= 0x3C01;
5655     return roundAndPackFloat64(aSign, aExp, zSig, status);
5656
5657 }
5658
5659 /*----------------------------------------------------------------------------
5660 | Returns the result of converting the extended double-precision floating-
5661 | point value `a' to the quadruple-precision floating-point format.  The
5662 | conversion is performed according to the IEC/IEEE Standard for Binary
5663 | Floating-Point Arithmetic.
5664 *----------------------------------------------------------------------------*/
5665
5666 float128 floatx80_to_float128(floatx80 a, float_status *status)
5667 {
5668     bool aSign;
5669     int aExp;
5670     uint64_t aSig, zSig0, zSig1;
5671
5672     if (floatx80_invalid_encoding(a)) {
5673         float_raise(float_flag_invalid, status);
5674         return float128_default_nan(status);
5675     }
5676     aSig = extractFloatx80Frac( a );
5677     aExp = extractFloatx80Exp( a );
5678     aSign = extractFloatx80Sign( a );
5679     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5680         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5681                                            status);
5682         return float128_silence_nan(res, status);
5683     }
5684     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5685     return packFloat128( aSign, aExp, zSig0, zSig1 );
5686
5687 }
5688
5689 /*----------------------------------------------------------------------------
5690 | Rounds the extended double-precision floating-point value `a'
5691 | to the precision provided by floatx80_rounding_precision and returns the
5692 | result as an extended double-precision floating-point value.
5693 | The operation is performed according to the IEC/IEEE Standard for Binary
5694 | Floating-Point Arithmetic.
5695 *----------------------------------------------------------------------------*/
5696
5697 floatx80 floatx80_round(floatx80 a, float_status *status)
5698 {
5699     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5700                                 extractFloatx80Sign(a),
5701                                 extractFloatx80Exp(a),
5702                                 extractFloatx80Frac(a), 0, status);
5703 }
5704
5705 /*----------------------------------------------------------------------------
5706 | Rounds the extended double-precision floating-point value `a' to an integer,
5707 | and returns the result as an extended quadruple-precision floating-point
5708 | value.  The operation is performed according to the IEC/IEEE Standard for
5709 | Binary Floating-Point Arithmetic.
5710 *----------------------------------------------------------------------------*/
5711
5712 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5713 {
5714     bool aSign;
5715     int32_t aExp;
5716     uint64_t lastBitMask, roundBitsMask;
5717     floatx80 z;
5718
5719     if (floatx80_invalid_encoding(a)) {
5720         float_raise(float_flag_invalid, status);
5721         return floatx80_default_nan(status);
5722     }
5723     aExp = extractFloatx80Exp( a );
5724     if ( 0x403E <= aExp ) {
5725         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5726             return propagateFloatx80NaN(a, a, status);
5727         }
5728         return a;
5729     }
5730     if ( aExp < 0x3FFF ) {
5731         if (    ( aExp == 0 )
5732              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5733             return a;
5734         }
5735         float_raise(float_flag_inexact, status);
5736         aSign = extractFloatx80Sign( a );
5737         switch (status->float_rounding_mode) {
5738          case float_round_nearest_even:
5739             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5740                ) {
5741                 return
5742                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5743             }
5744             break;
5745         case float_round_ties_away:
5746             if (aExp == 0x3FFE) {
5747                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5748             }
5749             break;
5750          case float_round_down:
5751             return
5752                   aSign ?
5753                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5754                 : packFloatx80( 0, 0, 0 );
5755          case float_round_up:
5756             return
5757                   aSign ? packFloatx80( 1, 0, 0 )
5758                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5759
5760         case float_round_to_zero:
5761             break;
5762         default:
5763             g_assert_not_reached();
5764         }
5765         return packFloatx80( aSign, 0, 0 );
5766     }
5767     lastBitMask = 1;
5768     lastBitMask <<= 0x403E - aExp;
5769     roundBitsMask = lastBitMask - 1;
5770     z = a;
5771     switch (status->float_rounding_mode) {
5772     case float_round_nearest_even:
5773         z.low += lastBitMask>>1;
5774         if ((z.low & roundBitsMask) == 0) {
5775             z.low &= ~lastBitMask;
5776         }
5777         break;
5778     case float_round_ties_away:
5779         z.low += lastBitMask >> 1;
5780         break;
5781     case float_round_to_zero:
5782         break;
5783     case float_round_up:
5784         if (!extractFloatx80Sign(z)) {
5785             z.low += roundBitsMask;
5786         }
5787         break;
5788     case float_round_down:
5789         if (extractFloatx80Sign(z)) {
5790             z.low += roundBitsMask;
5791         }
5792         break;
5793     default:
5794         abort();
5795     }
5796     z.low &= ~ roundBitsMask;
5797     if ( z.low == 0 ) {
5798         ++z.high;
5799         z.low = UINT64_C(0x8000000000000000);
5800     }
5801     if (z.low != a.low) {
5802         float_raise(float_flag_inexact, status);
5803     }
5804     return z;
5805
5806 }
5807
5808 /*----------------------------------------------------------------------------
5809 | Returns the result of adding the absolute values of the extended double-
5810 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5811 | negated before being returned.  `zSign' is ignored if the result is a NaN.
5812 | The addition is performed according to the IEC/IEEE Standard for Binary
5813 | Floating-Point Arithmetic.
5814 *----------------------------------------------------------------------------*/
5815
5816 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5817                                 float_status *status)
5818 {
5819     int32_t aExp, bExp, zExp;
5820     uint64_t aSig, bSig, zSig0, zSig1;
5821     int32_t expDiff;
5822
5823     aSig = extractFloatx80Frac( a );
5824     aExp = extractFloatx80Exp( a );
5825     bSig = extractFloatx80Frac( b );
5826     bExp = extractFloatx80Exp( b );
5827     expDiff = aExp - bExp;
5828     if ( 0 < expDiff ) {
5829         if ( aExp == 0x7FFF ) {
5830             if ((uint64_t)(aSig << 1)) {
5831                 return propagateFloatx80NaN(a, b, status);
5832             }
5833             return a;
5834         }
5835         if ( bExp == 0 ) --expDiff;
5836         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5837         zExp = aExp;
5838     }
5839     else if ( expDiff < 0 ) {
5840         if ( bExp == 0x7FFF ) {
5841             if ((uint64_t)(bSig << 1)) {
5842                 return propagateFloatx80NaN(a, b, status);
5843             }
5844             return packFloatx80(zSign,
5845                                 floatx80_infinity_high,
5846                                 floatx80_infinity_low);
5847         }
5848         if ( aExp == 0 ) ++expDiff;
5849         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5850         zExp = bExp;
5851     }
5852     else {
5853         if ( aExp == 0x7FFF ) {
5854             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5855                 return propagateFloatx80NaN(a, b, status);
5856             }
5857             return a;
5858         }
5859         zSig1 = 0;
5860         zSig0 = aSig + bSig;
5861         if ( aExp == 0 ) {
5862             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5863                 /* At least one of the values is a pseudo-denormal,
5864                  * and there is a carry out of the result.  */
5865                 zExp = 1;
5866                 goto shiftRight1;
5867             }
5868             if (zSig0 == 0) {
5869                 return packFloatx80(zSign, 0, 0);
5870             }
5871             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5872             goto roundAndPack;
5873         }
5874         zExp = aExp;
5875         goto shiftRight1;
5876     }
5877     zSig0 = aSig + bSig;
5878     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5879  shiftRight1:
5880     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5881     zSig0 |= UINT64_C(0x8000000000000000);
5882     ++zExp;
5883  roundAndPack:
5884     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5885                                 zSign, zExp, zSig0, zSig1, status);
5886 }
5887
5888 /*----------------------------------------------------------------------------
5889 | Returns the result of subtracting the absolute values of the extended
5890 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5891 | difference is negated before being returned.  `zSign' is ignored if the
5892 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
5893 | Standard for Binary Floating-Point Arithmetic.
5894 *----------------------------------------------------------------------------*/
5895
5896 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5897                                 float_status *status)
5898 {
5899     int32_t aExp, bExp, zExp;
5900     uint64_t aSig, bSig, zSig0, zSig1;
5901     int32_t expDiff;
5902
5903     aSig = extractFloatx80Frac( a );
5904     aExp = extractFloatx80Exp( a );
5905     bSig = extractFloatx80Frac( b );
5906     bExp = extractFloatx80Exp( b );
5907     expDiff = aExp - bExp;
5908     if ( 0 < expDiff ) goto aExpBigger;
5909     if ( expDiff < 0 ) goto bExpBigger;
5910     if ( aExp == 0x7FFF ) {
5911         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5912             return propagateFloatx80NaN(a, b, status);
5913         }
5914         float_raise(float_flag_invalid, status);
5915         return floatx80_default_nan(status);
5916     }
5917     if ( aExp == 0 ) {
5918         aExp = 1;
5919         bExp = 1;
5920     }
5921     zSig1 = 0;
5922     if ( bSig < aSig ) goto aBigger;
5923     if ( aSig < bSig ) goto bBigger;
5924     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5925  bExpBigger:
5926     if ( bExp == 0x7FFF ) {
5927         if ((uint64_t)(bSig << 1)) {
5928             return propagateFloatx80NaN(a, b, status);
5929         }
5930         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5931                             floatx80_infinity_low);
5932     }
5933     if ( aExp == 0 ) ++expDiff;
5934     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5935  bBigger:
5936     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5937     zExp = bExp;
5938     zSign ^= 1;
5939     goto normalizeRoundAndPack;
5940  aExpBigger:
5941     if ( aExp == 0x7FFF ) {
5942         if ((uint64_t)(aSig << 1)) {
5943             return propagateFloatx80NaN(a, b, status);
5944         }
5945         return a;
5946     }
5947     if ( bExp == 0 ) --expDiff;
5948     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5949  aBigger:
5950     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5951     zExp = aExp;
5952  normalizeRoundAndPack:
5953     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5954                                          zSign, zExp, zSig0, zSig1, status);
5955 }
5956
5957 /*----------------------------------------------------------------------------
5958 | Returns the result of adding the extended double-precision floating-point
5959 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
5960 | Standard for Binary Floating-Point Arithmetic.
5961 *----------------------------------------------------------------------------*/
5962
5963 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5964 {
5965     bool aSign, bSign;
5966
5967     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5968         float_raise(float_flag_invalid, status);
5969         return floatx80_default_nan(status);
5970     }
5971     aSign = extractFloatx80Sign( a );
5972     bSign = extractFloatx80Sign( b );
5973     if ( aSign == bSign ) {
5974         return addFloatx80Sigs(a, b, aSign, status);
5975     }
5976     else {
5977         return subFloatx80Sigs(a, b, aSign, status);
5978     }
5979
5980 }
5981
5982 /*----------------------------------------------------------------------------
5983 | Returns the result of subtracting the extended double-precision floating-
5984 | point values `a' and `b'.  The operation is performed according to the
5985 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5986 *----------------------------------------------------------------------------*/
5987
5988 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5989 {
5990     bool aSign, bSign;
5991
5992     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5993         float_raise(float_flag_invalid, status);
5994         return floatx80_default_nan(status);
5995     }
5996     aSign = extractFloatx80Sign( a );
5997     bSign = extractFloatx80Sign( b );
5998     if ( aSign == bSign ) {
5999         return subFloatx80Sigs(a, b, aSign, status);
6000     }
6001     else {
6002         return addFloatx80Sigs(a, b, aSign, status);
6003     }
6004
6005 }
6006
6007 /*----------------------------------------------------------------------------
6008 | Returns the result of multiplying the extended double-precision floating-
6009 | point values `a' and `b'.  The operation is performed according to the
6010 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6011 *----------------------------------------------------------------------------*/
6012
6013 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6014 {
6015     bool aSign, bSign, zSign;
6016     int32_t aExp, bExp, zExp;
6017     uint64_t aSig, bSig, zSig0, zSig1;
6018
6019     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6020         float_raise(float_flag_invalid, status);
6021         return floatx80_default_nan(status);
6022     }
6023     aSig = extractFloatx80Frac( a );
6024     aExp = extractFloatx80Exp( a );
6025     aSign = extractFloatx80Sign( a );
6026     bSig = extractFloatx80Frac( b );
6027     bExp = extractFloatx80Exp( b );
6028     bSign = extractFloatx80Sign( b );
6029     zSign = aSign ^ bSign;
6030     if ( aExp == 0x7FFF ) {
6031         if (    (uint64_t) ( aSig<<1 )
6032              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6033             return propagateFloatx80NaN(a, b, status);
6034         }
6035         if ( ( bExp | bSig ) == 0 ) goto invalid;
6036         return packFloatx80(zSign, floatx80_infinity_high,
6037                                    floatx80_infinity_low);
6038     }
6039     if ( bExp == 0x7FFF ) {
6040         if ((uint64_t)(bSig << 1)) {
6041             return propagateFloatx80NaN(a, b, status);
6042         }
6043         if ( ( aExp | aSig ) == 0 ) {
6044  invalid:
6045             float_raise(float_flag_invalid, status);
6046             return floatx80_default_nan(status);
6047         }
6048         return packFloatx80(zSign, floatx80_infinity_high,
6049                                    floatx80_infinity_low);
6050     }
6051     if ( aExp == 0 ) {
6052         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6053         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6054     }
6055     if ( bExp == 0 ) {
6056         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6057         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6058     }
6059     zExp = aExp + bExp - 0x3FFE;
6060     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6061     if ( 0 < (int64_t) zSig0 ) {
6062         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6063         --zExp;
6064     }
6065     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6066                                 zSign, zExp, zSig0, zSig1, status);
6067 }
6068
6069 /*----------------------------------------------------------------------------
6070 | Returns the result of dividing the extended double-precision floating-point
6071 | value `a' by the corresponding value `b'.  The operation is performed
6072 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6073 *----------------------------------------------------------------------------*/
6074
6075 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6076 {
6077     bool aSign, bSign, zSign;
6078     int32_t aExp, bExp, zExp;
6079     uint64_t aSig, bSig, zSig0, zSig1;
6080     uint64_t rem0, rem1, rem2, term0, term1, term2;
6081
6082     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6083         float_raise(float_flag_invalid, status);
6084         return floatx80_default_nan(status);
6085     }
6086     aSig = extractFloatx80Frac( a );
6087     aExp = extractFloatx80Exp( a );
6088     aSign = extractFloatx80Sign( a );
6089     bSig = extractFloatx80Frac( b );
6090     bExp = extractFloatx80Exp( b );
6091     bSign = extractFloatx80Sign( b );
6092     zSign = aSign ^ bSign;
6093     if ( aExp == 0x7FFF ) {
6094         if ((uint64_t)(aSig << 1)) {
6095             return propagateFloatx80NaN(a, b, status);
6096         }
6097         if ( bExp == 0x7FFF ) {
6098             if ((uint64_t)(bSig << 1)) {
6099                 return propagateFloatx80NaN(a, b, status);
6100             }
6101             goto invalid;
6102         }
6103         return packFloatx80(zSign, floatx80_infinity_high,
6104                                    floatx80_infinity_low);
6105     }
6106     if ( bExp == 0x7FFF ) {
6107         if ((uint64_t)(bSig << 1)) {
6108             return propagateFloatx80NaN(a, b, status);
6109         }
6110         return packFloatx80( zSign, 0, 0 );
6111     }
6112     if ( bExp == 0 ) {
6113         if ( bSig == 0 ) {
6114             if ( ( aExp | aSig ) == 0 ) {
6115  invalid:
6116                 float_raise(float_flag_invalid, status);
6117                 return floatx80_default_nan(status);
6118             }
6119             float_raise(float_flag_divbyzero, status);
6120             return packFloatx80(zSign, floatx80_infinity_high,
6121                                        floatx80_infinity_low);
6122         }
6123         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6124     }
6125     if ( aExp == 0 ) {
6126         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6127         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6128     }
6129     zExp = aExp - bExp + 0x3FFE;
6130     rem1 = 0;
6131     if ( bSig <= aSig ) {
6132         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6133         ++zExp;
6134     }
6135     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6136     mul64To128( bSig, zSig0, &term0, &term1 );
6137     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6138     while ( (int64_t) rem0 < 0 ) {
6139         --zSig0;
6140         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6141     }
6142     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6143     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6144         mul64To128( bSig, zSig1, &term1, &term2 );
6145         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6146         while ( (int64_t) rem1 < 0 ) {
6147             --zSig1;
6148             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6149         }
6150         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6151     }
6152     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6153                                 zSign, zExp, zSig0, zSig1, status);
6154 }
6155
6156 /*----------------------------------------------------------------------------
6157 | Returns the remainder of the extended double-precision floating-point value
6158 | `a' with respect to the corresponding value `b'.  The operation is performed
6159 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6160 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6161 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6162 | the absolute value of the integer quotient.
6163 *----------------------------------------------------------------------------*/
6164
6165 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6166                          float_status *status)
6167 {
6168     bool aSign, zSign;
6169     int32_t aExp, bExp, expDiff, aExpOrig;
6170     uint64_t aSig0, aSig1, bSig;
6171     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6172
6173     *quotient = 0;
6174     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6175         float_raise(float_flag_invalid, status);
6176         return floatx80_default_nan(status);
6177     }
6178     aSig0 = extractFloatx80Frac( a );
6179     aExpOrig = aExp = extractFloatx80Exp( a );
6180     aSign = extractFloatx80Sign( a );
6181     bSig = extractFloatx80Frac( b );
6182     bExp = extractFloatx80Exp( b );
6183     if ( aExp == 0x7FFF ) {
6184         if (    (uint64_t) ( aSig0<<1 )
6185              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6186             return propagateFloatx80NaN(a, b, status);
6187         }
6188         goto invalid;
6189     }
6190     if ( bExp == 0x7FFF ) {
6191         if ((uint64_t)(bSig << 1)) {
6192             return propagateFloatx80NaN(a, b, status);
6193         }
6194         if (aExp == 0 && aSig0 >> 63) {
6195             /*
6196              * Pseudo-denormal argument must be returned in normalized
6197              * form.
6198              */
6199             return packFloatx80(aSign, 1, aSig0);
6200         }
6201         return a;
6202     }
6203     if ( bExp == 0 ) {
6204         if ( bSig == 0 ) {
6205  invalid:
6206             float_raise(float_flag_invalid, status);
6207             return floatx80_default_nan(status);
6208         }
6209         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6210     }
6211     if ( aExp == 0 ) {
6212         if ( aSig0 == 0 ) return a;
6213         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6214     }
6215     zSign = aSign;
6216     expDiff = aExp - bExp;
6217     aSig1 = 0;
6218     if ( expDiff < 0 ) {
6219         if ( mod || expDiff < -1 ) {
6220             if (aExp == 1 && aExpOrig == 0) {
6221                 /*
6222                  * Pseudo-denormal argument must be returned in
6223                  * normalized form.
6224                  */
6225                 return packFloatx80(aSign, aExp, aSig0);
6226             }
6227             return a;
6228         }
6229         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6230         expDiff = 0;
6231     }
6232     *quotient = q = ( bSig <= aSig0 );
6233     if ( q ) aSig0 -= bSig;
6234     expDiff -= 64;
6235     while ( 0 < expDiff ) {
6236         q = estimateDiv128To64( aSig0, aSig1, bSig );
6237         q = ( 2 < q ) ? q - 2 : 0;
6238         mul64To128( bSig, q, &term0, &term1 );
6239         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6240         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6241         expDiff -= 62;
6242         *quotient <<= 62;
6243         *quotient += q;
6244     }
6245     expDiff += 64;
6246     if ( 0 < expDiff ) {
6247         q = estimateDiv128To64( aSig0, aSig1, bSig );
6248         q = ( 2 < q ) ? q - 2 : 0;
6249         q >>= 64 - expDiff;
6250         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6251         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6252         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6253         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6254             ++q;
6255             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6256         }
6257         if (expDiff < 64) {
6258             *quotient <<= expDiff;
6259         } else {
6260             *quotient = 0;
6261         }
6262         *quotient += q;
6263     }
6264     else {
6265         term1 = 0;
6266         term0 = bSig;
6267     }
6268     if (!mod) {
6269         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6270         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6271                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6272                         && ( q & 1 ) )
6273             ) {
6274             aSig0 = alternateASig0;
6275             aSig1 = alternateASig1;
6276             zSign = ! zSign;
6277             ++*quotient;
6278         }
6279     }
6280     return
6281         normalizeRoundAndPackFloatx80(
6282             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6283
6284 }
6285
6286 /*----------------------------------------------------------------------------
6287 | Returns the remainder of the extended double-precision floating-point value
6288 | `a' with respect to the corresponding value `b'.  The operation is performed
6289 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6290 *----------------------------------------------------------------------------*/
6291
6292 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6293 {
6294     uint64_t quotient;
6295     return floatx80_modrem(a, b, false, &quotient, status);
6296 }
6297
6298 /*----------------------------------------------------------------------------
6299 | Returns the remainder of the extended double-precision floating-point value
6300 | `a' with respect to the corresponding value `b', with the quotient truncated
6301 | toward zero.
6302 *----------------------------------------------------------------------------*/
6303
6304 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6305 {
6306     uint64_t quotient;
6307     return floatx80_modrem(a, b, true, &quotient, status);
6308 }
6309
6310 /*----------------------------------------------------------------------------
6311 | Returns the square root of the extended double-precision floating-point
6312 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6313 | for Binary Floating-Point Arithmetic.
6314 *----------------------------------------------------------------------------*/
6315
6316 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6317 {
6318     bool aSign;
6319     int32_t aExp, zExp;
6320     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6321     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6322
6323     if (floatx80_invalid_encoding(a)) {
6324         float_raise(float_flag_invalid, status);
6325         return floatx80_default_nan(status);
6326     }
6327     aSig0 = extractFloatx80Frac( a );
6328     aExp = extractFloatx80Exp( a );
6329     aSign = extractFloatx80Sign( a );
6330     if ( aExp == 0x7FFF ) {
6331         if ((uint64_t)(aSig0 << 1)) {
6332             return propagateFloatx80NaN(a, a, status);
6333         }
6334         if ( ! aSign ) return a;
6335         goto invalid;
6336     }
6337     if ( aSign ) {
6338         if ( ( aExp | aSig0 ) == 0 ) return a;
6339  invalid:
6340         float_raise(float_flag_invalid, status);
6341         return floatx80_default_nan(status);
6342     }
6343     if ( aExp == 0 ) {
6344         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6345         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6346     }
6347     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6348     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6349     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6350     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6351     doubleZSig0 = zSig0<<1;
6352     mul64To128( zSig0, zSig0, &term0, &term1 );
6353     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6354     while ( (int64_t) rem0 < 0 ) {
6355         --zSig0;
6356         doubleZSig0 -= 2;
6357         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6358     }
6359     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6360     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6361         if ( zSig1 == 0 ) zSig1 = 1;
6362         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6363         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6364         mul64To128( zSig1, zSig1, &term2, &term3 );
6365         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6366         while ( (int64_t) rem1 < 0 ) {
6367             --zSig1;
6368             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6369             term3 |= 1;
6370             term2 |= doubleZSig0;
6371             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6372         }
6373         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6374     }
6375     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6376     zSig0 |= doubleZSig0;
6377     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6378                                 0, zExp, zSig0, zSig1, status);
6379 }
6380
6381 /*----------------------------------------------------------------------------
6382 | Returns the result of converting the quadruple-precision floating-point
6383 | value `a' to the extended double-precision floating-point format.  The
6384 | conversion is performed according to the IEC/IEEE Standard for Binary
6385 | Floating-Point Arithmetic.
6386 *----------------------------------------------------------------------------*/
6387
6388 floatx80 float128_to_floatx80(float128 a, float_status *status)
6389 {
6390     bool aSign;
6391     int32_t aExp;
6392     uint64_t aSig0, aSig1;
6393
6394     aSig1 = extractFloat128Frac1( a );
6395     aSig0 = extractFloat128Frac0( a );
6396     aExp = extractFloat128Exp( a );
6397     aSign = extractFloat128Sign( a );
6398     if ( aExp == 0x7FFF ) {
6399         if ( aSig0 | aSig1 ) {
6400             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6401                                                status);
6402             return floatx80_silence_nan(res, status);
6403         }
6404         return packFloatx80(aSign, floatx80_infinity_high,
6405                                    floatx80_infinity_low);
6406     }
6407     if ( aExp == 0 ) {
6408         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6409         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6410     }
6411     else {
6412         aSig0 |= UINT64_C(0x0001000000000000);
6413     }
6414     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6415     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6416
6417 }
6418
6419 /*----------------------------------------------------------------------------
6420 | Returns the remainder of the quadruple-precision floating-point value `a'
6421 | with respect to the corresponding value `b'.  The operation is performed
6422 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6423 *----------------------------------------------------------------------------*/
6424
6425 float128 float128_rem(float128 a, float128 b, float_status *status)
6426 {
6427     bool aSign, zSign;
6428     int32_t aExp, bExp, expDiff;
6429     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6430     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6431     int64_t sigMean0;
6432
6433     aSig1 = extractFloat128Frac1( a );
6434     aSig0 = extractFloat128Frac0( a );
6435     aExp = extractFloat128Exp( a );
6436     aSign = extractFloat128Sign( a );
6437     bSig1 = extractFloat128Frac1( b );
6438     bSig0 = extractFloat128Frac0( b );
6439     bExp = extractFloat128Exp( b );
6440     if ( aExp == 0x7FFF ) {
6441         if (    ( aSig0 | aSig1 )
6442              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6443             return propagateFloat128NaN(a, b, status);
6444         }
6445         goto invalid;
6446     }
6447     if ( bExp == 0x7FFF ) {
6448         if (bSig0 | bSig1) {
6449             return propagateFloat128NaN(a, b, status);
6450         }
6451         return a;
6452     }
6453     if ( bExp == 0 ) {
6454         if ( ( bSig0 | bSig1 ) == 0 ) {
6455  invalid:
6456             float_raise(float_flag_invalid, status);
6457             return float128_default_nan(status);
6458         }
6459         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6460     }
6461     if ( aExp == 0 ) {
6462         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6463         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6464     }
6465     expDiff = aExp - bExp;
6466     if ( expDiff < -1 ) return a;
6467     shortShift128Left(
6468         aSig0 | UINT64_C(0x0001000000000000),
6469         aSig1,
6470         15 - ( expDiff < 0 ),
6471         &aSig0,
6472         &aSig1
6473     );
6474     shortShift128Left(
6475         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6476     q = le128( bSig0, bSig1, aSig0, aSig1 );
6477     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6478     expDiff -= 64;
6479     while ( 0 < expDiff ) {
6480         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6481         q = ( 4 < q ) ? q - 4 : 0;
6482         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6483         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6484         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6485         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6486         expDiff -= 61;
6487     }
6488     if ( -64 < expDiff ) {
6489         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6490         q = ( 4 < q ) ? q - 4 : 0;
6491         q >>= - expDiff;
6492         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6493         expDiff += 52;
6494         if ( expDiff < 0 ) {
6495             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6496         }
6497         else {
6498             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6499         }
6500         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6501         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6502     }
6503     else {
6504         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6505         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6506     }
6507     do {
6508         alternateASig0 = aSig0;
6509         alternateASig1 = aSig1;
6510         ++q;
6511         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6512     } while ( 0 <= (int64_t) aSig0 );
6513     add128(
6514         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6515     if (    ( sigMean0 < 0 )
6516          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6517         aSig0 = alternateASig0;
6518         aSig1 = alternateASig1;
6519     }
6520     zSign = ( (int64_t) aSig0 < 0 );
6521     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6522     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6523                                          status);
6524 }
6525
6526 /*----------------------------------------------------------------------------
6527 | Returns the square root of the quadruple-precision floating-point value `a'.
6528 | The operation is performed according to the IEC/IEEE Standard for Binary
6529 | Floating-Point Arithmetic.
6530 *----------------------------------------------------------------------------*/
6531
6532 float128 float128_sqrt(float128 a, float_status *status)
6533 {
6534     bool aSign;
6535     int32_t aExp, zExp;
6536     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6537     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6538
6539     aSig1 = extractFloat128Frac1( a );
6540     aSig0 = extractFloat128Frac0( a );
6541     aExp = extractFloat128Exp( a );
6542     aSign = extractFloat128Sign( a );
6543     if ( aExp == 0x7FFF ) {
6544         if (aSig0 | aSig1) {
6545             return propagateFloat128NaN(a, a, status);
6546         }
6547         if ( ! aSign ) return a;
6548         goto invalid;
6549     }
6550     if ( aSign ) {
6551         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6552  invalid:
6553         float_raise(float_flag_invalid, status);
6554         return float128_default_nan(status);
6555     }
6556     if ( aExp == 0 ) {
6557         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6558         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6559     }
6560     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6561     aSig0 |= UINT64_C(0x0001000000000000);
6562     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6563     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6564     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6565     doubleZSig0 = zSig0<<1;
6566     mul64To128( zSig0, zSig0, &term0, &term1 );
6567     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6568     while ( (int64_t) rem0 < 0 ) {
6569         --zSig0;
6570         doubleZSig0 -= 2;
6571         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6572     }
6573     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6574     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6575         if ( zSig1 == 0 ) zSig1 = 1;
6576         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6577         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6578         mul64To128( zSig1, zSig1, &term2, &term3 );
6579         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6580         while ( (int64_t) rem1 < 0 ) {
6581             --zSig1;
6582             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6583             term3 |= 1;
6584             term2 |= doubleZSig0;
6585             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6586         }
6587         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6588     }
6589     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6590     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6591
6592 }
6593
6594 static inline FloatRelation
6595 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6596                           float_status *status)
6597 {
6598     bool aSign, bSign;
6599
6600     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6601         float_raise(float_flag_invalid, status);
6602         return float_relation_unordered;
6603     }
6604     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6605           ( extractFloatx80Frac( a )<<1 ) ) ||
6606         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6607           ( extractFloatx80Frac( b )<<1 ) )) {
6608         if (!is_quiet ||
6609             floatx80_is_signaling_nan(a, status) ||
6610             floatx80_is_signaling_nan(b, status)) {
6611             float_raise(float_flag_invalid, status);
6612         }
6613         return float_relation_unordered;
6614     }
6615     aSign = extractFloatx80Sign( a );
6616     bSign = extractFloatx80Sign( b );
6617     if ( aSign != bSign ) {
6618
6619         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6620              ( ( a.low | b.low ) == 0 ) ) {
6621             /* zero case */
6622             return float_relation_equal;
6623         } else {
6624             return 1 - (2 * aSign);
6625         }
6626     } else {
6627         /* Normalize pseudo-denormals before comparison.  */
6628         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6629             ++a.high;
6630         }
6631         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6632             ++b.high;
6633         }
6634         if (a.low == b.low && a.high == b.high) {
6635             return float_relation_equal;
6636         } else {
6637             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6638         }
6639     }
6640 }
6641
6642 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6643 {
6644     return floatx80_compare_internal(a, b, 0, status);
6645 }
6646
6647 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6648                                      float_status *status)
6649 {
6650     return floatx80_compare_internal(a, b, 1, status);
6651 }
6652
6653 static inline FloatRelation
6654 float128_compare_internal(float128 a, float128 b, bool is_quiet,
6655                           float_status *status)
6656 {
6657     bool aSign, bSign;
6658
6659     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6660           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6661         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6662           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6663         if (!is_quiet ||
6664             float128_is_signaling_nan(a, status) ||
6665             float128_is_signaling_nan(b, status)) {
6666             float_raise(float_flag_invalid, status);
6667         }
6668         return float_relation_unordered;
6669     }
6670     aSign = extractFloat128Sign( a );
6671     bSign = extractFloat128Sign( b );
6672     if ( aSign != bSign ) {
6673         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6674             /* zero case */
6675             return float_relation_equal;
6676         } else {
6677             return 1 - (2 * aSign);
6678         }
6679     } else {
6680         if (a.low == b.low && a.high == b.high) {
6681             return float_relation_equal;
6682         } else {
6683             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6684         }
6685     }
6686 }
6687
6688 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
6689 {
6690     return float128_compare_internal(a, b, 0, status);
6691 }
6692
6693 FloatRelation float128_compare_quiet(float128 a, float128 b,
6694                                      float_status *status)
6695 {
6696     return float128_compare_internal(a, b, 1, status);
6697 }
6698
6699 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6700 {
6701     bool aSign;
6702     int32_t aExp;
6703     uint64_t aSig;
6704
6705     if (floatx80_invalid_encoding(a)) {
6706         float_raise(float_flag_invalid, status);
6707         return floatx80_default_nan(status);
6708     }
6709     aSig = extractFloatx80Frac( a );
6710     aExp = extractFloatx80Exp( a );
6711     aSign = extractFloatx80Sign( a );
6712
6713     if ( aExp == 0x7FFF ) {
6714         if ( aSig<<1 ) {
6715             return propagateFloatx80NaN(a, a, status);
6716         }
6717         return a;
6718     }
6719
6720     if (aExp == 0) {
6721         if (aSig == 0) {
6722             return a;
6723         }
6724         aExp++;
6725     }
6726
6727     if (n > 0x10000) {
6728         n = 0x10000;
6729     } else if (n < -0x10000) {
6730         n = -0x10000;
6731     }
6732
6733     aExp += n;
6734     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6735                                          aSign, aExp, aSig, 0, status);
6736 }
6737
6738 float128 float128_scalbn(float128 a, int n, float_status *status)
6739 {
6740     bool aSign;
6741     int32_t aExp;
6742     uint64_t aSig0, aSig1;
6743
6744     aSig1 = extractFloat128Frac1( a );
6745     aSig0 = extractFloat128Frac0( a );
6746     aExp = extractFloat128Exp( a );
6747     aSign = extractFloat128Sign( a );
6748     if ( aExp == 0x7FFF ) {
6749         if ( aSig0 | aSig1 ) {
6750             return propagateFloat128NaN(a, a, status);
6751         }
6752         return a;
6753     }
6754     if (aExp != 0) {
6755         aSig0 |= UINT64_C(0x0001000000000000);
6756     } else if (aSig0 == 0 && aSig1 == 0) {
6757         return a;
6758     } else {
6759         aExp++;
6760     }
6761
6762     if (n > 0x10000) {
6763         n = 0x10000;
6764     } else if (n < -0x10000) {
6765         n = -0x10000;
6766     }
6767
6768     aExp += n - 1;
6769     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6770                                          , status);
6771
6772 }
6773
6774 static void __attribute__((constructor)) softfloat_init(void)
6775 {
6776     union_float64 ua, ub, uc, ur;
6777
6778     if (QEMU_NO_HARDFLOAT) {
6779         return;
6780     }
6781     /*
6782      * Test that the host's FMA is not obviously broken. For example,
6783      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6784      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6785      */
6786     ua.s = 0x0020000000000001ULL;
6787     ub.s = 0x3ca0000000000000ULL;
6788     uc.s = 0x0020000000000000ULL;
6789     ur.h = fma(ua.h, ub.h, uc.h);
6790     if (ur.s != 0x0020000000000001ULL) {
6791         force_soft_fma = true;
6792     }
6793 }