fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485
 486 /* Simple helpers for checking if, or what kind of, NaN we have */
 487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 488 {
 489     return unlikely(c >= float_class_qnan);
 490 }
 491
 492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 493 {
 494     return c == float_class_snan;
 495 }
 496
 497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 498 {
 499     return c == float_class_qnan;
 500 }
 501
 502 /*
 503  * Structure holding all of the decomposed parts of a float.
 504  * The exponent is unbiased and the fraction is normalized.
 505  *
 506  * The fraction words are stored in big-endian word ordering,
 507  * so that truncation from a larger format to a smaller format
 508  * can be done simply by ignoring subsequent elements.
 509  */
 510
 511 typedef struct {
 512     FloatClass cls;
 513     bool sign;
 514     int32_t exp;
 515     union {
 516         /* Routines that know the structure may reference the singular name. */
 517         uint64_t frac;
 518         /*
 519          * Routines expanded with multiple structures reference "hi" and "lo"
 520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 521          * both the same word and aliased here.
 522          */
 523         uint64_t frac_hi;
 524         uint64_t frac_lo;
 525     };
 526 } FloatParts64;
 527
 528 typedef struct {
 529     FloatClass cls;
 530     bool sign;
 531     int32_t exp;
 532     uint64_t frac_hi;
 533     uint64_t frac_lo;
 534 } FloatParts128;
 535
 536 typedef struct {
 537     FloatClass cls;
 538     bool sign;
 539     int32_t exp;
 540     uint64_t frac_hi;
 541     uint64_t frac_hm;  /* high-middle */
 542     uint64_t frac_lm;  /* low-middle */
 543     uint64_t frac_lo;
 544 } FloatParts256;
 545
 546 /* These apply to the most significant word of each FloatPartsN. */
 547 #define DECOMPOSED_BINARY_POINT    63
 548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 549
 550 /* Structure holding all of the relevant parameters for a format.
 551  *   exp_size: the size of the exponent field
 552  *   exp_bias: the offset applied to the exponent field
 553  *   exp_max: the maximum normalised exponent
 554  *   frac_size: the size of the fraction field
 555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 556  * The following are computed based the size of fraction
 557  *   frac_lsb: least significant bit of fraction
 558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
 559  *   round_mask/roundeven_mask: masks used for rounding
 560  * The following optional modifiers are available:
 561  *   arm_althp: handle ARM Alternative Half Precision
 562  */
 563 typedef struct {
 564     int exp_size;
 565     int exp_bias;
 566     int exp_max;
 567     int frac_size;
 568     int frac_shift;
 569     uint64_t frac_lsb;
 570     uint64_t frac_lsbm1;
 571     uint64_t round_mask;
 572     uint64_t roundeven_mask;
 573     bool arm_althp;
 574 } FloatFmt;
 575
 576 /* Expand fields based on the size of exponent and fraction */
 577 #define FLOAT_PARAMS(E, F)                                           \
 578     .exp_size       = E,                                             \
 579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
 580     .exp_max        = (1 << E) - 1,                                  \
 581     .frac_size      = F,                                             \
 582     .frac_shift     = (-F - 1) & 63,                                 \
 583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
 584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
 585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
 586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 587
 588 static const FloatFmt float16_params = {
 589     FLOAT_PARAMS(5, 10)
 590 };
 591
 592 static const FloatFmt float16_params_ahp = {
 593     FLOAT_PARAMS(5, 10),
 594     .arm_althp = true
 595 };
 596
 597 static const FloatFmt bfloat16_params = {
 598     FLOAT_PARAMS(8, 7)
 599 };
 600
 601 static const FloatFmt float32_params = {
 602     FLOAT_PARAMS(8, 23)
 603 };
 604
 605 static const FloatFmt float64_params = {
 606     FLOAT_PARAMS(11, 52)
 607 };
 608
 609 static const FloatFmt float128_params = {
 610     FLOAT_PARAMS(15, 112)
 611 };
 612
 613 /* Unpack a float to parts, but do not canonicalize.  */
 614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 615 {
 616     const int f_size = fmt->frac_size;
 617     const int e_size = fmt->exp_size;
 618
 619     *r = (FloatParts64) {
 620         .cls = float_class_unclassified,
 621         .sign = extract64(raw, f_size + e_size, 1),
 622         .exp = extract64(raw, f_size, e_size),
 623         .frac = extract64(raw, 0, f_size)
 624     };
 625 }
 626
 627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 628 {
 629     unpack_raw64(p, &float16_params, f);
 630 }
 631
 632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 633 {
 634     unpack_raw64(p, &bfloat16_params, f);
 635 }
 636
 637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 638 {
 639     unpack_raw64(p, &float32_params, f);
 640 }
 641
 642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 643 {
 644     unpack_raw64(p, &float64_params, f);
 645 }
 646
 647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 648 {
 649     const int f_size = float128_params.frac_size - 64;
 650     const int e_size = float128_params.exp_size;
 651
 652     *p = (FloatParts128) {
 653         .cls = float_class_unclassified,
 654         .sign = extract64(f.high, f_size + e_size, 1),
 655         .exp = extract64(f.high, f_size, e_size),
 656         .frac_hi = extract64(f.high, 0, f_size),
 657         .frac_lo = f.low,
 658     };
 659 }
 660
 661 /* Pack a float from parts, but do not canonicalize.  */
 662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 663 {
 664     const int f_size = fmt->frac_size;
 665     const int e_size = fmt->exp_size;
 666     uint64_t ret;
 667
 668     ret = (uint64_t)p->sign << (f_size + e_size);
 669     ret = deposit64(ret, f_size, e_size, p->exp);
 670     ret = deposit64(ret, 0, f_size, p->frac);
 671     return ret;
 672 }
 673
 674 static inline float16 float16_pack_raw(const FloatParts64 *p)
 675 {
 676     return make_float16(pack_raw64(p, &float16_params));
 677 }
 678
 679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 680 {
 681     return pack_raw64(p, &bfloat16_params);
 682 }
 683
 684 static inline float32 float32_pack_raw(const FloatParts64 *p)
 685 {
 686     return make_float32(pack_raw64(p, &float32_params));
 687 }
 688
 689 static inline float64 float64_pack_raw(const FloatParts64 *p)
 690 {
 691     return make_float64(pack_raw64(p, &float64_params));
 692 }
 693
 694 static float128 float128_pack_raw(const FloatParts128 *p)
 695 {
 696     const int f_size = float128_params.frac_size - 64;
 697     const int e_size = float128_params.exp_size;
 698     uint64_t hi;
 699
 700     hi = (uint64_t)p->sign << (f_size + e_size);
 701     hi = deposit64(hi, f_size, e_size, p->exp);
 702     hi = deposit64(hi, 0, f_size, p->frac_hi);
 703     return make_float128(hi, p->frac_lo);
 704 }
 705
 706 /*----------------------------------------------------------------------------
 707 | Functions and definitions to determine:  (1) whether tininess for underflow
 708 | is detected before or after rounding by default, (2) what (if anything)
 709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 711 | are propagated from function inputs to output.  These details are target-
 712 | specific.
 713 *----------------------------------------------------------------------------*/
 714 #include "softfloat-specialize.c.inc"
 715
 716 #define PARTS_GENERIC_64_128(NAME, P) \
 717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 718
 719 #define PARTS_GENERIC_64_128_256(NAME, P) \
 720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 722
 723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 725
 726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 728
 729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 730
 731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 732                                       float_status *s);
 733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 734                                         float_status *s);
 735
 736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 737
 738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 739                                              FloatParts64 *c, float_status *s,
 740                                              int ab_mask, int abc_mask);
 741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 742                                                FloatParts128 *b,
 743                                                FloatParts128 *c,
 744                                                float_status *s,
 745                                                int ab_mask, int abc_mask);
 746
 747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 749
 750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 751                                  const FloatFmt *fmt);
 752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 753                                   const FloatFmt *fmt);
 754
 755 #define parts_canonicalize(A, S, F) \
 756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 757
 758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 759                             const FloatFmt *fmt);
 760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 761                              const FloatFmt *fmt);
 762
 763 #define parts_uncanon(A, S, F) \
 764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 765
 766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 769
 770 #define parts_add_normal(A, B) \
 771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 772
 773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 776
 777 #define parts_sub_normal(A, B) \
 778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 779
 780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 781                                     float_status *s, bool subtract);
 782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 783                                       float_status *s, bool subtract);
 784
 785 #define parts_addsub(A, B, S, Z) \
 786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 787
 788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 789                                  float_status *s);
 790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 791                                    float_status *s);
 792
 793 #define parts_mul(A, B, S) \
 794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 795
 796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 797                                     FloatParts64 *c, int flags,
 798                                     float_status *s);
 799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 800                                       FloatParts128 *c, int flags,
 801                                       float_status *s);
 802
 803 #define parts_muladd(A, B, C, Z, S) \
 804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 805
 806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 807                                  float_status *s);
 808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 809                                    float_status *s);
 810
 811 #define parts_div(A, B, S) \
 812     PARTS_GENERIC_64_128(div, A)(A, B, S)
 813
 814 /*
 815  * Helper functions for softfloat-parts.c.inc, per-size operations.
 816  */
 817
 818 #define FRAC_GENERIC_64_128(NAME, P) \
 819     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 820
 821 #define FRAC_GENERIC_64_128_256(NAME, P) \
 822     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 823                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 824
 825 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 826 {
 827     return uadd64_overflow(a->frac, b->frac, &r->frac);
 828 }
 829
 830 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 831 {
 832     bool c = 0;
 833     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 834     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 835     return c;
 836 }
 837
 838 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 839 {
 840     bool c = 0;
 841     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 842     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 843     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 844     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 845     return c;
 846 }
 847
 848 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 849
 850 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 851 {
 852     return uadd64_overflow(a->frac, c, &r->frac);
 853 }
 854
 855 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 856 {
 857     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 858     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 859 }
 860
 861 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 862
 863 static void frac64_allones(FloatParts64 *a)
 864 {
 865     a->frac = -1;
 866 }
 867
 868 static void frac128_allones(FloatParts128 *a)
 869 {
 870     a->frac_hi = a->frac_lo = -1;
 871 }
 872
 873 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 874
 875 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 876 {
 877     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 878 }
 879
 880 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 881 {
 882     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 883     if (ta == tb) {
 884         ta = a->frac_lo, tb = b->frac_lo;
 885         if (ta == tb) {
 886             return 0;
 887         }
 888     }
 889     return ta < tb ? -1 : 1;
 890 }
 891
 892 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 893
 894 static void frac64_clear(FloatParts64 *a)
 895 {
 896     a->frac = 0;
 897 }
 898
 899 static void frac128_clear(FloatParts128 *a)
 900 {
 901     a->frac_hi = a->frac_lo = 0;
 902 }
 903
 904 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 905
 906 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 907 {
 908     uint64_t n1, n0, r, q;
 909     bool ret;
 910
 911     /*
 912      * We want a 2*N / N-bit division to produce exactly an N-bit
 913      * result, so that we do not lose any precision and so that we
 914      * do not have to renormalize afterward.  If A.frac < B.frac,
 915      * then division would produce an (N-1)-bit result; shift A left
 916      * by one to produce the an N-bit result, and return true to
 917      * decrement the exponent to match.
 918      *
 919      * The udiv_qrnnd algorithm that we're using requires normalization,
 920      * i.e. the msb of the denominator must be set, which is already true.
 921      */
 922     ret = a->frac < b->frac;
 923     if (ret) {
 924         n0 = a->frac;
 925         n1 = 0;
 926     } else {
 927         n0 = a->frac >> 1;
 928         n1 = a->frac << 63;
 929     }
 930     q = udiv_qrnnd(&r, n0, n1, b->frac);
 931
 932     /* Set lsb if there is a remainder, to set inexact. */
 933     a->frac = q | (r != 0);
 934
 935     return ret;
 936 }
 937
 938 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
 939 {
 940     uint64_t q0, q1, a0, a1, b0, b1;
 941     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
 942     bool ret = false;
 943
 944     a0 = a->frac_hi, a1 = a->frac_lo;
 945     b0 = b->frac_hi, b1 = b->frac_lo;
 946
 947     ret = lt128(a0, a1, b0, b1);
 948     if (!ret) {
 949         a1 = shr_double(a0, a1, 1);
 950         a0 = a0 >> 1;
 951     }
 952
 953     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
 954     q0 = estimateDiv128To64(a0, a1, b0);
 955
 956     /*
 957      * Estimate is high because B1 was not included (unless B1 == 0).
 958      * Reduce quotient and increase remainder until remainder is non-negative.
 959      * This loop will execute 0 to 2 times.
 960      */
 961     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
 962     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
 963     while (r0 != 0) {
 964         q0--;
 965         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
 966     }
 967
 968     /* Repeat using the remainder, producing a second word of quotient. */
 969     q1 = estimateDiv128To64(r1, r2, b0);
 970     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
 971     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
 972     while (r1 != 0) {
 973         q1--;
 974         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
 975     }
 976
 977     /* Any remainder indicates inexact; set sticky bit. */
 978     q1 |= (r2 | r3) != 0;
 979
 980     a->frac_hi = q0;
 981     a->frac_lo = q1;
 982     return ret;
 983 }
 984
 985 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
 986
 987 static bool frac64_eqz(FloatParts64 *a)
 988 {
 989     return a->frac == 0;
 990 }
 991
 992 static bool frac128_eqz(FloatParts128 *a)
 993 {
 994     return (a->frac_hi | a->frac_lo) == 0;
 995 }
 996
 997 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
 998
 999 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1000 {
1001     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1002 }
1003
1004 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1005 {
1006     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1007                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1008 }
1009
1010 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1011
1012 static void frac64_neg(FloatParts64 *a)
1013 {
1014     a->frac = -a->frac;
1015 }
1016
1017 static void frac128_neg(FloatParts128 *a)
1018 {
1019     bool c = 0;
1020     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1021     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1022 }
1023
1024 static void frac256_neg(FloatParts256 *a)
1025 {
1026     bool c = 0;
1027     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1028     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1029     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1030     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1031 }
1032
1033 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1034
1035 static int frac64_normalize(FloatParts64 *a)
1036 {
1037     if (a->frac) {
1038         int shift = clz64(a->frac);
1039         a->frac <<= shift;
1040         return shift;
1041     }
1042     return 64;
1043 }
1044
1045 static int frac128_normalize(FloatParts128 *a)
1046 {
1047     if (a->frac_hi) {
1048         int shl = clz64(a->frac_hi);
1049         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1050         a->frac_lo <<= shl;
1051         return shl;
1052     } else if (a->frac_lo) {
1053         int shl = clz64(a->frac_lo);
1054         a->frac_hi = a->frac_lo << shl;
1055         a->frac_lo = 0;
1056         return shl + 64;
1057     }
1058     return 128;
1059 }
1060
1061 static int frac256_normalize(FloatParts256 *a)
1062 {
1063     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1064     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1065     int ret, shl;
1066
1067     if (likely(a0)) {
1068         shl = clz64(a0);
1069         if (shl == 0) {
1070             return 0;
1071         }
1072         ret = shl;
1073     } else {
1074         if (a1) {
1075             ret = 64;
1076             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1077         } else if (a2) {
1078             ret = 128;
1079             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1080         } else if (a3) {
1081             ret = 192;
1082             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1083         } else {
1084             ret = 256;
1085             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1086             goto done;
1087         }
1088         shl = clz64(a0);
1089         if (shl == 0) {
1090             goto done;
1091         }
1092         ret += shl;
1093     }
1094
1095     a0 = shl_double(a0, a1, shl);
1096     a1 = shl_double(a1, a2, shl);
1097     a2 = shl_double(a2, a3, shl);
1098     a3 <<= shl;
1099
1100  done:
1101     a->frac_hi = a0;
1102     a->frac_hm = a1;
1103     a->frac_lm = a2;
1104     a->frac_lo = a3;
1105     return ret;
1106 }
1107
1108 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1109
1110 static void frac64_shl(FloatParts64 *a, int c)
1111 {
1112     a->frac <<= c;
1113 }
1114
1115 static void frac128_shl(FloatParts128 *a, int c)
1116 {
1117     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1118
1119     if (c & 64) {
1120         a0 = a1, a1 = 0;
1121     }
1122
1123     c &= 63;
1124     if (c) {
1125         a0 = shl_double(a0, a1, c);
1126         a1 = a1 << c;
1127     }
1128
1129     a->frac_hi = a0;
1130     a->frac_lo = a1;
1131 }
1132
1133 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1134
1135 static void frac64_shr(FloatParts64 *a, int c)
1136 {
1137     a->frac >>= c;
1138 }
1139
1140 static void frac128_shr(FloatParts128 *a, int c)
1141 {
1142     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1143
1144     if (c & 64) {
1145         a1 = a0, a0 = 0;
1146     }
1147
1148     c &= 63;
1149     if (c) {
1150         a1 = shr_double(a0, a1, c);
1151         a0 = a0 >> c;
1152     }
1153
1154     a->frac_hi = a0;
1155     a->frac_lo = a1;
1156 }
1157
1158 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1159
1160 static void frac64_shrjam(FloatParts64 *a, int c)
1161 {
1162     uint64_t a0 = a->frac;
1163
1164     if (likely(c != 0)) {
1165         if (likely(c < 64)) {
1166             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1167         } else {
1168             a0 = a0 != 0;
1169         }
1170         a->frac = a0;
1171     }
1172 }
1173
1174 static void frac128_shrjam(FloatParts128 *a, int c)
1175 {
1176     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1177     uint64_t sticky = 0;
1178
1179     if (unlikely(c == 0)) {
1180         return;
1181     } else if (likely(c < 64)) {
1182         /* nothing */
1183     } else if (likely(c < 128)) {
1184         sticky = a1;
1185         a1 = a0;
1186         a0 = 0;
1187         c &= 63;
1188         if (c == 0) {
1189             goto done;
1190         }
1191     } else {
1192         sticky = a0 | a1;
1193         a0 = a1 = 0;
1194         goto done;
1195     }
1196
1197     sticky |= shr_double(a1, 0, c);
1198     a1 = shr_double(a0, a1, c);
1199     a0 = a0 >> c;
1200
1201  done:
1202     a->frac_lo = a1 | (sticky != 0);
1203     a->frac_hi = a0;
1204 }
1205
1206 static void frac256_shrjam(FloatParts256 *a, int c)
1207 {
1208     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1209     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1210     uint64_t sticky = 0;
1211
1212     if (unlikely(c == 0)) {
1213         return;
1214     } else if (likely(c < 64)) {
1215         /* nothing */
1216     } else if (likely(c < 256)) {
1217         if (unlikely(c & 128)) {
1218             sticky |= a2 | a3;
1219             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1220         }
1221         if (unlikely(c & 64)) {
1222             sticky |= a3;
1223             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1224         }
1225         c &= 63;
1226         if (c == 0) {
1227             goto done;
1228         }
1229     } else {
1230         sticky = a0 | a1 | a2 | a3;
1231         a0 = a1 = a2 = a3 = 0;
1232         goto done;
1233     }
1234
1235     sticky |= shr_double(a3, 0, c);
1236     a3 = shr_double(a2, a3, c);
1237     a2 = shr_double(a1, a2, c);
1238     a1 = shr_double(a0, a1, c);
1239     a0 = a0 >> c;
1240
1241  done:
1242     a->frac_lo = a3 | (sticky != 0);
1243     a->frac_lm = a2;
1244     a->frac_hm = a1;
1245     a->frac_hi = a0;
1246 }
1247
1248 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1249
1250 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1251 {
1252     return usub64_overflow(a->frac, b->frac, &r->frac);
1253 }
1254
1255 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1256 {
1257     bool c = 0;
1258     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1259     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1260     return c;
1261 }
1262
1263 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1264 {
1265     bool c = 0;
1266     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1267     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1268     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1269     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1270     return c;
1271 }
1272
1273 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1274
1275 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1276 {
1277     r->frac = a->frac_hi | (a->frac_lo != 0);
1278 }
1279
1280 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1281 {
1282     r->frac_hi = a->frac_hi;
1283     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1284 }
1285
1286 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1287
1288 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1289 {
1290     r->frac_hi = a->frac;
1291     r->frac_lo = 0;
1292 }
1293
1294 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1295 {
1296     r->frac_hi = a->frac_hi;
1297     r->frac_hm = a->frac_lo;
1298     r->frac_lm = 0;
1299     r->frac_lo = 0;
1300 }
1301
1302 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1303
1304 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1305 #define FloatPartsN    glue(FloatParts,N)
1306 #define FloatPartsW    glue(FloatParts,W)
1307
1308 #define N 64
1309 #define W 128
1310
1311 #include "softfloat-parts-addsub.c.inc"
1312 #include "softfloat-parts.c.inc"
1313
1314 #undef  N
1315 #undef  W
1316 #define N 128
1317 #define W 256
1318
1319 #include "softfloat-parts-addsub.c.inc"
1320 #include "softfloat-parts.c.inc"
1321
1322 #undef  N
1323 #undef  W
1324 #define N            256
1325
1326 #include "softfloat-parts-addsub.c.inc"
1327
1328 #undef  N
1329 #undef  W
1330 #undef  partsN
1331 #undef  FloatPartsN
1332 #undef  FloatPartsW
1333
1334 /*
1335  * Pack/unpack routines with a specific FloatFmt.
1336  */
1337
1338 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1339                                       float_status *s, const FloatFmt *params)
1340 {
1341     float16_unpack_raw(p, f);
1342     parts_canonicalize(p, s, params);
1343 }
1344
1345 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1346                                      float_status *s)
1347 {
1348     float16a_unpack_canonical(p, f, s, &float16_params);
1349 }
1350
1351 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1352                                       float_status *s)
1353 {
1354     bfloat16_unpack_raw(p, f);
1355     parts_canonicalize(p, s, &bfloat16_params);
1356 }
1357
1358 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1359                                              float_status *s,
1360                                              const FloatFmt *params)
1361 {
1362     parts_uncanon(p, s, params);
1363     return float16_pack_raw(p);
1364 }
1365
1366 static float16 float16_round_pack_canonical(FloatParts64 *p,
1367                                             float_status *s)
1368 {
1369     return float16a_round_pack_canonical(p, s, &float16_params);
1370 }
1371
1372 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1373                                               float_status *s)
1374 {
1375     parts_uncanon(p, s, &bfloat16_params);
1376     return bfloat16_pack_raw(p);
1377 }
1378
1379 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1380                                      float_status *s)
1381 {
1382     float32_unpack_raw(p, f);
1383     parts_canonicalize(p, s, &float32_params);
1384 }
1385
1386 static float32 float32_round_pack_canonical(FloatParts64 *p,
1387                                             float_status *s)
1388 {
1389     parts_uncanon(p, s, &float32_params);
1390     return float32_pack_raw(p);
1391 }
1392
1393 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1394                                      float_status *s)
1395 {
1396     float64_unpack_raw(p, f);
1397     parts_canonicalize(p, s, &float64_params);
1398 }
1399
1400 static float64 float64_round_pack_canonical(FloatParts64 *p,
1401                                             float_status *s)
1402 {
1403     parts_uncanon(p, s, &float64_params);
1404     return float64_pack_raw(p);
1405 }
1406
1407 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1408                                       float_status *s)
1409 {
1410     float128_unpack_raw(p, f);
1411     parts_canonicalize(p, s, &float128_params);
1412 }
1413
1414 static float128 float128_round_pack_canonical(FloatParts128 *p,
1415                                               float_status *s)
1416 {
1417     parts_uncanon(p, s, &float128_params);
1418     return float128_pack_raw(p);
1419 }
1420
1421 /*
1422  * Addition and subtraction
1423  */
1424
1425 static float16 QEMU_FLATTEN
1426 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1427 {
1428     FloatParts64 pa, pb, *pr;
1429
1430     float16_unpack_canonical(&pa, a, status);
1431     float16_unpack_canonical(&pb, b, status);
1432     pr = parts_addsub(&pa, &pb, status, subtract);
1433
1434     return float16_round_pack_canonical(pr, status);
1435 }
1436
1437 float16 float16_add(float16 a, float16 b, float_status *status)
1438 {
1439     return float16_addsub(a, b, status, false);
1440 }
1441
1442 float16 float16_sub(float16 a, float16 b, float_status *status)
1443 {
1444     return float16_addsub(a, b, status, true);
1445 }
1446
1447 static float32 QEMU_SOFTFLOAT_ATTR
1448 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1449 {
1450     FloatParts64 pa, pb, *pr;
1451
1452     float32_unpack_canonical(&pa, a, status);
1453     float32_unpack_canonical(&pb, b, status);
1454     pr = parts_addsub(&pa, &pb, status, subtract);
1455
1456     return float32_round_pack_canonical(pr, status);
1457 }
1458
1459 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1460 {
1461     return soft_f32_addsub(a, b, status, false);
1462 }
1463
1464 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1465 {
1466     return soft_f32_addsub(a, b, status, true);
1467 }
1468
1469 static float64 QEMU_SOFTFLOAT_ATTR
1470 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1471 {
1472     FloatParts64 pa, pb, *pr;
1473
1474     float64_unpack_canonical(&pa, a, status);
1475     float64_unpack_canonical(&pb, b, status);
1476     pr = parts_addsub(&pa, &pb, status, subtract);
1477
1478     return float64_round_pack_canonical(pr, status);
1479 }
1480
1481 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1482 {
1483     return soft_f64_addsub(a, b, status, false);
1484 }
1485
1486 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1487 {
1488     return soft_f64_addsub(a, b, status, true);
1489 }
1490
1491 static float hard_f32_add(float a, float b)
1492 {
1493     return a + b;
1494 }
1495
1496 static float hard_f32_sub(float a, float b)
1497 {
1498     return a - b;
1499 }
1500
1501 static double hard_f64_add(double a, double b)
1502 {
1503     return a + b;
1504 }
1505
1506 static double hard_f64_sub(double a, double b)
1507 {
1508     return a - b;
1509 }
1510
1511 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1512 {
1513     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1514         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1515     }
1516     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1517 }
1518
1519 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1520 {
1521     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1522         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1523     } else {
1524         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1525     }
1526 }
1527
1528 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1529                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1530 {
1531     return float32_gen2(a, b, s, hard, soft,
1532                         f32_is_zon2, f32_addsubmul_post);
1533 }
1534
1535 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1536                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1537 {
1538     return float64_gen2(a, b, s, hard, soft,
1539                         f64_is_zon2, f64_addsubmul_post);
1540 }
1541
1542 float32 QEMU_FLATTEN
1543 float32_add(float32 a, float32 b, float_status *s)
1544 {
1545     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1546 }
1547
1548 float32 QEMU_FLATTEN
1549 float32_sub(float32 a, float32 b, float_status *s)
1550 {
1551     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1552 }
1553
1554 float64 QEMU_FLATTEN
1555 float64_add(float64 a, float64 b, float_status *s)
1556 {
1557     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1558 }
1559
1560 float64 QEMU_FLATTEN
1561 float64_sub(float64 a, float64 b, float_status *s)
1562 {
1563     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1564 }
1565
1566 static bfloat16 QEMU_FLATTEN
1567 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1568 {
1569     FloatParts64 pa, pb, *pr;
1570
1571     bfloat16_unpack_canonical(&pa, a, status);
1572     bfloat16_unpack_canonical(&pb, b, status);
1573     pr = parts_addsub(&pa, &pb, status, subtract);
1574
1575     return bfloat16_round_pack_canonical(pr, status);
1576 }
1577
1578 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1579 {
1580     return bfloat16_addsub(a, b, status, false);
1581 }
1582
1583 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1584 {
1585     return bfloat16_addsub(a, b, status, true);
1586 }
1587
1588 static float128 QEMU_FLATTEN
1589 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1590 {
1591     FloatParts128 pa, pb, *pr;
1592
1593     float128_unpack_canonical(&pa, a, status);
1594     float128_unpack_canonical(&pb, b, status);
1595     pr = parts_addsub(&pa, &pb, status, subtract);
1596
1597     return float128_round_pack_canonical(pr, status);
1598 }
1599
1600 float128 float128_add(float128 a, float128 b, float_status *status)
1601 {
1602     return float128_addsub(a, b, status, false);
1603 }
1604
1605 float128 float128_sub(float128 a, float128 b, float_status *status)
1606 {
1607     return float128_addsub(a, b, status, true);
1608 }
1609
1610 /*
1611  * Multiplication
1612  */
1613
1614 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1615 {
1616     FloatParts64 pa, pb, *pr;
1617
1618     float16_unpack_canonical(&pa, a, status);
1619     float16_unpack_canonical(&pb, b, status);
1620     pr = parts_mul(&pa, &pb, status);
1621
1622     return float16_round_pack_canonical(pr, status);
1623 }
1624
1625 static float32 QEMU_SOFTFLOAT_ATTR
1626 soft_f32_mul(float32 a, float32 b, float_status *status)
1627 {
1628     FloatParts64 pa, pb, *pr;
1629
1630     float32_unpack_canonical(&pa, a, status);
1631     float32_unpack_canonical(&pb, b, status);
1632     pr = parts_mul(&pa, &pb, status);
1633
1634     return float32_round_pack_canonical(pr, status);
1635 }
1636
1637 static float64 QEMU_SOFTFLOAT_ATTR
1638 soft_f64_mul(float64 a, float64 b, float_status *status)
1639 {
1640     FloatParts64 pa, pb, *pr;
1641
1642     float64_unpack_canonical(&pa, a, status);
1643     float64_unpack_canonical(&pb, b, status);
1644     pr = parts_mul(&pa, &pb, status);
1645
1646     return float64_round_pack_canonical(pr, status);
1647 }
1648
1649 static float hard_f32_mul(float a, float b)
1650 {
1651     return a * b;
1652 }
1653
1654 static double hard_f64_mul(double a, double b)
1655 {
1656     return a * b;
1657 }
1658
1659 float32 QEMU_FLATTEN
1660 float32_mul(float32 a, float32 b, float_status *s)
1661 {
1662     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1663                         f32_is_zon2, f32_addsubmul_post);
1664 }
1665
1666 float64 QEMU_FLATTEN
1667 float64_mul(float64 a, float64 b, float_status *s)
1668 {
1669     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1670                         f64_is_zon2, f64_addsubmul_post);
1671 }
1672
1673 bfloat16 QEMU_FLATTEN
1674 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677
1678     bfloat16_unpack_canonical(&pa, a, status);
1679     bfloat16_unpack_canonical(&pb, b, status);
1680     pr = parts_mul(&pa, &pb, status);
1681
1682     return bfloat16_round_pack_canonical(pr, status);
1683 }
1684
1685 float128 QEMU_FLATTEN
1686 float128_mul(float128 a, float128 b, float_status *status)
1687 {
1688     FloatParts128 pa, pb, *pr;
1689
1690     float128_unpack_canonical(&pa, a, status);
1691     float128_unpack_canonical(&pb, b, status);
1692     pr = parts_mul(&pa, &pb, status);
1693
1694     return float128_round_pack_canonical(pr, status);
1695 }
1696
1697 /*
1698  * Fused multiply-add
1699  */
1700
1701 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1702                                     int flags, float_status *status)
1703 {
1704     FloatParts64 pa, pb, pc, *pr;
1705
1706     float16_unpack_canonical(&pa, a, status);
1707     float16_unpack_canonical(&pb, b, status);
1708     float16_unpack_canonical(&pc, c, status);
1709     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1710
1711     return float16_round_pack_canonical(pr, status);
1712 }
1713
1714 static float32 QEMU_SOFTFLOAT_ATTR
1715 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1716                 float_status *status)
1717 {
1718     FloatParts64 pa, pb, pc, *pr;
1719
1720     float32_unpack_canonical(&pa, a, status);
1721     float32_unpack_canonical(&pb, b, status);
1722     float32_unpack_canonical(&pc, c, status);
1723     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1724
1725     return float32_round_pack_canonical(pr, status);
1726 }
1727
1728 static float64 QEMU_SOFTFLOAT_ATTR
1729 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1730                 float_status *status)
1731 {
1732     FloatParts64 pa, pb, pc, *pr;
1733
1734     float64_unpack_canonical(&pa, a, status);
1735     float64_unpack_canonical(&pb, b, status);
1736     float64_unpack_canonical(&pc, c, status);
1737     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1738
1739     return float64_round_pack_canonical(pr, status);
1740 }
1741
1742 static bool force_soft_fma;
1743
1744 float32 QEMU_FLATTEN
1745 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1746 {
1747     union_float32 ua, ub, uc, ur;
1748
1749     ua.s = xa;
1750     ub.s = xb;
1751     uc.s = xc;
1752
1753     if (unlikely(!can_use_fpu(s))) {
1754         goto soft;
1755     }
1756     if (unlikely(flags & float_muladd_halve_result)) {
1757         goto soft;
1758     }
1759
1760     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1761     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1762         goto soft;
1763     }
1764
1765     if (unlikely(force_soft_fma)) {
1766         goto soft;
1767     }
1768
1769     /*
1770      * When (a || b) == 0, there's no need to check for under/over flow,
1771      * since we know the addend is (normal || 0) and the product is 0.
1772      */
1773     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1774         union_float32 up;
1775         bool prod_sign;
1776
1777         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1778         prod_sign ^= !!(flags & float_muladd_negate_product);
1779         up.s = float32_set_sign(float32_zero, prod_sign);
1780
1781         if (flags & float_muladd_negate_c) {
1782             uc.h = -uc.h;
1783         }
1784         ur.h = up.h + uc.h;
1785     } else {
1786         union_float32 ua_orig = ua;
1787         union_float32 uc_orig = uc;
1788
1789         if (flags & float_muladd_negate_product) {
1790             ua.h = -ua.h;
1791         }
1792         if (flags & float_muladd_negate_c) {
1793             uc.h = -uc.h;
1794         }
1795
1796         ur.h = fmaf(ua.h, ub.h, uc.h);
1797
1798         if (unlikely(f32_is_inf(ur))) {
1799             float_raise(float_flag_overflow, s);
1800         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1801             ua = ua_orig;
1802             uc = uc_orig;
1803             goto soft;
1804         }
1805     }
1806     if (flags & float_muladd_negate_result) {
1807         return float32_chs(ur.s);
1808     }
1809     return ur.s;
1810
1811  soft:
1812     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1813 }
1814
1815 float64 QEMU_FLATTEN
1816 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1817 {
1818     union_float64 ua, ub, uc, ur;
1819
1820     ua.s = xa;
1821     ub.s = xb;
1822     uc.s = xc;
1823
1824     if (unlikely(!can_use_fpu(s))) {
1825         goto soft;
1826     }
1827     if (unlikely(flags & float_muladd_halve_result)) {
1828         goto soft;
1829     }
1830
1831     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1832     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1833         goto soft;
1834     }
1835
1836     if (unlikely(force_soft_fma)) {
1837         goto soft;
1838     }
1839
1840     /*
1841      * When (a || b) == 0, there's no need to check for under/over flow,
1842      * since we know the addend is (normal || 0) and the product is 0.
1843      */
1844     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1845         union_float64 up;
1846         bool prod_sign;
1847
1848         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1849         prod_sign ^= !!(flags & float_muladd_negate_product);
1850         up.s = float64_set_sign(float64_zero, prod_sign);
1851
1852         if (flags & float_muladd_negate_c) {
1853             uc.h = -uc.h;
1854         }
1855         ur.h = up.h + uc.h;
1856     } else {
1857         union_float64 ua_orig = ua;
1858         union_float64 uc_orig = uc;
1859
1860         if (flags & float_muladd_negate_product) {
1861             ua.h = -ua.h;
1862         }
1863         if (flags & float_muladd_negate_c) {
1864             uc.h = -uc.h;
1865         }
1866
1867         ur.h = fma(ua.h, ub.h, uc.h);
1868
1869         if (unlikely(f64_is_inf(ur))) {
1870             float_raise(float_flag_overflow, s);
1871         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1872             ua = ua_orig;
1873             uc = uc_orig;
1874             goto soft;
1875         }
1876     }
1877     if (flags & float_muladd_negate_result) {
1878         return float64_chs(ur.s);
1879     }
1880     return ur.s;
1881
1882  soft:
1883     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1884 }
1885
1886 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1887                                       int flags, float_status *status)
1888 {
1889     FloatParts64 pa, pb, pc, *pr;
1890
1891     bfloat16_unpack_canonical(&pa, a, status);
1892     bfloat16_unpack_canonical(&pb, b, status);
1893     bfloat16_unpack_canonical(&pc, c, status);
1894     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1895
1896     return bfloat16_round_pack_canonical(pr, status);
1897 }
1898
1899 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1900                                       int flags, float_status *status)
1901 {
1902     FloatParts128 pa, pb, pc, *pr;
1903
1904     float128_unpack_canonical(&pa, a, status);
1905     float128_unpack_canonical(&pb, b, status);
1906     float128_unpack_canonical(&pc, c, status);
1907     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1908
1909     return float128_round_pack_canonical(pr, status);
1910 }
1911
1912 /*
1913  * Division
1914  */
1915
1916 float16 float16_div(float16 a, float16 b, float_status *status)
1917 {
1918     FloatParts64 pa, pb, *pr;
1919
1920     float16_unpack_canonical(&pa, a, status);
1921     float16_unpack_canonical(&pb, b, status);
1922     pr = parts_div(&pa, &pb, status);
1923
1924     return float16_round_pack_canonical(pr, status);
1925 }
1926
1927 static float32 QEMU_SOFTFLOAT_ATTR
1928 soft_f32_div(float32 a, float32 b, float_status *status)
1929 {
1930     FloatParts64 pa, pb, *pr;
1931
1932     float32_unpack_canonical(&pa, a, status);
1933     float32_unpack_canonical(&pb, b, status);
1934     pr = parts_div(&pa, &pb, status);
1935
1936     return float32_round_pack_canonical(pr, status);
1937 }
1938
1939 static float64 QEMU_SOFTFLOAT_ATTR
1940 soft_f64_div(float64 a, float64 b, float_status *status)
1941 {
1942     FloatParts64 pa, pb, *pr;
1943
1944     float64_unpack_canonical(&pa, a, status);
1945     float64_unpack_canonical(&pb, b, status);
1946     pr = parts_div(&pa, &pb, status);
1947
1948     return float64_round_pack_canonical(pr, status);
1949 }
1950
1951 static float hard_f32_div(float a, float b)
1952 {
1953     return a / b;
1954 }
1955
1956 static double hard_f64_div(double a, double b)
1957 {
1958     return a / b;
1959 }
1960
1961 static bool f32_div_pre(union_float32 a, union_float32 b)
1962 {
1963     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1964         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1965                fpclassify(b.h) == FP_NORMAL;
1966     }
1967     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1968 }
1969
1970 static bool f64_div_pre(union_float64 a, union_float64 b)
1971 {
1972     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1973         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1974                fpclassify(b.h) == FP_NORMAL;
1975     }
1976     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1977 }
1978
1979 static bool f32_div_post(union_float32 a, union_float32 b)
1980 {
1981     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1982         return fpclassify(a.h) != FP_ZERO;
1983     }
1984     return !float32_is_zero(a.s);
1985 }
1986
1987 static bool f64_div_post(union_float64 a, union_float64 b)
1988 {
1989     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1990         return fpclassify(a.h) != FP_ZERO;
1991     }
1992     return !float64_is_zero(a.s);
1993 }
1994
1995 float32 QEMU_FLATTEN
1996 float32_div(float32 a, float32 b, float_status *s)
1997 {
1998     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1999                         f32_div_pre, f32_div_post);
2000 }
2001
2002 float64 QEMU_FLATTEN
2003 float64_div(float64 a, float64 b, float_status *s)
2004 {
2005     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2006                         f64_div_pre, f64_div_post);
2007 }
2008
2009 bfloat16 QEMU_FLATTEN
2010 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013
2014     bfloat16_unpack_canonical(&pa, a, status);
2015     bfloat16_unpack_canonical(&pb, b, status);
2016     pr = parts_div(&pa, &pb, status);
2017
2018     return bfloat16_round_pack_canonical(pr, status);
2019 }
2020
2021 float128 QEMU_FLATTEN
2022 float128_div(float128 a, float128 b, float_status *status)
2023 {
2024     FloatParts128 pa, pb, *pr;
2025
2026     float128_unpack_canonical(&pa, a, status);
2027     float128_unpack_canonical(&pb, b, status);
2028     pr = parts_div(&pa, &pb, status);
2029
2030     return float128_round_pack_canonical(pr, status);
2031 }
2032
2033 /*
2034  * Float to Float conversions
2035  *
2036  * Returns the result of converting one float format to another. The
2037  * conversion is performed according to the IEC/IEEE Standard for
2038  * Binary Floating-Point Arithmetic.
2039  *
2040  * The float_to_float helper only needs to take care of raising
2041  * invalid exceptions and handling the conversion on NaNs.
2042  */
2043
2044 static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
2045                                  float_status *s)
2046 {
2047     if (dstf->arm_althp) {
2048         switch (a.cls) {
2049         case float_class_qnan:
2050         case float_class_snan:
2051             /* There is no NaN in the destination format.  Raise Invalid
2052              * and return a zero with the sign of the input NaN.
2053              */
2054             float_raise(float_flag_invalid, s);
2055             a.cls = float_class_zero;
2056             a.frac = 0;
2057             a.exp = 0;
2058             break;
2059
2060         case float_class_inf:
2061             /* There is no Inf in the destination format.  Raise Invalid
2062              * and return the maximum normal with the correct sign.
2063              */
2064             float_raise(float_flag_invalid, s);
2065             a.cls = float_class_normal;
2066             a.exp = dstf->exp_max;
2067             a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
2068             break;
2069
2070         default:
2071             break;
2072         }
2073     } else if (is_nan(a.cls)) {
2074         parts_return_nan(&a, s);
2075     }
2076     return a;
2077 }
2078
2079 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2080 {
2081     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2082     FloatParts64 pa, pr;
2083
2084     float16a_unpack_canonical(&pa, a, s, fmt16);
2085     pr = float_to_float(pa, &float32_params, s);
2086     return float32_round_pack_canonical(&pr, s);
2087 }
2088
2089 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2090 {
2091     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2092     FloatParts64 pa, pr;
2093
2094     float16a_unpack_canonical(&pa, a, s, fmt16);
2095     pr = float_to_float(pa, &float64_params, s);
2096     return float64_round_pack_canonical(&pr, s);
2097 }
2098
2099 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2100 {
2101     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2102     FloatParts64 pa, pr;
2103
2104     float32_unpack_canonical(&pa, a, s);
2105     pr = float_to_float(pa, fmt16, s);
2106     return float16a_round_pack_canonical(&pr, s, fmt16);
2107 }
2108
2109 static float64 QEMU_SOFTFLOAT_ATTR
2110 soft_float32_to_float64(float32 a, float_status *s)
2111 {
2112     FloatParts64 pa, pr;
2113
2114     float32_unpack_canonical(&pa, a, s);
2115     pr = float_to_float(pa, &float64_params, s);
2116     return float64_round_pack_canonical(&pr, s);
2117 }
2118
2119 float64 float32_to_float64(float32 a, float_status *s)
2120 {
2121     if (likely(float32_is_normal(a))) {
2122         /* Widening conversion can never produce inexact results.  */
2123         union_float32 uf;
2124         union_float64 ud;
2125         uf.s = a;
2126         ud.h = uf.h;
2127         return ud.s;
2128     } else if (float32_is_zero(a)) {
2129         return float64_set_sign(float64_zero, float32_is_neg(a));
2130     } else {
2131         return soft_float32_to_float64(a, s);
2132     }
2133 }
2134
2135 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2136 {
2137     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2138     FloatParts64 pa, pr;
2139
2140     float64_unpack_canonical(&pa, a, s);
2141     pr = float_to_float(pa, fmt16, s);
2142     return float16a_round_pack_canonical(&pr, s, fmt16);
2143 }
2144
2145 float32 float64_to_float32(float64 a, float_status *s)
2146 {
2147     FloatParts64 pa, pr;
2148
2149     float64_unpack_canonical(&pa, a, s);
2150     pr = float_to_float(pa, &float32_params, s);
2151     return float32_round_pack_canonical(&pr, s);
2152 }
2153
2154 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2155 {
2156     FloatParts64 pa, pr;
2157
2158     bfloat16_unpack_canonical(&pa, a, s);
2159     pr = float_to_float(pa, &float32_params, s);
2160     return float32_round_pack_canonical(&pr, s);
2161 }
2162
2163 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2164 {
2165     FloatParts64 pa, pr;
2166
2167     bfloat16_unpack_canonical(&pa, a, s);
2168     pr = float_to_float(pa, &float64_params, s);
2169     return float64_round_pack_canonical(&pr, s);
2170 }
2171
2172 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2173 {
2174     FloatParts64 pa, pr;
2175
2176     float32_unpack_canonical(&pa, a, s);
2177     pr = float_to_float(pa, &bfloat16_params, s);
2178     return bfloat16_round_pack_canonical(&pr, s);
2179 }
2180
2181 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2182 {
2183     FloatParts64 pa, pr;
2184
2185     float64_unpack_canonical(&pa, a, s);
2186     pr = float_to_float(pa, &bfloat16_params, s);
2187     return bfloat16_round_pack_canonical(&pr, s);
2188 }
2189
2190 /*
2191  * Rounds the floating-point value `a' to an integer, and returns the
2192  * result as a floating-point value. The operation is performed
2193  * according to the IEC/IEEE Standard for Binary Floating-Point
2194  * Arithmetic.
2195  */
2196
2197 static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2198                                int scale, float_status *s)
2199 {
2200     switch (a.cls) {
2201     case float_class_qnan:
2202     case float_class_snan:
2203         parts_return_nan(&a, s);
2204         break;
2205
2206     case float_class_zero:
2207     case float_class_inf:
2208         /* already "integral" */
2209         break;
2210
2211     case float_class_normal:
2212         scale = MIN(MAX(scale, -0x10000), 0x10000);
2213         a.exp += scale;
2214
2215         if (a.exp >= DECOMPOSED_BINARY_POINT) {
2216             /* already integral */
2217             break;
2218         }
2219         if (a.exp < 0) {
2220             bool one;
2221             /* all fractional */
2222             float_raise(float_flag_inexact, s);
2223             switch (rmode) {
2224             case float_round_nearest_even:
2225                 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2226                 break;
2227             case float_round_ties_away:
2228                 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2229                 break;
2230             case float_round_to_zero:
2231                 one = false;
2232                 break;
2233             case float_round_up:
2234                 one = !a.sign;
2235                 break;
2236             case float_round_down:
2237                 one = a.sign;
2238                 break;
2239             case float_round_to_odd:
2240                 one = true;
2241                 break;
2242             default:
2243                 g_assert_not_reached();
2244             }
2245
2246             if (one) {
2247                 a.frac = DECOMPOSED_IMPLICIT_BIT;
2248                 a.exp = 0;
2249             } else {
2250                 a.cls = float_class_zero;
2251             }
2252         } else {
2253             uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2254             uint64_t frac_lsbm1 = frac_lsb >> 1;
2255             uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2256             uint64_t rnd_mask = rnd_even_mask >> 1;
2257             uint64_t inc;
2258
2259             switch (rmode) {
2260             case float_round_nearest_even:
2261                 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2262                 break;
2263             case float_round_ties_away:
2264                 inc = frac_lsbm1;
2265                 break;
2266             case float_round_to_zero:
2267                 inc = 0;
2268                 break;
2269             case float_round_up:
2270                 inc = a.sign ? 0 : rnd_mask;
2271                 break;
2272             case float_round_down:
2273                 inc = a.sign ? rnd_mask : 0;
2274                 break;
2275             case float_round_to_odd:
2276                 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2277                 break;
2278             default:
2279                 g_assert_not_reached();
2280             }
2281
2282             if (a.frac & rnd_mask) {
2283                 float_raise(float_flag_inexact, s);
2284                 if (uadd64_overflow(a.frac, inc, &a.frac)) {
2285                     a.frac >>= 1;
2286                     a.frac |= DECOMPOSED_IMPLICIT_BIT;
2287                     a.exp++;
2288                 }
2289                 a.frac &= ~rnd_mask;
2290             }
2291         }
2292         break;
2293     default:
2294         g_assert_not_reached();
2295     }
2296     return a;
2297 }
2298
2299 float16 float16_round_to_int(float16 a, float_status *s)
2300 {
2301     FloatParts64 pa, pr;
2302
2303     float16_unpack_canonical(&pa, a, s);
2304     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2305     return float16_round_pack_canonical(&pr, s);
2306 }
2307
2308 float32 float32_round_to_int(float32 a, float_status *s)
2309 {
2310     FloatParts64 pa, pr;
2311
2312     float32_unpack_canonical(&pa, a, s);
2313     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2314     return float32_round_pack_canonical(&pr, s);
2315 }
2316
2317 float64 float64_round_to_int(float64 a, float_status *s)
2318 {
2319     FloatParts64 pa, pr;
2320
2321     float64_unpack_canonical(&pa, a, s);
2322     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2323     return float64_round_pack_canonical(&pr, s);
2324 }
2325
2326 /*
2327  * Rounds the bfloat16 value `a' to an integer, and returns the
2328  * result as a bfloat16 value.
2329  */
2330
2331 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2332 {
2333     FloatParts64 pa, pr;
2334
2335     bfloat16_unpack_canonical(&pa, a, s);
2336     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2337     return bfloat16_round_pack_canonical(&pr, s);
2338 }
2339
2340 /*
2341  * Returns the result of converting the floating-point value `a' to
2342  * the two's complement integer format. The conversion is performed
2343  * according to the IEC/IEEE Standard for Binary Floating-Point
2344  * Arithmetic---which means in particular that the conversion is
2345  * rounded according to the current rounding mode. If `a' is a NaN,
2346  * the largest positive integer is returned. Otherwise, if the
2347  * conversion overflows, the largest integer with the same sign as `a'
2348  * is returned.
2349 */
2350
2351 static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
2352                                      int scale, int64_t min, int64_t max,
2353                                      float_status *s)
2354 {
2355     uint64_t r;
2356     int orig_flags = get_float_exception_flags(s);
2357     FloatParts64 p = round_to_int(in, rmode, scale, s);
2358
2359     switch (p.cls) {
2360     case float_class_snan:
2361     case float_class_qnan:
2362         s->float_exception_flags = orig_flags | float_flag_invalid;
2363         return max;
2364     case float_class_inf:
2365         s->float_exception_flags = orig_flags | float_flag_invalid;
2366         return p.sign ? min : max;
2367     case float_class_zero:
2368         return 0;
2369     case float_class_normal:
2370         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2371             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2372         } else {
2373             r = UINT64_MAX;
2374         }
2375         if (p.sign) {
2376             if (r <= -(uint64_t) min) {
2377                 return -r;
2378             } else {
2379                 s->float_exception_flags = orig_flags | float_flag_invalid;
2380                 return min;
2381             }
2382         } else {
2383             if (r <= max) {
2384                 return r;
2385             } else {
2386                 s->float_exception_flags = orig_flags | float_flag_invalid;
2387                 return max;
2388             }
2389         }
2390     default:
2391         g_assert_not_reached();
2392     }
2393 }
2394
2395 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2396                               float_status *s)
2397 {
2398     FloatParts64 p;
2399
2400     float16_unpack_canonical(&p, a, s);
2401     return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
2402 }
2403
2404 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2405                                 float_status *s)
2406 {
2407     FloatParts64 p;
2408
2409     float16_unpack_canonical(&p, a, s);
2410     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2411 }
2412
2413 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2414                                 float_status *s)
2415 {
2416     FloatParts64 p;
2417
2418     float16_unpack_canonical(&p, a, s);
2419     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2420 }
2421
2422 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2423                                 float_status *s)
2424 {
2425     FloatParts64 p;
2426
2427     float16_unpack_canonical(&p, a, s);
2428     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2429 }
2430
2431 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2432                                 float_status *s)
2433 {
2434     FloatParts64 p;
2435
2436     float32_unpack_canonical(&p, a, s);
2437     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2438 }
2439
2440 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2441                                 float_status *s)
2442 {
2443     FloatParts64 p;
2444
2445     float32_unpack_canonical(&p, a, s);
2446     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2447 }
2448
2449 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2450                                 float_status *s)
2451 {
2452     FloatParts64 p;
2453
2454     float32_unpack_canonical(&p, a, s);
2455     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2456 }
2457
2458 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2459                                 float_status *s)
2460 {
2461     FloatParts64 p;
2462
2463     float64_unpack_canonical(&p, a, s);
2464     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2465 }
2466
2467 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2468                                 float_status *s)
2469 {
2470     FloatParts64 p;
2471
2472     float64_unpack_canonical(&p, a, s);
2473     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2474 }
2475
2476 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2477                                 float_status *s)
2478 {
2479     FloatParts64 p;
2480
2481     float64_unpack_canonical(&p, a, s);
2482     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2483 }
2484
2485 int8_t float16_to_int8(float16 a, float_status *s)
2486 {
2487     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2488 }
2489
2490 int16_t float16_to_int16(float16 a, float_status *s)
2491 {
2492     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2493 }
2494
2495 int32_t float16_to_int32(float16 a, float_status *s)
2496 {
2497     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2498 }
2499
2500 int64_t float16_to_int64(float16 a, float_status *s)
2501 {
2502     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2503 }
2504
2505 int16_t float32_to_int16(float32 a, float_status *s)
2506 {
2507     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2508 }
2509
2510 int32_t float32_to_int32(float32 a, float_status *s)
2511 {
2512     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2513 }
2514
2515 int64_t float32_to_int64(float32 a, float_status *s)
2516 {
2517     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2518 }
2519
2520 int16_t float64_to_int16(float64 a, float_status *s)
2521 {
2522     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2523 }
2524
2525 int32_t float64_to_int32(float64 a, float_status *s)
2526 {
2527     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2528 }
2529
2530 int64_t float64_to_int64(float64 a, float_status *s)
2531 {
2532     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2533 }
2534
2535 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2536 {
2537     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2538 }
2539
2540 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2541 {
2542     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2543 }
2544
2545 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2546 {
2547     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2548 }
2549
2550 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2551 {
2552     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2553 }
2554
2555 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2556 {
2557     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2558 }
2559
2560 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2561 {
2562     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2563 }
2564
2565 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2566 {
2567     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2568 }
2569
2570 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2571 {
2572     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2573 }
2574
2575 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2576 {
2577     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2578 }
2579
2580 /*
2581  * Returns the result of converting the floating-point value `a' to
2582  * the two's complement integer format.
2583  */
2584
2585 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2586                                  float_status *s)
2587 {
2588     FloatParts64 p;
2589
2590     bfloat16_unpack_canonical(&p, a, s);
2591     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2592 }
2593
2594 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2595                                  float_status *s)
2596 {
2597     FloatParts64 p;
2598
2599     bfloat16_unpack_canonical(&p, a, s);
2600     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2601 }
2602
2603 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2604                                  float_status *s)
2605 {
2606     FloatParts64 p;
2607
2608     bfloat16_unpack_canonical(&p, a, s);
2609     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2610 }
2611
2612 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2613 {
2614     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2615 }
2616
2617 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2618 {
2619     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2620 }
2621
2622 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2623 {
2624     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2625 }
2626
2627 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2628 {
2629     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2630 }
2631
2632 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2633 {
2634     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2635 }
2636
2637 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2638 {
2639     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2640 }
2641
2642 /*
2643  *  Returns the result of converting the floating-point value `a' to
2644  *  the unsigned integer format. The conversion is performed according
2645  *  to the IEC/IEEE Standard for Binary Floating-Point
2646  *  Arithmetic---which means in particular that the conversion is
2647  *  rounded according to the current rounding mode. If `a' is a NaN,
2648  *  the largest unsigned integer is returned. Otherwise, if the
2649  *  conversion overflows, the largest unsigned integer is returned. If
2650  *  the 'a' is negative, the result is rounded and zero is returned;
2651  *  values that do not round to zero will raise the inexact exception
2652  *  flag.
2653  */
2654
2655 static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
2656                                        int scale, uint64_t max,
2657                                        float_status *s)
2658 {
2659     int orig_flags = get_float_exception_flags(s);
2660     FloatParts64 p = round_to_int(in, rmode, scale, s);
2661     uint64_t r;
2662
2663     switch (p.cls) {
2664     case float_class_snan:
2665     case float_class_qnan:
2666         s->float_exception_flags = orig_flags | float_flag_invalid;
2667         return max;
2668     case float_class_inf:
2669         s->float_exception_flags = orig_flags | float_flag_invalid;
2670         return p.sign ? 0 : max;
2671     case float_class_zero:
2672         return 0;
2673     case float_class_normal:
2674         if (p.sign) {
2675             s->float_exception_flags = orig_flags | float_flag_invalid;
2676             return 0;
2677         }
2678
2679         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2680             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2681         } else {
2682             s->float_exception_flags = orig_flags | float_flag_invalid;
2683             return max;
2684         }
2685
2686         /* For uint64 this will never trip, but if p.exp is too large
2687          * to shift a decomposed fraction we shall have exited via the
2688          * 3rd leg above.
2689          */
2690         if (r > max) {
2691             s->float_exception_flags = orig_flags | float_flag_invalid;
2692             return max;
2693         }
2694         return r;
2695     default:
2696         g_assert_not_reached();
2697     }
2698 }
2699
2700 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2701                                 float_status *s)
2702 {
2703     FloatParts64 p;
2704
2705     float16_unpack_canonical(&p, a, s);
2706     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2707 }
2708
2709 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2710                                   float_status *s)
2711 {
2712     FloatParts64 p;
2713
2714     float16_unpack_canonical(&p, a, s);
2715     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2716 }
2717
2718 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2719                                   float_status *s)
2720 {
2721     FloatParts64 p;
2722
2723     float16_unpack_canonical(&p, a, s);
2724     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2725 }
2726
2727 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2728                                   float_status *s)
2729 {
2730     FloatParts64 p;
2731
2732     float16_unpack_canonical(&p, a, s);
2733     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2734 }
2735
2736 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2737                                   float_status *s)
2738 {
2739     FloatParts64 p;
2740
2741     float32_unpack_canonical(&p, a, s);
2742     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2743 }
2744
2745 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2746                                   float_status *s)
2747 {
2748     FloatParts64 p;
2749
2750     float32_unpack_canonical(&p, a, s);
2751     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2752 }
2753
2754 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2755                                   float_status *s)
2756 {
2757     FloatParts64 p;
2758
2759     float32_unpack_canonical(&p, a, s);
2760     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2761 }
2762
2763 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2764                                   float_status *s)
2765 {
2766     FloatParts64 p;
2767
2768     float64_unpack_canonical(&p, a, s);
2769     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2770 }
2771
2772 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2773                                   float_status *s)
2774 {
2775     FloatParts64 p;
2776
2777     float64_unpack_canonical(&p, a, s);
2778     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2779 }
2780
2781 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2782                                   float_status *s)
2783 {
2784     FloatParts64 p;
2785
2786     float64_unpack_canonical(&p, a, s);
2787     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2788 }
2789
2790 uint8_t float16_to_uint8(float16 a, float_status *s)
2791 {
2792     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2793 }
2794
2795 uint16_t float16_to_uint16(float16 a, float_status *s)
2796 {
2797     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2798 }
2799
2800 uint32_t float16_to_uint32(float16 a, float_status *s)
2801 {
2802     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2803 }
2804
2805 uint64_t float16_to_uint64(float16 a, float_status *s)
2806 {
2807     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2808 }
2809
2810 uint16_t float32_to_uint16(float32 a, float_status *s)
2811 {
2812     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2813 }
2814
2815 uint32_t float32_to_uint32(float32 a, float_status *s)
2816 {
2817     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2818 }
2819
2820 uint64_t float32_to_uint64(float32 a, float_status *s)
2821 {
2822     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2823 }
2824
2825 uint16_t float64_to_uint16(float64 a, float_status *s)
2826 {
2827     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2828 }
2829
2830 uint32_t float64_to_uint32(float64 a, float_status *s)
2831 {
2832     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2833 }
2834
2835 uint64_t float64_to_uint64(float64 a, float_status *s)
2836 {
2837     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2838 }
2839
2840 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2841 {
2842     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2843 }
2844
2845 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2846 {
2847     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2848 }
2849
2850 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2851 {
2852     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2853 }
2854
2855 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2856 {
2857     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2858 }
2859
2860 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2861 {
2862     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2863 }
2864
2865 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2866 {
2867     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2868 }
2869
2870 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2871 {
2872     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2873 }
2874
2875 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2876 {
2877     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2878 }
2879
2880 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2881 {
2882     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2883 }
2884
2885 /*
2886  *  Returns the result of converting the bfloat16 value `a' to
2887  *  the unsigned integer format.
2888  */
2889
2890 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2891                                    int scale, float_status *s)
2892 {
2893     FloatParts64 p;
2894
2895     bfloat16_unpack_canonical(&p, a, s);
2896     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2897 }
2898
2899 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2900                                    int scale, float_status *s)
2901 {
2902     FloatParts64 p;
2903
2904     bfloat16_unpack_canonical(&p, a, s);
2905     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2906 }
2907
2908 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2909                                    int scale, float_status *s)
2910 {
2911     FloatParts64 p;
2912
2913     bfloat16_unpack_canonical(&p, a, s);
2914     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2915 }
2916
2917 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2918 {
2919     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2920 }
2921
2922 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2923 {
2924     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2925 }
2926
2927 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2928 {
2929     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2930 }
2931
2932 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2933 {
2934     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2935 }
2936
2937 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2938 {
2939     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2940 }
2941
2942 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2943 {
2944     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2945 }
2946
2947 /*
2948  * Integer to float conversions
2949  *
2950  * Returns the result of converting the two's complement integer `a'
2951  * to the floating-point format. The conversion is performed according
2952  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2953  */
2954
2955 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
2956 {
2957     FloatParts64 r = { .sign = false };
2958
2959     if (a == 0) {
2960         r.cls = float_class_zero;
2961     } else {
2962         uint64_t f = a;
2963         int shift;
2964
2965         r.cls = float_class_normal;
2966         if (a < 0) {
2967             f = -f;
2968             r.sign = true;
2969         }
2970         shift = clz64(f);
2971         scale = MIN(MAX(scale, -0x10000), 0x10000);
2972
2973         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2974         r.frac = f << shift;
2975     }
2976
2977     return r;
2978 }
2979
2980 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2981 {
2982     FloatParts64 pa = int_to_float(a, scale, status);
2983     return float16_round_pack_canonical(&pa, status);
2984 }
2985
2986 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2987 {
2988     return int64_to_float16_scalbn(a, scale, status);
2989 }
2990
2991 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2992 {
2993     return int64_to_float16_scalbn(a, scale, status);
2994 }
2995
2996 float16 int64_to_float16(int64_t a, float_status *status)
2997 {
2998     return int64_to_float16_scalbn(a, 0, status);
2999 }
3000
3001 float16 int32_to_float16(int32_t a, float_status *status)
3002 {
3003     return int64_to_float16_scalbn(a, 0, status);
3004 }
3005
3006 float16 int16_to_float16(int16_t a, float_status *status)
3007 {
3008     return int64_to_float16_scalbn(a, 0, status);
3009 }
3010
3011 float16 int8_to_float16(int8_t a, float_status *status)
3012 {
3013     return int64_to_float16_scalbn(a, 0, status);
3014 }
3015
3016 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3017 {
3018     FloatParts64 pa = int_to_float(a, scale, status);
3019     return float32_round_pack_canonical(&pa, status);
3020 }
3021
3022 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3023 {
3024     return int64_to_float32_scalbn(a, scale, status);
3025 }
3026
3027 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3028 {
3029     return int64_to_float32_scalbn(a, scale, status);
3030 }
3031
3032 float32 int64_to_float32(int64_t a, float_status *status)
3033 {
3034     return int64_to_float32_scalbn(a, 0, status);
3035 }
3036
3037 float32 int32_to_float32(int32_t a, float_status *status)
3038 {
3039     return int64_to_float32_scalbn(a, 0, status);
3040 }
3041
3042 float32 int16_to_float32(int16_t a, float_status *status)
3043 {
3044     return int64_to_float32_scalbn(a, 0, status);
3045 }
3046
3047 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3048 {
3049     FloatParts64 pa = int_to_float(a, scale, status);
3050     return float64_round_pack_canonical(&pa, status);
3051 }
3052
3053 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3054 {
3055     return int64_to_float64_scalbn(a, scale, status);
3056 }
3057
3058 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3059 {
3060     return int64_to_float64_scalbn(a, scale, status);
3061 }
3062
3063 float64 int64_to_float64(int64_t a, float_status *status)
3064 {
3065     return int64_to_float64_scalbn(a, 0, status);
3066 }
3067
3068 float64 int32_to_float64(int32_t a, float_status *status)
3069 {
3070     return int64_to_float64_scalbn(a, 0, status);
3071 }
3072
3073 float64 int16_to_float64(int16_t a, float_status *status)
3074 {
3075     return int64_to_float64_scalbn(a, 0, status);
3076 }
3077
3078 /*
3079  * Returns the result of converting the two's complement integer `a'
3080  * to the bfloat16 format.
3081  */
3082
3083 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3084 {
3085     FloatParts64 pa = int_to_float(a, scale, status);
3086     return bfloat16_round_pack_canonical(&pa, status);
3087 }
3088
3089 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3090 {
3091     return int64_to_bfloat16_scalbn(a, scale, status);
3092 }
3093
3094 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3095 {
3096     return int64_to_bfloat16_scalbn(a, scale, status);
3097 }
3098
3099 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3100 {
3101     return int64_to_bfloat16_scalbn(a, 0, status);
3102 }
3103
3104 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3105 {
3106     return int64_to_bfloat16_scalbn(a, 0, status);
3107 }
3108
3109 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3110 {
3111     return int64_to_bfloat16_scalbn(a, 0, status);
3112 }
3113
3114 /*
3115  * Unsigned Integer to float conversions
3116  *
3117  * Returns the result of converting the unsigned integer `a' to the
3118  * floating-point format. The conversion is performed according to the
3119  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3120  */
3121
3122 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3123 {
3124     FloatParts64 r = { .sign = false };
3125     int shift;
3126
3127     if (a == 0) {
3128         r.cls = float_class_zero;
3129     } else {
3130         scale = MIN(MAX(scale, -0x10000), 0x10000);
3131         shift = clz64(a);
3132         r.cls = float_class_normal;
3133         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3134         r.frac = a << shift;
3135     }
3136
3137     return r;
3138 }
3139
3140 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3141 {
3142     FloatParts64 pa = uint_to_float(a, scale, status);
3143     return float16_round_pack_canonical(&pa, status);
3144 }
3145
3146 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3147 {
3148     return uint64_to_float16_scalbn(a, scale, status);
3149 }
3150
3151 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3152 {
3153     return uint64_to_float16_scalbn(a, scale, status);
3154 }
3155
3156 float16 uint64_to_float16(uint64_t a, float_status *status)
3157 {
3158     return uint64_to_float16_scalbn(a, 0, status);
3159 }
3160
3161 float16 uint32_to_float16(uint32_t a, float_status *status)
3162 {
3163     return uint64_to_float16_scalbn(a, 0, status);
3164 }
3165
3166 float16 uint16_to_float16(uint16_t a, float_status *status)
3167 {
3168     return uint64_to_float16_scalbn(a, 0, status);
3169 }
3170
3171 float16 uint8_to_float16(uint8_t a, float_status *status)
3172 {
3173     return uint64_to_float16_scalbn(a, 0, status);
3174 }
3175
3176 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3177 {
3178     FloatParts64 pa = uint_to_float(a, scale, status);
3179     return float32_round_pack_canonical(&pa, status);
3180 }
3181
3182 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3183 {
3184     return uint64_to_float32_scalbn(a, scale, status);
3185 }
3186
3187 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3188 {
3189     return uint64_to_float32_scalbn(a, scale, status);
3190 }
3191
3192 float32 uint64_to_float32(uint64_t a, float_status *status)
3193 {
3194     return uint64_to_float32_scalbn(a, 0, status);
3195 }
3196
3197 float32 uint32_to_float32(uint32_t a, float_status *status)
3198 {
3199     return uint64_to_float32_scalbn(a, 0, status);
3200 }
3201
3202 float32 uint16_to_float32(uint16_t a, float_status *status)
3203 {
3204     return uint64_to_float32_scalbn(a, 0, status);
3205 }
3206
3207 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3208 {
3209     FloatParts64 pa = uint_to_float(a, scale, status);
3210     return float64_round_pack_canonical(&pa, status);
3211 }
3212
3213 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3214 {
3215     return uint64_to_float64_scalbn(a, scale, status);
3216 }
3217
3218 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3219 {
3220     return uint64_to_float64_scalbn(a, scale, status);
3221 }
3222
3223 float64 uint64_to_float64(uint64_t a, float_status *status)
3224 {
3225     return uint64_to_float64_scalbn(a, 0, status);
3226 }
3227
3228 float64 uint32_to_float64(uint32_t a, float_status *status)
3229 {
3230     return uint64_to_float64_scalbn(a, 0, status);
3231 }
3232
3233 float64 uint16_to_float64(uint16_t a, float_status *status)
3234 {
3235     return uint64_to_float64_scalbn(a, 0, status);
3236 }
3237
3238 /*
3239  * Returns the result of converting the unsigned integer `a' to the
3240  * bfloat16 format.
3241  */
3242
3243 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3244 {
3245     FloatParts64 pa = uint_to_float(a, scale, status);
3246     return bfloat16_round_pack_canonical(&pa, status);
3247 }
3248
3249 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3250 {
3251     return uint64_to_bfloat16_scalbn(a, scale, status);
3252 }
3253
3254 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3255 {
3256     return uint64_to_bfloat16_scalbn(a, scale, status);
3257 }
3258
3259 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3260 {
3261     return uint64_to_bfloat16_scalbn(a, 0, status);
3262 }
3263
3264 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3265 {
3266     return uint64_to_bfloat16_scalbn(a, 0, status);
3267 }
3268
3269 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3270 {
3271     return uint64_to_bfloat16_scalbn(a, 0, status);
3272 }
3273
3274 /* Float Min/Max */
3275 /* min() and max() functions. These can't be implemented as
3276  * 'compare and pick one input' because that would mishandle
3277  * NaNs and +0 vs -0.
3278  *
3279  * minnum() and maxnum() functions. These are similar to the min()
3280  * and max() functions but if one of the arguments is a QNaN and
3281  * the other is numerical then the numerical argument is returned.
3282  * SNaNs will get quietened before being returned.
3283  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3284  * and maxNum() operations. min() and max() are the typical min/max
3285  * semantics provided by many CPUs which predate that specification.
3286  *
3287  * minnummag() and maxnummag() functions correspond to minNumMag()
3288  * and minNumMag() from the IEEE-754 2008.
3289  */
3290 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3291                                 bool ieee, bool ismag, float_status *s)
3292 {
3293     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3294         if (ieee) {
3295             /* Takes two floating-point values `a' and `b', one of
3296              * which is a NaN, and returns the appropriate NaN
3297              * result. If either `a' or `b' is a signaling NaN,
3298              * the invalid exception is raised.
3299              */
3300             if (is_snan(a.cls) || is_snan(b.cls)) {
3301                 return *parts_pick_nan(&a, &b, s);
3302             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3303                 return b;
3304             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3305                 return a;
3306             }
3307         }
3308         return *parts_pick_nan(&a, &b, s);
3309     } else {
3310         int a_exp, b_exp;
3311
3312         switch (a.cls) {
3313         case float_class_normal:
3314             a_exp = a.exp;
3315             break;
3316         case float_class_inf:
3317             a_exp = INT_MAX;
3318             break;
3319         case float_class_zero:
3320             a_exp = INT_MIN;
3321             break;
3322         default:
3323             g_assert_not_reached();
3324             break;
3325         }
3326         switch (b.cls) {
3327         case float_class_normal:
3328             b_exp = b.exp;
3329             break;
3330         case float_class_inf:
3331             b_exp = INT_MAX;
3332             break;
3333         case float_class_zero:
3334             b_exp = INT_MIN;
3335             break;
3336         default:
3337             g_assert_not_reached();
3338             break;
3339         }
3340
3341         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3342             bool a_less = a_exp < b_exp;
3343             if (a_exp == b_exp) {
3344                 a_less = a.frac < b.frac;
3345             }
3346             return a_less ^ ismin ? b : a;
3347         }
3348
3349         if (a.sign == b.sign) {
3350             bool a_less = a_exp < b_exp;
3351             if (a_exp == b_exp) {
3352                 a_less = a.frac < b.frac;
3353             }
3354             return a.sign ^ a_less ^ ismin ? b : a;
3355         } else {
3356             return a.sign ^ ismin ? b : a;
3357         }
3358     }
3359 }
3360
3361 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3362 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3363                                      float_status *s)                   \
3364 {                                                                       \
3365     FloatParts64 pa, pb, pr;                                            \
3366     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3367     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3368     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3369     return float ## sz ## _round_pack_canonical(&pr, s);                \
3370 }
3371
3372 MINMAX(16, min, true, false, false)
3373 MINMAX(16, minnum, true, true, false)
3374 MINMAX(16, minnummag, true, true, true)
3375 MINMAX(16, max, false, false, false)
3376 MINMAX(16, maxnum, false, true, false)
3377 MINMAX(16, maxnummag, false, true, true)
3378
3379 MINMAX(32, min, true, false, false)
3380 MINMAX(32, minnum, true, true, false)
3381 MINMAX(32, minnummag, true, true, true)
3382 MINMAX(32, max, false, false, false)
3383 MINMAX(32, maxnum, false, true, false)
3384 MINMAX(32, maxnummag, false, true, true)
3385
3386 MINMAX(64, min, true, false, false)
3387 MINMAX(64, minnum, true, true, false)
3388 MINMAX(64, minnummag, true, true, true)
3389 MINMAX(64, max, false, false, false)
3390 MINMAX(64, maxnum, false, true, false)
3391 MINMAX(64, maxnummag, false, true, true)
3392
3393 #undef MINMAX
3394
3395 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3396 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3397 {                                                                       \
3398     FloatParts64 pa, pb, pr;                                            \
3399     bfloat16_unpack_canonical(&pa, a, s);                               \
3400     bfloat16_unpack_canonical(&pb, b, s);                               \
3401     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3402     return bfloat16_round_pack_canonical(&pr, s);                       \
3403 }
3404
3405 BF16_MINMAX(min, true, false, false)
3406 BF16_MINMAX(minnum, true, true, false)
3407 BF16_MINMAX(minnummag, true, true, true)
3408 BF16_MINMAX(max, false, false, false)
3409 BF16_MINMAX(maxnum, false, true, false)
3410 BF16_MINMAX(maxnummag, false, true, true)
3411
3412 #undef BF16_MINMAX
3413
3414 /* Floating point compare */
3415 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3416                                     float_status *s)
3417 {
3418     if (is_nan(a.cls) || is_nan(b.cls)) {
3419         if (!is_quiet ||
3420             a.cls == float_class_snan ||
3421             b.cls == float_class_snan) {
3422             float_raise(float_flag_invalid, s);
3423         }
3424         return float_relation_unordered;
3425     }
3426
3427     if (a.cls == float_class_zero) {
3428         if (b.cls == float_class_zero) {
3429             return float_relation_equal;
3430         }
3431         return b.sign ? float_relation_greater : float_relation_less;
3432     } else if (b.cls == float_class_zero) {
3433         return a.sign ? float_relation_less : float_relation_greater;
3434     }
3435
3436     /* The only really important thing about infinity is its sign. If
3437      * both are infinities the sign marks the smallest of the two.
3438      */
3439     if (a.cls == float_class_inf) {
3440         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3441             return float_relation_equal;
3442         }
3443         return a.sign ? float_relation_less : float_relation_greater;
3444     } else if (b.cls == float_class_inf) {
3445         return b.sign ? float_relation_greater : float_relation_less;
3446     }
3447
3448     if (a.sign != b.sign) {
3449         return a.sign ? float_relation_less : float_relation_greater;
3450     }
3451
3452     if (a.exp == b.exp) {
3453         if (a.frac == b.frac) {
3454             return float_relation_equal;
3455         }
3456         if (a.sign) {
3457             return a.frac > b.frac ?
3458                 float_relation_less : float_relation_greater;
3459         } else {
3460             return a.frac > b.frac ?
3461                 float_relation_greater : float_relation_less;
3462         }
3463     } else {
3464         if (a.sign) {
3465             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3466         } else {
3467             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3468         }
3469     }
3470 }
3471
3472 #define COMPARE(name, attr, sz)                                         \
3473 static int attr                                                         \
3474 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3475 {                                                                       \
3476     FloatParts64 pa, pb;                                                \
3477     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3478     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3479     return compare_floats(pa, pb, is_quiet, s);                         \
3480 }
3481
3482 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3483 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3484 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3485
3486 #undef COMPARE
3487
3488 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3489 {
3490     return soft_f16_compare(a, b, false, s);
3491 }
3492
3493 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3494 {
3495     return soft_f16_compare(a, b, true, s);
3496 }
3497
3498 static FloatRelation QEMU_FLATTEN
3499 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3500 {
3501     union_float32 ua, ub;
3502
3503     ua.s = xa;
3504     ub.s = xb;
3505
3506     if (QEMU_NO_HARDFLOAT) {
3507         goto soft;
3508     }
3509
3510     float32_input_flush2(&ua.s, &ub.s, s);
3511     if (isgreaterequal(ua.h, ub.h)) {
3512         if (isgreater(ua.h, ub.h)) {
3513             return float_relation_greater;
3514         }
3515         return float_relation_equal;
3516     }
3517     if (likely(isless(ua.h, ub.h))) {
3518         return float_relation_less;
3519     }
3520     /* The only condition remaining is unordered.
3521      * Fall through to set flags.
3522      */
3523  soft:
3524     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3525 }
3526
3527 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3528 {
3529     return f32_compare(a, b, false, s);
3530 }
3531
3532 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3533 {
3534     return f32_compare(a, b, true, s);
3535 }
3536
3537 static FloatRelation QEMU_FLATTEN
3538 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3539 {
3540     union_float64 ua, ub;
3541
3542     ua.s = xa;
3543     ub.s = xb;
3544
3545     if (QEMU_NO_HARDFLOAT) {
3546         goto soft;
3547     }
3548
3549     float64_input_flush2(&ua.s, &ub.s, s);
3550     if (isgreaterequal(ua.h, ub.h)) {
3551         if (isgreater(ua.h, ub.h)) {
3552             return float_relation_greater;
3553         }
3554         return float_relation_equal;
3555     }
3556     if (likely(isless(ua.h, ub.h))) {
3557         return float_relation_less;
3558     }
3559     /* The only condition remaining is unordered.
3560      * Fall through to set flags.
3561      */
3562  soft:
3563     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3564 }
3565
3566 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3567 {
3568     return f64_compare(a, b, false, s);
3569 }
3570
3571 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3572 {
3573     return f64_compare(a, b, true, s);
3574 }
3575
3576 static FloatRelation QEMU_FLATTEN
3577 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3578 {
3579     FloatParts64 pa, pb;
3580
3581     bfloat16_unpack_canonical(&pa, a, s);
3582     bfloat16_unpack_canonical(&pb, b, s);
3583     return compare_floats(pa, pb, is_quiet, s);
3584 }
3585
3586 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3587 {
3588     return soft_bf16_compare(a, b, false, s);
3589 }
3590
3591 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3592 {
3593     return soft_bf16_compare(a, b, true, s);
3594 }
3595
3596 /* Multiply A by 2 raised to the power N.  */
3597 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3598 {
3599     if (unlikely(is_nan(a.cls))) {
3600         parts_return_nan(&a, s);
3601     }
3602     if (a.cls == float_class_normal) {
3603         /* The largest float type (even though not supported by FloatParts64)
3604          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3605          * still allows rounding to infinity, without allowing overflow
3606          * within the int32_t that backs FloatParts64.exp.
3607          */
3608         n = MIN(MAX(n, -0x10000), 0x10000);
3609         a.exp += n;
3610     }
3611     return a;
3612 }
3613
3614 float16 float16_scalbn(float16 a, int n, float_status *status)
3615 {
3616     FloatParts64 pa, pr;
3617
3618     float16_unpack_canonical(&pa, a, status);
3619     pr = scalbn_decomposed(pa, n, status);
3620     return float16_round_pack_canonical(&pr, status);
3621 }
3622
3623 float32 float32_scalbn(float32 a, int n, float_status *status)
3624 {
3625     FloatParts64 pa, pr;
3626
3627     float32_unpack_canonical(&pa, a, status);
3628     pr = scalbn_decomposed(pa, n, status);
3629     return float32_round_pack_canonical(&pr, status);
3630 }
3631
3632 float64 float64_scalbn(float64 a, int n, float_status *status)
3633 {
3634     FloatParts64 pa, pr;
3635
3636     float64_unpack_canonical(&pa, a, status);
3637     pr = scalbn_decomposed(pa, n, status);
3638     return float64_round_pack_canonical(&pr, status);
3639 }
3640
3641 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3642 {
3643     FloatParts64 pa, pr;
3644
3645     bfloat16_unpack_canonical(&pa, a, status);
3646     pr = scalbn_decomposed(pa, n, status);
3647     return bfloat16_round_pack_canonical(&pr, status);
3648 }
3649
3650 /*
3651  * Square Root
3652  *
3653  * The old softfloat code did an approximation step before zeroing in
3654  * on the final result. However for simpleness we just compute the
3655  * square root by iterating down from the implicit bit to enough extra
3656  * bits to ensure we get a correctly rounded result.
3657  *
3658  * This does mean however the calculation is slower than before,
3659  * especially for 64 bit floats.
3660  */
3661
3662 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3663 {
3664     uint64_t a_frac, r_frac, s_frac;
3665     int bit, last_bit;
3666
3667     if (is_nan(a.cls)) {
3668         parts_return_nan(&a, s);
3669         return a;
3670     }
3671     if (a.cls == float_class_zero) {
3672         return a;  /* sqrt(+-0) = +-0 */
3673     }
3674     if (a.sign) {
3675         float_raise(float_flag_invalid, s);
3676         parts_default_nan(&a, s);
3677         return a;
3678     }
3679     if (a.cls == float_class_inf) {
3680         return a;  /* sqrt(+inf) = +inf */
3681     }
3682
3683     assert(a.cls == float_class_normal);
3684
3685     /* We need two overflow bits at the top. Adding room for that is a
3686      * right shift. If the exponent is odd, we can discard the low bit
3687      * by multiplying the fraction by 2; that's a left shift. Combine
3688      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3689      */
3690     a_frac = a.frac >> (2 - (a.exp & 1));
3691     a.exp >>= 1;
3692
3693     /* Bit-by-bit computation of sqrt.  */
3694     r_frac = 0;
3695     s_frac = 0;
3696
3697     /* Iterate from implicit bit down to the 3 extra bits to compute a
3698      * properly rounded result. Remember we've inserted two more bits
3699      * at the top, so these positions are two less.
3700      */
3701     bit = DECOMPOSED_BINARY_POINT - 2;
3702     last_bit = MAX(p->frac_shift - 4, 0);
3703     do {
3704         uint64_t q = 1ULL << bit;
3705         uint64_t t_frac = s_frac + q;
3706         if (t_frac <= a_frac) {
3707             s_frac = t_frac + q;
3708             a_frac -= t_frac;
3709             r_frac += q;
3710         }
3711         a_frac <<= 1;
3712     } while (--bit >= last_bit);
3713
3714     /* Undo the right shift done above. If there is any remaining
3715      * fraction, the result is inexact. Set the sticky bit.
3716      */
3717     a.frac = (r_frac << 2) + (a_frac != 0);
3718
3719     return a;
3720 }
3721
3722 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3723 {
3724     FloatParts64 pa, pr;
3725
3726     float16_unpack_canonical(&pa, a, status);
3727     pr = sqrt_float(pa, status, &float16_params);
3728     return float16_round_pack_canonical(&pr, status);
3729 }
3730
3731 static float32 QEMU_SOFTFLOAT_ATTR
3732 soft_f32_sqrt(float32 a, float_status *status)
3733 {
3734     FloatParts64 pa, pr;
3735
3736     float32_unpack_canonical(&pa, a, status);
3737     pr = sqrt_float(pa, status, &float32_params);
3738     return float32_round_pack_canonical(&pr, status);
3739 }
3740
3741 static float64 QEMU_SOFTFLOAT_ATTR
3742 soft_f64_sqrt(float64 a, float_status *status)
3743 {
3744     FloatParts64 pa, pr;
3745
3746     float64_unpack_canonical(&pa, a, status);
3747     pr = sqrt_float(pa, status, &float64_params);
3748     return float64_round_pack_canonical(&pr, status);
3749 }
3750
3751 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3752 {
3753     union_float32 ua, ur;
3754
3755     ua.s = xa;
3756     if (unlikely(!can_use_fpu(s))) {
3757         goto soft;
3758     }
3759
3760     float32_input_flush1(&ua.s, s);
3761     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3762         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3763                        fpclassify(ua.h) == FP_ZERO) ||
3764                      signbit(ua.h))) {
3765             goto soft;
3766         }
3767     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3768                         float32_is_neg(ua.s))) {
3769         goto soft;
3770     }
3771     ur.h = sqrtf(ua.h);
3772     return ur.s;
3773
3774  soft:
3775     return soft_f32_sqrt(ua.s, s);
3776 }
3777
3778 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3779 {
3780     union_float64 ua, ur;
3781
3782     ua.s = xa;
3783     if (unlikely(!can_use_fpu(s))) {
3784         goto soft;
3785     }
3786
3787     float64_input_flush1(&ua.s, s);
3788     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3789         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3790                        fpclassify(ua.h) == FP_ZERO) ||
3791                      signbit(ua.h))) {
3792             goto soft;
3793         }
3794     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3795                         float64_is_neg(ua.s))) {
3796         goto soft;
3797     }
3798     ur.h = sqrt(ua.h);
3799     return ur.s;
3800
3801  soft:
3802     return soft_f64_sqrt(ua.s, s);
3803 }
3804
3805 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3806 {
3807     FloatParts64 pa, pr;
3808
3809     bfloat16_unpack_canonical(&pa, a, status);
3810     pr = sqrt_float(pa, status, &bfloat16_params);
3811     return bfloat16_round_pack_canonical(&pr, status);
3812 }
3813
3814 /*----------------------------------------------------------------------------
3815 | The pattern for a default generated NaN.
3816 *----------------------------------------------------------------------------*/
3817
3818 float16 float16_default_nan(float_status *status)
3819 {
3820     FloatParts64 p;
3821
3822     parts_default_nan(&p, status);
3823     p.frac >>= float16_params.frac_shift;
3824     return float16_pack_raw(&p);
3825 }
3826
3827 float32 float32_default_nan(float_status *status)
3828 {
3829     FloatParts64 p;
3830
3831     parts_default_nan(&p, status);
3832     p.frac >>= float32_params.frac_shift;
3833     return float32_pack_raw(&p);
3834 }
3835
3836 float64 float64_default_nan(float_status *status)
3837 {
3838     FloatParts64 p;
3839
3840     parts_default_nan(&p, status);
3841     p.frac >>= float64_params.frac_shift;
3842     return float64_pack_raw(&p);
3843 }
3844
3845 float128 float128_default_nan(float_status *status)
3846 {
3847     FloatParts128 p;
3848
3849     parts_default_nan(&p, status);
3850     frac_shr(&p, float128_params.frac_shift);
3851     return float128_pack_raw(&p);
3852 }
3853
3854 bfloat16 bfloat16_default_nan(float_status *status)
3855 {
3856     FloatParts64 p;
3857
3858     parts_default_nan(&p, status);
3859     p.frac >>= bfloat16_params.frac_shift;
3860     return bfloat16_pack_raw(&p);
3861 }
3862
3863 /*----------------------------------------------------------------------------
3864 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3865 *----------------------------------------------------------------------------*/
3866
3867 float16 float16_silence_nan(float16 a, float_status *status)
3868 {
3869     FloatParts64 p;
3870
3871     float16_unpack_raw(&p, a);
3872     p.frac <<= float16_params.frac_shift;
3873     parts_silence_nan(&p, status);
3874     p.frac >>= float16_params.frac_shift;
3875     return float16_pack_raw(&p);
3876 }
3877
3878 float32 float32_silence_nan(float32 a, float_status *status)
3879 {
3880     FloatParts64 p;
3881
3882     float32_unpack_raw(&p, a);
3883     p.frac <<= float32_params.frac_shift;
3884     parts_silence_nan(&p, status);
3885     p.frac >>= float32_params.frac_shift;
3886     return float32_pack_raw(&p);
3887 }
3888
3889 float64 float64_silence_nan(float64 a, float_status *status)
3890 {
3891     FloatParts64 p;
3892
3893     float64_unpack_raw(&p, a);
3894     p.frac <<= float64_params.frac_shift;
3895     parts_silence_nan(&p, status);
3896     p.frac >>= float64_params.frac_shift;
3897     return float64_pack_raw(&p);
3898 }
3899
3900 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3901 {
3902     FloatParts64 p;
3903
3904     bfloat16_unpack_raw(&p, a);
3905     p.frac <<= bfloat16_params.frac_shift;
3906     parts_silence_nan(&p, status);
3907     p.frac >>= bfloat16_params.frac_shift;
3908     return bfloat16_pack_raw(&p);
3909 }
3910
3911 float128 float128_silence_nan(float128 a, float_status *status)
3912 {
3913     FloatParts128 p;
3914
3915     float128_unpack_raw(&p, a);
3916     frac_shl(&p, float128_params.frac_shift);
3917     parts_silence_nan(&p, status);
3918     frac_shr(&p, float128_params.frac_shift);
3919     return float128_pack_raw(&p);
3920 }
3921
3922 /*----------------------------------------------------------------------------
3923 | If `a' is denormal and we are in flush-to-zero mode then set the
3924 | input-denormal exception and return zero. Otherwise just return the value.
3925 *----------------------------------------------------------------------------*/
3926
3927 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3928 {
3929     if (p.exp == 0 && p.frac != 0) {
3930         float_raise(float_flag_input_denormal, status);
3931         return true;
3932     }
3933
3934     return false;
3935 }
3936
3937 float16 float16_squash_input_denormal(float16 a, float_status *status)
3938 {
3939     if (status->flush_inputs_to_zero) {
3940         FloatParts64 p;
3941
3942         float16_unpack_raw(&p, a);
3943         if (parts_squash_denormal(p, status)) {
3944             return float16_set_sign(float16_zero, p.sign);
3945         }
3946     }
3947     return a;
3948 }
3949
3950 float32 float32_squash_input_denormal(float32 a, float_status *status)
3951 {
3952     if (status->flush_inputs_to_zero) {
3953         FloatParts64 p;
3954
3955         float32_unpack_raw(&p, a);
3956         if (parts_squash_denormal(p, status)) {
3957             return float32_set_sign(float32_zero, p.sign);
3958         }
3959     }
3960     return a;
3961 }
3962
3963 float64 float64_squash_input_denormal(float64 a, float_status *status)
3964 {
3965     if (status->flush_inputs_to_zero) {
3966         FloatParts64 p;
3967
3968         float64_unpack_raw(&p, a);
3969         if (parts_squash_denormal(p, status)) {
3970             return float64_set_sign(float64_zero, p.sign);
3971         }
3972     }
3973     return a;
3974 }
3975
3976 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3977 {
3978     if (status->flush_inputs_to_zero) {
3979         FloatParts64 p;
3980
3981         bfloat16_unpack_raw(&p, a);
3982         if (parts_squash_denormal(p, status)) {
3983             return bfloat16_set_sign(bfloat16_zero, p.sign);
3984         }
3985     }
3986     return a;
3987 }
3988
3989 /*----------------------------------------------------------------------------
3990 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3991 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3992 | input.  If `zSign' is 1, the input is negated before being converted to an
3993 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3994 | is simply rounded to an integer, with the inexact exception raised if the
3995 | input cannot be represented exactly as an integer.  However, if the fixed-
3996 | point input is too large, the invalid exception is raised and the largest
3997 | positive or negative integer is returned.
3998 *----------------------------------------------------------------------------*/
3999
4000 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4001                                  float_status *status)
4002 {
4003     int8_t roundingMode;
4004     bool roundNearestEven;
4005     int8_t roundIncrement, roundBits;
4006     int32_t z;
4007
4008     roundingMode = status->float_rounding_mode;
4009     roundNearestEven = ( roundingMode == float_round_nearest_even );
4010     switch (roundingMode) {
4011     case float_round_nearest_even:
4012     case float_round_ties_away:
4013         roundIncrement = 0x40;
4014         break;
4015     case float_round_to_zero:
4016         roundIncrement = 0;
4017         break;
4018     case float_round_up:
4019         roundIncrement = zSign ? 0 : 0x7f;
4020         break;
4021     case float_round_down:
4022         roundIncrement = zSign ? 0x7f : 0;
4023         break;
4024     case float_round_to_odd:
4025         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4026         break;
4027     default:
4028         abort();
4029     }
4030     roundBits = absZ & 0x7F;
4031     absZ = ( absZ + roundIncrement )>>7;
4032     if (!(roundBits ^ 0x40) && roundNearestEven) {
4033         absZ &= ~1;
4034     }
4035     z = absZ;
4036     if ( zSign ) z = - z;
4037     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4038         float_raise(float_flag_invalid, status);
4039         return zSign ? INT32_MIN : INT32_MAX;
4040     }
4041     if (roundBits) {
4042         float_raise(float_flag_inexact, status);
4043     }
4044     return z;
4045
4046 }
4047
4048 /*----------------------------------------------------------------------------
4049 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4050 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4051 | and returns the properly rounded 64-bit integer corresponding to the input.
4052 | If `zSign' is 1, the input is negated before being converted to an integer.
4053 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4054 | the inexact exception raised if the input cannot be represented exactly as
4055 | an integer.  However, if the fixed-point input is too large, the invalid
4056 | exception is raised and the largest positive or negative integer is
4057 | returned.
4058 *----------------------------------------------------------------------------*/
4059
4060 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4061                                float_status *status)
4062 {
4063     int8_t roundingMode;
4064     bool roundNearestEven, increment;
4065     int64_t z;
4066
4067     roundingMode = status->float_rounding_mode;
4068     roundNearestEven = ( roundingMode == float_round_nearest_even );
4069     switch (roundingMode) {
4070     case float_round_nearest_even:
4071     case float_round_ties_away:
4072         increment = ((int64_t) absZ1 < 0);
4073         break;
4074     case float_round_to_zero:
4075         increment = 0;
4076         break;
4077     case float_round_up:
4078         increment = !zSign && absZ1;
4079         break;
4080     case float_round_down:
4081         increment = zSign && absZ1;
4082         break;
4083     case float_round_to_odd:
4084         increment = !(absZ0 & 1) && absZ1;
4085         break;
4086     default:
4087         abort();
4088     }
4089     if ( increment ) {
4090         ++absZ0;
4091         if ( absZ0 == 0 ) goto overflow;
4092         if (!(absZ1 << 1) && roundNearestEven) {
4093             absZ0 &= ~1;
4094         }
4095     }
4096     z = absZ0;
4097     if ( zSign ) z = - z;
4098     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4099  overflow:
4100         float_raise(float_flag_invalid, status);
4101         return zSign ? INT64_MIN : INT64_MAX;
4102     }
4103     if (absZ1) {
4104         float_raise(float_flag_inexact, status);
4105     }
4106     return z;
4107
4108 }
4109
4110 /*----------------------------------------------------------------------------
4111 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4112 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4113 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4114 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4115 | with the inexact exception raised if the input cannot be represented exactly
4116 | as an integer.  However, if the fixed-point input is too large, the invalid
4117 | exception is raised and the largest unsigned integer is returned.
4118 *----------------------------------------------------------------------------*/
4119
4120 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4121                                 uint64_t absZ1, float_status *status)
4122 {
4123     int8_t roundingMode;
4124     bool roundNearestEven, increment;
4125
4126     roundingMode = status->float_rounding_mode;
4127     roundNearestEven = (roundingMode == float_round_nearest_even);
4128     switch (roundingMode) {
4129     case float_round_nearest_even:
4130     case float_round_ties_away:
4131         increment = ((int64_t)absZ1 < 0);
4132         break;
4133     case float_round_to_zero:
4134         increment = 0;
4135         break;
4136     case float_round_up:
4137         increment = !zSign && absZ1;
4138         break;
4139     case float_round_down:
4140         increment = zSign && absZ1;
4141         break;
4142     case float_round_to_odd:
4143         increment = !(absZ0 & 1) && absZ1;
4144         break;
4145     default:
4146         abort();
4147     }
4148     if (increment) {
4149         ++absZ0;
4150         if (absZ0 == 0) {
4151             float_raise(float_flag_invalid, status);
4152             return UINT64_MAX;
4153         }
4154         if (!(absZ1 << 1) && roundNearestEven) {
4155             absZ0 &= ~1;
4156         }
4157     }
4158
4159     if (zSign && absZ0) {
4160         float_raise(float_flag_invalid, status);
4161         return 0;
4162     }
4163
4164     if (absZ1) {
4165         float_raise(float_flag_inexact, status);
4166     }
4167     return absZ0;
4168 }
4169
4170 /*----------------------------------------------------------------------------
4171 | Normalizes the subnormal single-precision floating-point value represented
4172 | by the denormalized significand `aSig'.  The normalized exponent and
4173 | significand are stored at the locations pointed to by `zExpPtr' and
4174 | `zSigPtr', respectively.
4175 *----------------------------------------------------------------------------*/
4176
4177 static void
4178  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4179 {
4180     int8_t shiftCount;
4181
4182     shiftCount = clz32(aSig) - 8;
4183     *zSigPtr = aSig<<shiftCount;
4184     *zExpPtr = 1 - shiftCount;
4185
4186 }
4187
4188 /*----------------------------------------------------------------------------
4189 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4190 | and significand `zSig', and returns the proper single-precision floating-
4191 | point value corresponding to the abstract input.  Ordinarily, the abstract
4192 | value is simply rounded and packed into the single-precision format, with
4193 | the inexact exception raised if the abstract input cannot be represented
4194 | exactly.  However, if the abstract value is too large, the overflow and
4195 | inexact exceptions are raised and an infinity or maximal finite value is
4196 | returned.  If the abstract value is too small, the input value is rounded to
4197 | a subnormal number, and the underflow and inexact exceptions are raised if
4198 | the abstract input cannot be represented exactly as a subnormal single-
4199 | precision floating-point number.
4200 |     The input significand `zSig' has its binary point between bits 30
4201 | and 29, which is 7 bits to the left of the usual location.  This shifted
4202 | significand must be normalized or smaller.  If `zSig' is not normalized,
4203 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4204 | and it must not require rounding.  In the usual case that `zSig' is
4205 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4206 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4207 | Binary Floating-Point Arithmetic.
4208 *----------------------------------------------------------------------------*/
4209
4210 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4211                                    float_status *status)
4212 {
4213     int8_t roundingMode;
4214     bool roundNearestEven;
4215     int8_t roundIncrement, roundBits;
4216     bool isTiny;
4217
4218     roundingMode = status->float_rounding_mode;
4219     roundNearestEven = ( roundingMode == float_round_nearest_even );
4220     switch (roundingMode) {
4221     case float_round_nearest_even:
4222     case float_round_ties_away:
4223         roundIncrement = 0x40;
4224         break;
4225     case float_round_to_zero:
4226         roundIncrement = 0;
4227         break;
4228     case float_round_up:
4229         roundIncrement = zSign ? 0 : 0x7f;
4230         break;
4231     case float_round_down:
4232         roundIncrement = zSign ? 0x7f : 0;
4233         break;
4234     case float_round_to_odd:
4235         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4236         break;
4237     default:
4238         abort();
4239         break;
4240     }
4241     roundBits = zSig & 0x7F;
4242     if ( 0xFD <= (uint16_t) zExp ) {
4243         if (    ( 0xFD < zExp )
4244              || (    ( zExp == 0xFD )
4245                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4246            ) {
4247             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4248                                    roundIncrement != 0;
4249             float_raise(float_flag_overflow | float_flag_inexact, status);
4250             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4251         }
4252         if ( zExp < 0 ) {
4253             if (status->flush_to_zero) {
4254                 float_raise(float_flag_output_denormal, status);
4255                 return packFloat32(zSign, 0, 0);
4256             }
4257             isTiny = status->tininess_before_rounding
4258                   || (zExp < -1)
4259                   || (zSig + roundIncrement < 0x80000000);
4260             shift32RightJamming( zSig, - zExp, &zSig );
4261             zExp = 0;
4262             roundBits = zSig & 0x7F;
4263             if (isTiny && roundBits) {
4264                 float_raise(float_flag_underflow, status);
4265             }
4266             if (roundingMode == float_round_to_odd) {
4267                 /*
4268                  * For round-to-odd case, the roundIncrement depends on
4269                  * zSig which just changed.
4270                  */
4271                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4272             }
4273         }
4274     }
4275     if (roundBits) {
4276         float_raise(float_flag_inexact, status);
4277     }
4278     zSig = ( zSig + roundIncrement )>>7;
4279     if (!(roundBits ^ 0x40) && roundNearestEven) {
4280         zSig &= ~1;
4281     }
4282     if ( zSig == 0 ) zExp = 0;
4283     return packFloat32( zSign, zExp, zSig );
4284
4285 }
4286
4287 /*----------------------------------------------------------------------------
4288 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4289 | and significand `zSig', and returns the proper single-precision floating-
4290 | point value corresponding to the abstract input.  This routine is just like
4291 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4292 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4293 | floating-point exponent.
4294 *----------------------------------------------------------------------------*/
4295
4296 static float32
4297  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4298                               float_status *status)
4299 {
4300     int8_t shiftCount;
4301
4302     shiftCount = clz32(zSig) - 1;
4303     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4304                                status);
4305
4306 }
4307
4308 /*----------------------------------------------------------------------------
4309 | Normalizes the subnormal double-precision floating-point value represented
4310 | by the denormalized significand `aSig'.  The normalized exponent and
4311 | significand are stored at the locations pointed to by `zExpPtr' and
4312 | `zSigPtr', respectively.
4313 *----------------------------------------------------------------------------*/
4314
4315 static void
4316  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4317 {
4318     int8_t shiftCount;
4319
4320     shiftCount = clz64(aSig) - 11;
4321     *zSigPtr = aSig<<shiftCount;
4322     *zExpPtr = 1 - shiftCount;
4323
4324 }
4325
4326 /*----------------------------------------------------------------------------
4327 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4328 | double-precision floating-point value, returning the result.  After being
4329 | shifted into the proper positions, the three fields are simply added
4330 | together to form the result.  This means that any integer portion of `zSig'
4331 | will be added into the exponent.  Since a properly normalized significand
4332 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4333 | than the desired result exponent whenever `zSig' is a complete, normalized
4334 | significand.
4335 *----------------------------------------------------------------------------*/
4336
4337 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4338 {
4339
4340     return make_float64(
4341         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4342
4343 }
4344
4345 /*----------------------------------------------------------------------------
4346 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4347 | and significand `zSig', and returns the proper double-precision floating-
4348 | point value corresponding to the abstract input.  Ordinarily, the abstract
4349 | value is simply rounded and packed into the double-precision format, with
4350 | the inexact exception raised if the abstract input cannot be represented
4351 | exactly.  However, if the abstract value is too large, the overflow and
4352 | inexact exceptions are raised and an infinity or maximal finite value is
4353 | returned.  If the abstract value is too small, the input value is rounded to
4354 | a subnormal number, and the underflow and inexact exceptions are raised if
4355 | the abstract input cannot be represented exactly as a subnormal double-
4356 | precision floating-point number.
4357 |     The input significand `zSig' has its binary point between bits 62
4358 | and 61, which is 10 bits to the left of the usual location.  This shifted
4359 | significand must be normalized or smaller.  If `zSig' is not normalized,
4360 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4361 | and it must not require rounding.  In the usual case that `zSig' is
4362 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4363 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4364 | Binary Floating-Point Arithmetic.
4365 *----------------------------------------------------------------------------*/
4366
4367 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4368                                    float_status *status)
4369 {
4370     int8_t roundingMode;
4371     bool roundNearestEven;
4372     int roundIncrement, roundBits;
4373     bool isTiny;
4374
4375     roundingMode = status->float_rounding_mode;
4376     roundNearestEven = ( roundingMode == float_round_nearest_even );
4377     switch (roundingMode) {
4378     case float_round_nearest_even:
4379     case float_round_ties_away:
4380         roundIncrement = 0x200;
4381         break;
4382     case float_round_to_zero:
4383         roundIncrement = 0;
4384         break;
4385     case float_round_up:
4386         roundIncrement = zSign ? 0 : 0x3ff;
4387         break;
4388     case float_round_down:
4389         roundIncrement = zSign ? 0x3ff : 0;
4390         break;
4391     case float_round_to_odd:
4392         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4393         break;
4394     default:
4395         abort();
4396     }
4397     roundBits = zSig & 0x3FF;
4398     if ( 0x7FD <= (uint16_t) zExp ) {
4399         if (    ( 0x7FD < zExp )
4400              || (    ( zExp == 0x7FD )
4401                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4402            ) {
4403             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4404                                    roundIncrement != 0;
4405             float_raise(float_flag_overflow | float_flag_inexact, status);
4406             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4407         }
4408         if ( zExp < 0 ) {
4409             if (status->flush_to_zero) {
4410                 float_raise(float_flag_output_denormal, status);
4411                 return packFloat64(zSign, 0, 0);
4412             }
4413             isTiny = status->tininess_before_rounding
4414                   || (zExp < -1)
4415                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4416             shift64RightJamming( zSig, - zExp, &zSig );
4417             zExp = 0;
4418             roundBits = zSig & 0x3FF;
4419             if (isTiny && roundBits) {
4420                 float_raise(float_flag_underflow, status);
4421             }
4422             if (roundingMode == float_round_to_odd) {
4423                 /*
4424                  * For round-to-odd case, the roundIncrement depends on
4425                  * zSig which just changed.
4426                  */
4427                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4428             }
4429         }
4430     }
4431     if (roundBits) {
4432         float_raise(float_flag_inexact, status);
4433     }
4434     zSig = ( zSig + roundIncrement )>>10;
4435     if (!(roundBits ^ 0x200) && roundNearestEven) {
4436         zSig &= ~1;
4437     }
4438     if ( zSig == 0 ) zExp = 0;
4439     return packFloat64( zSign, zExp, zSig );
4440
4441 }
4442
4443 /*----------------------------------------------------------------------------
4444 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4445 | and significand `zSig', and returns the proper double-precision floating-
4446 | point value corresponding to the abstract input.  This routine is just like
4447 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4448 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4449 | floating-point exponent.
4450 *----------------------------------------------------------------------------*/
4451
4452 static float64
4453  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4454                               float_status *status)
4455 {
4456     int8_t shiftCount;
4457
4458     shiftCount = clz64(zSig) - 1;
4459     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4460                                status);
4461
4462 }
4463
4464 /*----------------------------------------------------------------------------
4465 | Normalizes the subnormal extended double-precision floating-point value
4466 | represented by the denormalized significand `aSig'.  The normalized exponent
4467 | and significand are stored at the locations pointed to by `zExpPtr' and
4468 | `zSigPtr', respectively.
4469 *----------------------------------------------------------------------------*/
4470
4471 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4472                                 uint64_t *zSigPtr)
4473 {
4474     int8_t shiftCount;
4475
4476     shiftCount = clz64(aSig);
4477     *zSigPtr = aSig<<shiftCount;
4478     *zExpPtr = 1 - shiftCount;
4479 }
4480
4481 /*----------------------------------------------------------------------------
4482 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4483 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4484 | and returns the proper extended double-precision floating-point value
4485 | corresponding to the abstract input.  Ordinarily, the abstract value is
4486 | rounded and packed into the extended double-precision format, with the
4487 | inexact exception raised if the abstract input cannot be represented
4488 | exactly.  However, if the abstract value is too large, the overflow and
4489 | inexact exceptions are raised and an infinity or maximal finite value is
4490 | returned.  If the abstract value is too small, the input value is rounded to
4491 | a subnormal number, and the underflow and inexact exceptions are raised if
4492 | the abstract input cannot be represented exactly as a subnormal extended
4493 | double-precision floating-point number.
4494 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4495 | number of bits as single or double precision, respectively.  Otherwise, the
4496 | result is rounded to the full precision of the extended double-precision
4497 | format.
4498 |     The input significand must be normalized or smaller.  If the input
4499 | significand is not normalized, `zExp' must be 0; in that case, the result
4500 | returned is a subnormal number, and it must not require rounding.  The
4501 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4502 | Floating-Point Arithmetic.
4503 *----------------------------------------------------------------------------*/
4504
4505 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4506                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4507                               float_status *status)
4508 {
4509     int8_t roundingMode;
4510     bool roundNearestEven, increment, isTiny;
4511     int64_t roundIncrement, roundMask, roundBits;
4512
4513     roundingMode = status->float_rounding_mode;
4514     roundNearestEven = ( roundingMode == float_round_nearest_even );
4515     if ( roundingPrecision == 80 ) goto precision80;
4516     if ( roundingPrecision == 64 ) {
4517         roundIncrement = UINT64_C(0x0000000000000400);
4518         roundMask = UINT64_C(0x00000000000007FF);
4519     }
4520     else if ( roundingPrecision == 32 ) {
4521         roundIncrement = UINT64_C(0x0000008000000000);
4522         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4523     }
4524     else {
4525         goto precision80;
4526     }
4527     zSig0 |= ( zSig1 != 0 );
4528     switch (roundingMode) {
4529     case float_round_nearest_even:
4530     case float_round_ties_away:
4531         break;
4532     case float_round_to_zero:
4533         roundIncrement = 0;
4534         break;
4535     case float_round_up:
4536         roundIncrement = zSign ? 0 : roundMask;
4537         break;
4538     case float_round_down:
4539         roundIncrement = zSign ? roundMask : 0;
4540         break;
4541     default:
4542         abort();
4543     }
4544     roundBits = zSig0 & roundMask;
4545     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4546         if (    ( 0x7FFE < zExp )
4547              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4548            ) {
4549             goto overflow;
4550         }
4551         if ( zExp <= 0 ) {
4552             if (status->flush_to_zero) {
4553                 float_raise(float_flag_output_denormal, status);
4554                 return packFloatx80(zSign, 0, 0);
4555             }
4556             isTiny = status->tininess_before_rounding
4557                   || (zExp < 0 )
4558                   || (zSig0 <= zSig0 + roundIncrement);
4559             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4560             zExp = 0;
4561             roundBits = zSig0 & roundMask;
4562             if (isTiny && roundBits) {
4563                 float_raise(float_flag_underflow, status);
4564             }
4565             if (roundBits) {
4566                 float_raise(float_flag_inexact, status);
4567             }
4568             zSig0 += roundIncrement;
4569             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4570             roundIncrement = roundMask + 1;
4571             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4572                 roundMask |= roundIncrement;
4573             }
4574             zSig0 &= ~ roundMask;
4575             return packFloatx80( zSign, zExp, zSig0 );
4576         }
4577     }
4578     if (roundBits) {
4579         float_raise(float_flag_inexact, status);
4580     }
4581     zSig0 += roundIncrement;
4582     if ( zSig0 < roundIncrement ) {
4583         ++zExp;
4584         zSig0 = UINT64_C(0x8000000000000000);
4585     }
4586     roundIncrement = roundMask + 1;
4587     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4588         roundMask |= roundIncrement;
4589     }
4590     zSig0 &= ~ roundMask;
4591     if ( zSig0 == 0 ) zExp = 0;
4592     return packFloatx80( zSign, zExp, zSig0 );
4593  precision80:
4594     switch (roundingMode) {
4595     case float_round_nearest_even:
4596     case float_round_ties_away:
4597         increment = ((int64_t)zSig1 < 0);
4598         break;
4599     case float_round_to_zero:
4600         increment = 0;
4601         break;
4602     case float_round_up:
4603         increment = !zSign && zSig1;
4604         break;
4605     case float_round_down:
4606         increment = zSign && zSig1;
4607         break;
4608     default:
4609         abort();
4610     }
4611     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4612         if (    ( 0x7FFE < zExp )
4613              || (    ( zExp == 0x7FFE )
4614                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4615                   && increment
4616                 )
4617            ) {
4618             roundMask = 0;
4619  overflow:
4620             float_raise(float_flag_overflow | float_flag_inexact, status);
4621             if (    ( roundingMode == float_round_to_zero )
4622                  || ( zSign && ( roundingMode == float_round_up ) )
4623                  || ( ! zSign && ( roundingMode == float_round_down ) )
4624                ) {
4625                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4626             }
4627             return packFloatx80(zSign,
4628                                 floatx80_infinity_high,
4629                                 floatx80_infinity_low);
4630         }
4631         if ( zExp <= 0 ) {
4632             isTiny = status->tininess_before_rounding
4633                   || (zExp < 0)
4634                   || !increment
4635                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4636             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4637             zExp = 0;
4638             if (isTiny && zSig1) {
4639                 float_raise(float_flag_underflow, status);
4640             }
4641             if (zSig1) {
4642                 float_raise(float_flag_inexact, status);
4643             }
4644             switch (roundingMode) {
4645             case float_round_nearest_even:
4646             case float_round_ties_away:
4647                 increment = ((int64_t)zSig1 < 0);
4648                 break;
4649             case float_round_to_zero:
4650                 increment = 0;
4651                 break;
4652             case float_round_up:
4653                 increment = !zSign && zSig1;
4654                 break;
4655             case float_round_down:
4656                 increment = zSign && zSig1;
4657                 break;
4658             default:
4659                 abort();
4660             }
4661             if ( increment ) {
4662                 ++zSig0;
4663                 if (!(zSig1 << 1) && roundNearestEven) {
4664                     zSig0 &= ~1;
4665                 }
4666                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4667             }
4668             return packFloatx80( zSign, zExp, zSig0 );
4669         }
4670     }
4671     if (zSig1) {
4672         float_raise(float_flag_inexact, status);
4673     }
4674     if ( increment ) {
4675         ++zSig0;
4676         if ( zSig0 == 0 ) {
4677             ++zExp;
4678             zSig0 = UINT64_C(0x8000000000000000);
4679         }
4680         else {
4681             if (!(zSig1 << 1) && roundNearestEven) {
4682                 zSig0 &= ~1;
4683             }
4684         }
4685     }
4686     else {
4687         if ( zSig0 == 0 ) zExp = 0;
4688     }
4689     return packFloatx80( zSign, zExp, zSig0 );
4690
4691 }
4692
4693 /*----------------------------------------------------------------------------
4694 | Takes an abstract floating-point value having sign `zSign', exponent
4695 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4696 | and returns the proper extended double-precision floating-point value
4697 | corresponding to the abstract input.  This routine is just like
4698 | `roundAndPackFloatx80' except that the input significand does not have to be
4699 | normalized.
4700 *----------------------------------------------------------------------------*/
4701
4702 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4703                                        bool zSign, int32_t zExp,
4704                                        uint64_t zSig0, uint64_t zSig1,
4705                                        float_status *status)
4706 {
4707     int8_t shiftCount;
4708
4709     if ( zSig0 == 0 ) {
4710         zSig0 = zSig1;
4711         zSig1 = 0;
4712         zExp -= 64;
4713     }
4714     shiftCount = clz64(zSig0);
4715     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4716     zExp -= shiftCount;
4717     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4718                                 zSig0, zSig1, status);
4719
4720 }
4721
4722 /*----------------------------------------------------------------------------
4723 | Returns the least-significant 64 fraction bits of the quadruple-precision
4724 | floating-point value `a'.
4725 *----------------------------------------------------------------------------*/
4726
4727 static inline uint64_t extractFloat128Frac1( float128 a )
4728 {
4729
4730     return a.low;
4731
4732 }
4733
4734 /*----------------------------------------------------------------------------
4735 | Returns the most-significant 48 fraction bits of the quadruple-precision
4736 | floating-point value `a'.
4737 *----------------------------------------------------------------------------*/
4738
4739 static inline uint64_t extractFloat128Frac0( float128 a )
4740 {
4741
4742     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4743
4744 }
4745
4746 /*----------------------------------------------------------------------------
4747 | Returns the exponent bits of the quadruple-precision floating-point value
4748 | `a'.
4749 *----------------------------------------------------------------------------*/
4750
4751 static inline int32_t extractFloat128Exp( float128 a )
4752 {
4753
4754     return ( a.high>>48 ) & 0x7FFF;
4755
4756 }
4757
4758 /*----------------------------------------------------------------------------
4759 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4760 *----------------------------------------------------------------------------*/
4761
4762 static inline bool extractFloat128Sign(float128 a)
4763 {
4764     return a.high >> 63;
4765 }
4766
4767 /*----------------------------------------------------------------------------
4768 | Normalizes the subnormal quadruple-precision floating-point value
4769 | represented by the denormalized significand formed by the concatenation of
4770 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4771 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4772 | significand are stored at the location pointed to by `zSig0Ptr', and the
4773 | least significant 64 bits of the normalized significand are stored at the
4774 | location pointed to by `zSig1Ptr'.
4775 *----------------------------------------------------------------------------*/
4776
4777 static void
4778  normalizeFloat128Subnormal(
4779      uint64_t aSig0,
4780      uint64_t aSig1,
4781      int32_t *zExpPtr,
4782      uint64_t *zSig0Ptr,
4783      uint64_t *zSig1Ptr
4784  )
4785 {
4786     int8_t shiftCount;
4787
4788     if ( aSig0 == 0 ) {
4789         shiftCount = clz64(aSig1) - 15;
4790         if ( shiftCount < 0 ) {
4791             *zSig0Ptr = aSig1>>( - shiftCount );
4792             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4793         }
4794         else {
4795             *zSig0Ptr = aSig1<<shiftCount;
4796             *zSig1Ptr = 0;
4797         }
4798         *zExpPtr = - shiftCount - 63;
4799     }
4800     else {
4801         shiftCount = clz64(aSig0) - 15;
4802         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4803         *zExpPtr = 1 - shiftCount;
4804     }
4805
4806 }
4807
4808 /*----------------------------------------------------------------------------
4809 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4810 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4811 | floating-point value, returning the result.  After being shifted into the
4812 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4813 | added together to form the most significant 32 bits of the result.  This
4814 | means that any integer portion of `zSig0' will be added into the exponent.
4815 | Since a properly normalized significand will have an integer portion equal
4816 | to 1, the `zExp' input should be 1 less than the desired result exponent
4817 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4818 | significand.
4819 *----------------------------------------------------------------------------*/
4820
4821 static inline float128
4822 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4823 {
4824     float128 z;
4825
4826     z.low = zSig1;
4827     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4828     return z;
4829 }
4830
4831 /*----------------------------------------------------------------------------
4832 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4833 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4834 | and `zSig2', and returns the proper quadruple-precision floating-point value
4835 | corresponding to the abstract input.  Ordinarily, the abstract value is
4836 | simply rounded and packed into the quadruple-precision format, with the
4837 | inexact exception raised if the abstract input cannot be represented
4838 | exactly.  However, if the abstract value is too large, the overflow and
4839 | inexact exceptions are raised and an infinity or maximal finite value is
4840 | returned.  If the abstract value is too small, the input value is rounded to
4841 | a subnormal number, and the underflow and inexact exceptions are raised if
4842 | the abstract input cannot be represented exactly as a subnormal quadruple-
4843 | precision floating-point number.
4844 |     The input significand must be normalized or smaller.  If the input
4845 | significand is not normalized, `zExp' must be 0; in that case, the result
4846 | returned is a subnormal number, and it must not require rounding.  In the
4847 | usual case that the input significand is normalized, `zExp' must be 1 less
4848 | than the ``true'' floating-point exponent.  The handling of underflow and
4849 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4850 *----------------------------------------------------------------------------*/
4851
4852 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4853                                      uint64_t zSig0, uint64_t zSig1,
4854                                      uint64_t zSig2, float_status *status)
4855 {
4856     int8_t roundingMode;
4857     bool roundNearestEven, increment, isTiny;
4858
4859     roundingMode = status->float_rounding_mode;
4860     roundNearestEven = ( roundingMode == float_round_nearest_even );
4861     switch (roundingMode) {
4862     case float_round_nearest_even:
4863     case float_round_ties_away:
4864         increment = ((int64_t)zSig2 < 0);
4865         break;
4866     case float_round_to_zero:
4867         increment = 0;
4868         break;
4869     case float_round_up:
4870         increment = !zSign && zSig2;
4871         break;
4872     case float_round_down:
4873         increment = zSign && zSig2;
4874         break;
4875     case float_round_to_odd:
4876         increment = !(zSig1 & 0x1) && zSig2;
4877         break;
4878     default:
4879         abort();
4880     }
4881     if ( 0x7FFD <= (uint32_t) zExp ) {
4882         if (    ( 0x7FFD < zExp )
4883              || (    ( zExp == 0x7FFD )
4884                   && eq128(
4885                          UINT64_C(0x0001FFFFFFFFFFFF),
4886                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4887                          zSig0,
4888                          zSig1
4889                      )
4890                   && increment
4891                 )
4892            ) {
4893             float_raise(float_flag_overflow | float_flag_inexact, status);
4894             if (    ( roundingMode == float_round_to_zero )
4895                  || ( zSign && ( roundingMode == float_round_up ) )
4896                  || ( ! zSign && ( roundingMode == float_round_down ) )
4897                  || (roundingMode == float_round_to_odd)
4898                ) {
4899                 return
4900                     packFloat128(
4901                         zSign,
4902                         0x7FFE,
4903                         UINT64_C(0x0000FFFFFFFFFFFF),
4904                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4905                     );
4906             }
4907             return packFloat128( zSign, 0x7FFF, 0, 0 );
4908         }
4909         if ( zExp < 0 ) {
4910             if (status->flush_to_zero) {
4911                 float_raise(float_flag_output_denormal, status);
4912                 return packFloat128(zSign, 0, 0, 0);
4913             }
4914             isTiny = status->tininess_before_rounding
4915                   || (zExp < -1)
4916                   || !increment
4917                   || lt128(zSig0, zSig1,
4918                            UINT64_C(0x0001FFFFFFFFFFFF),
4919                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4920             shift128ExtraRightJamming(
4921                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4922             zExp = 0;
4923             if (isTiny && zSig2) {
4924                 float_raise(float_flag_underflow, status);
4925             }
4926             switch (roundingMode) {
4927             case float_round_nearest_even:
4928             case float_round_ties_away:
4929                 increment = ((int64_t)zSig2 < 0);
4930                 break;
4931             case float_round_to_zero:
4932                 increment = 0;
4933                 break;
4934             case float_round_up:
4935                 increment = !zSign && zSig2;
4936                 break;
4937             case float_round_down:
4938                 increment = zSign && zSig2;
4939                 break;
4940             case float_round_to_odd:
4941                 increment = !(zSig1 & 0x1) && zSig2;
4942                 break;
4943             default:
4944                 abort();
4945             }
4946         }
4947     }
4948     if (zSig2) {
4949         float_raise(float_flag_inexact, status);
4950     }
4951     if ( increment ) {
4952         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4953         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4954             zSig1 &= ~1;
4955         }
4956     }
4957     else {
4958         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4959     }
4960     return packFloat128( zSign, zExp, zSig0, zSig1 );
4961
4962 }
4963
4964 /*----------------------------------------------------------------------------
4965 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4966 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4967 | returns the proper quadruple-precision floating-point value corresponding
4968 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4969 | except that the input significand has fewer bits and does not have to be
4970 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4971 | point exponent.
4972 *----------------------------------------------------------------------------*/
4973
4974 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4975                                               uint64_t zSig0, uint64_t zSig1,
4976                                               float_status *status)
4977 {
4978     int8_t shiftCount;
4979     uint64_t zSig2;
4980
4981     if ( zSig0 == 0 ) {
4982         zSig0 = zSig1;
4983         zSig1 = 0;
4984         zExp -= 64;
4985     }
4986     shiftCount = clz64(zSig0) - 15;
4987     if ( 0 <= shiftCount ) {
4988         zSig2 = 0;
4989         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4990     }
4991     else {
4992         shift128ExtraRightJamming(
4993             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4994     }
4995     zExp -= shiftCount;
4996     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4997
4998 }
4999
5000
5001 /*----------------------------------------------------------------------------
5002 | Returns the result of converting the 32-bit two's complement integer `a'
5003 | to the extended double-precision floating-point format.  The conversion
5004 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5005 | Arithmetic.
5006 *----------------------------------------------------------------------------*/
5007
5008 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5009 {
5010     bool zSign;
5011     uint32_t absA;
5012     int8_t shiftCount;
5013     uint64_t zSig;
5014
5015     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5016     zSign = ( a < 0 );
5017     absA = zSign ? - a : a;
5018     shiftCount = clz32(absA) + 32;
5019     zSig = absA;
5020     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5021
5022 }
5023
5024 /*----------------------------------------------------------------------------
5025 | Returns the result of converting the 32-bit two's complement integer `a' to
5026 | the quadruple-precision floating-point format.  The conversion is performed
5027 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5028 *----------------------------------------------------------------------------*/
5029
5030 float128 int32_to_float128(int32_t a, float_status *status)
5031 {
5032     bool zSign;
5033     uint32_t absA;
5034     int8_t shiftCount;
5035     uint64_t zSig0;
5036
5037     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5038     zSign = ( a < 0 );
5039     absA = zSign ? - a : a;
5040     shiftCount = clz32(absA) + 17;
5041     zSig0 = absA;
5042     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5043
5044 }
5045
5046 /*----------------------------------------------------------------------------
5047 | Returns the result of converting the 64-bit two's complement integer `a'
5048 | to the extended double-precision floating-point format.  The conversion
5049 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5050 | Arithmetic.
5051 *----------------------------------------------------------------------------*/
5052
5053 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5054 {
5055     bool zSign;
5056     uint64_t absA;
5057     int8_t shiftCount;
5058
5059     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5060     zSign = ( a < 0 );
5061     absA = zSign ? - a : a;
5062     shiftCount = clz64(absA);
5063     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5064
5065 }
5066
5067 /*----------------------------------------------------------------------------
5068 | Returns the result of converting the 64-bit two's complement integer `a' to
5069 | the quadruple-precision floating-point format.  The conversion is performed
5070 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5071 *----------------------------------------------------------------------------*/
5072
5073 float128 int64_to_float128(int64_t a, float_status *status)
5074 {
5075     bool zSign;
5076     uint64_t absA;
5077     int8_t shiftCount;
5078     int32_t zExp;
5079     uint64_t zSig0, zSig1;
5080
5081     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5082     zSign = ( a < 0 );
5083     absA = zSign ? - a : a;
5084     shiftCount = clz64(absA) + 49;
5085     zExp = 0x406E - shiftCount;
5086     if ( 64 <= shiftCount ) {
5087         zSig1 = 0;
5088         zSig0 = absA;
5089         shiftCount -= 64;
5090     }
5091     else {
5092         zSig1 = absA;
5093         zSig0 = 0;
5094     }
5095     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5096     return packFloat128( zSign, zExp, zSig0, zSig1 );
5097
5098 }
5099
5100 /*----------------------------------------------------------------------------
5101 | Returns the result of converting the 64-bit unsigned integer `a'
5102 | to the quadruple-precision floating-point format.  The conversion is performed
5103 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5104 *----------------------------------------------------------------------------*/
5105
5106 float128 uint64_to_float128(uint64_t a, float_status *status)
5107 {
5108     if (a == 0) {
5109         return float128_zero;
5110     }
5111     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5112 }
5113
5114 /*----------------------------------------------------------------------------
5115 | Returns the result of converting the single-precision floating-point value
5116 | `a' to the extended double-precision floating-point format.  The conversion
5117 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5118 | Arithmetic.
5119 *----------------------------------------------------------------------------*/
5120
5121 floatx80 float32_to_floatx80(float32 a, float_status *status)
5122 {
5123     bool aSign;
5124     int aExp;
5125     uint32_t aSig;
5126
5127     a = float32_squash_input_denormal(a, status);
5128     aSig = extractFloat32Frac( a );
5129     aExp = extractFloat32Exp( a );
5130     aSign = extractFloat32Sign( a );
5131     if ( aExp == 0xFF ) {
5132         if (aSig) {
5133             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5134                                                status);
5135             return floatx80_silence_nan(res, status);
5136         }
5137         return packFloatx80(aSign,
5138                             floatx80_infinity_high,
5139                             floatx80_infinity_low);
5140     }
5141     if ( aExp == 0 ) {
5142         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5143         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5144     }
5145     aSig |= 0x00800000;
5146     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5147
5148 }
5149
5150 /*----------------------------------------------------------------------------
5151 | Returns the result of converting the single-precision floating-point value
5152 | `a' to the double-precision floating-point format.  The conversion is
5153 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5154 | Arithmetic.
5155 *----------------------------------------------------------------------------*/
5156
5157 float128 float32_to_float128(float32 a, float_status *status)
5158 {
5159     bool aSign;
5160     int aExp;
5161     uint32_t aSig;
5162
5163     a = float32_squash_input_denormal(a, status);
5164     aSig = extractFloat32Frac( a );
5165     aExp = extractFloat32Exp( a );
5166     aSign = extractFloat32Sign( a );
5167     if ( aExp == 0xFF ) {
5168         if (aSig) {
5169             return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5170         }
5171         return packFloat128( aSign, 0x7FFF, 0, 0 );
5172     }
5173     if ( aExp == 0 ) {
5174         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5175         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5176         --aExp;
5177     }
5178     return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
5179
5180 }
5181
5182 /*----------------------------------------------------------------------------
5183 | Returns the remainder of the single-precision floating-point value `a'
5184 | with respect to the corresponding value `b'.  The operation is performed
5185 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5186 *----------------------------------------------------------------------------*/
5187
5188 float32 float32_rem(float32 a, float32 b, float_status *status)
5189 {
5190     bool aSign, zSign;
5191     int aExp, bExp, expDiff;
5192     uint32_t aSig, bSig;
5193     uint32_t q;
5194     uint64_t aSig64, bSig64, q64;
5195     uint32_t alternateASig;
5196     int32_t sigMean;
5197     a = float32_squash_input_denormal(a, status);
5198     b = float32_squash_input_denormal(b, status);
5199
5200     aSig = extractFloat32Frac( a );
5201     aExp = extractFloat32Exp( a );
5202     aSign = extractFloat32Sign( a );
5203     bSig = extractFloat32Frac( b );
5204     bExp = extractFloat32Exp( b );
5205     if ( aExp == 0xFF ) {
5206         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5207             return propagateFloat32NaN(a, b, status);
5208         }
5209         float_raise(float_flag_invalid, status);
5210         return float32_default_nan(status);
5211     }
5212     if ( bExp == 0xFF ) {
5213         if (bSig) {
5214             return propagateFloat32NaN(a, b, status);
5215         }
5216         return a;
5217     }
5218     if ( bExp == 0 ) {
5219         if ( bSig == 0 ) {
5220             float_raise(float_flag_invalid, status);
5221             return float32_default_nan(status);
5222         }
5223         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5224     }
5225     if ( aExp == 0 ) {
5226         if ( aSig == 0 ) return a;
5227         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5228     }
5229     expDiff = aExp - bExp;
5230     aSig |= 0x00800000;
5231     bSig |= 0x00800000;
5232     if ( expDiff < 32 ) {
5233         aSig <<= 8;
5234         bSig <<= 8;
5235         if ( expDiff < 0 ) {
5236             if ( expDiff < -1 ) return a;
5237             aSig >>= 1;
5238         }
5239         q = ( bSig <= aSig );
5240         if ( q ) aSig -= bSig;
5241         if ( 0 < expDiff ) {
5242             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5243             q >>= 32 - expDiff;
5244             bSig >>= 2;
5245             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5246         }
5247         else {
5248             aSig >>= 2;
5249             bSig >>= 2;
5250         }
5251     }
5252     else {
5253         if ( bSig <= aSig ) aSig -= bSig;
5254         aSig64 = ( (uint64_t) aSig )<<40;
5255         bSig64 = ( (uint64_t) bSig )<<40;
5256         expDiff -= 64;
5257         while ( 0 < expDiff ) {
5258             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5259             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5260             aSig64 = - ( ( bSig * q64 )<<38 );
5261             expDiff -= 62;
5262         }
5263         expDiff += 64;
5264         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5265         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5266         q = q64>>( 64 - expDiff );
5267         bSig <<= 6;
5268         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5269     }
5270     do {
5271         alternateASig = aSig;
5272         ++q;
5273         aSig -= bSig;
5274     } while ( 0 <= (int32_t) aSig );
5275     sigMean = aSig + alternateASig;
5276     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5277         aSig = alternateASig;
5278     }
5279     zSign = ( (int32_t) aSig < 0 );
5280     if ( zSign ) aSig = - aSig;
5281     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5282 }
5283
5284
5285
5286 /*----------------------------------------------------------------------------
5287 | Returns the binary exponential of the single-precision floating-point value
5288 | `a'. The operation is performed according to the IEC/IEEE Standard for
5289 | Binary Floating-Point Arithmetic.
5290 |
5291 | Uses the following identities:
5292 |
5293 | 1. -------------------------------------------------------------------------
5294 |      x    x*ln(2)
5295 |     2  = e
5296 |
5297 | 2. -------------------------------------------------------------------------
5298 |                      2     3     4     5           n
5299 |      x        x     x     x     x     x           x
5300 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5301 |               1!    2!    3!    4!    5!          n!
5302 *----------------------------------------------------------------------------*/
5303
5304 static const float64 float32_exp2_coefficients[15] =
5305 {
5306     const_float64( 0x3ff0000000000000ll ), /*  1 */
5307     const_float64( 0x3fe0000000000000ll ), /*  2 */
5308     const_float64( 0x3fc5555555555555ll ), /*  3 */
5309     const_float64( 0x3fa5555555555555ll ), /*  4 */
5310     const_float64( 0x3f81111111111111ll ), /*  5 */
5311     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5312     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5313     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5314     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5315     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5316     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5317     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5318     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5319     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5320     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5321 };
5322
5323 float32 float32_exp2(float32 a, float_status *status)
5324 {
5325     bool aSign;
5326     int aExp;
5327     uint32_t aSig;
5328     float64 r, x, xn;
5329     int i;
5330     a = float32_squash_input_denormal(a, status);
5331
5332     aSig = extractFloat32Frac( a );
5333     aExp = extractFloat32Exp( a );
5334     aSign = extractFloat32Sign( a );
5335
5336     if ( aExp == 0xFF) {
5337         if (aSig) {
5338             return propagateFloat32NaN(a, float32_zero, status);
5339         }
5340         return (aSign) ? float32_zero : a;
5341     }
5342     if (aExp == 0) {
5343         if (aSig == 0) return float32_one;
5344     }
5345
5346     float_raise(float_flag_inexact, status);
5347
5348     /* ******************************* */
5349     /* using float64 for approximation */
5350     /* ******************************* */
5351     x = float32_to_float64(a, status);
5352     x = float64_mul(x, float64_ln2, status);
5353
5354     xn = x;
5355     r = float64_one;
5356     for (i = 0 ; i < 15 ; i++) {
5357         float64 f;
5358
5359         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5360         r = float64_add(r, f, status);
5361
5362         xn = float64_mul(xn, x, status);
5363     }
5364
5365     return float64_to_float32(r, status);
5366 }
5367
5368 /*----------------------------------------------------------------------------
5369 | Returns the binary log of the single-precision floating-point value `a'.
5370 | The operation is performed according to the IEC/IEEE Standard for Binary
5371 | Floating-Point Arithmetic.
5372 *----------------------------------------------------------------------------*/
5373 float32 float32_log2(float32 a, float_status *status)
5374 {
5375     bool aSign, zSign;
5376     int aExp;
5377     uint32_t aSig, zSig, i;
5378
5379     a = float32_squash_input_denormal(a, status);
5380     aSig = extractFloat32Frac( a );
5381     aExp = extractFloat32Exp( a );
5382     aSign = extractFloat32Sign( a );
5383
5384     if ( aExp == 0 ) {
5385         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5386         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5387     }
5388     if ( aSign ) {
5389         float_raise(float_flag_invalid, status);
5390         return float32_default_nan(status);
5391     }
5392     if ( aExp == 0xFF ) {
5393         if (aSig) {
5394             return propagateFloat32NaN(a, float32_zero, status);
5395         }
5396         return a;
5397     }
5398
5399     aExp -= 0x7F;
5400     aSig |= 0x00800000;
5401     zSign = aExp < 0;
5402     zSig = aExp << 23;
5403
5404     for (i = 1 << 22; i > 0; i >>= 1) {
5405         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5406         if ( aSig & 0x01000000 ) {
5407             aSig >>= 1;
5408             zSig |= i;
5409         }
5410     }
5411
5412     if ( zSign )
5413         zSig = -zSig;
5414
5415     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5416 }
5417
5418 /*----------------------------------------------------------------------------
5419 | Returns the result of converting the double-precision floating-point value
5420 | `a' to the extended double-precision floating-point format.  The conversion
5421 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5422 | Arithmetic.
5423 *----------------------------------------------------------------------------*/
5424
5425 floatx80 float64_to_floatx80(float64 a, float_status *status)
5426 {
5427     bool aSign;
5428     int aExp;
5429     uint64_t aSig;
5430
5431     a = float64_squash_input_denormal(a, status);
5432     aSig = extractFloat64Frac( a );
5433     aExp = extractFloat64Exp( a );
5434     aSign = extractFloat64Sign( a );
5435     if ( aExp == 0x7FF ) {
5436         if (aSig) {
5437             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5438                                                status);
5439             return floatx80_silence_nan(res, status);
5440         }
5441         return packFloatx80(aSign,
5442                             floatx80_infinity_high,
5443                             floatx80_infinity_low);
5444     }
5445     if ( aExp == 0 ) {
5446         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5447         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5448     }
5449     return
5450         packFloatx80(
5451             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5452
5453 }
5454
5455 /*----------------------------------------------------------------------------
5456 | Returns the result of converting the double-precision floating-point value
5457 | `a' to the quadruple-precision floating-point format.  The conversion is
5458 | performed according to the IEC/IEEE Standard for Binary Floating-Point
5459 | Arithmetic.
5460 *----------------------------------------------------------------------------*/
5461
5462 float128 float64_to_float128(float64 a, float_status *status)
5463 {
5464     bool aSign;
5465     int aExp;
5466     uint64_t aSig, zSig0, zSig1;
5467
5468     a = float64_squash_input_denormal(a, status);
5469     aSig = extractFloat64Frac( a );
5470     aExp = extractFloat64Exp( a );
5471     aSign = extractFloat64Sign( a );
5472     if ( aExp == 0x7FF ) {
5473         if (aSig) {
5474             return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5475         }
5476         return packFloat128( aSign, 0x7FFF, 0, 0 );
5477     }
5478     if ( aExp == 0 ) {
5479         if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5480         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5481         --aExp;
5482     }
5483     shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5484     return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5485
5486 }
5487
5488
5489 /*----------------------------------------------------------------------------
5490 | Returns the remainder of the double-precision floating-point value `a'
5491 | with respect to the corresponding value `b'.  The operation is performed
5492 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5493 *----------------------------------------------------------------------------*/
5494
5495 float64 float64_rem(float64 a, float64 b, float_status *status)
5496 {
5497     bool aSign, zSign;
5498     int aExp, bExp, expDiff;
5499     uint64_t aSig, bSig;
5500     uint64_t q, alternateASig;
5501     int64_t sigMean;
5502
5503     a = float64_squash_input_denormal(a, status);
5504     b = float64_squash_input_denormal(b, status);
5505     aSig = extractFloat64Frac( a );
5506     aExp = extractFloat64Exp( a );
5507     aSign = extractFloat64Sign( a );
5508     bSig = extractFloat64Frac( b );
5509     bExp = extractFloat64Exp( b );
5510     if ( aExp == 0x7FF ) {
5511         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5512             return propagateFloat64NaN(a, b, status);
5513         }
5514         float_raise(float_flag_invalid, status);
5515         return float64_default_nan(status);
5516     }
5517     if ( bExp == 0x7FF ) {
5518         if (bSig) {
5519             return propagateFloat64NaN(a, b, status);
5520         }
5521         return a;
5522     }
5523     if ( bExp == 0 ) {
5524         if ( bSig == 0 ) {
5525             float_raise(float_flag_invalid, status);
5526             return float64_default_nan(status);
5527         }
5528         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5529     }
5530     if ( aExp == 0 ) {
5531         if ( aSig == 0 ) return a;
5532         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5533     }
5534     expDiff = aExp - bExp;
5535     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5536     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5537     if ( expDiff < 0 ) {
5538         if ( expDiff < -1 ) return a;
5539         aSig >>= 1;
5540     }
5541     q = ( bSig <= aSig );
5542     if ( q ) aSig -= bSig;
5543     expDiff -= 64;
5544     while ( 0 < expDiff ) {
5545         q = estimateDiv128To64( aSig, 0, bSig );
5546         q = ( 2 < q ) ? q - 2 : 0;
5547         aSig = - ( ( bSig>>2 ) * q );
5548         expDiff -= 62;
5549     }
5550     expDiff += 64;
5551     if ( 0 < expDiff ) {
5552         q = estimateDiv128To64( aSig, 0, bSig );
5553         q = ( 2 < q ) ? q - 2 : 0;
5554         q >>= 64 - expDiff;
5555         bSig >>= 2;
5556         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5557     }
5558     else {
5559         aSig >>= 2;
5560         bSig >>= 2;
5561     }
5562     do {
5563         alternateASig = aSig;
5564         ++q;
5565         aSig -= bSig;
5566     } while ( 0 <= (int64_t) aSig );
5567     sigMean = aSig + alternateASig;
5568     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5569         aSig = alternateASig;
5570     }
5571     zSign = ( (int64_t) aSig < 0 );
5572     if ( zSign ) aSig = - aSig;
5573     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5574
5575 }
5576
5577 /*----------------------------------------------------------------------------
5578 | Returns the binary log of the double-precision floating-point value `a'.
5579 | The operation is performed according to the IEC/IEEE Standard for Binary
5580 | Floating-Point Arithmetic.
5581 *----------------------------------------------------------------------------*/
5582 float64 float64_log2(float64 a, float_status *status)
5583 {
5584     bool aSign, zSign;
5585     int aExp;
5586     uint64_t aSig, aSig0, aSig1, zSig, i;
5587     a = float64_squash_input_denormal(a, status);
5588
5589     aSig = extractFloat64Frac( a );
5590     aExp = extractFloat64Exp( a );
5591     aSign = extractFloat64Sign( a );
5592
5593     if ( aExp == 0 ) {
5594         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5595         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5596     }
5597     if ( aSign ) {
5598         float_raise(float_flag_invalid, status);
5599         return float64_default_nan(status);
5600     }
5601     if ( aExp == 0x7FF ) {
5602         if (aSig) {
5603             return propagateFloat64NaN(a, float64_zero, status);
5604         }
5605         return a;
5606     }
5607
5608     aExp -= 0x3FF;
5609     aSig |= UINT64_C(0x0010000000000000);
5610     zSign = aExp < 0;
5611     zSig = (uint64_t)aExp << 52;
5612     for (i = 1LL << 51; i > 0; i >>= 1) {
5613         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5614         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5615         if ( aSig & UINT64_C(0x0020000000000000) ) {
5616             aSig >>= 1;
5617             zSig |= i;
5618         }
5619     }
5620
5621     if ( zSign )
5622         zSig = -zSig;
5623     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5624 }
5625
5626 /*----------------------------------------------------------------------------
5627 | Returns the result of converting the extended double-precision floating-
5628 | point value `a' to the 32-bit two's complement integer format.  The
5629 | conversion is performed according to the IEC/IEEE Standard for Binary
5630 | Floating-Point Arithmetic---which means in particular that the conversion
5631 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5632 | largest positive integer is returned.  Otherwise, if the conversion
5633 | overflows, the largest integer with the same sign as `a' is returned.
5634 *----------------------------------------------------------------------------*/
5635
5636 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5637 {
5638     bool aSign;
5639     int32_t aExp, shiftCount;
5640     uint64_t aSig;
5641
5642     if (floatx80_invalid_encoding(a)) {
5643         float_raise(float_flag_invalid, status);
5644         return 1 << 31;
5645     }
5646     aSig = extractFloatx80Frac( a );
5647     aExp = extractFloatx80Exp( a );
5648     aSign = extractFloatx80Sign( a );
5649     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5650     shiftCount = 0x4037 - aExp;
5651     if ( shiftCount <= 0 ) shiftCount = 1;
5652     shift64RightJamming( aSig, shiftCount, &aSig );
5653     return roundAndPackInt32(aSign, aSig, status);
5654
5655 }
5656
5657 /*----------------------------------------------------------------------------
5658 | Returns the result of converting the extended double-precision floating-
5659 | point value `a' to the 32-bit two's complement integer format.  The
5660 | conversion is performed according to the IEC/IEEE Standard for Binary
5661 | Floating-Point Arithmetic, except that the conversion is always rounded
5662 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5663 | Otherwise, if the conversion overflows, the largest integer with the same
5664 | sign as `a' is returned.
5665 *----------------------------------------------------------------------------*/
5666
5667 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5668 {
5669     bool aSign;
5670     int32_t aExp, shiftCount;
5671     uint64_t aSig, savedASig;
5672     int32_t z;
5673
5674     if (floatx80_invalid_encoding(a)) {
5675         float_raise(float_flag_invalid, status);
5676         return 1 << 31;
5677     }
5678     aSig = extractFloatx80Frac( a );
5679     aExp = extractFloatx80Exp( a );
5680     aSign = extractFloatx80Sign( a );
5681     if ( 0x401E < aExp ) {
5682         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5683         goto invalid;
5684     }
5685     else if ( aExp < 0x3FFF ) {
5686         if (aExp || aSig) {
5687             float_raise(float_flag_inexact, status);
5688         }
5689         return 0;
5690     }
5691     shiftCount = 0x403E - aExp;
5692     savedASig = aSig;
5693     aSig >>= shiftCount;
5694     z = aSig;
5695     if ( aSign ) z = - z;
5696     if ( ( z < 0 ) ^ aSign ) {
5697  invalid:
5698         float_raise(float_flag_invalid, status);
5699         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5700     }
5701     if ( ( aSig<<shiftCount ) != savedASig ) {
5702         float_raise(float_flag_inexact, status);
5703     }
5704     return z;
5705
5706 }
5707
5708 /*----------------------------------------------------------------------------
5709 | Returns the result of converting the extended double-precision floating-
5710 | point value `a' to the 64-bit two's complement integer format.  The
5711 | conversion is performed according to the IEC/IEEE Standard for Binary
5712 | Floating-Point Arithmetic---which means in particular that the conversion
5713 | is rounded according to the current rounding mode.  If `a' is a NaN,
5714 | the largest positive integer is returned.  Otherwise, if the conversion
5715 | overflows, the largest integer with the same sign as `a' is returned.
5716 *----------------------------------------------------------------------------*/
5717
5718 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5719 {
5720     bool aSign;
5721     int32_t aExp, shiftCount;
5722     uint64_t aSig, aSigExtra;
5723
5724     if (floatx80_invalid_encoding(a)) {
5725         float_raise(float_flag_invalid, status);
5726         return 1ULL << 63;
5727     }
5728     aSig = extractFloatx80Frac( a );
5729     aExp = extractFloatx80Exp( a );
5730     aSign = extractFloatx80Sign( a );
5731     shiftCount = 0x403E - aExp;
5732     if ( shiftCount <= 0 ) {
5733         if ( shiftCount ) {
5734             float_raise(float_flag_invalid, status);
5735             if (!aSign || floatx80_is_any_nan(a)) {
5736                 return INT64_MAX;
5737             }
5738             return INT64_MIN;
5739         }
5740         aSigExtra = 0;
5741     }
5742     else {
5743         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5744     }
5745     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5746
5747 }
5748
5749 /*----------------------------------------------------------------------------
5750 | Returns the result of converting the extended double-precision floating-
5751 | point value `a' to the 64-bit two's complement integer format.  The
5752 | conversion is performed according to the IEC/IEEE Standard for Binary
5753 | Floating-Point Arithmetic, except that the conversion is always rounded
5754 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5755 | Otherwise, if the conversion overflows, the largest integer with the same
5756 | sign as `a' is returned.
5757 *----------------------------------------------------------------------------*/
5758
5759 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5760 {
5761     bool aSign;
5762     int32_t aExp, shiftCount;
5763     uint64_t aSig;
5764     int64_t z;
5765
5766     if (floatx80_invalid_encoding(a)) {
5767         float_raise(float_flag_invalid, status);
5768         return 1ULL << 63;
5769     }
5770     aSig = extractFloatx80Frac( a );
5771     aExp = extractFloatx80Exp( a );
5772     aSign = extractFloatx80Sign( a );
5773     shiftCount = aExp - 0x403E;
5774     if ( 0 <= shiftCount ) {
5775         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5776         if ( ( a.high != 0xC03E ) || aSig ) {
5777             float_raise(float_flag_invalid, status);
5778             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5779                 return INT64_MAX;
5780             }
5781         }
5782         return INT64_MIN;
5783     }
5784     else if ( aExp < 0x3FFF ) {
5785         if (aExp | aSig) {
5786             float_raise(float_flag_inexact, status);
5787         }
5788         return 0;
5789     }
5790     z = aSig>>( - shiftCount );
5791     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5792         float_raise(float_flag_inexact, status);
5793     }
5794     if ( aSign ) z = - z;
5795     return z;
5796
5797 }
5798
5799 /*----------------------------------------------------------------------------
5800 | Returns the result of converting the extended double-precision floating-
5801 | point value `a' to the single-precision floating-point format.  The
5802 | conversion is performed according to the IEC/IEEE Standard for Binary
5803 | Floating-Point Arithmetic.
5804 *----------------------------------------------------------------------------*/
5805
5806 float32 floatx80_to_float32(floatx80 a, float_status *status)
5807 {
5808     bool aSign;
5809     int32_t aExp;
5810     uint64_t aSig;
5811
5812     if (floatx80_invalid_encoding(a)) {
5813         float_raise(float_flag_invalid, status);
5814         return float32_default_nan(status);
5815     }
5816     aSig = extractFloatx80Frac( a );
5817     aExp = extractFloatx80Exp( a );
5818     aSign = extractFloatx80Sign( a );
5819     if ( aExp == 0x7FFF ) {
5820         if ( (uint64_t) ( aSig<<1 ) ) {
5821             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5822                                              status);
5823             return float32_silence_nan(res, status);
5824         }
5825         return packFloat32( aSign, 0xFF, 0 );
5826     }
5827     shift64RightJamming( aSig, 33, &aSig );
5828     if ( aExp || aSig ) aExp -= 0x3F81;
5829     return roundAndPackFloat32(aSign, aExp, aSig, status);
5830
5831 }
5832
5833 /*----------------------------------------------------------------------------
5834 | Returns the result of converting the extended double-precision floating-
5835 | point value `a' to the double-precision floating-point format.  The
5836 | conversion is performed according to the IEC/IEEE Standard for Binary
5837 | Floating-Point Arithmetic.
5838 *----------------------------------------------------------------------------*/
5839
5840 float64 floatx80_to_float64(floatx80 a, float_status *status)
5841 {
5842     bool aSign;
5843     int32_t aExp;
5844     uint64_t aSig, zSig;
5845
5846     if (floatx80_invalid_encoding(a)) {
5847         float_raise(float_flag_invalid, status);
5848         return float64_default_nan(status);
5849     }
5850     aSig = extractFloatx80Frac( a );
5851     aExp = extractFloatx80Exp( a );
5852     aSign = extractFloatx80Sign( a );
5853     if ( aExp == 0x7FFF ) {
5854         if ( (uint64_t) ( aSig<<1 ) ) {
5855             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5856                                              status);
5857             return float64_silence_nan(res, status);
5858         }
5859         return packFloat64( aSign, 0x7FF, 0 );
5860     }
5861     shift64RightJamming( aSig, 1, &zSig );
5862     if ( aExp || aSig ) aExp -= 0x3C01;
5863     return roundAndPackFloat64(aSign, aExp, zSig, status);
5864
5865 }
5866
5867 /*----------------------------------------------------------------------------
5868 | Returns the result of converting the extended double-precision floating-
5869 | point value `a' to the quadruple-precision floating-point format.  The
5870 | conversion is performed according to the IEC/IEEE Standard for Binary
5871 | Floating-Point Arithmetic.
5872 *----------------------------------------------------------------------------*/
5873
5874 float128 floatx80_to_float128(floatx80 a, float_status *status)
5875 {
5876     bool aSign;
5877     int aExp;
5878     uint64_t aSig, zSig0, zSig1;
5879
5880     if (floatx80_invalid_encoding(a)) {
5881         float_raise(float_flag_invalid, status);
5882         return float128_default_nan(status);
5883     }
5884     aSig = extractFloatx80Frac( a );
5885     aExp = extractFloatx80Exp( a );
5886     aSign = extractFloatx80Sign( a );
5887     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5888         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5889                                            status);
5890         return float128_silence_nan(res, status);
5891     }
5892     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5893     return packFloat128( aSign, aExp, zSig0, zSig1 );
5894
5895 }
5896
5897 /*----------------------------------------------------------------------------
5898 | Rounds the extended double-precision floating-point value `a'
5899 | to the precision provided by floatx80_rounding_precision and returns the
5900 | result as an extended double-precision floating-point value.
5901 | The operation is performed according to the IEC/IEEE Standard for Binary
5902 | Floating-Point Arithmetic.
5903 *----------------------------------------------------------------------------*/
5904
5905 floatx80 floatx80_round(floatx80 a, float_status *status)
5906 {
5907     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5908                                 extractFloatx80Sign(a),
5909                                 extractFloatx80Exp(a),
5910                                 extractFloatx80Frac(a), 0, status);
5911 }
5912
5913 /*----------------------------------------------------------------------------
5914 | Rounds the extended double-precision floating-point value `a' to an integer,
5915 | and returns the result as an extended quadruple-precision floating-point
5916 | value.  The operation is performed according to the IEC/IEEE Standard for
5917 | Binary Floating-Point Arithmetic.
5918 *----------------------------------------------------------------------------*/
5919
5920 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5921 {
5922     bool aSign;
5923     int32_t aExp;
5924     uint64_t lastBitMask, roundBitsMask;
5925     floatx80 z;
5926
5927     if (floatx80_invalid_encoding(a)) {
5928         float_raise(float_flag_invalid, status);
5929         return floatx80_default_nan(status);
5930     }
5931     aExp = extractFloatx80Exp( a );
5932     if ( 0x403E <= aExp ) {
5933         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5934             return propagateFloatx80NaN(a, a, status);
5935         }
5936         return a;
5937     }
5938     if ( aExp < 0x3FFF ) {
5939         if (    ( aExp == 0 )
5940              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5941             return a;
5942         }
5943         float_raise(float_flag_inexact, status);
5944         aSign = extractFloatx80Sign( a );
5945         switch (status->float_rounding_mode) {
5946          case float_round_nearest_even:
5947             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5948                ) {
5949                 return
5950                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5951             }
5952             break;
5953         case float_round_ties_away:
5954             if (aExp == 0x3FFE) {
5955                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5956             }
5957             break;
5958          case float_round_down:
5959             return
5960                   aSign ?
5961                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5962                 : packFloatx80( 0, 0, 0 );
5963          case float_round_up:
5964             return
5965                   aSign ? packFloatx80( 1, 0, 0 )
5966                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5967
5968         case float_round_to_zero:
5969             break;
5970         default:
5971             g_assert_not_reached();
5972         }
5973         return packFloatx80( aSign, 0, 0 );
5974     }
5975     lastBitMask = 1;
5976     lastBitMask <<= 0x403E - aExp;
5977     roundBitsMask = lastBitMask - 1;
5978     z = a;
5979     switch (status->float_rounding_mode) {
5980     case float_round_nearest_even:
5981         z.low += lastBitMask>>1;
5982         if ((z.low & roundBitsMask) == 0) {
5983             z.low &= ~lastBitMask;
5984         }
5985         break;
5986     case float_round_ties_away:
5987         z.low += lastBitMask >> 1;
5988         break;
5989     case float_round_to_zero:
5990         break;
5991     case float_round_up:
5992         if (!extractFloatx80Sign(z)) {
5993             z.low += roundBitsMask;
5994         }
5995         break;
5996     case float_round_down:
5997         if (extractFloatx80Sign(z)) {
5998             z.low += roundBitsMask;
5999         }
6000         break;
6001     default:
6002         abort();
6003     }
6004     z.low &= ~ roundBitsMask;
6005     if ( z.low == 0 ) {
6006         ++z.high;
6007         z.low = UINT64_C(0x8000000000000000);
6008     }
6009     if (z.low != a.low) {
6010         float_raise(float_flag_inexact, status);
6011     }
6012     return z;
6013
6014 }
6015
6016 /*----------------------------------------------------------------------------
6017 | Returns the result of adding the absolute values of the extended double-
6018 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
6019 | negated before being returned.  `zSign' is ignored if the result is a NaN.
6020 | The addition is performed according to the IEC/IEEE Standard for Binary
6021 | Floating-Point Arithmetic.
6022 *----------------------------------------------------------------------------*/
6023
6024 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6025                                 float_status *status)
6026 {
6027     int32_t aExp, bExp, zExp;
6028     uint64_t aSig, bSig, zSig0, zSig1;
6029     int32_t expDiff;
6030
6031     aSig = extractFloatx80Frac( a );
6032     aExp = extractFloatx80Exp( a );
6033     bSig = extractFloatx80Frac( b );
6034     bExp = extractFloatx80Exp( b );
6035     expDiff = aExp - bExp;
6036     if ( 0 < expDiff ) {
6037         if ( aExp == 0x7FFF ) {
6038             if ((uint64_t)(aSig << 1)) {
6039                 return propagateFloatx80NaN(a, b, status);
6040             }
6041             return a;
6042         }
6043         if ( bExp == 0 ) --expDiff;
6044         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6045         zExp = aExp;
6046     }
6047     else if ( expDiff < 0 ) {
6048         if ( bExp == 0x7FFF ) {
6049             if ((uint64_t)(bSig << 1)) {
6050                 return propagateFloatx80NaN(a, b, status);
6051             }
6052             return packFloatx80(zSign,
6053                                 floatx80_infinity_high,
6054                                 floatx80_infinity_low);
6055         }
6056         if ( aExp == 0 ) ++expDiff;
6057         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6058         zExp = bExp;
6059     }
6060     else {
6061         if ( aExp == 0x7FFF ) {
6062             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6063                 return propagateFloatx80NaN(a, b, status);
6064             }
6065             return a;
6066         }
6067         zSig1 = 0;
6068         zSig0 = aSig + bSig;
6069         if ( aExp == 0 ) {
6070             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6071                 /* At least one of the values is a pseudo-denormal,
6072                  * and there is a carry out of the result.  */
6073                 zExp = 1;
6074                 goto shiftRight1;
6075             }
6076             if (zSig0 == 0) {
6077                 return packFloatx80(zSign, 0, 0);
6078             }
6079             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6080             goto roundAndPack;
6081         }
6082         zExp = aExp;
6083         goto shiftRight1;
6084     }
6085     zSig0 = aSig + bSig;
6086     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
6087  shiftRight1:
6088     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
6089     zSig0 |= UINT64_C(0x8000000000000000);
6090     ++zExp;
6091  roundAndPack:
6092     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6093                                 zSign, zExp, zSig0, zSig1, status);
6094 }
6095
6096 /*----------------------------------------------------------------------------
6097 | Returns the result of subtracting the absolute values of the extended
6098 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6099 | difference is negated before being returned.  `zSign' is ignored if the
6100 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6101 | Standard for Binary Floating-Point Arithmetic.
6102 *----------------------------------------------------------------------------*/
6103
6104 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6105                                 float_status *status)
6106 {
6107     int32_t aExp, bExp, zExp;
6108     uint64_t aSig, bSig, zSig0, zSig1;
6109     int32_t expDiff;
6110
6111     aSig = extractFloatx80Frac( a );
6112     aExp = extractFloatx80Exp( a );
6113     bSig = extractFloatx80Frac( b );
6114     bExp = extractFloatx80Exp( b );
6115     expDiff = aExp - bExp;
6116     if ( 0 < expDiff ) goto aExpBigger;
6117     if ( expDiff < 0 ) goto bExpBigger;
6118     if ( aExp == 0x7FFF ) {
6119         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6120             return propagateFloatx80NaN(a, b, status);
6121         }
6122         float_raise(float_flag_invalid, status);
6123         return floatx80_default_nan(status);
6124     }
6125     if ( aExp == 0 ) {
6126         aExp = 1;
6127         bExp = 1;
6128     }
6129     zSig1 = 0;
6130     if ( bSig < aSig ) goto aBigger;
6131     if ( aSig < bSig ) goto bBigger;
6132     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6133  bExpBigger:
6134     if ( bExp == 0x7FFF ) {
6135         if ((uint64_t)(bSig << 1)) {
6136             return propagateFloatx80NaN(a, b, status);
6137         }
6138         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6139                             floatx80_infinity_low);
6140     }
6141     if ( aExp == 0 ) ++expDiff;
6142     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6143  bBigger:
6144     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6145     zExp = bExp;
6146     zSign ^= 1;
6147     goto normalizeRoundAndPack;
6148  aExpBigger:
6149     if ( aExp == 0x7FFF ) {
6150         if ((uint64_t)(aSig << 1)) {
6151             return propagateFloatx80NaN(a, b, status);
6152         }
6153         return a;
6154     }
6155     if ( bExp == 0 ) --expDiff;
6156     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6157  aBigger:
6158     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6159     zExp = aExp;
6160  normalizeRoundAndPack:
6161     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6162                                          zSign, zExp, zSig0, zSig1, status);
6163 }
6164
6165 /*----------------------------------------------------------------------------
6166 | Returns the result of adding the extended double-precision floating-point
6167 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6168 | Standard for Binary Floating-Point Arithmetic.
6169 *----------------------------------------------------------------------------*/
6170
6171 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6172 {
6173     bool aSign, bSign;
6174
6175     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6176         float_raise(float_flag_invalid, status);
6177         return floatx80_default_nan(status);
6178     }
6179     aSign = extractFloatx80Sign( a );
6180     bSign = extractFloatx80Sign( b );
6181     if ( aSign == bSign ) {
6182         return addFloatx80Sigs(a, b, aSign, status);
6183     }
6184     else {
6185         return subFloatx80Sigs(a, b, aSign, status);
6186     }
6187
6188 }
6189
6190 /*----------------------------------------------------------------------------
6191 | Returns the result of subtracting the extended double-precision floating-
6192 | point values `a' and `b'.  The operation is performed according to the
6193 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6194 *----------------------------------------------------------------------------*/
6195
6196 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6197 {
6198     bool aSign, bSign;
6199
6200     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6201         float_raise(float_flag_invalid, status);
6202         return floatx80_default_nan(status);
6203     }
6204     aSign = extractFloatx80Sign( a );
6205     bSign = extractFloatx80Sign( b );
6206     if ( aSign == bSign ) {
6207         return subFloatx80Sigs(a, b, aSign, status);
6208     }
6209     else {
6210         return addFloatx80Sigs(a, b, aSign, status);
6211     }
6212
6213 }
6214
6215 /*----------------------------------------------------------------------------
6216 | Returns the result of multiplying the extended double-precision floating-
6217 | point values `a' and `b'.  The operation is performed according to the
6218 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6219 *----------------------------------------------------------------------------*/
6220
6221 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6222 {
6223     bool aSign, bSign, zSign;
6224     int32_t aExp, bExp, zExp;
6225     uint64_t aSig, bSig, zSig0, zSig1;
6226
6227     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6228         float_raise(float_flag_invalid, status);
6229         return floatx80_default_nan(status);
6230     }
6231     aSig = extractFloatx80Frac( a );
6232     aExp = extractFloatx80Exp( a );
6233     aSign = extractFloatx80Sign( a );
6234     bSig = extractFloatx80Frac( b );
6235     bExp = extractFloatx80Exp( b );
6236     bSign = extractFloatx80Sign( b );
6237     zSign = aSign ^ bSign;
6238     if ( aExp == 0x7FFF ) {
6239         if (    (uint64_t) ( aSig<<1 )
6240              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6241             return propagateFloatx80NaN(a, b, status);
6242         }
6243         if ( ( bExp | bSig ) == 0 ) goto invalid;
6244         return packFloatx80(zSign, floatx80_infinity_high,
6245                                    floatx80_infinity_low);
6246     }
6247     if ( bExp == 0x7FFF ) {
6248         if ((uint64_t)(bSig << 1)) {
6249             return propagateFloatx80NaN(a, b, status);
6250         }
6251         if ( ( aExp | aSig ) == 0 ) {
6252  invalid:
6253             float_raise(float_flag_invalid, status);
6254             return floatx80_default_nan(status);
6255         }
6256         return packFloatx80(zSign, floatx80_infinity_high,
6257                                    floatx80_infinity_low);
6258     }
6259     if ( aExp == 0 ) {
6260         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6261         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6262     }
6263     if ( bExp == 0 ) {
6264         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6265         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6266     }
6267     zExp = aExp + bExp - 0x3FFE;
6268     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6269     if ( 0 < (int64_t) zSig0 ) {
6270         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6271         --zExp;
6272     }
6273     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6274                                 zSign, zExp, zSig0, zSig1, status);
6275 }
6276
6277 /*----------------------------------------------------------------------------
6278 | Returns the result of dividing the extended double-precision floating-point
6279 | value `a' by the corresponding value `b'.  The operation is performed
6280 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6281 *----------------------------------------------------------------------------*/
6282
6283 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6284 {
6285     bool aSign, bSign, zSign;
6286     int32_t aExp, bExp, zExp;
6287     uint64_t aSig, bSig, zSig0, zSig1;
6288     uint64_t rem0, rem1, rem2, term0, term1, term2;
6289
6290     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6291         float_raise(float_flag_invalid, status);
6292         return floatx80_default_nan(status);
6293     }
6294     aSig = extractFloatx80Frac( a );
6295     aExp = extractFloatx80Exp( a );
6296     aSign = extractFloatx80Sign( a );
6297     bSig = extractFloatx80Frac( b );
6298     bExp = extractFloatx80Exp( b );
6299     bSign = extractFloatx80Sign( b );
6300     zSign = aSign ^ bSign;
6301     if ( aExp == 0x7FFF ) {
6302         if ((uint64_t)(aSig << 1)) {
6303             return propagateFloatx80NaN(a, b, status);
6304         }
6305         if ( bExp == 0x7FFF ) {
6306             if ((uint64_t)(bSig << 1)) {
6307                 return propagateFloatx80NaN(a, b, status);
6308             }
6309             goto invalid;
6310         }
6311         return packFloatx80(zSign, floatx80_infinity_high,
6312                                    floatx80_infinity_low);
6313     }
6314     if ( bExp == 0x7FFF ) {
6315         if ((uint64_t)(bSig << 1)) {
6316             return propagateFloatx80NaN(a, b, status);
6317         }
6318         return packFloatx80( zSign, 0, 0 );
6319     }
6320     if ( bExp == 0 ) {
6321         if ( bSig == 0 ) {
6322             if ( ( aExp | aSig ) == 0 ) {
6323  invalid:
6324                 float_raise(float_flag_invalid, status);
6325                 return floatx80_default_nan(status);
6326             }
6327             float_raise(float_flag_divbyzero, status);
6328             return packFloatx80(zSign, floatx80_infinity_high,
6329                                        floatx80_infinity_low);
6330         }
6331         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6332     }
6333     if ( aExp == 0 ) {
6334         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6335         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6336     }
6337     zExp = aExp - bExp + 0x3FFE;
6338     rem1 = 0;
6339     if ( bSig <= aSig ) {
6340         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6341         ++zExp;
6342     }
6343     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6344     mul64To128( bSig, zSig0, &term0, &term1 );
6345     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6346     while ( (int64_t) rem0 < 0 ) {
6347         --zSig0;
6348         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6349     }
6350     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6351     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6352         mul64To128( bSig, zSig1, &term1, &term2 );
6353         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6354         while ( (int64_t) rem1 < 0 ) {
6355             --zSig1;
6356             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6357         }
6358         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6359     }
6360     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6361                                 zSign, zExp, zSig0, zSig1, status);
6362 }
6363
6364 /*----------------------------------------------------------------------------
6365 | Returns the remainder of the extended double-precision floating-point value
6366 | `a' with respect to the corresponding value `b'.  The operation is performed
6367 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6368 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6369 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6370 | the absolute value of the integer quotient.
6371 *----------------------------------------------------------------------------*/
6372
6373 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6374                          float_status *status)
6375 {
6376     bool aSign, zSign;
6377     int32_t aExp, bExp, expDiff, aExpOrig;
6378     uint64_t aSig0, aSig1, bSig;
6379     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6380
6381     *quotient = 0;
6382     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6383         float_raise(float_flag_invalid, status);
6384         return floatx80_default_nan(status);
6385     }
6386     aSig0 = extractFloatx80Frac( a );
6387     aExpOrig = aExp = extractFloatx80Exp( a );
6388     aSign = extractFloatx80Sign( a );
6389     bSig = extractFloatx80Frac( b );
6390     bExp = extractFloatx80Exp( b );
6391     if ( aExp == 0x7FFF ) {
6392         if (    (uint64_t) ( aSig0<<1 )
6393              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6394             return propagateFloatx80NaN(a, b, status);
6395         }
6396         goto invalid;
6397     }
6398     if ( bExp == 0x7FFF ) {
6399         if ((uint64_t)(bSig << 1)) {
6400             return propagateFloatx80NaN(a, b, status);
6401         }
6402         if (aExp == 0 && aSig0 >> 63) {
6403             /*
6404              * Pseudo-denormal argument must be returned in normalized
6405              * form.
6406              */
6407             return packFloatx80(aSign, 1, aSig0);
6408         }
6409         return a;
6410     }
6411     if ( bExp == 0 ) {
6412         if ( bSig == 0 ) {
6413  invalid:
6414             float_raise(float_flag_invalid, status);
6415             return floatx80_default_nan(status);
6416         }
6417         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6418     }
6419     if ( aExp == 0 ) {
6420         if ( aSig0 == 0 ) return a;
6421         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6422     }
6423     zSign = aSign;
6424     expDiff = aExp - bExp;
6425     aSig1 = 0;
6426     if ( expDiff < 0 ) {
6427         if ( mod || expDiff < -1 ) {
6428             if (aExp == 1 && aExpOrig == 0) {
6429                 /*
6430                  * Pseudo-denormal argument must be returned in
6431                  * normalized form.
6432                  */
6433                 return packFloatx80(aSign, aExp, aSig0);
6434             }
6435             return a;
6436         }
6437         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6438         expDiff = 0;
6439     }
6440     *quotient = q = ( bSig <= aSig0 );
6441     if ( q ) aSig0 -= bSig;
6442     expDiff -= 64;
6443     while ( 0 < expDiff ) {
6444         q = estimateDiv128To64( aSig0, aSig1, bSig );
6445         q = ( 2 < q ) ? q - 2 : 0;
6446         mul64To128( bSig, q, &term0, &term1 );
6447         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6448         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6449         expDiff -= 62;
6450         *quotient <<= 62;
6451         *quotient += q;
6452     }
6453     expDiff += 64;
6454     if ( 0 < expDiff ) {
6455         q = estimateDiv128To64( aSig0, aSig1, bSig );
6456         q = ( 2 < q ) ? q - 2 : 0;
6457         q >>= 64 - expDiff;
6458         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6459         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6460         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6461         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6462             ++q;
6463             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6464         }
6465         if (expDiff < 64) {
6466             *quotient <<= expDiff;
6467         } else {
6468             *quotient = 0;
6469         }
6470         *quotient += q;
6471     }
6472     else {
6473         term1 = 0;
6474         term0 = bSig;
6475     }
6476     if (!mod) {
6477         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6478         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6479                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6480                         && ( q & 1 ) )
6481             ) {
6482             aSig0 = alternateASig0;
6483             aSig1 = alternateASig1;
6484             zSign = ! zSign;
6485             ++*quotient;
6486         }
6487     }
6488     return
6489         normalizeRoundAndPackFloatx80(
6490             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6491
6492 }
6493
6494 /*----------------------------------------------------------------------------
6495 | Returns the remainder of the extended double-precision floating-point value
6496 | `a' with respect to the corresponding value `b'.  The operation is performed
6497 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6498 *----------------------------------------------------------------------------*/
6499
6500 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6501 {
6502     uint64_t quotient;
6503     return floatx80_modrem(a, b, false, &quotient, status);
6504 }
6505
6506 /*----------------------------------------------------------------------------
6507 | Returns the remainder of the extended double-precision floating-point value
6508 | `a' with respect to the corresponding value `b', with the quotient truncated
6509 | toward zero.
6510 *----------------------------------------------------------------------------*/
6511
6512 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6513 {
6514     uint64_t quotient;
6515     return floatx80_modrem(a, b, true, &quotient, status);
6516 }
6517
6518 /*----------------------------------------------------------------------------
6519 | Returns the square root of the extended double-precision floating-point
6520 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6521 | for Binary Floating-Point Arithmetic.
6522 *----------------------------------------------------------------------------*/
6523
6524 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6525 {
6526     bool aSign;
6527     int32_t aExp, zExp;
6528     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6529     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6530
6531     if (floatx80_invalid_encoding(a)) {
6532         float_raise(float_flag_invalid, status);
6533         return floatx80_default_nan(status);
6534     }
6535     aSig0 = extractFloatx80Frac( a );
6536     aExp = extractFloatx80Exp( a );
6537     aSign = extractFloatx80Sign( a );
6538     if ( aExp == 0x7FFF ) {
6539         if ((uint64_t)(aSig0 << 1)) {
6540             return propagateFloatx80NaN(a, a, status);
6541         }
6542         if ( ! aSign ) return a;
6543         goto invalid;
6544     }
6545     if ( aSign ) {
6546         if ( ( aExp | aSig0 ) == 0 ) return a;
6547  invalid:
6548         float_raise(float_flag_invalid, status);
6549         return floatx80_default_nan(status);
6550     }
6551     if ( aExp == 0 ) {
6552         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6553         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6554     }
6555     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6556     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6557     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6558     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6559     doubleZSig0 = zSig0<<1;
6560     mul64To128( zSig0, zSig0, &term0, &term1 );
6561     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6562     while ( (int64_t) rem0 < 0 ) {
6563         --zSig0;
6564         doubleZSig0 -= 2;
6565         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6566     }
6567     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6568     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6569         if ( zSig1 == 0 ) zSig1 = 1;
6570         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6571         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6572         mul64To128( zSig1, zSig1, &term2, &term3 );
6573         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6574         while ( (int64_t) rem1 < 0 ) {
6575             --zSig1;
6576             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6577             term3 |= 1;
6578             term2 |= doubleZSig0;
6579             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6580         }
6581         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6582     }
6583     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6584     zSig0 |= doubleZSig0;
6585     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6586                                 0, zExp, zSig0, zSig1, status);
6587 }
6588
6589 /*----------------------------------------------------------------------------
6590 | Returns the result of converting the quadruple-precision floating-point
6591 | value `a' to the 32-bit two's complement integer format.  The conversion
6592 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6593 | Arithmetic---which means in particular that the conversion is rounded
6594 | according to the current rounding mode.  If `a' is a NaN, the largest
6595 | positive integer is returned.  Otherwise, if the conversion overflows, the
6596 | largest integer with the same sign as `a' is returned.
6597 *----------------------------------------------------------------------------*/
6598
6599 int32_t float128_to_int32(float128 a, float_status *status)
6600 {
6601     bool aSign;
6602     int32_t aExp, shiftCount;
6603     uint64_t aSig0, aSig1;
6604
6605     aSig1 = extractFloat128Frac1( a );
6606     aSig0 = extractFloat128Frac0( a );
6607     aExp = extractFloat128Exp( a );
6608     aSign = extractFloat128Sign( a );
6609     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6610     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6611     aSig0 |= ( aSig1 != 0 );
6612     shiftCount = 0x4028 - aExp;
6613     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6614     return roundAndPackInt32(aSign, aSig0, status);
6615
6616 }
6617
6618 /*----------------------------------------------------------------------------
6619 | Returns the result of converting the quadruple-precision floating-point
6620 | value `a' to the 32-bit two's complement integer format.  The conversion
6621 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6622 | Arithmetic, except that the conversion is always rounded toward zero.  If
6623 | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6624 | conversion overflows, the largest integer with the same sign as `a' is
6625 | returned.
6626 *----------------------------------------------------------------------------*/
6627
6628 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6629 {
6630     bool aSign;
6631     int32_t aExp, shiftCount;
6632     uint64_t aSig0, aSig1, savedASig;
6633     int32_t z;
6634
6635     aSig1 = extractFloat128Frac1( a );
6636     aSig0 = extractFloat128Frac0( a );
6637     aExp = extractFloat128Exp( a );
6638     aSign = extractFloat128Sign( a );
6639     aSig0 |= ( aSig1 != 0 );
6640     if ( 0x401E < aExp ) {
6641         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6642         goto invalid;
6643     }
6644     else if ( aExp < 0x3FFF ) {
6645         if (aExp || aSig0) {
6646             float_raise(float_flag_inexact, status);
6647         }
6648         return 0;
6649     }
6650     aSig0 |= UINT64_C(0x0001000000000000);
6651     shiftCount = 0x402F - aExp;
6652     savedASig = aSig0;
6653     aSig0 >>= shiftCount;
6654     z = aSig0;
6655     if ( aSign ) z = - z;
6656     if ( ( z < 0 ) ^ aSign ) {
6657  invalid:
6658         float_raise(float_flag_invalid, status);
6659         return aSign ? INT32_MIN : INT32_MAX;
6660     }
6661     if ( ( aSig0<<shiftCount ) != savedASig ) {
6662         float_raise(float_flag_inexact, status);
6663     }
6664     return z;
6665
6666 }
6667
6668 /*----------------------------------------------------------------------------
6669 | Returns the result of converting the quadruple-precision floating-point
6670 | value `a' to the 64-bit two's complement integer format.  The conversion
6671 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6672 | Arithmetic---which means in particular that the conversion is rounded
6673 | according to the current rounding mode.  If `a' is a NaN, the largest
6674 | positive integer is returned.  Otherwise, if the conversion overflows, the
6675 | largest integer with the same sign as `a' is returned.
6676 *----------------------------------------------------------------------------*/
6677
6678 int64_t float128_to_int64(float128 a, float_status *status)
6679 {
6680     bool aSign;
6681     int32_t aExp, shiftCount;
6682     uint64_t aSig0, aSig1;
6683
6684     aSig1 = extractFloat128Frac1( a );
6685     aSig0 = extractFloat128Frac0( a );
6686     aExp = extractFloat128Exp( a );
6687     aSign = extractFloat128Sign( a );
6688     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6689     shiftCount = 0x402F - aExp;
6690     if ( shiftCount <= 0 ) {
6691         if ( 0x403E < aExp ) {
6692             float_raise(float_flag_invalid, status);
6693             if (    ! aSign
6694                  || (    ( aExp == 0x7FFF )
6695                       && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6696                     )
6697                ) {
6698                 return INT64_MAX;
6699             }
6700             return INT64_MIN;
6701         }
6702         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6703     }
6704     else {
6705         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6706     }
6707     return roundAndPackInt64(aSign, aSig0, aSig1, status);
6708
6709 }
6710
6711 /*----------------------------------------------------------------------------
6712 | Returns the result of converting the quadruple-precision floating-point
6713 | value `a' to the 64-bit two's complement integer format.  The conversion
6714 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6715 | Arithmetic, except that the conversion is always rounded toward zero.
6716 | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6717 | the conversion overflows, the largest integer with the same sign as `a' is
6718 | returned.
6719 *----------------------------------------------------------------------------*/
6720
6721 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6722 {
6723     bool aSign;
6724     int32_t aExp, shiftCount;
6725     uint64_t aSig0, aSig1;
6726     int64_t z;
6727
6728     aSig1 = extractFloat128Frac1( a );
6729     aSig0 = extractFloat128Frac0( a );
6730     aExp = extractFloat128Exp( a );
6731     aSign = extractFloat128Sign( a );
6732     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6733     shiftCount = aExp - 0x402F;
6734     if ( 0 < shiftCount ) {
6735         if ( 0x403E <= aExp ) {
6736             aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6737             if (    ( a.high == UINT64_C(0xC03E000000000000) )
6738                  && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6739                 if (aSig1) {
6740                     float_raise(float_flag_inexact, status);
6741                 }
6742             }
6743             else {
6744                 float_raise(float_flag_invalid, status);
6745                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6746                     return INT64_MAX;
6747                 }
6748             }
6749             return INT64_MIN;
6750         }
6751         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6752         if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6753             float_raise(float_flag_inexact, status);
6754         }
6755     }
6756     else {
6757         if ( aExp < 0x3FFF ) {
6758             if ( aExp | aSig0 | aSig1 ) {
6759                 float_raise(float_flag_inexact, status);
6760             }
6761             return 0;
6762         }
6763         z = aSig0>>( - shiftCount );
6764         if (    aSig1
6765              || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6766             float_raise(float_flag_inexact, status);
6767         }
6768     }
6769     if ( aSign ) z = - z;
6770     return z;
6771
6772 }
6773
6774 /*----------------------------------------------------------------------------
6775 | Returns the result of converting the quadruple-precision floating-point value
6776 | `a' to the 64-bit unsigned integer format.  The conversion is
6777 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6778 | Arithmetic---which means in particular that the conversion is rounded
6779 | according to the current rounding mode.  If `a' is a NaN, the largest
6780 | positive integer is returned.  If the conversion overflows, the
6781 | largest unsigned integer is returned.  If 'a' is negative, the value is
6782 | rounded and zero is returned; negative values that do not round to zero
6783 | will raise the inexact exception.
6784 *----------------------------------------------------------------------------*/
6785
6786 uint64_t float128_to_uint64(float128 a, float_status *status)
6787 {
6788     bool aSign;
6789     int aExp;
6790     int shiftCount;
6791     uint64_t aSig0, aSig1;
6792
6793     aSig0 = extractFloat128Frac0(a);
6794     aSig1 = extractFloat128Frac1(a);
6795     aExp = extractFloat128Exp(a);
6796     aSign = extractFloat128Sign(a);
6797     if (aSign && (aExp > 0x3FFE)) {
6798         float_raise(float_flag_invalid, status);
6799         if (float128_is_any_nan(a)) {
6800             return UINT64_MAX;
6801         } else {
6802             return 0;
6803         }
6804     }
6805     if (aExp) {
6806         aSig0 |= UINT64_C(0x0001000000000000);
6807     }
6808     shiftCount = 0x402F - aExp;
6809     if (shiftCount <= 0) {
6810         if (0x403E < aExp) {
6811             float_raise(float_flag_invalid, status);
6812             return UINT64_MAX;
6813         }
6814         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6815     } else {
6816         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6817     }
6818     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6819 }
6820
6821 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6822 {
6823     uint64_t v;
6824     signed char current_rounding_mode = status->float_rounding_mode;
6825
6826     set_float_rounding_mode(float_round_to_zero, status);
6827     v = float128_to_uint64(a, status);
6828     set_float_rounding_mode(current_rounding_mode, status);
6829
6830     return v;
6831 }
6832
6833 /*----------------------------------------------------------------------------
6834 | Returns the result of converting the quadruple-precision floating-point
6835 | value `a' to the 32-bit unsigned integer format.  The conversion
6836 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6837 | Arithmetic except that the conversion is always rounded toward zero.
6838 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6839 | if the conversion overflows, the largest unsigned integer is returned.
6840 | If 'a' is negative, the value is rounded and zero is returned; negative
6841 | values that do not round to zero will raise the inexact exception.
6842 *----------------------------------------------------------------------------*/
6843
6844 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6845 {
6846     uint64_t v;
6847     uint32_t res;
6848     int old_exc_flags = get_float_exception_flags(status);
6849
6850     v = float128_to_uint64_round_to_zero(a, status);
6851     if (v > 0xffffffff) {
6852         res = 0xffffffff;
6853     } else {
6854         return v;
6855     }
6856     set_float_exception_flags(old_exc_flags, status);
6857     float_raise(float_flag_invalid, status);
6858     return res;
6859 }
6860
6861 /*----------------------------------------------------------------------------
6862 | Returns the result of converting the quadruple-precision floating-point value
6863 | `a' to the 32-bit unsigned integer format.  The conversion is
6864 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6865 | Arithmetic---which means in particular that the conversion is rounded
6866 | according to the current rounding mode.  If `a' is a NaN, the largest
6867 | positive integer is returned.  If the conversion overflows, the
6868 | largest unsigned integer is returned.  If 'a' is negative, the value is
6869 | rounded and zero is returned; negative values that do not round to zero
6870 | will raise the inexact exception.
6871 *----------------------------------------------------------------------------*/
6872
6873 uint32_t float128_to_uint32(float128 a, float_status *status)
6874 {
6875     uint64_t v;
6876     uint32_t res;
6877     int old_exc_flags = get_float_exception_flags(status);
6878
6879     v = float128_to_uint64(a, status);
6880     if (v > 0xffffffff) {
6881         res = 0xffffffff;
6882     } else {
6883         return v;
6884     }
6885     set_float_exception_flags(old_exc_flags, status);
6886     float_raise(float_flag_invalid, status);
6887     return res;
6888 }
6889
6890 /*----------------------------------------------------------------------------
6891 | Returns the result of converting the quadruple-precision floating-point
6892 | value `a' to the single-precision floating-point format.  The conversion
6893 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6894 | Arithmetic.
6895 *----------------------------------------------------------------------------*/
6896
6897 float32 float128_to_float32(float128 a, float_status *status)
6898 {
6899     bool aSign;
6900     int32_t aExp;
6901     uint64_t aSig0, aSig1;
6902     uint32_t zSig;
6903
6904     aSig1 = extractFloat128Frac1( a );
6905     aSig0 = extractFloat128Frac0( a );
6906     aExp = extractFloat128Exp( a );
6907     aSign = extractFloat128Sign( a );
6908     if ( aExp == 0x7FFF ) {
6909         if ( aSig0 | aSig1 ) {
6910             return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6911         }
6912         return packFloat32( aSign, 0xFF, 0 );
6913     }
6914     aSig0 |= ( aSig1 != 0 );
6915     shift64RightJamming( aSig0, 18, &aSig0 );
6916     zSig = aSig0;
6917     if ( aExp || zSig ) {
6918         zSig |= 0x40000000;
6919         aExp -= 0x3F81;
6920     }
6921     return roundAndPackFloat32(aSign, aExp, zSig, status);
6922
6923 }
6924
6925 /*----------------------------------------------------------------------------
6926 | Returns the result of converting the quadruple-precision floating-point
6927 | value `a' to the double-precision floating-point format.  The conversion
6928 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6929 | Arithmetic.
6930 *----------------------------------------------------------------------------*/
6931
6932 float64 float128_to_float64(float128 a, float_status *status)
6933 {
6934     bool aSign;
6935     int32_t aExp;
6936     uint64_t aSig0, aSig1;
6937
6938     aSig1 = extractFloat128Frac1( a );
6939     aSig0 = extractFloat128Frac0( a );
6940     aExp = extractFloat128Exp( a );
6941     aSign = extractFloat128Sign( a );
6942     if ( aExp == 0x7FFF ) {
6943         if ( aSig0 | aSig1 ) {
6944             return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6945         }
6946         return packFloat64( aSign, 0x7FF, 0 );
6947     }
6948     shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6949     aSig0 |= ( aSig1 != 0 );
6950     if ( aExp || aSig0 ) {
6951         aSig0 |= UINT64_C(0x4000000000000000);
6952         aExp -= 0x3C01;
6953     }
6954     return roundAndPackFloat64(aSign, aExp, aSig0, status);
6955
6956 }
6957
6958 /*----------------------------------------------------------------------------
6959 | Returns the result of converting the quadruple-precision floating-point
6960 | value `a' to the extended double-precision floating-point format.  The
6961 | conversion is performed according to the IEC/IEEE Standard for Binary
6962 | Floating-Point Arithmetic.
6963 *----------------------------------------------------------------------------*/
6964
6965 floatx80 float128_to_floatx80(float128 a, float_status *status)
6966 {
6967     bool aSign;
6968     int32_t aExp;
6969     uint64_t aSig0, aSig1;
6970
6971     aSig1 = extractFloat128Frac1( a );
6972     aSig0 = extractFloat128Frac0( a );
6973     aExp = extractFloat128Exp( a );
6974     aSign = extractFloat128Sign( a );
6975     if ( aExp == 0x7FFF ) {
6976         if ( aSig0 | aSig1 ) {
6977             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6978                                                status);
6979             return floatx80_silence_nan(res, status);
6980         }
6981         return packFloatx80(aSign, floatx80_infinity_high,
6982                                    floatx80_infinity_low);
6983     }
6984     if ( aExp == 0 ) {
6985         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6986         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6987     }
6988     else {
6989         aSig0 |= UINT64_C(0x0001000000000000);
6990     }
6991     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6992     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6993
6994 }
6995
6996 /*----------------------------------------------------------------------------
6997 | Rounds the quadruple-precision floating-point value `a' to an integer, and
6998 | returns the result as a quadruple-precision floating-point value.  The
6999 | operation is performed according to the IEC/IEEE Standard for Binary
7000 | Floating-Point Arithmetic.
7001 *----------------------------------------------------------------------------*/
7002
7003 float128 float128_round_to_int(float128 a, float_status *status)
7004 {
7005     bool aSign;
7006     int32_t aExp;
7007     uint64_t lastBitMask, roundBitsMask;
7008     float128 z;
7009
7010     aExp = extractFloat128Exp( a );
7011     if ( 0x402F <= aExp ) {
7012         if ( 0x406F <= aExp ) {
7013             if (    ( aExp == 0x7FFF )
7014                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
7015                ) {
7016                 return propagateFloat128NaN(a, a, status);
7017             }
7018             return a;
7019         }
7020         lastBitMask = 1;
7021         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
7022         roundBitsMask = lastBitMask - 1;
7023         z = a;
7024         switch (status->float_rounding_mode) {
7025         case float_round_nearest_even:
7026             if ( lastBitMask ) {
7027                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
7028                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
7029             }
7030             else {
7031                 if ( (int64_t) z.low < 0 ) {
7032                     ++z.high;
7033                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
7034                 }
7035             }
7036             break;
7037         case float_round_ties_away:
7038             if (lastBitMask) {
7039                 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7040             } else {
7041                 if ((int64_t) z.low < 0) {
7042                     ++z.high;
7043                 }
7044             }
7045             break;
7046         case float_round_to_zero:
7047             break;
7048         case float_round_up:
7049             if (!extractFloat128Sign(z)) {
7050                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7051             }
7052             break;
7053         case float_round_down:
7054             if (extractFloat128Sign(z)) {
7055                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7056             }
7057             break;
7058         case float_round_to_odd:
7059             /*
7060              * Note that if lastBitMask == 0, the last bit is the lsb
7061              * of high, and roundBitsMask == -1.
7062              */
7063             if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7064                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7065             }
7066             break;
7067         default:
7068             abort();
7069         }
7070         z.low &= ~ roundBitsMask;
7071     }
7072     else {
7073         if ( aExp < 0x3FFF ) {
7074             if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
7075             float_raise(float_flag_inexact, status);
7076             aSign = extractFloat128Sign( a );
7077             switch (status->float_rounding_mode) {
7078             case float_round_nearest_even:
7079                 if (    ( aExp == 0x3FFE )
7080                      && (   extractFloat128Frac0( a )
7081                           | extractFloat128Frac1( a ) )
7082                    ) {
7083                     return packFloat128( aSign, 0x3FFF, 0, 0 );
7084                 }
7085                 break;
7086             case float_round_ties_away:
7087                 if (aExp == 0x3FFE) {
7088                     return packFloat128(aSign, 0x3FFF, 0, 0);
7089                 }
7090                 break;
7091             case float_round_down:
7092                 return
7093                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7094                     : packFloat128( 0, 0, 0, 0 );
7095             case float_round_up:
7096                 return
7097                       aSign ? packFloat128( 1, 0, 0, 0 )
7098                     : packFloat128( 0, 0x3FFF, 0, 0 );
7099
7100             case float_round_to_odd:
7101                 return packFloat128(aSign, 0x3FFF, 0, 0);
7102
7103             case float_round_to_zero:
7104                 break;
7105             }
7106             return packFloat128( aSign, 0, 0, 0 );
7107         }
7108         lastBitMask = 1;
7109         lastBitMask <<= 0x402F - aExp;
7110         roundBitsMask = lastBitMask - 1;
7111         z.low = 0;
7112         z.high = a.high;
7113         switch (status->float_rounding_mode) {
7114         case float_round_nearest_even:
7115             z.high += lastBitMask>>1;
7116             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7117                 z.high &= ~ lastBitMask;
7118             }
7119             break;
7120         case float_round_ties_away:
7121             z.high += lastBitMask>>1;
7122             break;
7123         case float_round_to_zero:
7124             break;
7125         case float_round_up:
7126             if (!extractFloat128Sign(z)) {
7127                 z.high |= ( a.low != 0 );
7128                 z.high += roundBitsMask;
7129             }
7130             break;
7131         case float_round_down:
7132             if (extractFloat128Sign(z)) {
7133                 z.high |= (a.low != 0);
7134                 z.high += roundBitsMask;
7135             }
7136             break;
7137         case float_round_to_odd:
7138             if ((z.high & lastBitMask) == 0) {
7139                 z.high |= (a.low != 0);
7140                 z.high += roundBitsMask;
7141             }
7142             break;
7143         default:
7144             abort();
7145         }
7146         z.high &= ~ roundBitsMask;
7147     }
7148     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
7149         float_raise(float_flag_inexact, status);
7150     }
7151     return z;
7152
7153 }
7154
7155 /*----------------------------------------------------------------------------
7156 | Returns the remainder of the quadruple-precision floating-point value `a'
7157 | with respect to the corresponding value `b'.  The operation is performed
7158 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7159 *----------------------------------------------------------------------------*/
7160
7161 float128 float128_rem(float128 a, float128 b, float_status *status)
7162 {
7163     bool aSign, zSign;
7164     int32_t aExp, bExp, expDiff;
7165     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7166     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7167     int64_t sigMean0;
7168
7169     aSig1 = extractFloat128Frac1( a );
7170     aSig0 = extractFloat128Frac0( a );
7171     aExp = extractFloat128Exp( a );
7172     aSign = extractFloat128Sign( a );
7173     bSig1 = extractFloat128Frac1( b );
7174     bSig0 = extractFloat128Frac0( b );
7175     bExp = extractFloat128Exp( b );
7176     if ( aExp == 0x7FFF ) {
7177         if (    ( aSig0 | aSig1 )
7178              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7179             return propagateFloat128NaN(a, b, status);
7180         }
7181         goto invalid;
7182     }
7183     if ( bExp == 0x7FFF ) {
7184         if (bSig0 | bSig1) {
7185             return propagateFloat128NaN(a, b, status);
7186         }
7187         return a;
7188     }
7189     if ( bExp == 0 ) {
7190         if ( ( bSig0 | bSig1 ) == 0 ) {
7191  invalid:
7192             float_raise(float_flag_invalid, status);
7193             return float128_default_nan(status);
7194         }
7195         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7196     }
7197     if ( aExp == 0 ) {
7198         if ( ( aSig0 | aSig1 ) == 0 ) return a;
7199         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7200     }
7201     expDiff = aExp - bExp;
7202     if ( expDiff < -1 ) return a;
7203     shortShift128Left(
7204         aSig0 | UINT64_C(0x0001000000000000),
7205         aSig1,
7206         15 - ( expDiff < 0 ),
7207         &aSig0,
7208         &aSig1
7209     );
7210     shortShift128Left(
7211         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7212     q = le128( bSig0, bSig1, aSig0, aSig1 );
7213     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7214     expDiff -= 64;
7215     while ( 0 < expDiff ) {
7216         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7217         q = ( 4 < q ) ? q - 4 : 0;
7218         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7219         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7220         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7221         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7222         expDiff -= 61;
7223     }
7224     if ( -64 < expDiff ) {
7225         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7226         q = ( 4 < q ) ? q - 4 : 0;
7227         q >>= - expDiff;
7228         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7229         expDiff += 52;
7230         if ( expDiff < 0 ) {
7231             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7232         }
7233         else {
7234             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7235         }
7236         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7237         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7238     }
7239     else {
7240         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7241         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7242     }
7243     do {
7244         alternateASig0 = aSig0;
7245         alternateASig1 = aSig1;
7246         ++q;
7247         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7248     } while ( 0 <= (int64_t) aSig0 );
7249     add128(
7250         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7251     if (    ( sigMean0 < 0 )
7252          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7253         aSig0 = alternateASig0;
7254         aSig1 = alternateASig1;
7255     }
7256     zSign = ( (int64_t) aSig0 < 0 );
7257     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7258     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7259                                          status);
7260 }
7261
7262 /*----------------------------------------------------------------------------
7263 | Returns the square root of the quadruple-precision floating-point value `a'.
7264 | The operation is performed according to the IEC/IEEE Standard for Binary
7265 | Floating-Point Arithmetic.
7266 *----------------------------------------------------------------------------*/
7267
7268 float128 float128_sqrt(float128 a, float_status *status)
7269 {
7270     bool aSign;
7271     int32_t aExp, zExp;
7272     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7273     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7274
7275     aSig1 = extractFloat128Frac1( a );
7276     aSig0 = extractFloat128Frac0( a );
7277     aExp = extractFloat128Exp( a );
7278     aSign = extractFloat128Sign( a );
7279     if ( aExp == 0x7FFF ) {
7280         if (aSig0 | aSig1) {
7281             return propagateFloat128NaN(a, a, status);
7282         }
7283         if ( ! aSign ) return a;
7284         goto invalid;
7285     }
7286     if ( aSign ) {
7287         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7288  invalid:
7289         float_raise(float_flag_invalid, status);
7290         return float128_default_nan(status);
7291     }
7292     if ( aExp == 0 ) {
7293         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7294         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7295     }
7296     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7297     aSig0 |= UINT64_C(0x0001000000000000);
7298     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7299     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7300     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7301     doubleZSig0 = zSig0<<1;
7302     mul64To128( zSig0, zSig0, &term0, &term1 );
7303     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7304     while ( (int64_t) rem0 < 0 ) {
7305         --zSig0;
7306         doubleZSig0 -= 2;
7307         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7308     }
7309     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7310     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7311         if ( zSig1 == 0 ) zSig1 = 1;
7312         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7313         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7314         mul64To128( zSig1, zSig1, &term2, &term3 );
7315         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7316         while ( (int64_t) rem1 < 0 ) {
7317             --zSig1;
7318             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7319             term3 |= 1;
7320             term2 |= doubleZSig0;
7321             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7322         }
7323         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7324     }
7325     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7326     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7327
7328 }
7329
7330 static inline FloatRelation
7331 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7332                           float_status *status)
7333 {
7334     bool aSign, bSign;
7335
7336     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7337         float_raise(float_flag_invalid, status);
7338         return float_relation_unordered;
7339     }
7340     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7341           ( extractFloatx80Frac( a )<<1 ) ) ||
7342         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7343           ( extractFloatx80Frac( b )<<1 ) )) {
7344         if (!is_quiet ||
7345             floatx80_is_signaling_nan(a, status) ||
7346             floatx80_is_signaling_nan(b, status)) {
7347             float_raise(float_flag_invalid, status);
7348         }
7349         return float_relation_unordered;
7350     }
7351     aSign = extractFloatx80Sign( a );
7352     bSign = extractFloatx80Sign( b );
7353     if ( aSign != bSign ) {
7354
7355         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7356              ( ( a.low | b.low ) == 0 ) ) {
7357             /* zero case */
7358             return float_relation_equal;
7359         } else {
7360             return 1 - (2 * aSign);
7361         }
7362     } else {
7363         /* Normalize pseudo-denormals before comparison.  */
7364         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7365             ++a.high;
7366         }
7367         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7368             ++b.high;
7369         }
7370         if (a.low == b.low && a.high == b.high) {
7371             return float_relation_equal;
7372         } else {
7373             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7374         }
7375     }
7376 }
7377
7378 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7379 {
7380     return floatx80_compare_internal(a, b, 0, status);
7381 }
7382
7383 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7384                                      float_status *status)
7385 {
7386     return floatx80_compare_internal(a, b, 1, status);
7387 }
7388
7389 static inline FloatRelation
7390 float128_compare_internal(float128 a, float128 b, bool is_quiet,
7391                           float_status *status)
7392 {
7393     bool aSign, bSign;
7394
7395     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7396           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7397         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7398           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7399         if (!is_quiet ||
7400             float128_is_signaling_nan(a, status) ||
7401             float128_is_signaling_nan(b, status)) {
7402             float_raise(float_flag_invalid, status);
7403         }
7404         return float_relation_unordered;
7405     }
7406     aSign = extractFloat128Sign( a );
7407     bSign = extractFloat128Sign( b );
7408     if ( aSign != bSign ) {
7409         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7410             /* zero case */
7411             return float_relation_equal;
7412         } else {
7413             return 1 - (2 * aSign);
7414         }
7415     } else {
7416         if (a.low == b.low && a.high == b.high) {
7417             return float_relation_equal;
7418         } else {
7419             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7420         }
7421     }
7422 }
7423
7424 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7425 {
7426     return float128_compare_internal(a, b, 0, status);
7427 }
7428
7429 FloatRelation float128_compare_quiet(float128 a, float128 b,
7430                                      float_status *status)
7431 {
7432     return float128_compare_internal(a, b, 1, status);
7433 }
7434
7435 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7436 {
7437     bool aSign;
7438     int32_t aExp;
7439     uint64_t aSig;
7440
7441     if (floatx80_invalid_encoding(a)) {
7442         float_raise(float_flag_invalid, status);
7443         return floatx80_default_nan(status);
7444     }
7445     aSig = extractFloatx80Frac( a );
7446     aExp = extractFloatx80Exp( a );
7447     aSign = extractFloatx80Sign( a );
7448
7449     if ( aExp == 0x7FFF ) {
7450         if ( aSig<<1 ) {
7451             return propagateFloatx80NaN(a, a, status);
7452         }
7453         return a;
7454     }
7455
7456     if (aExp == 0) {
7457         if (aSig == 0) {
7458             return a;
7459         }
7460         aExp++;
7461     }
7462
7463     if (n > 0x10000) {
7464         n = 0x10000;
7465     } else if (n < -0x10000) {
7466         n = -0x10000;
7467     }
7468
7469     aExp += n;
7470     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7471                                          aSign, aExp, aSig, 0, status);
7472 }
7473
7474 float128 float128_scalbn(float128 a, int n, float_status *status)
7475 {
7476     bool aSign;
7477     int32_t aExp;
7478     uint64_t aSig0, aSig1;
7479
7480     aSig1 = extractFloat128Frac1( a );
7481     aSig0 = extractFloat128Frac0( a );
7482     aExp = extractFloat128Exp( a );
7483     aSign = extractFloat128Sign( a );
7484     if ( aExp == 0x7FFF ) {
7485         if ( aSig0 | aSig1 ) {
7486             return propagateFloat128NaN(a, a, status);
7487         }
7488         return a;
7489     }
7490     if (aExp != 0) {
7491         aSig0 |= UINT64_C(0x0001000000000000);
7492     } else if (aSig0 == 0 && aSig1 == 0) {
7493         return a;
7494     } else {
7495         aExp++;
7496     }
7497
7498     if (n > 0x10000) {
7499         n = 0x10000;
7500     } else if (n < -0x10000) {
7501         n = -0x10000;
7502     }
7503
7504     aExp += n - 1;
7505     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7506                                          , status);
7507
7508 }
7509
7510 static void __attribute__((constructor)) softfloat_init(void)
7511 {
7512     union_float64 ua, ub, uc, ur;
7513
7514     if (QEMU_NO_HARDFLOAT) {
7515         return;
7516     }
7517     /*
7518      * Test that the host's FMA is not obviously broken. For example,
7519      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7520      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7521      */
7522     ua.s = 0x0020000000000001ULL;
7523     ub.s = 0x3ca0000000000000ULL;
7524     uc.s = 0x0020000000000000ULL;
7525     ur.h = fma(ua.h, ub.h, uc.h);
7526     if (ur.s != 0x0020000000000001ULL) {
7527         force_soft_fma = true;
7528     }
7529 }