fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485 /* Flags for parts_minmax. */
 486 enum {
 487     /* Set for minimum; clear for maximum. */
 488     minmax_ismin = 1,
 489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 490     minmax_isnum = 2,
 491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 492     minmax_ismag = 4,
 493 };
 494
 495 /* Simple helpers for checking if, or what kind of, NaN we have */
 496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 497 {
 498     return unlikely(c >= float_class_qnan);
 499 }
 500
 501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 502 {
 503     return c == float_class_snan;
 504 }
 505
 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 507 {
 508     return c == float_class_qnan;
 509 }
 510
 511 /*
 512  * Structure holding all of the decomposed parts of a float.
 513  * The exponent is unbiased and the fraction is normalized.
 514  *
 515  * The fraction words are stored in big-endian word ordering,
 516  * so that truncation from a larger format to a smaller format
 517  * can be done simply by ignoring subsequent elements.
 518  */
 519
 520 typedef struct {
 521     FloatClass cls;
 522     bool sign;
 523     int32_t exp;
 524     union {
 525         /* Routines that know the structure may reference the singular name. */
 526         uint64_t frac;
 527         /*
 528          * Routines expanded with multiple structures reference "hi" and "lo"
 529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 530          * both the same word and aliased here.
 531          */
 532         uint64_t frac_hi;
 533         uint64_t frac_lo;
 534     };
 535 } FloatParts64;
 536
 537 typedef struct {
 538     FloatClass cls;
 539     bool sign;
 540     int32_t exp;
 541     uint64_t frac_hi;
 542     uint64_t frac_lo;
 543 } FloatParts128;
 544
 545 typedef struct {
 546     FloatClass cls;
 547     bool sign;
 548     int32_t exp;
 549     uint64_t frac_hi;
 550     uint64_t frac_hm;  /* high-middle */
 551     uint64_t frac_lm;  /* low-middle */
 552     uint64_t frac_lo;
 553 } FloatParts256;
 554
 555 /* These apply to the most significant word of each FloatPartsN. */
 556 #define DECOMPOSED_BINARY_POINT    63
 557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 558
 559 /* Structure holding all of the relevant parameters for a format.
 560  *   exp_size: the size of the exponent field
 561  *   exp_bias: the offset applied to the exponent field
 562  *   exp_max: the maximum normalised exponent
 563  *   frac_size: the size of the fraction field
 564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 565  * The following are computed based the size of fraction
 566  *   round_mask: bits below lsb which must be rounded
 567  * The following optional modifiers are available:
 568  *   arm_althp: handle ARM Alternative Half Precision
 569  */
 570 typedef struct {
 571     int exp_size;
 572     int exp_bias;
 573     int exp_max;
 574     int frac_size;
 575     int frac_shift;
 576     bool arm_althp;
 577     uint64_t round_mask;
 578 } FloatFmt;
 579
 580 /* Expand fields based on the size of exponent and fraction */
 581 #define FLOAT_PARAMS_(E)                                \
 582     .exp_size       = E,                                \
 583     .exp_bias       = ((1 << E) - 1) >> 1,              \
 584     .exp_max        = (1 << E) - 1
 585
 586 #define FLOAT_PARAMS(E, F)                              \
 587     FLOAT_PARAMS_(E),                                   \
 588     .frac_size      = F,                                \
 589     .frac_shift     = (-F - 1) & 63,                    \
 590     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 591
 592 static const FloatFmt float16_params = {
 593     FLOAT_PARAMS(5, 10)
 594 };
 595
 596 static const FloatFmt float16_params_ahp = {
 597     FLOAT_PARAMS(5, 10),
 598     .arm_althp = true
 599 };
 600
 601 static const FloatFmt bfloat16_params = {
 602     FLOAT_PARAMS(8, 7)
 603 };
 604
 605 static const FloatFmt float32_params = {
 606     FLOAT_PARAMS(8, 23)
 607 };
 608
 609 static const FloatFmt float64_params = {
 610     FLOAT_PARAMS(11, 52)
 611 };
 612
 613 static const FloatFmt float128_params = {
 614     FLOAT_PARAMS(15, 112)
 615 };
 616
 617 #define FLOATX80_PARAMS(R)              \
 618     FLOAT_PARAMS_(15),                  \
 619     .frac_size = R == 64 ? 63 : R,      \
 620     .frac_shift = 0,                    \
 621     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 622
 623 static const FloatFmt floatx80_params[3] = {
 624     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 625     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 626     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 627 };
 628
 629 /* Unpack a float to parts, but do not canonicalize.  */
 630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 631 {
 632     const int f_size = fmt->frac_size;
 633     const int e_size = fmt->exp_size;
 634
 635     *r = (FloatParts64) {
 636         .cls = float_class_unclassified,
 637         .sign = extract64(raw, f_size + e_size, 1),
 638         .exp = extract64(raw, f_size, e_size),
 639         .frac = extract64(raw, 0, f_size)
 640     };
 641 }
 642
 643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 644 {
 645     unpack_raw64(p, &float16_params, f);
 646 }
 647
 648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 649 {
 650     unpack_raw64(p, &bfloat16_params, f);
 651 }
 652
 653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 654 {
 655     unpack_raw64(p, &float32_params, f);
 656 }
 657
 658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 659 {
 660     unpack_raw64(p, &float64_params, f);
 661 }
 662
 663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 664 {
 665     *p = (FloatParts128) {
 666         .cls = float_class_unclassified,
 667         .sign = extract32(f.high, 15, 1),
 668         .exp = extract32(f.high, 0, 15),
 669         .frac_hi = f.low
 670     };
 671 }
 672
 673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 674 {
 675     const int f_size = float128_params.frac_size - 64;
 676     const int e_size = float128_params.exp_size;
 677
 678     *p = (FloatParts128) {
 679         .cls = float_class_unclassified,
 680         .sign = extract64(f.high, f_size + e_size, 1),
 681         .exp = extract64(f.high, f_size, e_size),
 682         .frac_hi = extract64(f.high, 0, f_size),
 683         .frac_lo = f.low,
 684     };
 685 }
 686
 687 /* Pack a float from parts, but do not canonicalize.  */
 688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 689 {
 690     const int f_size = fmt->frac_size;
 691     const int e_size = fmt->exp_size;
 692     uint64_t ret;
 693
 694     ret = (uint64_t)p->sign << (f_size + e_size);
 695     ret = deposit64(ret, f_size, e_size, p->exp);
 696     ret = deposit64(ret, 0, f_size, p->frac);
 697     return ret;
 698 }
 699
 700 static inline float16 float16_pack_raw(const FloatParts64 *p)
 701 {
 702     return make_float16(pack_raw64(p, &float16_params));
 703 }
 704
 705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 706 {
 707     return pack_raw64(p, &bfloat16_params);
 708 }
 709
 710 static inline float32 float32_pack_raw(const FloatParts64 *p)
 711 {
 712     return make_float32(pack_raw64(p, &float32_params));
 713 }
 714
 715 static inline float64 float64_pack_raw(const FloatParts64 *p)
 716 {
 717     return make_float64(pack_raw64(p, &float64_params));
 718 }
 719
 720 static float128 float128_pack_raw(const FloatParts128 *p)
 721 {
 722     const int f_size = float128_params.frac_size - 64;
 723     const int e_size = float128_params.exp_size;
 724     uint64_t hi;
 725
 726     hi = (uint64_t)p->sign << (f_size + e_size);
 727     hi = deposit64(hi, f_size, e_size, p->exp);
 728     hi = deposit64(hi, 0, f_size, p->frac_hi);
 729     return make_float128(hi, p->frac_lo);
 730 }
 731
 732 /*----------------------------------------------------------------------------
 733 | Functions and definitions to determine:  (1) whether tininess for underflow
 734 | is detected before or after rounding by default, (2) what (if anything)
 735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 737 | are propagated from function inputs to output.  These details are target-
 738 | specific.
 739 *----------------------------------------------------------------------------*/
 740 #include "softfloat-specialize.c.inc"
 741
 742 #define PARTS_GENERIC_64_128(NAME, P) \
 743     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 744
 745 #define PARTS_GENERIC_64_128_256(NAME, P) \
 746     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 747                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 748
 749 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 750 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 751
 752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 754
 755 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 756
 757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 758                                       float_status *s);
 759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 760                                         float_status *s);
 761
 762 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 763
 764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 765                                              FloatParts64 *c, float_status *s,
 766                                              int ab_mask, int abc_mask);
 767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 768                                                FloatParts128 *b,
 769                                                FloatParts128 *c,
 770                                                float_status *s,
 771                                                int ab_mask, int abc_mask);
 772
 773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 774     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 775
 776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 777                                  const FloatFmt *fmt);
 778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 779                                   const FloatFmt *fmt);
 780
 781 #define parts_canonicalize(A, S, F) \
 782     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 783
 784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 785                                    const FloatFmt *fmt);
 786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 787                                     const FloatFmt *fmt);
 788
 789 #define parts_uncanon_normal(A, S, F) \
 790     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 791
 792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 793                             const FloatFmt *fmt);
 794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 795                              const FloatFmt *fmt);
 796
 797 #define parts_uncanon(A, S, F) \
 798     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 799
 800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 803
 804 #define parts_add_normal(A, B) \
 805     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 806
 807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 810
 811 #define parts_sub_normal(A, B) \
 812     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 813
 814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 815                                     float_status *s, bool subtract);
 816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 817                                       float_status *s, bool subtract);
 818
 819 #define parts_addsub(A, B, S, Z) \
 820     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 821
 822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 823                                  float_status *s);
 824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 825                                    float_status *s);
 826
 827 #define parts_mul(A, B, S) \
 828     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 829
 830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 831                                     FloatParts64 *c, int flags,
 832                                     float_status *s);
 833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 834                                       FloatParts128 *c, int flags,
 835                                       float_status *s);
 836
 837 #define parts_muladd(A, B, C, Z, S) \
 838     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 839
 840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 841                                  float_status *s);
 842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 843                                    float_status *s);
 844
 845 #define parts_div(A, B, S) \
 846     PARTS_GENERIC_64_128(div, A)(A, B, S)
 847
 848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 850
 851 #define parts_sqrt(A, S, F) \
 852     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 853
 854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 855                                         int scale, int frac_size);
 856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 857                                          int scale, int frac_size);
 858
 859 #define parts_round_to_int_normal(A, R, C, F) \
 860     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 861
 862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 863                                  int scale, float_status *s,
 864                                  const FloatFmt *fmt);
 865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 866                                   int scale, float_status *s,
 867                                   const FloatFmt *fmt);
 868
 869 #define parts_round_to_int(A, R, C, S, F) \
 870     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 871
 872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 873                                      int scale, int64_t min, int64_t max,
 874                                      float_status *s);
 875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 876                                      int scale, int64_t min, int64_t max,
 877                                      float_status *s);
 878
 879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 880     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 881
 882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 883                                       int scale, uint64_t max,
 884                                       float_status *s);
 885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 886                                        int scale, uint64_t max,
 887                                        float_status *s);
 888
 889 #define parts_float_to_uint(P, R, Z, M, S) \
 890     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 891
 892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 893                                   int scale, float_status *s);
 894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 895                                    int scale, float_status *s);
 896
 897 #define parts_sint_to_float(P, I, Z, S) \
 898     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 899
 900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 901                                   int scale, float_status *s);
 902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 903                                    int scale, float_status *s);
 904
 905 #define parts_uint_to_float(P, I, Z, S) \
 906     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 907
 908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 909                                     float_status *s, int flags);
 910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 911                                       float_status *s, int flags);
 912
 913 #define parts_minmax(A, B, S, F) \
 914     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 915
 916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 917                            float_status *s, bool q);
 918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 919                             float_status *s, bool q);
 920
 921 #define parts_compare(A, B, S, Q) \
 922     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 923
 924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 926
 927 #define parts_scalbn(A, N, S) \
 928     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 929
 930 /*
 931  * Helper functions for softfloat-parts.c.inc, per-size operations.
 932  */
 933
 934 #define FRAC_GENERIC_64_128(NAME, P) \
 935     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 936
 937 #define FRAC_GENERIC_64_128_256(NAME, P) \
 938     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 939                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 940
 941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 942 {
 943     return uadd64_overflow(a->frac, b->frac, &r->frac);
 944 }
 945
 946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 947 {
 948     bool c = 0;
 949     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 950     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 951     return c;
 952 }
 953
 954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 955 {
 956     bool c = 0;
 957     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 958     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 959     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 960     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 961     return c;
 962 }
 963
 964 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 965
 966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 967 {
 968     return uadd64_overflow(a->frac, c, &r->frac);
 969 }
 970
 971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 972 {
 973     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 974     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 975 }
 976
 977 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 978
 979 static void frac64_allones(FloatParts64 *a)
 980 {
 981     a->frac = -1;
 982 }
 983
 984 static void frac128_allones(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = -1;
 987 }
 988
 989 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 990
 991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 994 }
 995
 996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 997 {
 998     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 999     if (ta == tb) {
1000         ta = a->frac_lo, tb = b->frac_lo;
1001         if (ta == tb) {
1002             return 0;
1003         }
1004     }
1005     return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012     a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017     a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024     uint64_t n1, n0, r, q;
1025     bool ret;
1026
1027     /*
1028      * We want a 2*N / N-bit division to produce exactly an N-bit
1029      * result, so that we do not lose any precision and so that we
1030      * do not have to renormalize afterward.  If A.frac < B.frac,
1031      * then division would produce an (N-1)-bit result; shift A left
1032      * by one to produce the an N-bit result, and return true to
1033      * decrement the exponent to match.
1034      *
1035      * The udiv_qrnnd algorithm that we're using requires normalization,
1036      * i.e. the msb of the denominator must be set, which is already true.
1037      */
1038     ret = a->frac < b->frac;
1039     if (ret) {
1040         n0 = a->frac;
1041         n1 = 0;
1042     } else {
1043         n0 = a->frac >> 1;
1044         n1 = a->frac << 63;
1045     }
1046     q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048     /* Set lsb if there is a remainder, to set inexact. */
1049     a->frac = q | (r != 0);
1050
1051     return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056     uint64_t q0, q1, a0, a1, b0, b1;
1057     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058     bool ret = false;
1059
1060     a0 = a->frac_hi, a1 = a->frac_lo;
1061     b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063     ret = lt128(a0, a1, b0, b1);
1064     if (!ret) {
1065         a1 = shr_double(a0, a1, 1);
1066         a0 = a0 >> 1;
1067     }
1068
1069     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070     q0 = estimateDiv128To64(a0, a1, b0);
1071
1072     /*
1073      * Estimate is high because B1 was not included (unless B1 == 0).
1074      * Reduce quotient and increase remainder until remainder is non-negative.
1075      * This loop will execute 0 to 2 times.
1076      */
1077     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079     while (r0 != 0) {
1080         q0--;
1081         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082     }
1083
1084     /* Repeat using the remainder, producing a second word of quotient. */
1085     q1 = estimateDiv128To64(r1, r2, b0);
1086     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088     while (r1 != 0) {
1089         q1--;
1090         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091     }
1092
1093     /* Any remainder indicates inexact; set sticky bit. */
1094     q1 |= (r2 | r3) != 0;
1095
1096     a->frac_hi = q0;
1097     a->frac_lo = q1;
1098     return ret;
1099 }
1100
1101 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105     return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110     return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130     a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153     if (a->frac) {
1154         int shift = clz64(a->frac);
1155         a->frac <<= shift;
1156         return shift;
1157     }
1158     return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163     if (a->frac_hi) {
1164         int shl = clz64(a->frac_hi);
1165         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166         a->frac_lo <<= shl;
1167         return shl;
1168     } else if (a->frac_lo) {
1169         int shl = clz64(a->frac_lo);
1170         a->frac_hi = a->frac_lo << shl;
1171         a->frac_lo = 0;
1172         return shl + 64;
1173     }
1174     return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181     int ret, shl;
1182
1183     if (likely(a0)) {
1184         shl = clz64(a0);
1185         if (shl == 0) {
1186             return 0;
1187         }
1188         ret = shl;
1189     } else {
1190         if (a1) {
1191             ret = 64;
1192             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193         } else if (a2) {
1194             ret = 128;
1195             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196         } else if (a3) {
1197             ret = 192;
1198             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199         } else {
1200             ret = 256;
1201             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202             goto done;
1203         }
1204         shl = clz64(a0);
1205         if (shl == 0) {
1206             goto done;
1207         }
1208         ret += shl;
1209     }
1210
1211     a0 = shl_double(a0, a1, shl);
1212     a1 = shl_double(a1, a2, shl);
1213     a2 = shl_double(a2, a3, shl);
1214     a3 <<= shl;
1215
1216  done:
1217     a->frac_hi = a0;
1218     a->frac_hm = a1;
1219     a->frac_lm = a2;
1220     a->frac_lo = a3;
1221     return ret;
1222 }
1223
1224 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228     a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235     if (c & 64) {
1236         a0 = a1, a1 = 0;
1237     }
1238
1239     c &= 63;
1240     if (c) {
1241         a0 = shl_double(a0, a1, c);
1242         a1 = a1 << c;
1243     }
1244
1245     a->frac_hi = a0;
1246     a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253     a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260     if (c & 64) {
1261         a1 = a0, a0 = 0;
1262     }
1263
1264     c &= 63;
1265     if (c) {
1266         a1 = shr_double(a0, a1, c);
1267         a0 = a0 >> c;
1268     }
1269
1270     a->frac_hi = a0;
1271     a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278     uint64_t a0 = a->frac;
1279
1280     if (likely(c != 0)) {
1281         if (likely(c < 64)) {
1282             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283         } else {
1284             a0 = a0 != 0;
1285         }
1286         a->frac = a0;
1287     }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293     uint64_t sticky = 0;
1294
1295     if (unlikely(c == 0)) {
1296         return;
1297     } else if (likely(c < 64)) {
1298         /* nothing */
1299     } else if (likely(c < 128)) {
1300         sticky = a1;
1301         a1 = a0;
1302         a0 = 0;
1303         c &= 63;
1304         if (c == 0) {
1305             goto done;
1306         }
1307     } else {
1308         sticky = a0 | a1;
1309         a0 = a1 = 0;
1310         goto done;
1311     }
1312
1313     sticky |= shr_double(a1, 0, c);
1314     a1 = shr_double(a0, a1, c);
1315     a0 = a0 >> c;
1316
1317  done:
1318     a->frac_lo = a1 | (sticky != 0);
1319     a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326     uint64_t sticky = 0;
1327
1328     if (unlikely(c == 0)) {
1329         return;
1330     } else if (likely(c < 64)) {
1331         /* nothing */
1332     } else if (likely(c < 256)) {
1333         if (unlikely(c & 128)) {
1334             sticky |= a2 | a3;
1335             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336         }
1337         if (unlikely(c & 64)) {
1338             sticky |= a3;
1339             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340         }
1341         c &= 63;
1342         if (c == 0) {
1343             goto done;
1344         }
1345     } else {
1346         sticky = a0 | a1 | a2 | a3;
1347         a0 = a1 = a2 = a3 = 0;
1348         goto done;
1349     }
1350
1351     sticky |= shr_double(a3, 0, c);
1352     a3 = shr_double(a2, a3, c);
1353     a2 = shr_double(a1, a2, c);
1354     a1 = shr_double(a0, a1, c);
1355     a0 = a0 >> c;
1356
1357  done:
1358     a->frac_lo = a3 | (sticky != 0);
1359     a->frac_lm = a2;
1360     a->frac_hm = a1;
1361     a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368     return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373     bool c = 0;
1374     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376     return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381     bool c = 0;
1382     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386     return c;
1387 }
1388
1389 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393     r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398     r->frac_hi = a->frac_hi;
1399     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406     r->frac_hi = a->frac;
1407     r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412     r->frac_hi = a->frac_hi;
1413     r->frac_hm = a->frac_lo;
1414     r->frac_lm = 0;
1415     r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1422  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423  * and thus MIT licenced.
1424  */
1425 static const uint16_t rsqrt_tab[128] = {
1426     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN    glue(FloatParts,N)
1446 #define FloatPartsW    glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef  N
1455 #undef  W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef  N
1463 #undef  W
1464 #define N            256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef  N
1469 #undef  W
1470 #undef  partsN
1471 #undef  FloatPartsN
1472 #undef  FloatPartsW
1473
1474 /*
1475  * Pack/unpack routines with a specific FloatFmt.
1476  */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479                                       float_status *s, const FloatFmt *params)
1480 {
1481     float16_unpack_raw(p, f);
1482     parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486                                      float_status *s)
1487 {
1488     float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492                                       float_status *s)
1493 {
1494     bfloat16_unpack_raw(p, f);
1495     parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499                                              float_status *s,
1500                                              const FloatFmt *params)
1501 {
1502     parts_uncanon(p, s, params);
1503     return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507                                             float_status *s)
1508 {
1509     return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513                                               float_status *s)
1514 {
1515     parts_uncanon(p, s, &bfloat16_params);
1516     return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520                                      float_status *s)
1521 {
1522     float32_unpack_raw(p, f);
1523     parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527                                             float_status *s)
1528 {
1529     parts_uncanon(p, s, &float32_params);
1530     return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534                                      float_status *s)
1535 {
1536     float64_unpack_raw(p, f);
1537     parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541                                             float_status *s)
1542 {
1543     parts_uncanon(p, s, &float64_params);
1544     return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548                                       float_status *s)
1549 {
1550     float128_unpack_raw(p, f);
1551     parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555                                               float_status *s)
1556 {
1557     parts_uncanon(p, s, &float128_params);
1558     return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563                                       float_status *s)
1564 {
1565     /* Ensure rounding precision is set before beginning. */
1566     switch (s->floatx80_rounding_precision) {
1567     case floatx80_precision_x:
1568     case floatx80_precision_d:
1569     case floatx80_precision_s:
1570         break;
1571     default:
1572         g_assert_not_reached();
1573     }
1574
1575     if (unlikely(floatx80_invalid_encoding(f))) {
1576         float_raise(float_flag_invalid, s);
1577         return false;
1578     }
1579
1580     floatx80_unpack_raw(p, f);
1581
1582     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584     } else {
1585         /* The explicit integer bit is ignored, after invalid checks. */
1586         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587         p->cls = (p->frac_hi == 0 ? float_class_inf
1588                   : parts_is_snan_frac(p->frac_hi, s)
1589                   ? float_class_snan : float_class_qnan);
1590     }
1591     return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595                                               float_status *s)
1596 {
1597     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598     uint64_t frac;
1599     int exp;
1600
1601     switch (p->cls) {
1602     case float_class_normal:
1603         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604             parts_uncanon_normal(p, s, fmt);
1605             frac = p->frac_hi;
1606             exp = p->exp;
1607         } else {
1608             FloatParts64 p64;
1609
1610             p64.sign = p->sign;
1611             p64.exp = p->exp;
1612             frac_truncjam(&p64, p);
1613             parts_uncanon_normal(&p64, s, fmt);
1614             frac = p64.frac;
1615             exp = p64.exp;
1616         }
1617         if (exp != fmt->exp_max) {
1618             break;
1619         }
1620         /* rounded to inf -- fall through to set frac correctly */
1621
1622     case float_class_inf:
1623         /* x86 and m68k differ in the setting of the integer bit. */
1624         frac = floatx80_infinity_low;
1625         exp = fmt->exp_max;
1626         break;
1627
1628     case float_class_zero:
1629         frac = 0;
1630         exp = 0;
1631         break;
1632
1633     case float_class_snan:
1634     case float_class_qnan:
1635         /* NaNs have the integer bit set. */
1636         frac = p->frac_hi | (1ull << 63);
1637         exp = fmt->exp_max;
1638         break;
1639
1640     default:
1641         g_assert_not_reached();
1642     }
1643
1644     return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648  * Addition and subtraction
1649  */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654     FloatParts64 pa, pb, *pr;
1655
1656     float16_unpack_canonical(&pa, a, status);
1657     float16_unpack_canonical(&pb, b, status);
1658     pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660     return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665     return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670     return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677
1678     float32_unpack_canonical(&pa, a, status);
1679     float32_unpack_canonical(&pb, b, status);
1680     pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682     return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687     return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692     return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698     FloatParts64 pa, pb, *pr;
1699
1700     float64_unpack_canonical(&pa, a, status);
1701     float64_unpack_canonical(&pb, b, status);
1702     pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704     return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709     return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714     return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719     return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724     return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729     return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734     return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741     }
1742     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749     } else {
1750         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751     }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757     return float32_gen2(a, b, s, hard, soft,
1758                         f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764     return float64_gen2(a, b, s, hard, soft,
1765                         f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795     FloatParts64 pa, pb, *pr;
1796
1797     bfloat16_unpack_canonical(&pa, a, status);
1798     bfloat16_unpack_canonical(&pb, b, status);
1799     pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801     return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806     return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811     return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817     FloatParts128 pa, pb, *pr;
1818
1819     float128_unpack_canonical(&pa, a, status);
1820     float128_unpack_canonical(&pb, b, status);
1821     pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823     return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828     return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833     return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839     FloatParts128 pa, pb, *pr;
1840
1841     if (!floatx80_unpack_canonical(&pa, a, status) ||
1842         !floatx80_unpack_canonical(&pb, b, status)) {
1843         return floatx80_default_nan(status);
1844     }
1845
1846     pr = parts_addsub(&pa, &pb, status, subtract);
1847     return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852     return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857     return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861  * Multiplication
1862  */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866     FloatParts64 pa, pb, *pr;
1867
1868     float16_unpack_canonical(&pa, a, status);
1869     float16_unpack_canonical(&pb, b, status);
1870     pr = parts_mul(&pa, &pb, status);
1871
1872     return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878     FloatParts64 pa, pb, *pr;
1879
1880     float32_unpack_canonical(&pa, a, status);
1881     float32_unpack_canonical(&pb, b, status);
1882     pr = parts_mul(&pa, &pb, status);
1883
1884     return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890     FloatParts64 pa, pb, *pr;
1891
1892     float64_unpack_canonical(&pa, a, status);
1893     float64_unpack_canonical(&pb, b, status);
1894     pr = parts_mul(&pa, &pb, status);
1895
1896     return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901     return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906     return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913                         f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920                         f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926     FloatParts64 pa, pb, *pr;
1927
1928     bfloat16_unpack_canonical(&pa, a, status);
1929     bfloat16_unpack_canonical(&pb, b, status);
1930     pr = parts_mul(&pa, &pb, status);
1931
1932     return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938     FloatParts128 pa, pb, *pr;
1939
1940     float128_unpack_canonical(&pa, a, status);
1941     float128_unpack_canonical(&pb, b, status);
1942     pr = parts_mul(&pa, &pb, status);
1943
1944     return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
1949 {
1950     FloatParts128 pa, pb, *pr;
1951
1952     if (!floatx80_unpack_canonical(&pa, a, status) ||
1953         !floatx80_unpack_canonical(&pb, b, status)) {
1954         return floatx80_default_nan(status);
1955     }
1956
1957     pr = parts_mul(&pa, &pb, status);
1958     return floatx80_round_pack_canonical(pr, status);
1959 }
1960
1961 /*
1962  * Fused multiply-add
1963  */
1964
1965 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1966                                     int flags, float_status *status)
1967 {
1968     FloatParts64 pa, pb, pc, *pr;
1969
1970     float16_unpack_canonical(&pa, a, status);
1971     float16_unpack_canonical(&pb, b, status);
1972     float16_unpack_canonical(&pc, c, status);
1973     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975     return float16_round_pack_canonical(pr, status);
1976 }
1977
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1980                 float_status *status)
1981 {
1982     FloatParts64 pa, pb, pc, *pr;
1983
1984     float32_unpack_canonical(&pa, a, status);
1985     float32_unpack_canonical(&pb, b, status);
1986     float32_unpack_canonical(&pc, c, status);
1987     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989     return float32_round_pack_canonical(pr, status);
1990 }
1991
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1994                 float_status *status)
1995 {
1996     FloatParts64 pa, pb, pc, *pr;
1997
1998     float64_unpack_canonical(&pa, a, status);
1999     float64_unpack_canonical(&pb, b, status);
2000     float64_unpack_canonical(&pc, c, status);
2001     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2002
2003     return float64_round_pack_canonical(pr, status);
2004 }
2005
2006 static bool force_soft_fma;
2007
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2010 {
2011     union_float32 ua, ub, uc, ur;
2012
2013     ua.s = xa;
2014     ub.s = xb;
2015     uc.s = xc;
2016
2017     if (unlikely(!can_use_fpu(s))) {
2018         goto soft;
2019     }
2020     if (unlikely(flags & float_muladd_halve_result)) {
2021         goto soft;
2022     }
2023
2024     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2025     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2026         goto soft;
2027     }
2028
2029     if (unlikely(force_soft_fma)) {
2030         goto soft;
2031     }
2032
2033     /*
2034      * When (a || b) == 0, there's no need to check for under/over flow,
2035      * since we know the addend is (normal || 0) and the product is 0.
2036      */
2037     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2038         union_float32 up;
2039         bool prod_sign;
2040
2041         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2042         prod_sign ^= !!(flags & float_muladd_negate_product);
2043         up.s = float32_set_sign(float32_zero, prod_sign);
2044
2045         if (flags & float_muladd_negate_c) {
2046             uc.h = -uc.h;
2047         }
2048         ur.h = up.h + uc.h;
2049     } else {
2050         union_float32 ua_orig = ua;
2051         union_float32 uc_orig = uc;
2052
2053         if (flags & float_muladd_negate_product) {
2054             ua.h = -ua.h;
2055         }
2056         if (flags & float_muladd_negate_c) {
2057             uc.h = -uc.h;
2058         }
2059
2060         ur.h = fmaf(ua.h, ub.h, uc.h);
2061
2062         if (unlikely(f32_is_inf(ur))) {
2063             float_raise(float_flag_overflow, s);
2064         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2065             ua = ua_orig;
2066             uc = uc_orig;
2067             goto soft;
2068         }
2069     }
2070     if (flags & float_muladd_negate_result) {
2071         return float32_chs(ur.s);
2072     }
2073     return ur.s;
2074
2075  soft:
2076     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2077 }
2078
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2081 {
2082     union_float64 ua, ub, uc, ur;
2083
2084     ua.s = xa;
2085     ub.s = xb;
2086     uc.s = xc;
2087
2088     if (unlikely(!can_use_fpu(s))) {
2089         goto soft;
2090     }
2091     if (unlikely(flags & float_muladd_halve_result)) {
2092         goto soft;
2093     }
2094
2095     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2096     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2097         goto soft;
2098     }
2099
2100     if (unlikely(force_soft_fma)) {
2101         goto soft;
2102     }
2103
2104     /*
2105      * When (a || b) == 0, there's no need to check for under/over flow,
2106      * since we know the addend is (normal || 0) and the product is 0.
2107      */
2108     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2109         union_float64 up;
2110         bool prod_sign;
2111
2112         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2113         prod_sign ^= !!(flags & float_muladd_negate_product);
2114         up.s = float64_set_sign(float64_zero, prod_sign);
2115
2116         if (flags & float_muladd_negate_c) {
2117             uc.h = -uc.h;
2118         }
2119         ur.h = up.h + uc.h;
2120     } else {
2121         union_float64 ua_orig = ua;
2122         union_float64 uc_orig = uc;
2123
2124         if (flags & float_muladd_negate_product) {
2125             ua.h = -ua.h;
2126         }
2127         if (flags & float_muladd_negate_c) {
2128             uc.h = -uc.h;
2129         }
2130
2131         ur.h = fma(ua.h, ub.h, uc.h);
2132
2133         if (unlikely(f64_is_inf(ur))) {
2134             float_raise(float_flag_overflow, s);
2135         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2136             ua = ua_orig;
2137             uc = uc_orig;
2138             goto soft;
2139         }
2140     }
2141     if (flags & float_muladd_negate_result) {
2142         return float64_chs(ur.s);
2143     }
2144     return ur.s;
2145
2146  soft:
2147     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2148 }
2149
2150 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2151                                       int flags, float_status *status)
2152 {
2153     FloatParts64 pa, pb, pc, *pr;
2154
2155     bfloat16_unpack_canonical(&pa, a, status);
2156     bfloat16_unpack_canonical(&pb, b, status);
2157     bfloat16_unpack_canonical(&pc, c, status);
2158     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2159
2160     return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2164                                       int flags, float_status *status)
2165 {
2166     FloatParts128 pa, pb, pc, *pr;
2167
2168     float128_unpack_canonical(&pa, a, status);
2169     float128_unpack_canonical(&pb, b, status);
2170     float128_unpack_canonical(&pc, c, status);
2171     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2172
2173     return float128_round_pack_canonical(pr, status);
2174 }
2175
2176 /*
2177  * Division
2178  */
2179
2180 float16 float16_div(float16 a, float16 b, float_status *status)
2181 {
2182     FloatParts64 pa, pb, *pr;
2183
2184     float16_unpack_canonical(&pa, a, status);
2185     float16_unpack_canonical(&pb, b, status);
2186     pr = parts_div(&pa, &pb, status);
2187
2188     return float16_round_pack_canonical(pr, status);
2189 }
2190
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a, float32 b, float_status *status)
2193 {
2194     FloatParts64 pa, pb, *pr;
2195
2196     float32_unpack_canonical(&pa, a, status);
2197     float32_unpack_canonical(&pb, b, status);
2198     pr = parts_div(&pa, &pb, status);
2199
2200     return float32_round_pack_canonical(pr, status);
2201 }
2202
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a, float64 b, float_status *status)
2205 {
2206     FloatParts64 pa, pb, *pr;
2207
2208     float64_unpack_canonical(&pa, a, status);
2209     float64_unpack_canonical(&pb, b, status);
2210     pr = parts_div(&pa, &pb, status);
2211
2212     return float64_round_pack_canonical(pr, status);
2213 }
2214
2215 static float hard_f32_div(float a, float b)
2216 {
2217     return a / b;
2218 }
2219
2220 static double hard_f64_div(double a, double b)
2221 {
2222     return a / b;
2223 }
2224
2225 static bool f32_div_pre(union_float32 a, union_float32 b)
2226 {
2227     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2228         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2229                fpclassify(b.h) == FP_NORMAL;
2230     }
2231     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2232 }
2233
2234 static bool f64_div_pre(union_float64 a, union_float64 b)
2235 {
2236     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2237         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2238                fpclassify(b.h) == FP_NORMAL;
2239     }
2240     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2241 }
2242
2243 static bool f32_div_post(union_float32 a, union_float32 b)
2244 {
2245     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2246         return fpclassify(a.h) != FP_ZERO;
2247     }
2248     return !float32_is_zero(a.s);
2249 }
2250
2251 static bool f64_div_post(union_float64 a, union_float64 b)
2252 {
2253     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2254         return fpclassify(a.h) != FP_ZERO;
2255     }
2256     return !float64_is_zero(a.s);
2257 }
2258
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a, float32 b, float_status *s)
2261 {
2262     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2263                         f32_div_pre, f32_div_post);
2264 }
2265
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a, float64 b, float_status *s)
2268 {
2269     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2270                         f64_div_pre, f64_div_post);
2271 }
2272
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2275 {
2276     FloatParts64 pa, pb, *pr;
2277
2278     bfloat16_unpack_canonical(&pa, a, status);
2279     bfloat16_unpack_canonical(&pb, b, status);
2280     pr = parts_div(&pa, &pb, status);
2281
2282     return bfloat16_round_pack_canonical(pr, status);
2283 }
2284
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a, float128 b, float_status *status)
2287 {
2288     FloatParts128 pa, pb, *pr;
2289
2290     float128_unpack_canonical(&pa, a, status);
2291     float128_unpack_canonical(&pb, b, status);
2292     pr = parts_div(&pa, &pb, status);
2293
2294     return float128_round_pack_canonical(pr, status);
2295 }
2296
2297 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2298 {
2299     FloatParts128 pa, pb, *pr;
2300
2301     if (!floatx80_unpack_canonical(&pa, a, status) ||
2302         !floatx80_unpack_canonical(&pb, b, status)) {
2303         return floatx80_default_nan(status);
2304     }
2305
2306     pr = parts_div(&pa, &pb, status);
2307     return floatx80_round_pack_canonical(pr, status);
2308 }
2309
2310 /*
2311  * Float to Float conversions
2312  *
2313  * Returns the result of converting one float format to another. The
2314  * conversion is performed according to the IEC/IEEE Standard for
2315  * Binary Floating-Point Arithmetic.
2316  *
2317  * Usually this only needs to take care of raising invalid exceptions
2318  * and handling the conversion on NaNs.
2319  */
2320
2321 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2322 {
2323     switch (a->cls) {
2324     case float_class_qnan:
2325     case float_class_snan:
2326         /*
2327          * There is no NaN in the destination format.  Raise Invalid
2328          * and return a zero with the sign of the input NaN.
2329          */
2330         float_raise(float_flag_invalid, s);
2331         a->cls = float_class_zero;
2332         break;
2333
2334     case float_class_inf:
2335         /*
2336          * There is no Inf in the destination format.  Raise Invalid
2337          * and return the maximum normal with the correct sign.
2338          */
2339         float_raise(float_flag_invalid, s);
2340         a->cls = float_class_normal;
2341         a->exp = float16_params_ahp.exp_max;
2342         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2343                                   float16_params_ahp.frac_size + 1);
2344         break;
2345
2346     case float_class_normal:
2347     case float_class_zero:
2348         break;
2349
2350     default:
2351         g_assert_not_reached();
2352     }
2353 }
2354
2355 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2356 {
2357     if (is_nan(a->cls)) {
2358         parts_return_nan(a, s);
2359     }
2360 }
2361
2362 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2363 {
2364     if (is_nan(a->cls)) {
2365         parts_return_nan(a, s);
2366     }
2367 }
2368
2369 #define parts_float_to_float(P, S) \
2370     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2371
2372 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2373                                         float_status *s)
2374 {
2375     a->cls = b->cls;
2376     a->sign = b->sign;
2377     a->exp = b->exp;
2378
2379     if (a->cls == float_class_normal) {
2380         frac_truncjam(a, b);
2381     } else if (is_nan(a->cls)) {
2382         /* Discard the low bits of the NaN. */
2383         a->frac = b->frac_hi;
2384         parts_return_nan(a, s);
2385     }
2386 }
2387
2388 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2389                                        float_status *s)
2390 {
2391     a->cls = b->cls;
2392     a->sign = b->sign;
2393     a->exp = b->exp;
2394     frac_widen(a, b);
2395
2396     if (is_nan(a->cls)) {
2397         parts_return_nan(a, s);
2398     }
2399 }
2400
2401 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2402 {
2403     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2404     FloatParts64 p;
2405
2406     float16a_unpack_canonical(&p, a, s, fmt16);
2407     parts_float_to_float(&p, s);
2408     return float32_round_pack_canonical(&p, s);
2409 }
2410
2411 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2412 {
2413     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2414     FloatParts64 p;
2415
2416     float16a_unpack_canonical(&p, a, s, fmt16);
2417     parts_float_to_float(&p, s);
2418     return float64_round_pack_canonical(&p, s);
2419 }
2420
2421 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2422 {
2423     FloatParts64 p;
2424     const FloatFmt *fmt;
2425
2426     float32_unpack_canonical(&p, a, s);
2427     if (ieee) {
2428         parts_float_to_float(&p, s);
2429         fmt = &float16_params;
2430     } else {
2431         parts_float_to_ahp(&p, s);
2432         fmt = &float16_params_ahp;
2433     }
2434     return float16a_round_pack_canonical(&p, s, fmt);
2435 }
2436
2437 static float64 QEMU_SOFTFLOAT_ATTR
2438 soft_float32_to_float64(float32 a, float_status *s)
2439 {
2440     FloatParts64 p;
2441
2442     float32_unpack_canonical(&p, a, s);
2443     parts_float_to_float(&p, s);
2444     return float64_round_pack_canonical(&p, s);
2445 }
2446
2447 float64 float32_to_float64(float32 a, float_status *s)
2448 {
2449     if (likely(float32_is_normal(a))) {
2450         /* Widening conversion can never produce inexact results.  */
2451         union_float32 uf;
2452         union_float64 ud;
2453         uf.s = a;
2454         ud.h = uf.h;
2455         return ud.s;
2456     } else if (float32_is_zero(a)) {
2457         return float64_set_sign(float64_zero, float32_is_neg(a));
2458     } else {
2459         return soft_float32_to_float64(a, s);
2460     }
2461 }
2462
2463 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2464 {
2465     FloatParts64 p;
2466     const FloatFmt *fmt;
2467
2468     float64_unpack_canonical(&p, a, s);
2469     if (ieee) {
2470         parts_float_to_float(&p, s);
2471         fmt = &float16_params;
2472     } else {
2473         parts_float_to_ahp(&p, s);
2474         fmt = &float16_params_ahp;
2475     }
2476     return float16a_round_pack_canonical(&p, s, fmt);
2477 }
2478
2479 float32 float64_to_float32(float64 a, float_status *s)
2480 {
2481     FloatParts64 p;
2482
2483     float64_unpack_canonical(&p, a, s);
2484     parts_float_to_float(&p, s);
2485     return float32_round_pack_canonical(&p, s);
2486 }
2487
2488 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2489 {
2490     FloatParts64 p;
2491
2492     bfloat16_unpack_canonical(&p, a, s);
2493     parts_float_to_float(&p, s);
2494     return float32_round_pack_canonical(&p, s);
2495 }
2496
2497 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2498 {
2499     FloatParts64 p;
2500
2501     bfloat16_unpack_canonical(&p, a, s);
2502     parts_float_to_float(&p, s);
2503     return float64_round_pack_canonical(&p, s);
2504 }
2505
2506 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2507 {
2508     FloatParts64 p;
2509
2510     float32_unpack_canonical(&p, a, s);
2511     parts_float_to_float(&p, s);
2512     return bfloat16_round_pack_canonical(&p, s);
2513 }
2514
2515 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2516 {
2517     FloatParts64 p;
2518
2519     float64_unpack_canonical(&p, a, s);
2520     parts_float_to_float(&p, s);
2521     return bfloat16_round_pack_canonical(&p, s);
2522 }
2523
2524 float32 float128_to_float32(float128 a, float_status *s)
2525 {
2526     FloatParts64 p64;
2527     FloatParts128 p128;
2528
2529     float128_unpack_canonical(&p128, a, s);
2530     parts_float_to_float_narrow(&p64, &p128, s);
2531     return float32_round_pack_canonical(&p64, s);
2532 }
2533
2534 float64 float128_to_float64(float128 a, float_status *s)
2535 {
2536     FloatParts64 p64;
2537     FloatParts128 p128;
2538
2539     float128_unpack_canonical(&p128, a, s);
2540     parts_float_to_float_narrow(&p64, &p128, s);
2541     return float64_round_pack_canonical(&p64, s);
2542 }
2543
2544 float128 float32_to_float128(float32 a, float_status *s)
2545 {
2546     FloatParts64 p64;
2547     FloatParts128 p128;
2548
2549     float32_unpack_canonical(&p64, a, s);
2550     parts_float_to_float_widen(&p128, &p64, s);
2551     return float128_round_pack_canonical(&p128, s);
2552 }
2553
2554 float128 float64_to_float128(float64 a, float_status *s)
2555 {
2556     FloatParts64 p64;
2557     FloatParts128 p128;
2558
2559     float64_unpack_canonical(&p64, a, s);
2560     parts_float_to_float_widen(&p128, &p64, s);
2561     return float128_round_pack_canonical(&p128, s);
2562 }
2563
2564 /*
2565  * Round to integral value
2566  */
2567
2568 float16 float16_round_to_int(float16 a, float_status *s)
2569 {
2570     FloatParts64 p;
2571
2572     float16_unpack_canonical(&p, a, s);
2573     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2574     return float16_round_pack_canonical(&p, s);
2575 }
2576
2577 float32 float32_round_to_int(float32 a, float_status *s)
2578 {
2579     FloatParts64 p;
2580
2581     float32_unpack_canonical(&p, a, s);
2582     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2583     return float32_round_pack_canonical(&p, s);
2584 }
2585
2586 float64 float64_round_to_int(float64 a, float_status *s)
2587 {
2588     FloatParts64 p;
2589
2590     float64_unpack_canonical(&p, a, s);
2591     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2592     return float64_round_pack_canonical(&p, s);
2593 }
2594
2595 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2596 {
2597     FloatParts64 p;
2598
2599     bfloat16_unpack_canonical(&p, a, s);
2600     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2601     return bfloat16_round_pack_canonical(&p, s);
2602 }
2603
2604 float128 float128_round_to_int(float128 a, float_status *s)
2605 {
2606     FloatParts128 p;
2607
2608     float128_unpack_canonical(&p, a, s);
2609     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2610     return float128_round_pack_canonical(&p, s);
2611 }
2612
2613 /*
2614  * Floating-point to signed integer conversions
2615  */
2616
2617 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2618                               float_status *s)
2619 {
2620     FloatParts64 p;
2621
2622     float16_unpack_canonical(&p, a, s);
2623     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2624 }
2625
2626 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2627                                 float_status *s)
2628 {
2629     FloatParts64 p;
2630
2631     float16_unpack_canonical(&p, a, s);
2632     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2633 }
2634
2635 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2636                                 float_status *s)
2637 {
2638     FloatParts64 p;
2639
2640     float16_unpack_canonical(&p, a, s);
2641     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2642 }
2643
2644 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2645                                 float_status *s)
2646 {
2647     FloatParts64 p;
2648
2649     float16_unpack_canonical(&p, a, s);
2650     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2651 }
2652
2653 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2654                                 float_status *s)
2655 {
2656     FloatParts64 p;
2657
2658     float32_unpack_canonical(&p, a, s);
2659     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2660 }
2661
2662 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2663                                 float_status *s)
2664 {
2665     FloatParts64 p;
2666
2667     float32_unpack_canonical(&p, a, s);
2668     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2669 }
2670
2671 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2672                                 float_status *s)
2673 {
2674     FloatParts64 p;
2675
2676     float32_unpack_canonical(&p, a, s);
2677     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2678 }
2679
2680 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2681                                 float_status *s)
2682 {
2683     FloatParts64 p;
2684
2685     float64_unpack_canonical(&p, a, s);
2686     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2687 }
2688
2689 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2690                                 float_status *s)
2691 {
2692     FloatParts64 p;
2693
2694     float64_unpack_canonical(&p, a, s);
2695     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2696 }
2697
2698 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2699                                 float_status *s)
2700 {
2701     FloatParts64 p;
2702
2703     float64_unpack_canonical(&p, a, s);
2704     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2705 }
2706
2707 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2708                                  float_status *s)
2709 {
2710     FloatParts64 p;
2711
2712     bfloat16_unpack_canonical(&p, a, s);
2713     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2714 }
2715
2716 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2717                                  float_status *s)
2718 {
2719     FloatParts64 p;
2720
2721     bfloat16_unpack_canonical(&p, a, s);
2722     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2723 }
2724
2725 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2726                                  float_status *s)
2727 {
2728     FloatParts64 p;
2729
2730     bfloat16_unpack_canonical(&p, a, s);
2731     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2732 }
2733
2734 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2735                                         int scale, float_status *s)
2736 {
2737     FloatParts128 p;
2738
2739     float128_unpack_canonical(&p, a, s);
2740     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2741 }
2742
2743 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2744                                         int scale, float_status *s)
2745 {
2746     FloatParts128 p;
2747
2748     float128_unpack_canonical(&p, a, s);
2749     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2750 }
2751
2752 int8_t float16_to_int8(float16 a, float_status *s)
2753 {
2754     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2755 }
2756
2757 int16_t float16_to_int16(float16 a, float_status *s)
2758 {
2759     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2760 }
2761
2762 int32_t float16_to_int32(float16 a, float_status *s)
2763 {
2764     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2765 }
2766
2767 int64_t float16_to_int64(float16 a, float_status *s)
2768 {
2769     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2770 }
2771
2772 int16_t float32_to_int16(float32 a, float_status *s)
2773 {
2774     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2775 }
2776
2777 int32_t float32_to_int32(float32 a, float_status *s)
2778 {
2779     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2780 }
2781
2782 int64_t float32_to_int64(float32 a, float_status *s)
2783 {
2784     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2785 }
2786
2787 int16_t float64_to_int16(float64 a, float_status *s)
2788 {
2789     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2790 }
2791
2792 int32_t float64_to_int32(float64 a, float_status *s)
2793 {
2794     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2795 }
2796
2797 int64_t float64_to_int64(float64 a, float_status *s)
2798 {
2799     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2800 }
2801
2802 int32_t float128_to_int32(float128 a, float_status *s)
2803 {
2804     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2805 }
2806
2807 int64_t float128_to_int64(float128 a, float_status *s)
2808 {
2809     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2810 }
2811
2812 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2813 {
2814     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2815 }
2816
2817 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2818 {
2819     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2820 }
2821
2822 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2823 {
2824     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2825 }
2826
2827 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2828 {
2829     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2830 }
2831
2832 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2833 {
2834     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2835 }
2836
2837 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2838 {
2839     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2840 }
2841
2842 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2843 {
2844     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2845 }
2846
2847 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2848 {
2849     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2850 }
2851
2852 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2853 {
2854     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2855 }
2856
2857 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2858 {
2859     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2860 }
2861
2862 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2863 {
2864     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2865 }
2866
2867 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2868 {
2869     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2870 }
2871
2872 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2873 {
2874     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2875 }
2876
2877 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2878 {
2879     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2880 }
2881
2882 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2883 {
2884     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2885 }
2886
2887 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2888 {
2889     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2890 }
2891
2892 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2893 {
2894     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2895 }
2896
2897 /*
2898  * Floating-point to unsigned integer conversions
2899  */
2900
2901 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2902                                 float_status *s)
2903 {
2904     FloatParts64 p;
2905
2906     float16_unpack_canonical(&p, a, s);
2907     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2908 }
2909
2910 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2911                                   float_status *s)
2912 {
2913     FloatParts64 p;
2914
2915     float16_unpack_canonical(&p, a, s);
2916     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2917 }
2918
2919 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920                                   float_status *s)
2921 {
2922     FloatParts64 p;
2923
2924     float16_unpack_canonical(&p, a, s);
2925     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2926 }
2927
2928 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929                                   float_status *s)
2930 {
2931     FloatParts64 p;
2932
2933     float16_unpack_canonical(&p, a, s);
2934     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2935 }
2936
2937 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2938                                   float_status *s)
2939 {
2940     FloatParts64 p;
2941
2942     float32_unpack_canonical(&p, a, s);
2943     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2944 }
2945
2946 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2947                                   float_status *s)
2948 {
2949     FloatParts64 p;
2950
2951     float32_unpack_canonical(&p, a, s);
2952     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2953 }
2954
2955 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956                                   float_status *s)
2957 {
2958     FloatParts64 p;
2959
2960     float32_unpack_canonical(&p, a, s);
2961     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2962 }
2963
2964 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2965                                   float_status *s)
2966 {
2967     FloatParts64 p;
2968
2969     float64_unpack_canonical(&p, a, s);
2970     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2971 }
2972
2973 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2974                                   float_status *s)
2975 {
2976     FloatParts64 p;
2977
2978     float64_unpack_canonical(&p, a, s);
2979     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2980 }
2981
2982 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983                                   float_status *s)
2984 {
2985     FloatParts64 p;
2986
2987     float64_unpack_canonical(&p, a, s);
2988     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2989 }
2990
2991 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2992                                    int scale, float_status *s)
2993 {
2994     FloatParts64 p;
2995
2996     bfloat16_unpack_canonical(&p, a, s);
2997     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2998 }
2999
3000 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3001                                    int scale, float_status *s)
3002 {
3003     FloatParts64 p;
3004
3005     bfloat16_unpack_canonical(&p, a, s);
3006     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3007 }
3008
3009 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3010                                    int scale, float_status *s)
3011 {
3012     FloatParts64 p;
3013
3014     bfloat16_unpack_canonical(&p, a, s);
3015     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3016 }
3017
3018 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3019                                           int scale, float_status *s)
3020 {
3021     FloatParts128 p;
3022
3023     float128_unpack_canonical(&p, a, s);
3024     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3025 }
3026
3027 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3028                                           int scale, float_status *s)
3029 {
3030     FloatParts128 p;
3031
3032     float128_unpack_canonical(&p, a, s);
3033     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3034 }
3035
3036 uint8_t float16_to_uint8(float16 a, float_status *s)
3037 {
3038     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3039 }
3040
3041 uint16_t float16_to_uint16(float16 a, float_status *s)
3042 {
3043     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3044 }
3045
3046 uint32_t float16_to_uint32(float16 a, float_status *s)
3047 {
3048     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3049 }
3050
3051 uint64_t float16_to_uint64(float16 a, float_status *s)
3052 {
3053     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3054 }
3055
3056 uint16_t float32_to_uint16(float32 a, float_status *s)
3057 {
3058     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3059 }
3060
3061 uint32_t float32_to_uint32(float32 a, float_status *s)
3062 {
3063     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3064 }
3065
3066 uint64_t float32_to_uint64(float32 a, float_status *s)
3067 {
3068     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3069 }
3070
3071 uint16_t float64_to_uint16(float64 a, float_status *s)
3072 {
3073     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3074 }
3075
3076 uint32_t float64_to_uint32(float64 a, float_status *s)
3077 {
3078     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3079 }
3080
3081 uint64_t float64_to_uint64(float64 a, float_status *s)
3082 {
3083     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3084 }
3085
3086 uint32_t float128_to_uint32(float128 a, float_status *s)
3087 {
3088     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3089 }
3090
3091 uint64_t float128_to_uint64(float128 a, float_status *s)
3092 {
3093     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3094 }
3095
3096 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3097 {
3098     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3099 }
3100
3101 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3102 {
3103     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3104 }
3105
3106 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3107 {
3108     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3109 }
3110
3111 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3112 {
3113     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3114 }
3115
3116 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3117 {
3118     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3119 }
3120
3121 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3122 {
3123     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3124 }
3125
3126 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3127 {
3128     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3129 }
3130
3131 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3132 {
3133     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3134 }
3135
3136 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3137 {
3138     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3139 }
3140
3141 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3142 {
3143     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3144 }
3145
3146 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3147 {
3148     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3149 }
3150
3151 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3152 {
3153     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3154 }
3155
3156 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3157 {
3158     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3159 }
3160
3161 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3162 {
3163     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3164 }
3165
3166 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3167 {
3168     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3169 }
3170
3171 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3172 {
3173     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3174 }
3175
3176 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3177 {
3178     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3179 }
3180
3181 /*
3182  * Signed integer to floating-point conversions
3183  */
3184
3185 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3186 {
3187     FloatParts64 p;
3188
3189     parts_sint_to_float(&p, a, scale, status);
3190     return float16_round_pack_canonical(&p, status);
3191 }
3192
3193 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3194 {
3195     return int64_to_float16_scalbn(a, scale, status);
3196 }
3197
3198 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3199 {
3200     return int64_to_float16_scalbn(a, scale, status);
3201 }
3202
3203 float16 int64_to_float16(int64_t a, float_status *status)
3204 {
3205     return int64_to_float16_scalbn(a, 0, status);
3206 }
3207
3208 float16 int32_to_float16(int32_t a, float_status *status)
3209 {
3210     return int64_to_float16_scalbn(a, 0, status);
3211 }
3212
3213 float16 int16_to_float16(int16_t a, float_status *status)
3214 {
3215     return int64_to_float16_scalbn(a, 0, status);
3216 }
3217
3218 float16 int8_to_float16(int8_t a, float_status *status)
3219 {
3220     return int64_to_float16_scalbn(a, 0, status);
3221 }
3222
3223 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3224 {
3225     FloatParts64 p;
3226
3227     parts64_sint_to_float(&p, a, scale, status);
3228     return float32_round_pack_canonical(&p, status);
3229 }
3230
3231 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3232 {
3233     return int64_to_float32_scalbn(a, scale, status);
3234 }
3235
3236 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3237 {
3238     return int64_to_float32_scalbn(a, scale, status);
3239 }
3240
3241 float32 int64_to_float32(int64_t a, float_status *status)
3242 {
3243     return int64_to_float32_scalbn(a, 0, status);
3244 }
3245
3246 float32 int32_to_float32(int32_t a, float_status *status)
3247 {
3248     return int64_to_float32_scalbn(a, 0, status);
3249 }
3250
3251 float32 int16_to_float32(int16_t a, float_status *status)
3252 {
3253     return int64_to_float32_scalbn(a, 0, status);
3254 }
3255
3256 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3257 {
3258     FloatParts64 p;
3259
3260     parts_sint_to_float(&p, a, scale, status);
3261     return float64_round_pack_canonical(&p, status);
3262 }
3263
3264 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3265 {
3266     return int64_to_float64_scalbn(a, scale, status);
3267 }
3268
3269 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3270 {
3271     return int64_to_float64_scalbn(a, scale, status);
3272 }
3273
3274 float64 int64_to_float64(int64_t a, float_status *status)
3275 {
3276     return int64_to_float64_scalbn(a, 0, status);
3277 }
3278
3279 float64 int32_to_float64(int32_t a, float_status *status)
3280 {
3281     return int64_to_float64_scalbn(a, 0, status);
3282 }
3283
3284 float64 int16_to_float64(int16_t a, float_status *status)
3285 {
3286     return int64_to_float64_scalbn(a, 0, status);
3287 }
3288
3289 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3290 {
3291     FloatParts64 p;
3292
3293     parts_sint_to_float(&p, a, scale, status);
3294     return bfloat16_round_pack_canonical(&p, status);
3295 }
3296
3297 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3298 {
3299     return int64_to_bfloat16_scalbn(a, scale, status);
3300 }
3301
3302 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3303 {
3304     return int64_to_bfloat16_scalbn(a, scale, status);
3305 }
3306
3307 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3308 {
3309     return int64_to_bfloat16_scalbn(a, 0, status);
3310 }
3311
3312 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3313 {
3314     return int64_to_bfloat16_scalbn(a, 0, status);
3315 }
3316
3317 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3318 {
3319     return int64_to_bfloat16_scalbn(a, 0, status);
3320 }
3321
3322 float128 int64_to_float128(int64_t a, float_status *status)
3323 {
3324     FloatParts128 p;
3325
3326     parts_sint_to_float(&p, a, 0, status);
3327     return float128_round_pack_canonical(&p, status);
3328 }
3329
3330 float128 int32_to_float128(int32_t a, float_status *status)
3331 {
3332     return int64_to_float128(a, status);
3333 }
3334
3335 /*
3336  * Unsigned Integer to floating-point conversions
3337  */
3338
3339 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3340 {
3341     FloatParts64 p;
3342
3343     parts_uint_to_float(&p, a, scale, status);
3344     return float16_round_pack_canonical(&p, status);
3345 }
3346
3347 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3348 {
3349     return uint64_to_float16_scalbn(a, scale, status);
3350 }
3351
3352 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3353 {
3354     return uint64_to_float16_scalbn(a, scale, status);
3355 }
3356
3357 float16 uint64_to_float16(uint64_t a, float_status *status)
3358 {
3359     return uint64_to_float16_scalbn(a, 0, status);
3360 }
3361
3362 float16 uint32_to_float16(uint32_t a, float_status *status)
3363 {
3364     return uint64_to_float16_scalbn(a, 0, status);
3365 }
3366
3367 float16 uint16_to_float16(uint16_t a, float_status *status)
3368 {
3369     return uint64_to_float16_scalbn(a, 0, status);
3370 }
3371
3372 float16 uint8_to_float16(uint8_t a, float_status *status)
3373 {
3374     return uint64_to_float16_scalbn(a, 0, status);
3375 }
3376
3377 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3378 {
3379     FloatParts64 p;
3380
3381     parts_uint_to_float(&p, a, scale, status);
3382     return float32_round_pack_canonical(&p, status);
3383 }
3384
3385 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3386 {
3387     return uint64_to_float32_scalbn(a, scale, status);
3388 }
3389
3390 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3391 {
3392     return uint64_to_float32_scalbn(a, scale, status);
3393 }
3394
3395 float32 uint64_to_float32(uint64_t a, float_status *status)
3396 {
3397     return uint64_to_float32_scalbn(a, 0, status);
3398 }
3399
3400 float32 uint32_to_float32(uint32_t a, float_status *status)
3401 {
3402     return uint64_to_float32_scalbn(a, 0, status);
3403 }
3404
3405 float32 uint16_to_float32(uint16_t a, float_status *status)
3406 {
3407     return uint64_to_float32_scalbn(a, 0, status);
3408 }
3409
3410 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3411 {
3412     FloatParts64 p;
3413
3414     parts_uint_to_float(&p, a, scale, status);
3415     return float64_round_pack_canonical(&p, status);
3416 }
3417
3418 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3419 {
3420     return uint64_to_float64_scalbn(a, scale, status);
3421 }
3422
3423 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3424 {
3425     return uint64_to_float64_scalbn(a, scale, status);
3426 }
3427
3428 float64 uint64_to_float64(uint64_t a, float_status *status)
3429 {
3430     return uint64_to_float64_scalbn(a, 0, status);
3431 }
3432
3433 float64 uint32_to_float64(uint32_t a, float_status *status)
3434 {
3435     return uint64_to_float64_scalbn(a, 0, status);
3436 }
3437
3438 float64 uint16_to_float64(uint16_t a, float_status *status)
3439 {
3440     return uint64_to_float64_scalbn(a, 0, status);
3441 }
3442
3443 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3444 {
3445     FloatParts64 p;
3446
3447     parts_uint_to_float(&p, a, scale, status);
3448     return bfloat16_round_pack_canonical(&p, status);
3449 }
3450
3451 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3452 {
3453     return uint64_to_bfloat16_scalbn(a, scale, status);
3454 }
3455
3456 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3457 {
3458     return uint64_to_bfloat16_scalbn(a, scale, status);
3459 }
3460
3461 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3462 {
3463     return uint64_to_bfloat16_scalbn(a, 0, status);
3464 }
3465
3466 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3467 {
3468     return uint64_to_bfloat16_scalbn(a, 0, status);
3469 }
3470
3471 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3472 {
3473     return uint64_to_bfloat16_scalbn(a, 0, status);
3474 }
3475
3476 float128 uint64_to_float128(uint64_t a, float_status *status)
3477 {
3478     FloatParts128 p;
3479
3480     parts_uint_to_float(&p, a, 0, status);
3481     return float128_round_pack_canonical(&p, status);
3482 }
3483
3484 /*
3485  * Minimum and maximum
3486  */
3487
3488 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3489 {
3490     FloatParts64 pa, pb, *pr;
3491
3492     float16_unpack_canonical(&pa, a, s);
3493     float16_unpack_canonical(&pb, b, s);
3494     pr = parts_minmax(&pa, &pb, s, flags);
3495
3496     return float16_round_pack_canonical(pr, s);
3497 }
3498
3499 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3500                                 float_status *s, int flags)
3501 {
3502     FloatParts64 pa, pb, *pr;
3503
3504     bfloat16_unpack_canonical(&pa, a, s);
3505     bfloat16_unpack_canonical(&pb, b, s);
3506     pr = parts_minmax(&pa, &pb, s, flags);
3507
3508     return bfloat16_round_pack_canonical(pr, s);
3509 }
3510
3511 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3512 {
3513     FloatParts64 pa, pb, *pr;
3514
3515     float32_unpack_canonical(&pa, a, s);
3516     float32_unpack_canonical(&pb, b, s);
3517     pr = parts_minmax(&pa, &pb, s, flags);
3518
3519     return float32_round_pack_canonical(pr, s);
3520 }
3521
3522 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3523 {
3524     FloatParts64 pa, pb, *pr;
3525
3526     float64_unpack_canonical(&pa, a, s);
3527     float64_unpack_canonical(&pb, b, s);
3528     pr = parts_minmax(&pa, &pb, s, flags);
3529
3530     return float64_round_pack_canonical(pr, s);
3531 }
3532
3533 static float128 float128_minmax(float128 a, float128 b,
3534                                 float_status *s, int flags)
3535 {
3536     FloatParts128 pa, pb, *pr;
3537
3538     float128_unpack_canonical(&pa, a, s);
3539     float128_unpack_canonical(&pb, b, s);
3540     pr = parts_minmax(&pa, &pb, s, flags);
3541
3542     return float128_round_pack_canonical(pr, s);
3543 }
3544
3545 #define MINMAX_1(type, name, flags) \
3546     type type##_##name(type a, type b, float_status *s) \
3547     { return type##_minmax(a, b, s, flags); }
3548
3549 #define MINMAX_2(type) \
3550     MINMAX_1(type, max, 0)                                      \
3551     MINMAX_1(type, maxnum, minmax_isnum)                        \
3552     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3553     MINMAX_1(type, min, minmax_ismin)                           \
3554     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3555     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3556
3557 MINMAX_2(float16)
3558 MINMAX_2(bfloat16)
3559 MINMAX_2(float32)
3560 MINMAX_2(float64)
3561 MINMAX_2(float128)
3562
3563 #undef MINMAX_1
3564 #undef MINMAX_2
3565
3566 /*
3567  * Floating point compare
3568  */
3569
3570 static FloatRelation QEMU_FLATTEN
3571 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3572 {
3573     FloatParts64 pa, pb;
3574
3575     float16_unpack_canonical(&pa, a, s);
3576     float16_unpack_canonical(&pb, b, s);
3577     return parts_compare(&pa, &pb, s, is_quiet);
3578 }
3579
3580 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3581 {
3582     return float16_do_compare(a, b, s, false);
3583 }
3584
3585 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3586 {
3587     return float16_do_compare(a, b, s, true);
3588 }
3589
3590 static FloatRelation QEMU_SOFTFLOAT_ATTR
3591 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3592 {
3593     FloatParts64 pa, pb;
3594
3595     float32_unpack_canonical(&pa, a, s);
3596     float32_unpack_canonical(&pb, b, s);
3597     return parts_compare(&pa, &pb, s, is_quiet);
3598 }
3599
3600 static FloatRelation QEMU_FLATTEN
3601 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3602 {
3603     union_float32 ua, ub;
3604
3605     ua.s = xa;
3606     ub.s = xb;
3607
3608     if (QEMU_NO_HARDFLOAT) {
3609         goto soft;
3610     }
3611
3612     float32_input_flush2(&ua.s, &ub.s, s);
3613     if (isgreaterequal(ua.h, ub.h)) {
3614         if (isgreater(ua.h, ub.h)) {
3615             return float_relation_greater;
3616         }
3617         return float_relation_equal;
3618     }
3619     if (likely(isless(ua.h, ub.h))) {
3620         return float_relation_less;
3621     }
3622     /*
3623      * The only condition remaining is unordered.
3624      * Fall through to set flags.
3625      */
3626  soft:
3627     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3628 }
3629
3630 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3631 {
3632     return float32_hs_compare(a, b, s, false);
3633 }
3634
3635 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3636 {
3637     return float32_hs_compare(a, b, s, true);
3638 }
3639
3640 static FloatRelation QEMU_SOFTFLOAT_ATTR
3641 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3642 {
3643     FloatParts64 pa, pb;
3644
3645     float64_unpack_canonical(&pa, a, s);
3646     float64_unpack_canonical(&pb, b, s);
3647     return parts_compare(&pa, &pb, s, is_quiet);
3648 }
3649
3650 static FloatRelation QEMU_FLATTEN
3651 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3652 {
3653     union_float64 ua, ub;
3654
3655     ua.s = xa;
3656     ub.s = xb;
3657
3658     if (QEMU_NO_HARDFLOAT) {
3659         goto soft;
3660     }
3661
3662     float64_input_flush2(&ua.s, &ub.s, s);
3663     if (isgreaterequal(ua.h, ub.h)) {
3664         if (isgreater(ua.h, ub.h)) {
3665             return float_relation_greater;
3666         }
3667         return float_relation_equal;
3668     }
3669     if (likely(isless(ua.h, ub.h))) {
3670         return float_relation_less;
3671     }
3672     /*
3673      * The only condition remaining is unordered.
3674      * Fall through to set flags.
3675      */
3676  soft:
3677     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3678 }
3679
3680 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3681 {
3682     return float64_hs_compare(a, b, s, false);
3683 }
3684
3685 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3686 {
3687     return float64_hs_compare(a, b, s, true);
3688 }
3689
3690 static FloatRelation QEMU_FLATTEN
3691 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3692 {
3693     FloatParts64 pa, pb;
3694
3695     bfloat16_unpack_canonical(&pa, a, s);
3696     bfloat16_unpack_canonical(&pb, b, s);
3697     return parts_compare(&pa, &pb, s, is_quiet);
3698 }
3699
3700 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3701 {
3702     return bfloat16_do_compare(a, b, s, false);
3703 }
3704
3705 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3706 {
3707     return bfloat16_do_compare(a, b, s, true);
3708 }
3709
3710 static FloatRelation QEMU_FLATTEN
3711 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3712 {
3713     FloatParts128 pa, pb;
3714
3715     float128_unpack_canonical(&pa, a, s);
3716     float128_unpack_canonical(&pb, b, s);
3717     return parts_compare(&pa, &pb, s, is_quiet);
3718 }
3719
3720 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3721 {
3722     return float128_do_compare(a, b, s, false);
3723 }
3724
3725 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3726 {
3727     return float128_do_compare(a, b, s, true);
3728 }
3729
3730 /*
3731  * Scale by 2**N
3732  */
3733
3734 float16 float16_scalbn(float16 a, int n, float_status *status)
3735 {
3736     FloatParts64 p;
3737
3738     float16_unpack_canonical(&p, a, status);
3739     parts_scalbn(&p, n, status);
3740     return float16_round_pack_canonical(&p, status);
3741 }
3742
3743 float32 float32_scalbn(float32 a, int n, float_status *status)
3744 {
3745     FloatParts64 p;
3746
3747     float32_unpack_canonical(&p, a, status);
3748     parts_scalbn(&p, n, status);
3749     return float32_round_pack_canonical(&p, status);
3750 }
3751
3752 float64 float64_scalbn(float64 a, int n, float_status *status)
3753 {
3754     FloatParts64 p;
3755
3756     float64_unpack_canonical(&p, a, status);
3757     parts_scalbn(&p, n, status);
3758     return float64_round_pack_canonical(&p, status);
3759 }
3760
3761 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3762 {
3763     FloatParts64 p;
3764
3765     bfloat16_unpack_canonical(&p, a, status);
3766     parts_scalbn(&p, n, status);
3767     return bfloat16_round_pack_canonical(&p, status);
3768 }
3769
3770 float128 float128_scalbn(float128 a, int n, float_status *status)
3771 {
3772     FloatParts128 p;
3773
3774     float128_unpack_canonical(&p, a, status);
3775     parts_scalbn(&p, n, status);
3776     return float128_round_pack_canonical(&p, status);
3777 }
3778
3779 /*
3780  * Square Root
3781  */
3782
3783 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3784 {
3785     FloatParts64 p;
3786
3787     float16_unpack_canonical(&p, a, status);
3788     parts_sqrt(&p, status, &float16_params);
3789     return float16_round_pack_canonical(&p, status);
3790 }
3791
3792 static float32 QEMU_SOFTFLOAT_ATTR
3793 soft_f32_sqrt(float32 a, float_status *status)
3794 {
3795     FloatParts64 p;
3796
3797     float32_unpack_canonical(&p, a, status);
3798     parts_sqrt(&p, status, &float32_params);
3799     return float32_round_pack_canonical(&p, status);
3800 }
3801
3802 static float64 QEMU_SOFTFLOAT_ATTR
3803 soft_f64_sqrt(float64 a, float_status *status)
3804 {
3805     FloatParts64 p;
3806
3807     float64_unpack_canonical(&p, a, status);
3808     parts_sqrt(&p, status, &float64_params);
3809     return float64_round_pack_canonical(&p, status);
3810 }
3811
3812 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3813 {
3814     union_float32 ua, ur;
3815
3816     ua.s = xa;
3817     if (unlikely(!can_use_fpu(s))) {
3818         goto soft;
3819     }
3820
3821     float32_input_flush1(&ua.s, s);
3822     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3823         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3824                        fpclassify(ua.h) == FP_ZERO) ||
3825                      signbit(ua.h))) {
3826             goto soft;
3827         }
3828     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3829                         float32_is_neg(ua.s))) {
3830         goto soft;
3831     }
3832     ur.h = sqrtf(ua.h);
3833     return ur.s;
3834
3835  soft:
3836     return soft_f32_sqrt(ua.s, s);
3837 }
3838
3839 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3840 {
3841     union_float64 ua, ur;
3842
3843     ua.s = xa;
3844     if (unlikely(!can_use_fpu(s))) {
3845         goto soft;
3846     }
3847
3848     float64_input_flush1(&ua.s, s);
3849     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3850         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3851                        fpclassify(ua.h) == FP_ZERO) ||
3852                      signbit(ua.h))) {
3853             goto soft;
3854         }
3855     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3856                         float64_is_neg(ua.s))) {
3857         goto soft;
3858     }
3859     ur.h = sqrt(ua.h);
3860     return ur.s;
3861
3862  soft:
3863     return soft_f64_sqrt(ua.s, s);
3864 }
3865
3866 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3867 {
3868     FloatParts64 p;
3869
3870     bfloat16_unpack_canonical(&p, a, status);
3871     parts_sqrt(&p, status, &bfloat16_params);
3872     return bfloat16_round_pack_canonical(&p, status);
3873 }
3874
3875 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3876 {
3877     FloatParts128 p;
3878
3879     float128_unpack_canonical(&p, a, status);
3880     parts_sqrt(&p, status, &float128_params);
3881     return float128_round_pack_canonical(&p, status);
3882 }
3883
3884 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
3885 {
3886     FloatParts128 p;
3887
3888     if (!floatx80_unpack_canonical(&p, a, s)) {
3889         return floatx80_default_nan(s);
3890     }
3891     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
3892     return floatx80_round_pack_canonical(&p, s);
3893 }
3894
3895 /*----------------------------------------------------------------------------
3896 | The pattern for a default generated NaN.
3897 *----------------------------------------------------------------------------*/
3898
3899 float16 float16_default_nan(float_status *status)
3900 {
3901     FloatParts64 p;
3902
3903     parts_default_nan(&p, status);
3904     p.frac >>= float16_params.frac_shift;
3905     return float16_pack_raw(&p);
3906 }
3907
3908 float32 float32_default_nan(float_status *status)
3909 {
3910     FloatParts64 p;
3911
3912     parts_default_nan(&p, status);
3913     p.frac >>= float32_params.frac_shift;
3914     return float32_pack_raw(&p);
3915 }
3916
3917 float64 float64_default_nan(float_status *status)
3918 {
3919     FloatParts64 p;
3920
3921     parts_default_nan(&p, status);
3922     p.frac >>= float64_params.frac_shift;
3923     return float64_pack_raw(&p);
3924 }
3925
3926 float128 float128_default_nan(float_status *status)
3927 {
3928     FloatParts128 p;
3929
3930     parts_default_nan(&p, status);
3931     frac_shr(&p, float128_params.frac_shift);
3932     return float128_pack_raw(&p);
3933 }
3934
3935 bfloat16 bfloat16_default_nan(float_status *status)
3936 {
3937     FloatParts64 p;
3938
3939     parts_default_nan(&p, status);
3940     p.frac >>= bfloat16_params.frac_shift;
3941     return bfloat16_pack_raw(&p);
3942 }
3943
3944 /*----------------------------------------------------------------------------
3945 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3946 *----------------------------------------------------------------------------*/
3947
3948 float16 float16_silence_nan(float16 a, float_status *status)
3949 {
3950     FloatParts64 p;
3951
3952     float16_unpack_raw(&p, a);
3953     p.frac <<= float16_params.frac_shift;
3954     parts_silence_nan(&p, status);
3955     p.frac >>= float16_params.frac_shift;
3956     return float16_pack_raw(&p);
3957 }
3958
3959 float32 float32_silence_nan(float32 a, float_status *status)
3960 {
3961     FloatParts64 p;
3962
3963     float32_unpack_raw(&p, a);
3964     p.frac <<= float32_params.frac_shift;
3965     parts_silence_nan(&p, status);
3966     p.frac >>= float32_params.frac_shift;
3967     return float32_pack_raw(&p);
3968 }
3969
3970 float64 float64_silence_nan(float64 a, float_status *status)
3971 {
3972     FloatParts64 p;
3973
3974     float64_unpack_raw(&p, a);
3975     p.frac <<= float64_params.frac_shift;
3976     parts_silence_nan(&p, status);
3977     p.frac >>= float64_params.frac_shift;
3978     return float64_pack_raw(&p);
3979 }
3980
3981 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3982 {
3983     FloatParts64 p;
3984
3985     bfloat16_unpack_raw(&p, a);
3986     p.frac <<= bfloat16_params.frac_shift;
3987     parts_silence_nan(&p, status);
3988     p.frac >>= bfloat16_params.frac_shift;
3989     return bfloat16_pack_raw(&p);
3990 }
3991
3992 float128 float128_silence_nan(float128 a, float_status *status)
3993 {
3994     FloatParts128 p;
3995
3996     float128_unpack_raw(&p, a);
3997     frac_shl(&p, float128_params.frac_shift);
3998     parts_silence_nan(&p, status);
3999     frac_shr(&p, float128_params.frac_shift);
4000     return float128_pack_raw(&p);
4001 }
4002
4003 /*----------------------------------------------------------------------------
4004 | If `a' is denormal and we are in flush-to-zero mode then set the
4005 | input-denormal exception and return zero. Otherwise just return the value.
4006 *----------------------------------------------------------------------------*/
4007
4008 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4009 {
4010     if (p.exp == 0 && p.frac != 0) {
4011         float_raise(float_flag_input_denormal, status);
4012         return true;
4013     }
4014
4015     return false;
4016 }
4017
4018 float16 float16_squash_input_denormal(float16 a, float_status *status)
4019 {
4020     if (status->flush_inputs_to_zero) {
4021         FloatParts64 p;
4022
4023         float16_unpack_raw(&p, a);
4024         if (parts_squash_denormal(p, status)) {
4025             return float16_set_sign(float16_zero, p.sign);
4026         }
4027     }
4028     return a;
4029 }
4030
4031 float32 float32_squash_input_denormal(float32 a, float_status *status)
4032 {
4033     if (status->flush_inputs_to_zero) {
4034         FloatParts64 p;
4035
4036         float32_unpack_raw(&p, a);
4037         if (parts_squash_denormal(p, status)) {
4038             return float32_set_sign(float32_zero, p.sign);
4039         }
4040     }
4041     return a;
4042 }
4043
4044 float64 float64_squash_input_denormal(float64 a, float_status *status)
4045 {
4046     if (status->flush_inputs_to_zero) {
4047         FloatParts64 p;
4048
4049         float64_unpack_raw(&p, a);
4050         if (parts_squash_denormal(p, status)) {
4051             return float64_set_sign(float64_zero, p.sign);
4052         }
4053     }
4054     return a;
4055 }
4056
4057 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4058 {
4059     if (status->flush_inputs_to_zero) {
4060         FloatParts64 p;
4061
4062         bfloat16_unpack_raw(&p, a);
4063         if (parts_squash_denormal(p, status)) {
4064             return bfloat16_set_sign(bfloat16_zero, p.sign);
4065         }
4066     }
4067     return a;
4068 }
4069
4070 /*----------------------------------------------------------------------------
4071 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4072 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4073 | input.  If `zSign' is 1, the input is negated before being converted to an
4074 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
4075 | is simply rounded to an integer, with the inexact exception raised if the
4076 | input cannot be represented exactly as an integer.  However, if the fixed-
4077 | point input is too large, the invalid exception is raised and the largest
4078 | positive or negative integer is returned.
4079 *----------------------------------------------------------------------------*/
4080
4081 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4082                                  float_status *status)
4083 {
4084     int8_t roundingMode;
4085     bool roundNearestEven;
4086     int8_t roundIncrement, roundBits;
4087     int32_t z;
4088
4089     roundingMode = status->float_rounding_mode;
4090     roundNearestEven = ( roundingMode == float_round_nearest_even );
4091     switch (roundingMode) {
4092     case float_round_nearest_even:
4093     case float_round_ties_away:
4094         roundIncrement = 0x40;
4095         break;
4096     case float_round_to_zero:
4097         roundIncrement = 0;
4098         break;
4099     case float_round_up:
4100         roundIncrement = zSign ? 0 : 0x7f;
4101         break;
4102     case float_round_down:
4103         roundIncrement = zSign ? 0x7f : 0;
4104         break;
4105     case float_round_to_odd:
4106         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4107         break;
4108     default:
4109         abort();
4110     }
4111     roundBits = absZ & 0x7F;
4112     absZ = ( absZ + roundIncrement )>>7;
4113     if (!(roundBits ^ 0x40) && roundNearestEven) {
4114         absZ &= ~1;
4115     }
4116     z = absZ;
4117     if ( zSign ) z = - z;
4118     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4119         float_raise(float_flag_invalid, status);
4120         return zSign ? INT32_MIN : INT32_MAX;
4121     }
4122     if (roundBits) {
4123         float_raise(float_flag_inexact, status);
4124     }
4125     return z;
4126
4127 }
4128
4129 /*----------------------------------------------------------------------------
4130 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4131 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4132 | and returns the properly rounded 64-bit integer corresponding to the input.
4133 | If `zSign' is 1, the input is negated before being converted to an integer.
4134 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4135 | the inexact exception raised if the input cannot be represented exactly as
4136 | an integer.  However, if the fixed-point input is too large, the invalid
4137 | exception is raised and the largest positive or negative integer is
4138 | returned.
4139 *----------------------------------------------------------------------------*/
4140
4141 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4142                                float_status *status)
4143 {
4144     int8_t roundingMode;
4145     bool roundNearestEven, increment;
4146     int64_t z;
4147
4148     roundingMode = status->float_rounding_mode;
4149     roundNearestEven = ( roundingMode == float_round_nearest_even );
4150     switch (roundingMode) {
4151     case float_round_nearest_even:
4152     case float_round_ties_away:
4153         increment = ((int64_t) absZ1 < 0);
4154         break;
4155     case float_round_to_zero:
4156         increment = 0;
4157         break;
4158     case float_round_up:
4159         increment = !zSign && absZ1;
4160         break;
4161     case float_round_down:
4162         increment = zSign && absZ1;
4163         break;
4164     case float_round_to_odd:
4165         increment = !(absZ0 & 1) && absZ1;
4166         break;
4167     default:
4168         abort();
4169     }
4170     if ( increment ) {
4171         ++absZ0;
4172         if ( absZ0 == 0 ) goto overflow;
4173         if (!(absZ1 << 1) && roundNearestEven) {
4174             absZ0 &= ~1;
4175         }
4176     }
4177     z = absZ0;
4178     if ( zSign ) z = - z;
4179     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4180  overflow:
4181         float_raise(float_flag_invalid, status);
4182         return zSign ? INT64_MIN : INT64_MAX;
4183     }
4184     if (absZ1) {
4185         float_raise(float_flag_inexact, status);
4186     }
4187     return z;
4188
4189 }
4190
4191 /*----------------------------------------------------------------------------
4192 | Normalizes the subnormal single-precision floating-point value represented
4193 | by the denormalized significand `aSig'.  The normalized exponent and
4194 | significand are stored at the locations pointed to by `zExpPtr' and
4195 | `zSigPtr', respectively.
4196 *----------------------------------------------------------------------------*/
4197
4198 static void
4199  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4200 {
4201     int8_t shiftCount;
4202
4203     shiftCount = clz32(aSig) - 8;
4204     *zSigPtr = aSig<<shiftCount;
4205     *zExpPtr = 1 - shiftCount;
4206
4207 }
4208
4209 /*----------------------------------------------------------------------------
4210 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4211 | and significand `zSig', and returns the proper single-precision floating-
4212 | point value corresponding to the abstract input.  Ordinarily, the abstract
4213 | value is simply rounded and packed into the single-precision format, with
4214 | the inexact exception raised if the abstract input cannot be represented
4215 | exactly.  However, if the abstract value is too large, the overflow and
4216 | inexact exceptions are raised and an infinity or maximal finite value is
4217 | returned.  If the abstract value is too small, the input value is rounded to
4218 | a subnormal number, and the underflow and inexact exceptions are raised if
4219 | the abstract input cannot be represented exactly as a subnormal single-
4220 | precision floating-point number.
4221 |     The input significand `zSig' has its binary point between bits 30
4222 | and 29, which is 7 bits to the left of the usual location.  This shifted
4223 | significand must be normalized or smaller.  If `zSig' is not normalized,
4224 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4225 | and it must not require rounding.  In the usual case that `zSig' is
4226 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4227 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4228 | Binary Floating-Point Arithmetic.
4229 *----------------------------------------------------------------------------*/
4230
4231 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4232                                    float_status *status)
4233 {
4234     int8_t roundingMode;
4235     bool roundNearestEven;
4236     int8_t roundIncrement, roundBits;
4237     bool isTiny;
4238
4239     roundingMode = status->float_rounding_mode;
4240     roundNearestEven = ( roundingMode == float_round_nearest_even );
4241     switch (roundingMode) {
4242     case float_round_nearest_even:
4243     case float_round_ties_away:
4244         roundIncrement = 0x40;
4245         break;
4246     case float_round_to_zero:
4247         roundIncrement = 0;
4248         break;
4249     case float_round_up:
4250         roundIncrement = zSign ? 0 : 0x7f;
4251         break;
4252     case float_round_down:
4253         roundIncrement = zSign ? 0x7f : 0;
4254         break;
4255     case float_round_to_odd:
4256         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4257         break;
4258     default:
4259         abort();
4260         break;
4261     }
4262     roundBits = zSig & 0x7F;
4263     if ( 0xFD <= (uint16_t) zExp ) {
4264         if (    ( 0xFD < zExp )
4265              || (    ( zExp == 0xFD )
4266                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4267            ) {
4268             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4269                                    roundIncrement != 0;
4270             float_raise(float_flag_overflow | float_flag_inexact, status);
4271             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4272         }
4273         if ( zExp < 0 ) {
4274             if (status->flush_to_zero) {
4275                 float_raise(float_flag_output_denormal, status);
4276                 return packFloat32(zSign, 0, 0);
4277             }
4278             isTiny = status->tininess_before_rounding
4279                   || (zExp < -1)
4280                   || (zSig + roundIncrement < 0x80000000);
4281             shift32RightJamming( zSig, - zExp, &zSig );
4282             zExp = 0;
4283             roundBits = zSig & 0x7F;
4284             if (isTiny && roundBits) {
4285                 float_raise(float_flag_underflow, status);
4286             }
4287             if (roundingMode == float_round_to_odd) {
4288                 /*
4289                  * For round-to-odd case, the roundIncrement depends on
4290                  * zSig which just changed.
4291                  */
4292                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4293             }
4294         }
4295     }
4296     if (roundBits) {
4297         float_raise(float_flag_inexact, status);
4298     }
4299     zSig = ( zSig + roundIncrement )>>7;
4300     if (!(roundBits ^ 0x40) && roundNearestEven) {
4301         zSig &= ~1;
4302     }
4303     if ( zSig == 0 ) zExp = 0;
4304     return packFloat32( zSign, zExp, zSig );
4305
4306 }
4307
4308 /*----------------------------------------------------------------------------
4309 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4310 | and significand `zSig', and returns the proper single-precision floating-
4311 | point value corresponding to the abstract input.  This routine is just like
4312 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4313 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4314 | floating-point exponent.
4315 *----------------------------------------------------------------------------*/
4316
4317 static float32
4318  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4319                               float_status *status)
4320 {
4321     int8_t shiftCount;
4322
4323     shiftCount = clz32(zSig) - 1;
4324     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4325                                status);
4326
4327 }
4328
4329 /*----------------------------------------------------------------------------
4330 | Normalizes the subnormal double-precision floating-point value represented
4331 | by the denormalized significand `aSig'.  The normalized exponent and
4332 | significand are stored at the locations pointed to by `zExpPtr' and
4333 | `zSigPtr', respectively.
4334 *----------------------------------------------------------------------------*/
4335
4336 static void
4337  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4338 {
4339     int8_t shiftCount;
4340
4341     shiftCount = clz64(aSig) - 11;
4342     *zSigPtr = aSig<<shiftCount;
4343     *zExpPtr = 1 - shiftCount;
4344
4345 }
4346
4347 /*----------------------------------------------------------------------------
4348 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4349 | double-precision floating-point value, returning the result.  After being
4350 | shifted into the proper positions, the three fields are simply added
4351 | together to form the result.  This means that any integer portion of `zSig'
4352 | will be added into the exponent.  Since a properly normalized significand
4353 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4354 | than the desired result exponent whenever `zSig' is a complete, normalized
4355 | significand.
4356 *----------------------------------------------------------------------------*/
4357
4358 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4359 {
4360
4361     return make_float64(
4362         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4363
4364 }
4365
4366 /*----------------------------------------------------------------------------
4367 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4368 | and significand `zSig', and returns the proper double-precision floating-
4369 | point value corresponding to the abstract input.  Ordinarily, the abstract
4370 | value is simply rounded and packed into the double-precision format, with
4371 | the inexact exception raised if the abstract input cannot be represented
4372 | exactly.  However, if the abstract value is too large, the overflow and
4373 | inexact exceptions are raised and an infinity or maximal finite value is
4374 | returned.  If the abstract value is too small, the input value is rounded to
4375 | a subnormal number, and the underflow and inexact exceptions are raised if
4376 | the abstract input cannot be represented exactly as a subnormal double-
4377 | precision floating-point number.
4378 |     The input significand `zSig' has its binary point between bits 62
4379 | and 61, which is 10 bits to the left of the usual location.  This shifted
4380 | significand must be normalized or smaller.  If `zSig' is not normalized,
4381 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4382 | and it must not require rounding.  In the usual case that `zSig' is
4383 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4384 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4385 | Binary Floating-Point Arithmetic.
4386 *----------------------------------------------------------------------------*/
4387
4388 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4389                                    float_status *status)
4390 {
4391     int8_t roundingMode;
4392     bool roundNearestEven;
4393     int roundIncrement, roundBits;
4394     bool isTiny;
4395
4396     roundingMode = status->float_rounding_mode;
4397     roundNearestEven = ( roundingMode == float_round_nearest_even );
4398     switch (roundingMode) {
4399     case float_round_nearest_even:
4400     case float_round_ties_away:
4401         roundIncrement = 0x200;
4402         break;
4403     case float_round_to_zero:
4404         roundIncrement = 0;
4405         break;
4406     case float_round_up:
4407         roundIncrement = zSign ? 0 : 0x3ff;
4408         break;
4409     case float_round_down:
4410         roundIncrement = zSign ? 0x3ff : 0;
4411         break;
4412     case float_round_to_odd:
4413         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4414         break;
4415     default:
4416         abort();
4417     }
4418     roundBits = zSig & 0x3FF;
4419     if ( 0x7FD <= (uint16_t) zExp ) {
4420         if (    ( 0x7FD < zExp )
4421              || (    ( zExp == 0x7FD )
4422                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4423            ) {
4424             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4425                                    roundIncrement != 0;
4426             float_raise(float_flag_overflow | float_flag_inexact, status);
4427             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4428         }
4429         if ( zExp < 0 ) {
4430             if (status->flush_to_zero) {
4431                 float_raise(float_flag_output_denormal, status);
4432                 return packFloat64(zSign, 0, 0);
4433             }
4434             isTiny = status->tininess_before_rounding
4435                   || (zExp < -1)
4436                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4437             shift64RightJamming( zSig, - zExp, &zSig );
4438             zExp = 0;
4439             roundBits = zSig & 0x3FF;
4440             if (isTiny && roundBits) {
4441                 float_raise(float_flag_underflow, status);
4442             }
4443             if (roundingMode == float_round_to_odd) {
4444                 /*
4445                  * For round-to-odd case, the roundIncrement depends on
4446                  * zSig which just changed.
4447                  */
4448                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4449             }
4450         }
4451     }
4452     if (roundBits) {
4453         float_raise(float_flag_inexact, status);
4454     }
4455     zSig = ( zSig + roundIncrement )>>10;
4456     if (!(roundBits ^ 0x200) && roundNearestEven) {
4457         zSig &= ~1;
4458     }
4459     if ( zSig == 0 ) zExp = 0;
4460     return packFloat64( zSign, zExp, zSig );
4461
4462 }
4463
4464 /*----------------------------------------------------------------------------
4465 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4466 | and significand `zSig', and returns the proper double-precision floating-
4467 | point value corresponding to the abstract input.  This routine is just like
4468 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4469 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4470 | floating-point exponent.
4471 *----------------------------------------------------------------------------*/
4472
4473 static float64
4474  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4475                               float_status *status)
4476 {
4477     int8_t shiftCount;
4478
4479     shiftCount = clz64(zSig) - 1;
4480     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4481                                status);
4482
4483 }
4484
4485 /*----------------------------------------------------------------------------
4486 | Normalizes the subnormal extended double-precision floating-point value
4487 | represented by the denormalized significand `aSig'.  The normalized exponent
4488 | and significand are stored at the locations pointed to by `zExpPtr' and
4489 | `zSigPtr', respectively.
4490 *----------------------------------------------------------------------------*/
4491
4492 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4493                                 uint64_t *zSigPtr)
4494 {
4495     int8_t shiftCount;
4496
4497     shiftCount = clz64(aSig);
4498     *zSigPtr = aSig<<shiftCount;
4499     *zExpPtr = 1 - shiftCount;
4500 }
4501
4502 /*----------------------------------------------------------------------------
4503 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4504 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4505 | and returns the proper extended double-precision floating-point value
4506 | corresponding to the abstract input.  Ordinarily, the abstract value is
4507 | rounded and packed into the extended double-precision format, with the
4508 | inexact exception raised if the abstract input cannot be represented
4509 | exactly.  However, if the abstract value is too large, the overflow and
4510 | inexact exceptions are raised and an infinity or maximal finite value is
4511 | returned.  If the abstract value is too small, the input value is rounded to
4512 | a subnormal number, and the underflow and inexact exceptions are raised if
4513 | the abstract input cannot be represented exactly as a subnormal extended
4514 | double-precision floating-point number.
4515 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4516 | the result is rounded to the same number of bits as single or double
4517 | precision, respectively.  Otherwise, the result is rounded to the full
4518 | precision of the extended double-precision format.
4519 |     The input significand must be normalized or smaller.  If the input
4520 | significand is not normalized, `zExp' must be 0; in that case, the result
4521 | returned is a subnormal number, and it must not require rounding.  The
4522 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4523 | Floating-Point Arithmetic.
4524 *----------------------------------------------------------------------------*/
4525
4526 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4527                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4528                               float_status *status)
4529 {
4530     FloatRoundMode roundingMode;
4531     bool roundNearestEven, increment, isTiny;
4532     int64_t roundIncrement, roundMask, roundBits;
4533
4534     roundingMode = status->float_rounding_mode;
4535     roundNearestEven = ( roundingMode == float_round_nearest_even );
4536     switch (roundingPrecision) {
4537     case floatx80_precision_x:
4538         goto precision80;
4539     case floatx80_precision_d:
4540         roundIncrement = UINT64_C(0x0000000000000400);
4541         roundMask = UINT64_C(0x00000000000007FF);
4542         break;
4543     case floatx80_precision_s:
4544         roundIncrement = UINT64_C(0x0000008000000000);
4545         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4546         break;
4547     default:
4548         g_assert_not_reached();
4549     }
4550     zSig0 |= ( zSig1 != 0 );
4551     switch (roundingMode) {
4552     case float_round_nearest_even:
4553     case float_round_ties_away:
4554         break;
4555     case float_round_to_zero:
4556         roundIncrement = 0;
4557         break;
4558     case float_round_up:
4559         roundIncrement = zSign ? 0 : roundMask;
4560         break;
4561     case float_round_down:
4562         roundIncrement = zSign ? roundMask : 0;
4563         break;
4564     default:
4565         abort();
4566     }
4567     roundBits = zSig0 & roundMask;
4568     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4569         if (    ( 0x7FFE < zExp )
4570              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4571            ) {
4572             goto overflow;
4573         }
4574         if ( zExp <= 0 ) {
4575             if (status->flush_to_zero) {
4576                 float_raise(float_flag_output_denormal, status);
4577                 return packFloatx80(zSign, 0, 0);
4578             }
4579             isTiny = status->tininess_before_rounding
4580                   || (zExp < 0 )
4581                   || (zSig0 <= zSig0 + roundIncrement);
4582             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4583             zExp = 0;
4584             roundBits = zSig0 & roundMask;
4585             if (isTiny && roundBits) {
4586                 float_raise(float_flag_underflow, status);
4587             }
4588             if (roundBits) {
4589                 float_raise(float_flag_inexact, status);
4590             }
4591             zSig0 += roundIncrement;
4592             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4593             roundIncrement = roundMask + 1;
4594             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4595                 roundMask |= roundIncrement;
4596             }
4597             zSig0 &= ~ roundMask;
4598             return packFloatx80( zSign, zExp, zSig0 );
4599         }
4600     }
4601     if (roundBits) {
4602         float_raise(float_flag_inexact, status);
4603     }
4604     zSig0 += roundIncrement;
4605     if ( zSig0 < roundIncrement ) {
4606         ++zExp;
4607         zSig0 = UINT64_C(0x8000000000000000);
4608     }
4609     roundIncrement = roundMask + 1;
4610     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4611         roundMask |= roundIncrement;
4612     }
4613     zSig0 &= ~ roundMask;
4614     if ( zSig0 == 0 ) zExp = 0;
4615     return packFloatx80( zSign, zExp, zSig0 );
4616  precision80:
4617     switch (roundingMode) {
4618     case float_round_nearest_even:
4619     case float_round_ties_away:
4620         increment = ((int64_t)zSig1 < 0);
4621         break;
4622     case float_round_to_zero:
4623         increment = 0;
4624         break;
4625     case float_round_up:
4626         increment = !zSign && zSig1;
4627         break;
4628     case float_round_down:
4629         increment = zSign && zSig1;
4630         break;
4631     default:
4632         abort();
4633     }
4634     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4635         if (    ( 0x7FFE < zExp )
4636              || (    ( zExp == 0x7FFE )
4637                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4638                   && increment
4639                 )
4640            ) {
4641             roundMask = 0;
4642  overflow:
4643             float_raise(float_flag_overflow | float_flag_inexact, status);
4644             if (    ( roundingMode == float_round_to_zero )
4645                  || ( zSign && ( roundingMode == float_round_up ) )
4646                  || ( ! zSign && ( roundingMode == float_round_down ) )
4647                ) {
4648                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4649             }
4650             return packFloatx80(zSign,
4651                                 floatx80_infinity_high,
4652                                 floatx80_infinity_low);
4653         }
4654         if ( zExp <= 0 ) {
4655             isTiny = status->tininess_before_rounding
4656                   || (zExp < 0)
4657                   || !increment
4658                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4659             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4660             zExp = 0;
4661             if (isTiny && zSig1) {
4662                 float_raise(float_flag_underflow, status);
4663             }
4664             if (zSig1) {
4665                 float_raise(float_flag_inexact, status);
4666             }
4667             switch (roundingMode) {
4668             case float_round_nearest_even:
4669             case float_round_ties_away:
4670                 increment = ((int64_t)zSig1 < 0);
4671                 break;
4672             case float_round_to_zero:
4673                 increment = 0;
4674                 break;
4675             case float_round_up:
4676                 increment = !zSign && zSig1;
4677                 break;
4678             case float_round_down:
4679                 increment = zSign && zSig1;
4680                 break;
4681             default:
4682                 abort();
4683             }
4684             if ( increment ) {
4685                 ++zSig0;
4686                 if (!(zSig1 << 1) && roundNearestEven) {
4687                     zSig0 &= ~1;
4688                 }
4689                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4690             }
4691             return packFloatx80( zSign, zExp, zSig0 );
4692         }
4693     }
4694     if (zSig1) {
4695         float_raise(float_flag_inexact, status);
4696     }
4697     if ( increment ) {
4698         ++zSig0;
4699         if ( zSig0 == 0 ) {
4700             ++zExp;
4701             zSig0 = UINT64_C(0x8000000000000000);
4702         }
4703         else {
4704             if (!(zSig1 << 1) && roundNearestEven) {
4705                 zSig0 &= ~1;
4706             }
4707         }
4708     }
4709     else {
4710         if ( zSig0 == 0 ) zExp = 0;
4711     }
4712     return packFloatx80( zSign, zExp, zSig0 );
4713
4714 }
4715
4716 /*----------------------------------------------------------------------------
4717 | Takes an abstract floating-point value having sign `zSign', exponent
4718 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4719 | and returns the proper extended double-precision floating-point value
4720 | corresponding to the abstract input.  This routine is just like
4721 | `roundAndPackFloatx80' except that the input significand does not have to be
4722 | normalized.
4723 *----------------------------------------------------------------------------*/
4724
4725 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4726                                        bool zSign, int32_t zExp,
4727                                        uint64_t zSig0, uint64_t zSig1,
4728                                        float_status *status)
4729 {
4730     int8_t shiftCount;
4731
4732     if ( zSig0 == 0 ) {
4733         zSig0 = zSig1;
4734         zSig1 = 0;
4735         zExp -= 64;
4736     }
4737     shiftCount = clz64(zSig0);
4738     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4739     zExp -= shiftCount;
4740     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4741                                 zSig0, zSig1, status);
4742
4743 }
4744
4745 /*----------------------------------------------------------------------------
4746 | Returns the least-significant 64 fraction bits of the quadruple-precision
4747 | floating-point value `a'.
4748 *----------------------------------------------------------------------------*/
4749
4750 static inline uint64_t extractFloat128Frac1( float128 a )
4751 {
4752
4753     return a.low;
4754
4755 }
4756
4757 /*----------------------------------------------------------------------------
4758 | Returns the most-significant 48 fraction bits of the quadruple-precision
4759 | floating-point value `a'.
4760 *----------------------------------------------------------------------------*/
4761
4762 static inline uint64_t extractFloat128Frac0( float128 a )
4763 {
4764
4765     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4766
4767 }
4768
4769 /*----------------------------------------------------------------------------
4770 | Returns the exponent bits of the quadruple-precision floating-point value
4771 | `a'.
4772 *----------------------------------------------------------------------------*/
4773
4774 static inline int32_t extractFloat128Exp( float128 a )
4775 {
4776
4777     return ( a.high>>48 ) & 0x7FFF;
4778
4779 }
4780
4781 /*----------------------------------------------------------------------------
4782 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4783 *----------------------------------------------------------------------------*/
4784
4785 static inline bool extractFloat128Sign(float128 a)
4786 {
4787     return a.high >> 63;
4788 }
4789
4790 /*----------------------------------------------------------------------------
4791 | Normalizes the subnormal quadruple-precision floating-point value
4792 | represented by the denormalized significand formed by the concatenation of
4793 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4794 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4795 | significand are stored at the location pointed to by `zSig0Ptr', and the
4796 | least significant 64 bits of the normalized significand are stored at the
4797 | location pointed to by `zSig1Ptr'.
4798 *----------------------------------------------------------------------------*/
4799
4800 static void
4801  normalizeFloat128Subnormal(
4802      uint64_t aSig0,
4803      uint64_t aSig1,
4804      int32_t *zExpPtr,
4805      uint64_t *zSig0Ptr,
4806      uint64_t *zSig1Ptr
4807  )
4808 {
4809     int8_t shiftCount;
4810
4811     if ( aSig0 == 0 ) {
4812         shiftCount = clz64(aSig1) - 15;
4813         if ( shiftCount < 0 ) {
4814             *zSig0Ptr = aSig1>>( - shiftCount );
4815             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4816         }
4817         else {
4818             *zSig0Ptr = aSig1<<shiftCount;
4819             *zSig1Ptr = 0;
4820         }
4821         *zExpPtr = - shiftCount - 63;
4822     }
4823     else {
4824         shiftCount = clz64(aSig0) - 15;
4825         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4826         *zExpPtr = 1 - shiftCount;
4827     }
4828
4829 }
4830
4831 /*----------------------------------------------------------------------------
4832 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4833 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4834 | floating-point value, returning the result.  After being shifted into the
4835 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4836 | added together to form the most significant 32 bits of the result.  This
4837 | means that any integer portion of `zSig0' will be added into the exponent.
4838 | Since a properly normalized significand will have an integer portion equal
4839 | to 1, the `zExp' input should be 1 less than the desired result exponent
4840 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4841 | significand.
4842 *----------------------------------------------------------------------------*/
4843
4844 static inline float128
4845 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4846 {
4847     float128 z;
4848
4849     z.low = zSig1;
4850     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4851     return z;
4852 }
4853
4854 /*----------------------------------------------------------------------------
4855 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4856 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4857 | and `zSig2', and returns the proper quadruple-precision floating-point value
4858 | corresponding to the abstract input.  Ordinarily, the abstract value is
4859 | simply rounded and packed into the quadruple-precision format, with the
4860 | inexact exception raised if the abstract input cannot be represented
4861 | exactly.  However, if the abstract value is too large, the overflow and
4862 | inexact exceptions are raised and an infinity or maximal finite value is
4863 | returned.  If the abstract value is too small, the input value is rounded to
4864 | a subnormal number, and the underflow and inexact exceptions are raised if
4865 | the abstract input cannot be represented exactly as a subnormal quadruple-
4866 | precision floating-point number.
4867 |     The input significand must be normalized or smaller.  If the input
4868 | significand is not normalized, `zExp' must be 0; in that case, the result
4869 | returned is a subnormal number, and it must not require rounding.  In the
4870 | usual case that the input significand is normalized, `zExp' must be 1 less
4871 | than the ``true'' floating-point exponent.  The handling of underflow and
4872 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4873 *----------------------------------------------------------------------------*/
4874
4875 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4876                                      uint64_t zSig0, uint64_t zSig1,
4877                                      uint64_t zSig2, float_status *status)
4878 {
4879     int8_t roundingMode;
4880     bool roundNearestEven, increment, isTiny;
4881
4882     roundingMode = status->float_rounding_mode;
4883     roundNearestEven = ( roundingMode == float_round_nearest_even );
4884     switch (roundingMode) {
4885     case float_round_nearest_even:
4886     case float_round_ties_away:
4887         increment = ((int64_t)zSig2 < 0);
4888         break;
4889     case float_round_to_zero:
4890         increment = 0;
4891         break;
4892     case float_round_up:
4893         increment = !zSign && zSig2;
4894         break;
4895     case float_round_down:
4896         increment = zSign && zSig2;
4897         break;
4898     case float_round_to_odd:
4899         increment = !(zSig1 & 0x1) && zSig2;
4900         break;
4901     default:
4902         abort();
4903     }
4904     if ( 0x7FFD <= (uint32_t) zExp ) {
4905         if (    ( 0x7FFD < zExp )
4906              || (    ( zExp == 0x7FFD )
4907                   && eq128(
4908                          UINT64_C(0x0001FFFFFFFFFFFF),
4909                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4910                          zSig0,
4911                          zSig1
4912                      )
4913                   && increment
4914                 )
4915            ) {
4916             float_raise(float_flag_overflow | float_flag_inexact, status);
4917             if (    ( roundingMode == float_round_to_zero )
4918                  || ( zSign && ( roundingMode == float_round_up ) )
4919                  || ( ! zSign && ( roundingMode == float_round_down ) )
4920                  || (roundingMode == float_round_to_odd)
4921                ) {
4922                 return
4923                     packFloat128(
4924                         zSign,
4925                         0x7FFE,
4926                         UINT64_C(0x0000FFFFFFFFFFFF),
4927                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4928                     );
4929             }
4930             return packFloat128( zSign, 0x7FFF, 0, 0 );
4931         }
4932         if ( zExp < 0 ) {
4933             if (status->flush_to_zero) {
4934                 float_raise(float_flag_output_denormal, status);
4935                 return packFloat128(zSign, 0, 0, 0);
4936             }
4937             isTiny = status->tininess_before_rounding
4938                   || (zExp < -1)
4939                   || !increment
4940                   || lt128(zSig0, zSig1,
4941                            UINT64_C(0x0001FFFFFFFFFFFF),
4942                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4943             shift128ExtraRightJamming(
4944                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4945             zExp = 0;
4946             if (isTiny && zSig2) {
4947                 float_raise(float_flag_underflow, status);
4948             }
4949             switch (roundingMode) {
4950             case float_round_nearest_even:
4951             case float_round_ties_away:
4952                 increment = ((int64_t)zSig2 < 0);
4953                 break;
4954             case float_round_to_zero:
4955                 increment = 0;
4956                 break;
4957             case float_round_up:
4958                 increment = !zSign && zSig2;
4959                 break;
4960             case float_round_down:
4961                 increment = zSign && zSig2;
4962                 break;
4963             case float_round_to_odd:
4964                 increment = !(zSig1 & 0x1) && zSig2;
4965                 break;
4966             default:
4967                 abort();
4968             }
4969         }
4970     }
4971     if (zSig2) {
4972         float_raise(float_flag_inexact, status);
4973     }
4974     if ( increment ) {
4975         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4976         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4977             zSig1 &= ~1;
4978         }
4979     }
4980     else {
4981         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4982     }
4983     return packFloat128( zSign, zExp, zSig0, zSig1 );
4984
4985 }
4986
4987 /*----------------------------------------------------------------------------
4988 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4989 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4990 | returns the proper quadruple-precision floating-point value corresponding
4991 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4992 | except that the input significand has fewer bits and does not have to be
4993 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4994 | point exponent.
4995 *----------------------------------------------------------------------------*/
4996
4997 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4998                                               uint64_t zSig0, uint64_t zSig1,
4999                                               float_status *status)
5000 {
5001     int8_t shiftCount;
5002     uint64_t zSig2;
5003
5004     if ( zSig0 == 0 ) {
5005         zSig0 = zSig1;
5006         zSig1 = 0;
5007         zExp -= 64;
5008     }
5009     shiftCount = clz64(zSig0) - 15;
5010     if ( 0 <= shiftCount ) {
5011         zSig2 = 0;
5012         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5013     }
5014     else {
5015         shift128ExtraRightJamming(
5016             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5017     }
5018     zExp -= shiftCount;
5019     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5020
5021 }
5022
5023
5024 /*----------------------------------------------------------------------------
5025 | Returns the result of converting the 32-bit two's complement integer `a'
5026 | to the extended double-precision floating-point format.  The conversion
5027 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5028 | Arithmetic.
5029 *----------------------------------------------------------------------------*/
5030
5031 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5032 {
5033     bool zSign;
5034     uint32_t absA;
5035     int8_t shiftCount;
5036     uint64_t zSig;
5037
5038     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5039     zSign = ( a < 0 );
5040     absA = zSign ? - a : a;
5041     shiftCount = clz32(absA) + 32;
5042     zSig = absA;
5043     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5044
5045 }
5046
5047 /*----------------------------------------------------------------------------
5048 | Returns the result of converting the 64-bit two's complement integer `a'
5049 | to the extended double-precision floating-point format.  The conversion
5050 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5051 | Arithmetic.
5052 *----------------------------------------------------------------------------*/
5053
5054 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5055 {
5056     bool zSign;
5057     uint64_t absA;
5058     int8_t shiftCount;
5059
5060     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5061     zSign = ( a < 0 );
5062     absA = zSign ? - a : a;
5063     shiftCount = clz64(absA);
5064     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5065
5066 }
5067
5068 /*----------------------------------------------------------------------------
5069 | Returns the result of converting the single-precision floating-point value
5070 | `a' to the extended double-precision floating-point format.  The conversion
5071 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5072 | Arithmetic.
5073 *----------------------------------------------------------------------------*/
5074
5075 floatx80 float32_to_floatx80(float32 a, float_status *status)
5076 {
5077     bool aSign;
5078     int aExp;
5079     uint32_t aSig;
5080
5081     a = float32_squash_input_denormal(a, status);
5082     aSig = extractFloat32Frac( a );
5083     aExp = extractFloat32Exp( a );
5084     aSign = extractFloat32Sign( a );
5085     if ( aExp == 0xFF ) {
5086         if (aSig) {
5087             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5088                                                status);
5089             return floatx80_silence_nan(res, status);
5090         }
5091         return packFloatx80(aSign,
5092                             floatx80_infinity_high,
5093                             floatx80_infinity_low);
5094     }
5095     if ( aExp == 0 ) {
5096         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5097         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5098     }
5099     aSig |= 0x00800000;
5100     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5101
5102 }
5103
5104 /*----------------------------------------------------------------------------
5105 | Returns the remainder of the single-precision floating-point value `a'
5106 | with respect to the corresponding value `b'.  The operation is performed
5107 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5108 *----------------------------------------------------------------------------*/
5109
5110 float32 float32_rem(float32 a, float32 b, float_status *status)
5111 {
5112     bool aSign, zSign;
5113     int aExp, bExp, expDiff;
5114     uint32_t aSig, bSig;
5115     uint32_t q;
5116     uint64_t aSig64, bSig64, q64;
5117     uint32_t alternateASig;
5118     int32_t sigMean;
5119     a = float32_squash_input_denormal(a, status);
5120     b = float32_squash_input_denormal(b, status);
5121
5122     aSig = extractFloat32Frac( a );
5123     aExp = extractFloat32Exp( a );
5124     aSign = extractFloat32Sign( a );
5125     bSig = extractFloat32Frac( b );
5126     bExp = extractFloat32Exp( b );
5127     if ( aExp == 0xFF ) {
5128         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5129             return propagateFloat32NaN(a, b, status);
5130         }
5131         float_raise(float_flag_invalid, status);
5132         return float32_default_nan(status);
5133     }
5134     if ( bExp == 0xFF ) {
5135         if (bSig) {
5136             return propagateFloat32NaN(a, b, status);
5137         }
5138         return a;
5139     }
5140     if ( bExp == 0 ) {
5141         if ( bSig == 0 ) {
5142             float_raise(float_flag_invalid, status);
5143             return float32_default_nan(status);
5144         }
5145         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5146     }
5147     if ( aExp == 0 ) {
5148         if ( aSig == 0 ) return a;
5149         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5150     }
5151     expDiff = aExp - bExp;
5152     aSig |= 0x00800000;
5153     bSig |= 0x00800000;
5154     if ( expDiff < 32 ) {
5155         aSig <<= 8;
5156         bSig <<= 8;
5157         if ( expDiff < 0 ) {
5158             if ( expDiff < -1 ) return a;
5159             aSig >>= 1;
5160         }
5161         q = ( bSig <= aSig );
5162         if ( q ) aSig -= bSig;
5163         if ( 0 < expDiff ) {
5164             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5165             q >>= 32 - expDiff;
5166             bSig >>= 2;
5167             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5168         }
5169         else {
5170             aSig >>= 2;
5171             bSig >>= 2;
5172         }
5173     }
5174     else {
5175         if ( bSig <= aSig ) aSig -= bSig;
5176         aSig64 = ( (uint64_t) aSig )<<40;
5177         bSig64 = ( (uint64_t) bSig )<<40;
5178         expDiff -= 64;
5179         while ( 0 < expDiff ) {
5180             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5181             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5182             aSig64 = - ( ( bSig * q64 )<<38 );
5183             expDiff -= 62;
5184         }
5185         expDiff += 64;
5186         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5187         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5188         q = q64>>( 64 - expDiff );
5189         bSig <<= 6;
5190         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5191     }
5192     do {
5193         alternateASig = aSig;
5194         ++q;
5195         aSig -= bSig;
5196     } while ( 0 <= (int32_t) aSig );
5197     sigMean = aSig + alternateASig;
5198     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5199         aSig = alternateASig;
5200     }
5201     zSign = ( (int32_t) aSig < 0 );
5202     if ( zSign ) aSig = - aSig;
5203     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5204 }
5205
5206
5207
5208 /*----------------------------------------------------------------------------
5209 | Returns the binary exponential of the single-precision floating-point value
5210 | `a'. The operation is performed according to the IEC/IEEE Standard for
5211 | Binary Floating-Point Arithmetic.
5212 |
5213 | Uses the following identities:
5214 |
5215 | 1. -------------------------------------------------------------------------
5216 |      x    x*ln(2)
5217 |     2  = e
5218 |
5219 | 2. -------------------------------------------------------------------------
5220 |                      2     3     4     5           n
5221 |      x        x     x     x     x     x           x
5222 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5223 |               1!    2!    3!    4!    5!          n!
5224 *----------------------------------------------------------------------------*/
5225
5226 static const float64 float32_exp2_coefficients[15] =
5227 {
5228     const_float64( 0x3ff0000000000000ll ), /*  1 */
5229     const_float64( 0x3fe0000000000000ll ), /*  2 */
5230     const_float64( 0x3fc5555555555555ll ), /*  3 */
5231     const_float64( 0x3fa5555555555555ll ), /*  4 */
5232     const_float64( 0x3f81111111111111ll ), /*  5 */
5233     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5234     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5235     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5236     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5237     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5238     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5239     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5240     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5241     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5242     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5243 };
5244
5245 float32 float32_exp2(float32 a, float_status *status)
5246 {
5247     bool aSign;
5248     int aExp;
5249     uint32_t aSig;
5250     float64 r, x, xn;
5251     int i;
5252     a = float32_squash_input_denormal(a, status);
5253
5254     aSig = extractFloat32Frac( a );
5255     aExp = extractFloat32Exp( a );
5256     aSign = extractFloat32Sign( a );
5257
5258     if ( aExp == 0xFF) {
5259         if (aSig) {
5260             return propagateFloat32NaN(a, float32_zero, status);
5261         }
5262         return (aSign) ? float32_zero : a;
5263     }
5264     if (aExp == 0) {
5265         if (aSig == 0) return float32_one;
5266     }
5267
5268     float_raise(float_flag_inexact, status);
5269
5270     /* ******************************* */
5271     /* using float64 for approximation */
5272     /* ******************************* */
5273     x = float32_to_float64(a, status);
5274     x = float64_mul(x, float64_ln2, status);
5275
5276     xn = x;
5277     r = float64_one;
5278     for (i = 0 ; i < 15 ; i++) {
5279         float64 f;
5280
5281         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5282         r = float64_add(r, f, status);
5283
5284         xn = float64_mul(xn, x, status);
5285     }
5286
5287     return float64_to_float32(r, status);
5288 }
5289
5290 /*----------------------------------------------------------------------------
5291 | Returns the binary log of the single-precision floating-point value `a'.
5292 | The operation is performed according to the IEC/IEEE Standard for Binary
5293 | Floating-Point Arithmetic.
5294 *----------------------------------------------------------------------------*/
5295 float32 float32_log2(float32 a, float_status *status)
5296 {
5297     bool aSign, zSign;
5298     int aExp;
5299     uint32_t aSig, zSig, i;
5300
5301     a = float32_squash_input_denormal(a, status);
5302     aSig = extractFloat32Frac( a );
5303     aExp = extractFloat32Exp( a );
5304     aSign = extractFloat32Sign( a );
5305
5306     if ( aExp == 0 ) {
5307         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5308         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5309     }
5310     if ( aSign ) {
5311         float_raise(float_flag_invalid, status);
5312         return float32_default_nan(status);
5313     }
5314     if ( aExp == 0xFF ) {
5315         if (aSig) {
5316             return propagateFloat32NaN(a, float32_zero, status);
5317         }
5318         return a;
5319     }
5320
5321     aExp -= 0x7F;
5322     aSig |= 0x00800000;
5323     zSign = aExp < 0;
5324     zSig = aExp << 23;
5325
5326     for (i = 1 << 22; i > 0; i >>= 1) {
5327         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5328         if ( aSig & 0x01000000 ) {
5329             aSig >>= 1;
5330             zSig |= i;
5331         }
5332     }
5333
5334     if ( zSign )
5335         zSig = -zSig;
5336
5337     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5338 }
5339
5340 /*----------------------------------------------------------------------------
5341 | Returns the result of converting the double-precision floating-point value
5342 | `a' to the extended double-precision floating-point format.  The conversion
5343 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5344 | Arithmetic.
5345 *----------------------------------------------------------------------------*/
5346
5347 floatx80 float64_to_floatx80(float64 a, float_status *status)
5348 {
5349     bool aSign;
5350     int aExp;
5351     uint64_t aSig;
5352
5353     a = float64_squash_input_denormal(a, status);
5354     aSig = extractFloat64Frac( a );
5355     aExp = extractFloat64Exp( a );
5356     aSign = extractFloat64Sign( a );
5357     if ( aExp == 0x7FF ) {
5358         if (aSig) {
5359             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5360                                                status);
5361             return floatx80_silence_nan(res, status);
5362         }
5363         return packFloatx80(aSign,
5364                             floatx80_infinity_high,
5365                             floatx80_infinity_low);
5366     }
5367     if ( aExp == 0 ) {
5368         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5369         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5370     }
5371     return
5372         packFloatx80(
5373             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5374
5375 }
5376
5377 /*----------------------------------------------------------------------------
5378 | Returns the remainder of the double-precision floating-point value `a'
5379 | with respect to the corresponding value `b'.  The operation is performed
5380 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5381 *----------------------------------------------------------------------------*/
5382
5383 float64 float64_rem(float64 a, float64 b, float_status *status)
5384 {
5385     bool aSign, zSign;
5386     int aExp, bExp, expDiff;
5387     uint64_t aSig, bSig;
5388     uint64_t q, alternateASig;
5389     int64_t sigMean;
5390
5391     a = float64_squash_input_denormal(a, status);
5392     b = float64_squash_input_denormal(b, status);
5393     aSig = extractFloat64Frac( a );
5394     aExp = extractFloat64Exp( a );
5395     aSign = extractFloat64Sign( a );
5396     bSig = extractFloat64Frac( b );
5397     bExp = extractFloat64Exp( b );
5398     if ( aExp == 0x7FF ) {
5399         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5400             return propagateFloat64NaN(a, b, status);
5401         }
5402         float_raise(float_flag_invalid, status);
5403         return float64_default_nan(status);
5404     }
5405     if ( bExp == 0x7FF ) {
5406         if (bSig) {
5407             return propagateFloat64NaN(a, b, status);
5408         }
5409         return a;
5410     }
5411     if ( bExp == 0 ) {
5412         if ( bSig == 0 ) {
5413             float_raise(float_flag_invalid, status);
5414             return float64_default_nan(status);
5415         }
5416         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5417     }
5418     if ( aExp == 0 ) {
5419         if ( aSig == 0 ) return a;
5420         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5421     }
5422     expDiff = aExp - bExp;
5423     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5424     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5425     if ( expDiff < 0 ) {
5426         if ( expDiff < -1 ) return a;
5427         aSig >>= 1;
5428     }
5429     q = ( bSig <= aSig );
5430     if ( q ) aSig -= bSig;
5431     expDiff -= 64;
5432     while ( 0 < expDiff ) {
5433         q = estimateDiv128To64( aSig, 0, bSig );
5434         q = ( 2 < q ) ? q - 2 : 0;
5435         aSig = - ( ( bSig>>2 ) * q );
5436         expDiff -= 62;
5437     }
5438     expDiff += 64;
5439     if ( 0 < expDiff ) {
5440         q = estimateDiv128To64( aSig, 0, bSig );
5441         q = ( 2 < q ) ? q - 2 : 0;
5442         q >>= 64 - expDiff;
5443         bSig >>= 2;
5444         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5445     }
5446     else {
5447         aSig >>= 2;
5448         bSig >>= 2;
5449     }
5450     do {
5451         alternateASig = aSig;
5452         ++q;
5453         aSig -= bSig;
5454     } while ( 0 <= (int64_t) aSig );
5455     sigMean = aSig + alternateASig;
5456     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5457         aSig = alternateASig;
5458     }
5459     zSign = ( (int64_t) aSig < 0 );
5460     if ( zSign ) aSig = - aSig;
5461     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5462
5463 }
5464
5465 /*----------------------------------------------------------------------------
5466 | Returns the binary log of the double-precision floating-point value `a'.
5467 | The operation is performed according to the IEC/IEEE Standard for Binary
5468 | Floating-Point Arithmetic.
5469 *----------------------------------------------------------------------------*/
5470 float64 float64_log2(float64 a, float_status *status)
5471 {
5472     bool aSign, zSign;
5473     int aExp;
5474     uint64_t aSig, aSig0, aSig1, zSig, i;
5475     a = float64_squash_input_denormal(a, status);
5476
5477     aSig = extractFloat64Frac( a );
5478     aExp = extractFloat64Exp( a );
5479     aSign = extractFloat64Sign( a );
5480
5481     if ( aExp == 0 ) {
5482         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5483         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5484     }
5485     if ( aSign ) {
5486         float_raise(float_flag_invalid, status);
5487         return float64_default_nan(status);
5488     }
5489     if ( aExp == 0x7FF ) {
5490         if (aSig) {
5491             return propagateFloat64NaN(a, float64_zero, status);
5492         }
5493         return a;
5494     }
5495
5496     aExp -= 0x3FF;
5497     aSig |= UINT64_C(0x0010000000000000);
5498     zSign = aExp < 0;
5499     zSig = (uint64_t)aExp << 52;
5500     for (i = 1LL << 51; i > 0; i >>= 1) {
5501         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5502         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5503         if ( aSig & UINT64_C(0x0020000000000000) ) {
5504             aSig >>= 1;
5505             zSig |= i;
5506         }
5507     }
5508
5509     if ( zSign )
5510         zSig = -zSig;
5511     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5512 }
5513
5514 /*----------------------------------------------------------------------------
5515 | Returns the result of converting the extended double-precision floating-
5516 | point value `a' to the 32-bit two's complement integer format.  The
5517 | conversion is performed according to the IEC/IEEE Standard for Binary
5518 | Floating-Point Arithmetic---which means in particular that the conversion
5519 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5520 | largest positive integer is returned.  Otherwise, if the conversion
5521 | overflows, the largest integer with the same sign as `a' is returned.
5522 *----------------------------------------------------------------------------*/
5523
5524 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5525 {
5526     bool aSign;
5527     int32_t aExp, shiftCount;
5528     uint64_t aSig;
5529
5530     if (floatx80_invalid_encoding(a)) {
5531         float_raise(float_flag_invalid, status);
5532         return 1 << 31;
5533     }
5534     aSig = extractFloatx80Frac( a );
5535     aExp = extractFloatx80Exp( a );
5536     aSign = extractFloatx80Sign( a );
5537     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5538     shiftCount = 0x4037 - aExp;
5539     if ( shiftCount <= 0 ) shiftCount = 1;
5540     shift64RightJamming( aSig, shiftCount, &aSig );
5541     return roundAndPackInt32(aSign, aSig, status);
5542
5543 }
5544
5545 /*----------------------------------------------------------------------------
5546 | Returns the result of converting the extended double-precision floating-
5547 | point value `a' to the 32-bit two's complement integer format.  The
5548 | conversion is performed according to the IEC/IEEE Standard for Binary
5549 | Floating-Point Arithmetic, except that the conversion is always rounded
5550 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5551 | Otherwise, if the conversion overflows, the largest integer with the same
5552 | sign as `a' is returned.
5553 *----------------------------------------------------------------------------*/
5554
5555 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5556 {
5557     bool aSign;
5558     int32_t aExp, shiftCount;
5559     uint64_t aSig, savedASig;
5560     int32_t z;
5561
5562     if (floatx80_invalid_encoding(a)) {
5563         float_raise(float_flag_invalid, status);
5564         return 1 << 31;
5565     }
5566     aSig = extractFloatx80Frac( a );
5567     aExp = extractFloatx80Exp( a );
5568     aSign = extractFloatx80Sign( a );
5569     if ( 0x401E < aExp ) {
5570         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5571         goto invalid;
5572     }
5573     else if ( aExp < 0x3FFF ) {
5574         if (aExp || aSig) {
5575             float_raise(float_flag_inexact, status);
5576         }
5577         return 0;
5578     }
5579     shiftCount = 0x403E - aExp;
5580     savedASig = aSig;
5581     aSig >>= shiftCount;
5582     z = aSig;
5583     if ( aSign ) z = - z;
5584     if ( ( z < 0 ) ^ aSign ) {
5585  invalid:
5586         float_raise(float_flag_invalid, status);
5587         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5588     }
5589     if ( ( aSig<<shiftCount ) != savedASig ) {
5590         float_raise(float_flag_inexact, status);
5591     }
5592     return z;
5593
5594 }
5595
5596 /*----------------------------------------------------------------------------
5597 | Returns the result of converting the extended double-precision floating-
5598 | point value `a' to the 64-bit two's complement integer format.  The
5599 | conversion is performed according to the IEC/IEEE Standard for Binary
5600 | Floating-Point Arithmetic---which means in particular that the conversion
5601 | is rounded according to the current rounding mode.  If `a' is a NaN,
5602 | the largest positive integer is returned.  Otherwise, if the conversion
5603 | overflows, the largest integer with the same sign as `a' is returned.
5604 *----------------------------------------------------------------------------*/
5605
5606 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5607 {
5608     bool aSign;
5609     int32_t aExp, shiftCount;
5610     uint64_t aSig, aSigExtra;
5611
5612     if (floatx80_invalid_encoding(a)) {
5613         float_raise(float_flag_invalid, status);
5614         return 1ULL << 63;
5615     }
5616     aSig = extractFloatx80Frac( a );
5617     aExp = extractFloatx80Exp( a );
5618     aSign = extractFloatx80Sign( a );
5619     shiftCount = 0x403E - aExp;
5620     if ( shiftCount <= 0 ) {
5621         if ( shiftCount ) {
5622             float_raise(float_flag_invalid, status);
5623             if (!aSign || floatx80_is_any_nan(a)) {
5624                 return INT64_MAX;
5625             }
5626             return INT64_MIN;
5627         }
5628         aSigExtra = 0;
5629     }
5630     else {
5631         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5632     }
5633     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5634
5635 }
5636
5637 /*----------------------------------------------------------------------------
5638 | Returns the result of converting the extended double-precision floating-
5639 | point value `a' to the 64-bit two's complement integer format.  The
5640 | conversion is performed according to the IEC/IEEE Standard for Binary
5641 | Floating-Point Arithmetic, except that the conversion is always rounded
5642 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5643 | Otherwise, if the conversion overflows, the largest integer with the same
5644 | sign as `a' is returned.
5645 *----------------------------------------------------------------------------*/
5646
5647 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5648 {
5649     bool aSign;
5650     int32_t aExp, shiftCount;
5651     uint64_t aSig;
5652     int64_t z;
5653
5654     if (floatx80_invalid_encoding(a)) {
5655         float_raise(float_flag_invalid, status);
5656         return 1ULL << 63;
5657     }
5658     aSig = extractFloatx80Frac( a );
5659     aExp = extractFloatx80Exp( a );
5660     aSign = extractFloatx80Sign( a );
5661     shiftCount = aExp - 0x403E;
5662     if ( 0 <= shiftCount ) {
5663         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5664         if ( ( a.high != 0xC03E ) || aSig ) {
5665             float_raise(float_flag_invalid, status);
5666             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5667                 return INT64_MAX;
5668             }
5669         }
5670         return INT64_MIN;
5671     }
5672     else if ( aExp < 0x3FFF ) {
5673         if (aExp | aSig) {
5674             float_raise(float_flag_inexact, status);
5675         }
5676         return 0;
5677     }
5678     z = aSig>>( - shiftCount );
5679     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5680         float_raise(float_flag_inexact, status);
5681     }
5682     if ( aSign ) z = - z;
5683     return z;
5684
5685 }
5686
5687 /*----------------------------------------------------------------------------
5688 | Returns the result of converting the extended double-precision floating-
5689 | point value `a' to the single-precision floating-point format.  The
5690 | conversion is performed according to the IEC/IEEE Standard for Binary
5691 | Floating-Point Arithmetic.
5692 *----------------------------------------------------------------------------*/
5693
5694 float32 floatx80_to_float32(floatx80 a, float_status *status)
5695 {
5696     bool aSign;
5697     int32_t aExp;
5698     uint64_t aSig;
5699
5700     if (floatx80_invalid_encoding(a)) {
5701         float_raise(float_flag_invalid, status);
5702         return float32_default_nan(status);
5703     }
5704     aSig = extractFloatx80Frac( a );
5705     aExp = extractFloatx80Exp( a );
5706     aSign = extractFloatx80Sign( a );
5707     if ( aExp == 0x7FFF ) {
5708         if ( (uint64_t) ( aSig<<1 ) ) {
5709             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5710                                              status);
5711             return float32_silence_nan(res, status);
5712         }
5713         return packFloat32( aSign, 0xFF, 0 );
5714     }
5715     shift64RightJamming( aSig, 33, &aSig );
5716     if ( aExp || aSig ) aExp -= 0x3F81;
5717     return roundAndPackFloat32(aSign, aExp, aSig, status);
5718
5719 }
5720
5721 /*----------------------------------------------------------------------------
5722 | Returns the result of converting the extended double-precision floating-
5723 | point value `a' to the double-precision floating-point format.  The
5724 | conversion is performed according to the IEC/IEEE Standard for Binary
5725 | Floating-Point Arithmetic.
5726 *----------------------------------------------------------------------------*/
5727
5728 float64 floatx80_to_float64(floatx80 a, float_status *status)
5729 {
5730     bool aSign;
5731     int32_t aExp;
5732     uint64_t aSig, zSig;
5733
5734     if (floatx80_invalid_encoding(a)) {
5735         float_raise(float_flag_invalid, status);
5736         return float64_default_nan(status);
5737     }
5738     aSig = extractFloatx80Frac( a );
5739     aExp = extractFloatx80Exp( a );
5740     aSign = extractFloatx80Sign( a );
5741     if ( aExp == 0x7FFF ) {
5742         if ( (uint64_t) ( aSig<<1 ) ) {
5743             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5744                                              status);
5745             return float64_silence_nan(res, status);
5746         }
5747         return packFloat64( aSign, 0x7FF, 0 );
5748     }
5749     shift64RightJamming( aSig, 1, &zSig );
5750     if ( aExp || aSig ) aExp -= 0x3C01;
5751     return roundAndPackFloat64(aSign, aExp, zSig, status);
5752
5753 }
5754
5755 /*----------------------------------------------------------------------------
5756 | Returns the result of converting the extended double-precision floating-
5757 | point value `a' to the quadruple-precision floating-point format.  The
5758 | conversion is performed according to the IEC/IEEE Standard for Binary
5759 | Floating-Point Arithmetic.
5760 *----------------------------------------------------------------------------*/
5761
5762 float128 floatx80_to_float128(floatx80 a, float_status *status)
5763 {
5764     bool aSign;
5765     int aExp;
5766     uint64_t aSig, zSig0, zSig1;
5767
5768     if (floatx80_invalid_encoding(a)) {
5769         float_raise(float_flag_invalid, status);
5770         return float128_default_nan(status);
5771     }
5772     aSig = extractFloatx80Frac( a );
5773     aExp = extractFloatx80Exp( a );
5774     aSign = extractFloatx80Sign( a );
5775     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5776         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5777                                            status);
5778         return float128_silence_nan(res, status);
5779     }
5780     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5781     return packFloat128( aSign, aExp, zSig0, zSig1 );
5782
5783 }
5784
5785 /*----------------------------------------------------------------------------
5786 | Rounds the extended double-precision floating-point value `a'
5787 | to the precision provided by floatx80_rounding_precision and returns the
5788 | result as an extended double-precision floating-point value.
5789 | The operation is performed according to the IEC/IEEE Standard for Binary
5790 | Floating-Point Arithmetic.
5791 *----------------------------------------------------------------------------*/
5792
5793 floatx80 floatx80_round(floatx80 a, float_status *status)
5794 {
5795     FloatParts128 p;
5796
5797     if (!floatx80_unpack_canonical(&p, a, status)) {
5798         return floatx80_default_nan(status);
5799     }
5800     return floatx80_round_pack_canonical(&p, status);
5801 }
5802
5803 /*----------------------------------------------------------------------------
5804 | Rounds the extended double-precision floating-point value `a' to an integer,
5805 | and returns the result as an extended quadruple-precision floating-point
5806 | value.  The operation is performed according to the IEC/IEEE Standard for
5807 | Binary Floating-Point Arithmetic.
5808 *----------------------------------------------------------------------------*/
5809
5810 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5811 {
5812     bool aSign;
5813     int32_t aExp;
5814     uint64_t lastBitMask, roundBitsMask;
5815     floatx80 z;
5816
5817     if (floatx80_invalid_encoding(a)) {
5818         float_raise(float_flag_invalid, status);
5819         return floatx80_default_nan(status);
5820     }
5821     aExp = extractFloatx80Exp( a );
5822     if ( 0x403E <= aExp ) {
5823         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5824             return propagateFloatx80NaN(a, a, status);
5825         }
5826         return a;
5827     }
5828     if ( aExp < 0x3FFF ) {
5829         if (    ( aExp == 0 )
5830              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5831             return a;
5832         }
5833         float_raise(float_flag_inexact, status);
5834         aSign = extractFloatx80Sign( a );
5835         switch (status->float_rounding_mode) {
5836          case float_round_nearest_even:
5837             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5838                ) {
5839                 return
5840                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5841             }
5842             break;
5843         case float_round_ties_away:
5844             if (aExp == 0x3FFE) {
5845                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5846             }
5847             break;
5848          case float_round_down:
5849             return
5850                   aSign ?
5851                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5852                 : packFloatx80( 0, 0, 0 );
5853          case float_round_up:
5854             return
5855                   aSign ? packFloatx80( 1, 0, 0 )
5856                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5857
5858         case float_round_to_zero:
5859             break;
5860         default:
5861             g_assert_not_reached();
5862         }
5863         return packFloatx80( aSign, 0, 0 );
5864     }
5865     lastBitMask = 1;
5866     lastBitMask <<= 0x403E - aExp;
5867     roundBitsMask = lastBitMask - 1;
5868     z = a;
5869     switch (status->float_rounding_mode) {
5870     case float_round_nearest_even:
5871         z.low += lastBitMask>>1;
5872         if ((z.low & roundBitsMask) == 0) {
5873             z.low &= ~lastBitMask;
5874         }
5875         break;
5876     case float_round_ties_away:
5877         z.low += lastBitMask >> 1;
5878         break;
5879     case float_round_to_zero:
5880         break;
5881     case float_round_up:
5882         if (!extractFloatx80Sign(z)) {
5883             z.low += roundBitsMask;
5884         }
5885         break;
5886     case float_round_down:
5887         if (extractFloatx80Sign(z)) {
5888             z.low += roundBitsMask;
5889         }
5890         break;
5891     default:
5892         abort();
5893     }
5894     z.low &= ~ roundBitsMask;
5895     if ( z.low == 0 ) {
5896         ++z.high;
5897         z.low = UINT64_C(0x8000000000000000);
5898     }
5899     if (z.low != a.low) {
5900         float_raise(float_flag_inexact, status);
5901     }
5902     return z;
5903
5904 }
5905
5906 /*----------------------------------------------------------------------------
5907 | Returns the remainder of the extended double-precision floating-point value
5908 | `a' with respect to the corresponding value `b'.  The operation is performed
5909 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5910 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5911 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
5912 | the absolute value of the integer quotient.
5913 *----------------------------------------------------------------------------*/
5914
5915 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
5916                          float_status *status)
5917 {
5918     bool aSign, zSign;
5919     int32_t aExp, bExp, expDiff, aExpOrig;
5920     uint64_t aSig0, aSig1, bSig;
5921     uint64_t q, term0, term1, alternateASig0, alternateASig1;
5922
5923     *quotient = 0;
5924     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5925         float_raise(float_flag_invalid, status);
5926         return floatx80_default_nan(status);
5927     }
5928     aSig0 = extractFloatx80Frac( a );
5929     aExpOrig = aExp = extractFloatx80Exp( a );
5930     aSign = extractFloatx80Sign( a );
5931     bSig = extractFloatx80Frac( b );
5932     bExp = extractFloatx80Exp( b );
5933     if ( aExp == 0x7FFF ) {
5934         if (    (uint64_t) ( aSig0<<1 )
5935              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5936             return propagateFloatx80NaN(a, b, status);
5937         }
5938         goto invalid;
5939     }
5940     if ( bExp == 0x7FFF ) {
5941         if ((uint64_t)(bSig << 1)) {
5942             return propagateFloatx80NaN(a, b, status);
5943         }
5944         if (aExp == 0 && aSig0 >> 63) {
5945             /*
5946              * Pseudo-denormal argument must be returned in normalized
5947              * form.
5948              */
5949             return packFloatx80(aSign, 1, aSig0);
5950         }
5951         return a;
5952     }
5953     if ( bExp == 0 ) {
5954         if ( bSig == 0 ) {
5955  invalid:
5956             float_raise(float_flag_invalid, status);
5957             return floatx80_default_nan(status);
5958         }
5959         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5960     }
5961     if ( aExp == 0 ) {
5962         if ( aSig0 == 0 ) return a;
5963         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5964     }
5965     zSign = aSign;
5966     expDiff = aExp - bExp;
5967     aSig1 = 0;
5968     if ( expDiff < 0 ) {
5969         if ( mod || expDiff < -1 ) {
5970             if (aExp == 1 && aExpOrig == 0) {
5971                 /*
5972                  * Pseudo-denormal argument must be returned in
5973                  * normalized form.
5974                  */
5975                 return packFloatx80(aSign, aExp, aSig0);
5976             }
5977             return a;
5978         }
5979         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5980         expDiff = 0;
5981     }
5982     *quotient = q = ( bSig <= aSig0 );
5983     if ( q ) aSig0 -= bSig;
5984     expDiff -= 64;
5985     while ( 0 < expDiff ) {
5986         q = estimateDiv128To64( aSig0, aSig1, bSig );
5987         q = ( 2 < q ) ? q - 2 : 0;
5988         mul64To128( bSig, q, &term0, &term1 );
5989         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5990         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5991         expDiff -= 62;
5992         *quotient <<= 62;
5993         *quotient += q;
5994     }
5995     expDiff += 64;
5996     if ( 0 < expDiff ) {
5997         q = estimateDiv128To64( aSig0, aSig1, bSig );
5998         q = ( 2 < q ) ? q - 2 : 0;
5999         q >>= 64 - expDiff;
6000         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6001         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6002         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6003         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6004             ++q;
6005             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6006         }
6007         if (expDiff < 64) {
6008             *quotient <<= expDiff;
6009         } else {
6010             *quotient = 0;
6011         }
6012         *quotient += q;
6013     }
6014     else {
6015         term1 = 0;
6016         term0 = bSig;
6017     }
6018     if (!mod) {
6019         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6020         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6021                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6022                         && ( q & 1 ) )
6023             ) {
6024             aSig0 = alternateASig0;
6025             aSig1 = alternateASig1;
6026             zSign = ! zSign;
6027             ++*quotient;
6028         }
6029     }
6030     return
6031         normalizeRoundAndPackFloatx80(
6032             floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
6033
6034 }
6035
6036 /*----------------------------------------------------------------------------
6037 | Returns the remainder of the extended double-precision floating-point value
6038 | `a' with respect to the corresponding value `b'.  The operation is performed
6039 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6040 *----------------------------------------------------------------------------*/
6041
6042 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6043 {
6044     uint64_t quotient;
6045     return floatx80_modrem(a, b, false, &quotient, status);
6046 }
6047
6048 /*----------------------------------------------------------------------------
6049 | Returns the remainder of the extended double-precision floating-point value
6050 | `a' with respect to the corresponding value `b', with the quotient truncated
6051 | toward zero.
6052 *----------------------------------------------------------------------------*/
6053
6054 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6055 {
6056     uint64_t quotient;
6057     return floatx80_modrem(a, b, true, &quotient, status);
6058 }
6059
6060 /*----------------------------------------------------------------------------
6061 | Returns the result of converting the quadruple-precision floating-point
6062 | value `a' to the extended double-precision floating-point format.  The
6063 | conversion is performed according to the IEC/IEEE Standard for Binary
6064 | Floating-Point Arithmetic.
6065 *----------------------------------------------------------------------------*/
6066
6067 floatx80 float128_to_floatx80(float128 a, float_status *status)
6068 {
6069     bool aSign;
6070     int32_t aExp;
6071     uint64_t aSig0, aSig1;
6072
6073     aSig1 = extractFloat128Frac1( a );
6074     aSig0 = extractFloat128Frac0( a );
6075     aExp = extractFloat128Exp( a );
6076     aSign = extractFloat128Sign( a );
6077     if ( aExp == 0x7FFF ) {
6078         if ( aSig0 | aSig1 ) {
6079             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6080                                                status);
6081             return floatx80_silence_nan(res, status);
6082         }
6083         return packFloatx80(aSign, floatx80_infinity_high,
6084                                    floatx80_infinity_low);
6085     }
6086     if ( aExp == 0 ) {
6087         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6088         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6089     }
6090     else {
6091         aSig0 |= UINT64_C(0x0001000000000000);
6092     }
6093     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6094     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6095
6096 }
6097
6098 /*----------------------------------------------------------------------------
6099 | Returns the remainder of the quadruple-precision floating-point value `a'
6100 | with respect to the corresponding value `b'.  The operation is performed
6101 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6102 *----------------------------------------------------------------------------*/
6103
6104 float128 float128_rem(float128 a, float128 b, float_status *status)
6105 {
6106     bool aSign, zSign;
6107     int32_t aExp, bExp, expDiff;
6108     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6109     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6110     int64_t sigMean0;
6111
6112     aSig1 = extractFloat128Frac1( a );
6113     aSig0 = extractFloat128Frac0( a );
6114     aExp = extractFloat128Exp( a );
6115     aSign = extractFloat128Sign( a );
6116     bSig1 = extractFloat128Frac1( b );
6117     bSig0 = extractFloat128Frac0( b );
6118     bExp = extractFloat128Exp( b );
6119     if ( aExp == 0x7FFF ) {
6120         if (    ( aSig0 | aSig1 )
6121              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6122             return propagateFloat128NaN(a, b, status);
6123         }
6124         goto invalid;
6125     }
6126     if ( bExp == 0x7FFF ) {
6127         if (bSig0 | bSig1) {
6128             return propagateFloat128NaN(a, b, status);
6129         }
6130         return a;
6131     }
6132     if ( bExp == 0 ) {
6133         if ( ( bSig0 | bSig1 ) == 0 ) {
6134  invalid:
6135             float_raise(float_flag_invalid, status);
6136             return float128_default_nan(status);
6137         }
6138         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6139     }
6140     if ( aExp == 0 ) {
6141         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6142         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6143     }
6144     expDiff = aExp - bExp;
6145     if ( expDiff < -1 ) return a;
6146     shortShift128Left(
6147         aSig0 | UINT64_C(0x0001000000000000),
6148         aSig1,
6149         15 - ( expDiff < 0 ),
6150         &aSig0,
6151         &aSig1
6152     );
6153     shortShift128Left(
6154         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6155     q = le128( bSig0, bSig1, aSig0, aSig1 );
6156     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6157     expDiff -= 64;
6158     while ( 0 < expDiff ) {
6159         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6160         q = ( 4 < q ) ? q - 4 : 0;
6161         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6162         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6163         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6164         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6165         expDiff -= 61;
6166     }
6167     if ( -64 < expDiff ) {
6168         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6169         q = ( 4 < q ) ? q - 4 : 0;
6170         q >>= - expDiff;
6171         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6172         expDiff += 52;
6173         if ( expDiff < 0 ) {
6174             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6175         }
6176         else {
6177             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6178         }
6179         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6180         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6181     }
6182     else {
6183         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6184         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6185     }
6186     do {
6187         alternateASig0 = aSig0;
6188         alternateASig1 = aSig1;
6189         ++q;
6190         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6191     } while ( 0 <= (int64_t) aSig0 );
6192     add128(
6193         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6194     if (    ( sigMean0 < 0 )
6195          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6196         aSig0 = alternateASig0;
6197         aSig1 = alternateASig1;
6198     }
6199     zSign = ( (int64_t) aSig0 < 0 );
6200     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6201     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6202                                          status);
6203 }
6204
6205 static inline FloatRelation
6206 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6207                           float_status *status)
6208 {
6209     bool aSign, bSign;
6210
6211     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6212         float_raise(float_flag_invalid, status);
6213         return float_relation_unordered;
6214     }
6215     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6216           ( extractFloatx80Frac( a )<<1 ) ) ||
6217         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6218           ( extractFloatx80Frac( b )<<1 ) )) {
6219         if (!is_quiet ||
6220             floatx80_is_signaling_nan(a, status) ||
6221             floatx80_is_signaling_nan(b, status)) {
6222             float_raise(float_flag_invalid, status);
6223         }
6224         return float_relation_unordered;
6225     }
6226     aSign = extractFloatx80Sign( a );
6227     bSign = extractFloatx80Sign( b );
6228     if ( aSign != bSign ) {
6229
6230         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6231              ( ( a.low | b.low ) == 0 ) ) {
6232             /* zero case */
6233             return float_relation_equal;
6234         } else {
6235             return 1 - (2 * aSign);
6236         }
6237     } else {
6238         /* Normalize pseudo-denormals before comparison.  */
6239         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6240             ++a.high;
6241         }
6242         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6243             ++b.high;
6244         }
6245         if (a.low == b.low && a.high == b.high) {
6246             return float_relation_equal;
6247         } else {
6248             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6249         }
6250     }
6251 }
6252
6253 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6254 {
6255     return floatx80_compare_internal(a, b, 0, status);
6256 }
6257
6258 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6259                                      float_status *status)
6260 {
6261     return floatx80_compare_internal(a, b, 1, status);
6262 }
6263
6264 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6265 {
6266     bool aSign;
6267     int32_t aExp;
6268     uint64_t aSig;
6269
6270     if (floatx80_invalid_encoding(a)) {
6271         float_raise(float_flag_invalid, status);
6272         return floatx80_default_nan(status);
6273     }
6274     aSig = extractFloatx80Frac( a );
6275     aExp = extractFloatx80Exp( a );
6276     aSign = extractFloatx80Sign( a );
6277
6278     if ( aExp == 0x7FFF ) {
6279         if ( aSig<<1 ) {
6280             return propagateFloatx80NaN(a, a, status);
6281         }
6282         return a;
6283     }
6284
6285     if (aExp == 0) {
6286         if (aSig == 0) {
6287             return a;
6288         }
6289         aExp++;
6290     }
6291
6292     if (n > 0x10000) {
6293         n = 0x10000;
6294     } else if (n < -0x10000) {
6295         n = -0x10000;
6296     }
6297
6298     aExp += n;
6299     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6300                                          aSign, aExp, aSig, 0, status);
6301 }
6302
6303 static void __attribute__((constructor)) softfloat_init(void)
6304 {
6305     union_float64 ua, ub, uc, ur;
6306
6307     if (QEMU_NO_HARDFLOAT) {
6308         return;
6309     }
6310     /*
6311      * Test that the host's FMA is not obviously broken. For example,
6312      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6313      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6314      */
6315     ua.s = 0x0020000000000001ULL;
6316     ub.s = 0x3ca0000000000000ULL;
6317     uc.s = 0x0020000000000000ULL;
6318     ur.h = fma(ua.h, ub.h, uc.h);
6319     if (ur.s != 0x0020000000000001ULL) {
6320         force_soft_fma = true;
6321     }
6322 }