fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485 /* Flags for parts_minmax. */
 486 enum {
 487     /* Set for minimum; clear for maximum. */
 488     minmax_ismin = 1,
 489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 490     minmax_isnum = 2,
 491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 492     minmax_ismag = 4,
 493 };
 494
 495 /* Simple helpers for checking if, or what kind of, NaN we have */
 496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 497 {
 498     return unlikely(c >= float_class_qnan);
 499 }
 500
 501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 502 {
 503     return c == float_class_snan;
 504 }
 505
 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 507 {
 508     return c == float_class_qnan;
 509 }
 510
 511 /*
 512  * Structure holding all of the decomposed parts of a float.
 513  * The exponent is unbiased and the fraction is normalized.
 514  *
 515  * The fraction words are stored in big-endian word ordering,
 516  * so that truncation from a larger format to a smaller format
 517  * can be done simply by ignoring subsequent elements.
 518  */
 519
 520 typedef struct {
 521     FloatClass cls;
 522     bool sign;
 523     int32_t exp;
 524     union {
 525         /* Routines that know the structure may reference the singular name. */
 526         uint64_t frac;
 527         /*
 528          * Routines expanded with multiple structures reference "hi" and "lo"
 529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 530          * both the same word and aliased here.
 531          */
 532         uint64_t frac_hi;
 533         uint64_t frac_lo;
 534     };
 535 } FloatParts64;
 536
 537 typedef struct {
 538     FloatClass cls;
 539     bool sign;
 540     int32_t exp;
 541     uint64_t frac_hi;
 542     uint64_t frac_lo;
 543 } FloatParts128;
 544
 545 typedef struct {
 546     FloatClass cls;
 547     bool sign;
 548     int32_t exp;
 549     uint64_t frac_hi;
 550     uint64_t frac_hm;  /* high-middle */
 551     uint64_t frac_lm;  /* low-middle */
 552     uint64_t frac_lo;
 553 } FloatParts256;
 554
 555 /* These apply to the most significant word of each FloatPartsN. */
 556 #define DECOMPOSED_BINARY_POINT    63
 557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 558
 559 /* Structure holding all of the relevant parameters for a format.
 560  *   exp_size: the size of the exponent field
 561  *   exp_bias: the offset applied to the exponent field
 562  *   exp_max: the maximum normalised exponent
 563  *   frac_size: the size of the fraction field
 564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 565  * The following are computed based the size of fraction
 566  *   round_mask: bits below lsb which must be rounded
 567  * The following optional modifiers are available:
 568  *   arm_althp: handle ARM Alternative Half Precision
 569  */
 570 typedef struct {
 571     int exp_size;
 572     int exp_bias;
 573     int exp_max;
 574     int frac_size;
 575     int frac_shift;
 576     bool arm_althp;
 577     uint64_t round_mask;
 578 } FloatFmt;
 579
 580 /* Expand fields based on the size of exponent and fraction */
 581 #define FLOAT_PARAMS_(E)                                \
 582     .exp_size       = E,                                \
 583     .exp_bias       = ((1 << E) - 1) >> 1,              \
 584     .exp_max        = (1 << E) - 1
 585
 586 #define FLOAT_PARAMS(E, F)                              \
 587     FLOAT_PARAMS_(E),                                   \
 588     .frac_size      = F,                                \
 589     .frac_shift     = (-F - 1) & 63,                    \
 590     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 591
 592 static const FloatFmt float16_params = {
 593     FLOAT_PARAMS(5, 10)
 594 };
 595
 596 static const FloatFmt float16_params_ahp = {
 597     FLOAT_PARAMS(5, 10),
 598     .arm_althp = true
 599 };
 600
 601 static const FloatFmt bfloat16_params = {
 602     FLOAT_PARAMS(8, 7)
 603 };
 604
 605 static const FloatFmt float32_params = {
 606     FLOAT_PARAMS(8, 23)
 607 };
 608
 609 static const FloatFmt float64_params = {
 610     FLOAT_PARAMS(11, 52)
 611 };
 612
 613 static const FloatFmt float128_params = {
 614     FLOAT_PARAMS(15, 112)
 615 };
 616
 617 #define FLOATX80_PARAMS(R)              \
 618     FLOAT_PARAMS_(15),                  \
 619     .frac_size = R == 64 ? 63 : R,      \
 620     .frac_shift = 0,                    \
 621     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 622
 623 static const FloatFmt floatx80_params[3] = {
 624     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 625     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 626     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 627 };
 628
 629 /* Unpack a float to parts, but do not canonicalize.  */
 630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 631 {
 632     const int f_size = fmt->frac_size;
 633     const int e_size = fmt->exp_size;
 634
 635     *r = (FloatParts64) {
 636         .cls = float_class_unclassified,
 637         .sign = extract64(raw, f_size + e_size, 1),
 638         .exp = extract64(raw, f_size, e_size),
 639         .frac = extract64(raw, 0, f_size)
 640     };
 641 }
 642
 643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 644 {
 645     unpack_raw64(p, &float16_params, f);
 646 }
 647
 648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 649 {
 650     unpack_raw64(p, &bfloat16_params, f);
 651 }
 652
 653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 654 {
 655     unpack_raw64(p, &float32_params, f);
 656 }
 657
 658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 659 {
 660     unpack_raw64(p, &float64_params, f);
 661 }
 662
 663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 664 {
 665     *p = (FloatParts128) {
 666         .cls = float_class_unclassified,
 667         .sign = extract32(f.high, 15, 1),
 668         .exp = extract32(f.high, 0, 15),
 669         .frac_hi = f.low
 670     };
 671 }
 672
 673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 674 {
 675     const int f_size = float128_params.frac_size - 64;
 676     const int e_size = float128_params.exp_size;
 677
 678     *p = (FloatParts128) {
 679         .cls = float_class_unclassified,
 680         .sign = extract64(f.high, f_size + e_size, 1),
 681         .exp = extract64(f.high, f_size, e_size),
 682         .frac_hi = extract64(f.high, 0, f_size),
 683         .frac_lo = f.low,
 684     };
 685 }
 686
 687 /* Pack a float from parts, but do not canonicalize.  */
 688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 689 {
 690     const int f_size = fmt->frac_size;
 691     const int e_size = fmt->exp_size;
 692     uint64_t ret;
 693
 694     ret = (uint64_t)p->sign << (f_size + e_size);
 695     ret = deposit64(ret, f_size, e_size, p->exp);
 696     ret = deposit64(ret, 0, f_size, p->frac);
 697     return ret;
 698 }
 699
 700 static inline float16 float16_pack_raw(const FloatParts64 *p)
 701 {
 702     return make_float16(pack_raw64(p, &float16_params));
 703 }
 704
 705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 706 {
 707     return pack_raw64(p, &bfloat16_params);
 708 }
 709
 710 static inline float32 float32_pack_raw(const FloatParts64 *p)
 711 {
 712     return make_float32(pack_raw64(p, &float32_params));
 713 }
 714
 715 static inline float64 float64_pack_raw(const FloatParts64 *p)
 716 {
 717     return make_float64(pack_raw64(p, &float64_params));
 718 }
 719
 720 static float128 float128_pack_raw(const FloatParts128 *p)
 721 {
 722     const int f_size = float128_params.frac_size - 64;
 723     const int e_size = float128_params.exp_size;
 724     uint64_t hi;
 725
 726     hi = (uint64_t)p->sign << (f_size + e_size);
 727     hi = deposit64(hi, f_size, e_size, p->exp);
 728     hi = deposit64(hi, 0, f_size, p->frac_hi);
 729     return make_float128(hi, p->frac_lo);
 730 }
 731
 732 /*----------------------------------------------------------------------------
 733 | Functions and definitions to determine:  (1) whether tininess for underflow
 734 | is detected before or after rounding by default, (2) what (if anything)
 735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 737 | are propagated from function inputs to output.  These details are target-
 738 | specific.
 739 *----------------------------------------------------------------------------*/
 740 #include "softfloat-specialize.c.inc"
 741
 742 #define PARTS_GENERIC_64_128(NAME, P) \
 743     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 744
 745 #define PARTS_GENERIC_64_128_256(NAME, P) \
 746     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 747                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 748
 749 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 750 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 751
 752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 754
 755 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 756
 757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 758                                       float_status *s);
 759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 760                                         float_status *s);
 761
 762 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 763
 764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 765                                              FloatParts64 *c, float_status *s,
 766                                              int ab_mask, int abc_mask);
 767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 768                                                FloatParts128 *b,
 769                                                FloatParts128 *c,
 770                                                float_status *s,
 771                                                int ab_mask, int abc_mask);
 772
 773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 774     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 775
 776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 777                                  const FloatFmt *fmt);
 778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 779                                   const FloatFmt *fmt);
 780
 781 #define parts_canonicalize(A, S, F) \
 782     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 783
 784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 785                                    const FloatFmt *fmt);
 786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 787                                     const FloatFmt *fmt);
 788
 789 #define parts_uncanon_normal(A, S, F) \
 790     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 791
 792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 793                             const FloatFmt *fmt);
 794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 795                              const FloatFmt *fmt);
 796
 797 #define parts_uncanon(A, S, F) \
 798     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 799
 800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 803
 804 #define parts_add_normal(A, B) \
 805     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 806
 807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 810
 811 #define parts_sub_normal(A, B) \
 812     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 813
 814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 815                                     float_status *s, bool subtract);
 816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 817                                       float_status *s, bool subtract);
 818
 819 #define parts_addsub(A, B, S, Z) \
 820     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 821
 822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 823                                  float_status *s);
 824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 825                                    float_status *s);
 826
 827 #define parts_mul(A, B, S) \
 828     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 829
 830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 831                                     FloatParts64 *c, int flags,
 832                                     float_status *s);
 833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 834                                       FloatParts128 *c, int flags,
 835                                       float_status *s);
 836
 837 #define parts_muladd(A, B, C, Z, S) \
 838     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 839
 840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 841                                  float_status *s);
 842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 843                                    float_status *s);
 844
 845 #define parts_div(A, B, S) \
 846     PARTS_GENERIC_64_128(div, A)(A, B, S)
 847
 848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 850
 851 #define parts_sqrt(A, S, F) \
 852     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 853
 854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 855                                         int scale, int frac_size);
 856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 857                                          int scale, int frac_size);
 858
 859 #define parts_round_to_int_normal(A, R, C, F) \
 860     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 861
 862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 863                                  int scale, float_status *s,
 864                                  const FloatFmt *fmt);
 865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 866                                   int scale, float_status *s,
 867                                   const FloatFmt *fmt);
 868
 869 #define parts_round_to_int(A, R, C, S, F) \
 870     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 871
 872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 873                                      int scale, int64_t min, int64_t max,
 874                                      float_status *s);
 875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 876                                      int scale, int64_t min, int64_t max,
 877                                      float_status *s);
 878
 879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 880     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 881
 882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 883                                       int scale, uint64_t max,
 884                                       float_status *s);
 885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 886                                        int scale, uint64_t max,
 887                                        float_status *s);
 888
 889 #define parts_float_to_uint(P, R, Z, M, S) \
 890     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 891
 892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 893                                   int scale, float_status *s);
 894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 895                                    int scale, float_status *s);
 896
 897 #define parts_sint_to_float(P, I, Z, S) \
 898     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 899
 900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 901                                   int scale, float_status *s);
 902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 903                                    int scale, float_status *s);
 904
 905 #define parts_uint_to_float(P, I, Z, S) \
 906     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 907
 908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 909                                     float_status *s, int flags);
 910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 911                                       float_status *s, int flags);
 912
 913 #define parts_minmax(A, B, S, F) \
 914     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 915
 916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 917                            float_status *s, bool q);
 918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 919                             float_status *s, bool q);
 920
 921 #define parts_compare(A, B, S, Q) \
 922     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 923
 924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 926
 927 #define parts_scalbn(A, N, S) \
 928     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 929
 930 /*
 931  * Helper functions for softfloat-parts.c.inc, per-size operations.
 932  */
 933
 934 #define FRAC_GENERIC_64_128(NAME, P) \
 935     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 936
 937 #define FRAC_GENERIC_64_128_256(NAME, P) \
 938     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 939                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 940
 941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 942 {
 943     return uadd64_overflow(a->frac, b->frac, &r->frac);
 944 }
 945
 946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 947 {
 948     bool c = 0;
 949     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 950     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 951     return c;
 952 }
 953
 954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 955 {
 956     bool c = 0;
 957     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 958     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 959     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 960     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 961     return c;
 962 }
 963
 964 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 965
 966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 967 {
 968     return uadd64_overflow(a->frac, c, &r->frac);
 969 }
 970
 971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 972 {
 973     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 974     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 975 }
 976
 977 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 978
 979 static void frac64_allones(FloatParts64 *a)
 980 {
 981     a->frac = -1;
 982 }
 983
 984 static void frac128_allones(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = -1;
 987 }
 988
 989 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 990
 991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 994 }
 995
 996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 997 {
 998     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 999     if (ta == tb) {
1000         ta = a->frac_lo, tb = b->frac_lo;
1001         if (ta == tb) {
1002             return 0;
1003         }
1004     }
1005     return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012     a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017     a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024     uint64_t n1, n0, r, q;
1025     bool ret;
1026
1027     /*
1028      * We want a 2*N / N-bit division to produce exactly an N-bit
1029      * result, so that we do not lose any precision and so that we
1030      * do not have to renormalize afterward.  If A.frac < B.frac,
1031      * then division would produce an (N-1)-bit result; shift A left
1032      * by one to produce the an N-bit result, and return true to
1033      * decrement the exponent to match.
1034      *
1035      * The udiv_qrnnd algorithm that we're using requires normalization,
1036      * i.e. the msb of the denominator must be set, which is already true.
1037      */
1038     ret = a->frac < b->frac;
1039     if (ret) {
1040         n0 = a->frac;
1041         n1 = 0;
1042     } else {
1043         n0 = a->frac >> 1;
1044         n1 = a->frac << 63;
1045     }
1046     q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048     /* Set lsb if there is a remainder, to set inexact. */
1049     a->frac = q | (r != 0);
1050
1051     return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056     uint64_t q0, q1, a0, a1, b0, b1;
1057     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058     bool ret = false;
1059
1060     a0 = a->frac_hi, a1 = a->frac_lo;
1061     b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063     ret = lt128(a0, a1, b0, b1);
1064     if (!ret) {
1065         a1 = shr_double(a0, a1, 1);
1066         a0 = a0 >> 1;
1067     }
1068
1069     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070     q0 = estimateDiv128To64(a0, a1, b0);
1071
1072     /*
1073      * Estimate is high because B1 was not included (unless B1 == 0).
1074      * Reduce quotient and increase remainder until remainder is non-negative.
1075      * This loop will execute 0 to 2 times.
1076      */
1077     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079     while (r0 != 0) {
1080         q0--;
1081         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082     }
1083
1084     /* Repeat using the remainder, producing a second word of quotient. */
1085     q1 = estimateDiv128To64(r1, r2, b0);
1086     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088     while (r1 != 0) {
1089         q1--;
1090         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091     }
1092
1093     /* Any remainder indicates inexact; set sticky bit. */
1094     q1 |= (r2 | r3) != 0;
1095
1096     a->frac_hi = q0;
1097     a->frac_lo = q1;
1098     return ret;
1099 }
1100
1101 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105     return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110     return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130     a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153     if (a->frac) {
1154         int shift = clz64(a->frac);
1155         a->frac <<= shift;
1156         return shift;
1157     }
1158     return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163     if (a->frac_hi) {
1164         int shl = clz64(a->frac_hi);
1165         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166         a->frac_lo <<= shl;
1167         return shl;
1168     } else if (a->frac_lo) {
1169         int shl = clz64(a->frac_lo);
1170         a->frac_hi = a->frac_lo << shl;
1171         a->frac_lo = 0;
1172         return shl + 64;
1173     }
1174     return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181     int ret, shl;
1182
1183     if (likely(a0)) {
1184         shl = clz64(a0);
1185         if (shl == 0) {
1186             return 0;
1187         }
1188         ret = shl;
1189     } else {
1190         if (a1) {
1191             ret = 64;
1192             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193         } else if (a2) {
1194             ret = 128;
1195             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196         } else if (a3) {
1197             ret = 192;
1198             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199         } else {
1200             ret = 256;
1201             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202             goto done;
1203         }
1204         shl = clz64(a0);
1205         if (shl == 0) {
1206             goto done;
1207         }
1208         ret += shl;
1209     }
1210
1211     a0 = shl_double(a0, a1, shl);
1212     a1 = shl_double(a1, a2, shl);
1213     a2 = shl_double(a2, a3, shl);
1214     a3 <<= shl;
1215
1216  done:
1217     a->frac_hi = a0;
1218     a->frac_hm = a1;
1219     a->frac_lm = a2;
1220     a->frac_lo = a3;
1221     return ret;
1222 }
1223
1224 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228     a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235     if (c & 64) {
1236         a0 = a1, a1 = 0;
1237     }
1238
1239     c &= 63;
1240     if (c) {
1241         a0 = shl_double(a0, a1, c);
1242         a1 = a1 << c;
1243     }
1244
1245     a->frac_hi = a0;
1246     a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253     a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260     if (c & 64) {
1261         a1 = a0, a0 = 0;
1262     }
1263
1264     c &= 63;
1265     if (c) {
1266         a1 = shr_double(a0, a1, c);
1267         a0 = a0 >> c;
1268     }
1269
1270     a->frac_hi = a0;
1271     a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278     uint64_t a0 = a->frac;
1279
1280     if (likely(c != 0)) {
1281         if (likely(c < 64)) {
1282             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283         } else {
1284             a0 = a0 != 0;
1285         }
1286         a->frac = a0;
1287     }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293     uint64_t sticky = 0;
1294
1295     if (unlikely(c == 0)) {
1296         return;
1297     } else if (likely(c < 64)) {
1298         /* nothing */
1299     } else if (likely(c < 128)) {
1300         sticky = a1;
1301         a1 = a0;
1302         a0 = 0;
1303         c &= 63;
1304         if (c == 0) {
1305             goto done;
1306         }
1307     } else {
1308         sticky = a0 | a1;
1309         a0 = a1 = 0;
1310         goto done;
1311     }
1312
1313     sticky |= shr_double(a1, 0, c);
1314     a1 = shr_double(a0, a1, c);
1315     a0 = a0 >> c;
1316
1317  done:
1318     a->frac_lo = a1 | (sticky != 0);
1319     a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326     uint64_t sticky = 0;
1327
1328     if (unlikely(c == 0)) {
1329         return;
1330     } else if (likely(c < 64)) {
1331         /* nothing */
1332     } else if (likely(c < 256)) {
1333         if (unlikely(c & 128)) {
1334             sticky |= a2 | a3;
1335             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336         }
1337         if (unlikely(c & 64)) {
1338             sticky |= a3;
1339             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340         }
1341         c &= 63;
1342         if (c == 0) {
1343             goto done;
1344         }
1345     } else {
1346         sticky = a0 | a1 | a2 | a3;
1347         a0 = a1 = a2 = a3 = 0;
1348         goto done;
1349     }
1350
1351     sticky |= shr_double(a3, 0, c);
1352     a3 = shr_double(a2, a3, c);
1353     a2 = shr_double(a1, a2, c);
1354     a1 = shr_double(a0, a1, c);
1355     a0 = a0 >> c;
1356
1357  done:
1358     a->frac_lo = a3 | (sticky != 0);
1359     a->frac_lm = a2;
1360     a->frac_hm = a1;
1361     a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368     return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373     bool c = 0;
1374     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376     return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381     bool c = 0;
1382     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386     return c;
1387 }
1388
1389 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393     r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398     r->frac_hi = a->frac_hi;
1399     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406     r->frac_hi = a->frac;
1407     r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412     r->frac_hi = a->frac_hi;
1413     r->frac_hm = a->frac_lo;
1414     r->frac_lm = 0;
1415     r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1422  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423  * and thus MIT licenced.
1424  */
1425 static const uint16_t rsqrt_tab[128] = {
1426     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN    glue(FloatParts,N)
1446 #define FloatPartsW    glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef  N
1455 #undef  W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef  N
1463 #undef  W
1464 #define N            256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef  N
1469 #undef  W
1470 #undef  partsN
1471 #undef  FloatPartsN
1472 #undef  FloatPartsW
1473
1474 /*
1475  * Pack/unpack routines with a specific FloatFmt.
1476  */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479                                       float_status *s, const FloatFmt *params)
1480 {
1481     float16_unpack_raw(p, f);
1482     parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486                                      float_status *s)
1487 {
1488     float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492                                       float_status *s)
1493 {
1494     bfloat16_unpack_raw(p, f);
1495     parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499                                              float_status *s,
1500                                              const FloatFmt *params)
1501 {
1502     parts_uncanon(p, s, params);
1503     return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507                                             float_status *s)
1508 {
1509     return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513                                               float_status *s)
1514 {
1515     parts_uncanon(p, s, &bfloat16_params);
1516     return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520                                      float_status *s)
1521 {
1522     float32_unpack_raw(p, f);
1523     parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527                                             float_status *s)
1528 {
1529     parts_uncanon(p, s, &float32_params);
1530     return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534                                      float_status *s)
1535 {
1536     float64_unpack_raw(p, f);
1537     parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541                                             float_status *s)
1542 {
1543     parts_uncanon(p, s, &float64_params);
1544     return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548                                       float_status *s)
1549 {
1550     float128_unpack_raw(p, f);
1551     parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555                                               float_status *s)
1556 {
1557     parts_uncanon(p, s, &float128_params);
1558     return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563                                       float_status *s)
1564 {
1565     /* Ensure rounding precision is set before beginning. */
1566     switch (s->floatx80_rounding_precision) {
1567     case floatx80_precision_x:
1568     case floatx80_precision_d:
1569     case floatx80_precision_s:
1570         break;
1571     default:
1572         g_assert_not_reached();
1573     }
1574
1575     if (unlikely(floatx80_invalid_encoding(f))) {
1576         float_raise(float_flag_invalid, s);
1577         return false;
1578     }
1579
1580     floatx80_unpack_raw(p, f);
1581
1582     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584     } else {
1585         /* The explicit integer bit is ignored, after invalid checks. */
1586         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587         p->cls = (p->frac_hi == 0 ? float_class_inf
1588                   : parts_is_snan_frac(p->frac_hi, s)
1589                   ? float_class_snan : float_class_qnan);
1590     }
1591     return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595                                               float_status *s)
1596 {
1597     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598     uint64_t frac;
1599     int exp;
1600
1601     switch (p->cls) {
1602     case float_class_normal:
1603         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604             parts_uncanon_normal(p, s, fmt);
1605             frac = p->frac_hi;
1606             exp = p->exp;
1607         } else {
1608             FloatParts64 p64;
1609
1610             p64.sign = p->sign;
1611             p64.exp = p->exp;
1612             frac_truncjam(&p64, p);
1613             parts_uncanon_normal(&p64, s, fmt);
1614             frac = p64.frac;
1615             exp = p64.exp;
1616         }
1617         if (exp != fmt->exp_max) {
1618             break;
1619         }
1620         /* rounded to inf -- fall through to set frac correctly */
1621
1622     case float_class_inf:
1623         /* x86 and m68k differ in the setting of the integer bit. */
1624         frac = floatx80_infinity_low;
1625         exp = fmt->exp_max;
1626         break;
1627
1628     case float_class_zero:
1629         frac = 0;
1630         exp = 0;
1631         break;
1632
1633     case float_class_snan:
1634     case float_class_qnan:
1635         /* NaNs have the integer bit set. */
1636         frac = p->frac_hi | (1ull << 63);
1637         exp = fmt->exp_max;
1638         break;
1639
1640     default:
1641         g_assert_not_reached();
1642     }
1643
1644     return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648  * Addition and subtraction
1649  */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654     FloatParts64 pa, pb, *pr;
1655
1656     float16_unpack_canonical(&pa, a, status);
1657     float16_unpack_canonical(&pb, b, status);
1658     pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660     return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665     return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670     return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677
1678     float32_unpack_canonical(&pa, a, status);
1679     float32_unpack_canonical(&pb, b, status);
1680     pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682     return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687     return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692     return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698     FloatParts64 pa, pb, *pr;
1699
1700     float64_unpack_canonical(&pa, a, status);
1701     float64_unpack_canonical(&pb, b, status);
1702     pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704     return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709     return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714     return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719     return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724     return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729     return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734     return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741     }
1742     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749     } else {
1750         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751     }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757     return float32_gen2(a, b, s, hard, soft,
1758                         f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764     return float64_gen2(a, b, s, hard, soft,
1765                         f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795     FloatParts64 pa, pb, *pr;
1796
1797     bfloat16_unpack_canonical(&pa, a, status);
1798     bfloat16_unpack_canonical(&pb, b, status);
1799     pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801     return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806     return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811     return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817     FloatParts128 pa, pb, *pr;
1818
1819     float128_unpack_canonical(&pa, a, status);
1820     float128_unpack_canonical(&pb, b, status);
1821     pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823     return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828     return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833     return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839     FloatParts128 pa, pb, *pr;
1840
1841     if (!floatx80_unpack_canonical(&pa, a, status) ||
1842         !floatx80_unpack_canonical(&pb, b, status)) {
1843         return floatx80_default_nan(status);
1844     }
1845
1846     pr = parts_addsub(&pa, &pb, status, subtract);
1847     return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852     return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857     return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861  * Multiplication
1862  */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866     FloatParts64 pa, pb, *pr;
1867
1868     float16_unpack_canonical(&pa, a, status);
1869     float16_unpack_canonical(&pb, b, status);
1870     pr = parts_mul(&pa, &pb, status);
1871
1872     return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878     FloatParts64 pa, pb, *pr;
1879
1880     float32_unpack_canonical(&pa, a, status);
1881     float32_unpack_canonical(&pb, b, status);
1882     pr = parts_mul(&pa, &pb, status);
1883
1884     return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890     FloatParts64 pa, pb, *pr;
1891
1892     float64_unpack_canonical(&pa, a, status);
1893     float64_unpack_canonical(&pb, b, status);
1894     pr = parts_mul(&pa, &pb, status);
1895
1896     return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901     return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906     return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913                         f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920                         f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926     FloatParts64 pa, pb, *pr;
1927
1928     bfloat16_unpack_canonical(&pa, a, status);
1929     bfloat16_unpack_canonical(&pb, b, status);
1930     pr = parts_mul(&pa, &pb, status);
1931
1932     return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938     FloatParts128 pa, pb, *pr;
1939
1940     float128_unpack_canonical(&pa, a, status);
1941     float128_unpack_canonical(&pb, b, status);
1942     pr = parts_mul(&pa, &pb, status);
1943
1944     return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
1949 {
1950     FloatParts128 pa, pb, *pr;
1951
1952     if (!floatx80_unpack_canonical(&pa, a, status) ||
1953         !floatx80_unpack_canonical(&pb, b, status)) {
1954         return floatx80_default_nan(status);
1955     }
1956
1957     pr = parts_mul(&pa, &pb, status);
1958     return floatx80_round_pack_canonical(pr, status);
1959 }
1960
1961 /*
1962  * Fused multiply-add
1963  */
1964
1965 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1966                                     int flags, float_status *status)
1967 {
1968     FloatParts64 pa, pb, pc, *pr;
1969
1970     float16_unpack_canonical(&pa, a, status);
1971     float16_unpack_canonical(&pb, b, status);
1972     float16_unpack_canonical(&pc, c, status);
1973     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975     return float16_round_pack_canonical(pr, status);
1976 }
1977
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1980                 float_status *status)
1981 {
1982     FloatParts64 pa, pb, pc, *pr;
1983
1984     float32_unpack_canonical(&pa, a, status);
1985     float32_unpack_canonical(&pb, b, status);
1986     float32_unpack_canonical(&pc, c, status);
1987     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989     return float32_round_pack_canonical(pr, status);
1990 }
1991
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1994                 float_status *status)
1995 {
1996     FloatParts64 pa, pb, pc, *pr;
1997
1998     float64_unpack_canonical(&pa, a, status);
1999     float64_unpack_canonical(&pb, b, status);
2000     float64_unpack_canonical(&pc, c, status);
2001     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2002
2003     return float64_round_pack_canonical(pr, status);
2004 }
2005
2006 static bool force_soft_fma;
2007
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2010 {
2011     union_float32 ua, ub, uc, ur;
2012
2013     ua.s = xa;
2014     ub.s = xb;
2015     uc.s = xc;
2016
2017     if (unlikely(!can_use_fpu(s))) {
2018         goto soft;
2019     }
2020     if (unlikely(flags & float_muladd_halve_result)) {
2021         goto soft;
2022     }
2023
2024     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2025     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2026         goto soft;
2027     }
2028
2029     if (unlikely(force_soft_fma)) {
2030         goto soft;
2031     }
2032
2033     /*
2034      * When (a || b) == 0, there's no need to check for under/over flow,
2035      * since we know the addend is (normal || 0) and the product is 0.
2036      */
2037     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2038         union_float32 up;
2039         bool prod_sign;
2040
2041         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2042         prod_sign ^= !!(flags & float_muladd_negate_product);
2043         up.s = float32_set_sign(float32_zero, prod_sign);
2044
2045         if (flags & float_muladd_negate_c) {
2046             uc.h = -uc.h;
2047         }
2048         ur.h = up.h + uc.h;
2049     } else {
2050         union_float32 ua_orig = ua;
2051         union_float32 uc_orig = uc;
2052
2053         if (flags & float_muladd_negate_product) {
2054             ua.h = -ua.h;
2055         }
2056         if (flags & float_muladd_negate_c) {
2057             uc.h = -uc.h;
2058         }
2059
2060         ur.h = fmaf(ua.h, ub.h, uc.h);
2061
2062         if (unlikely(f32_is_inf(ur))) {
2063             float_raise(float_flag_overflow, s);
2064         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2065             ua = ua_orig;
2066             uc = uc_orig;
2067             goto soft;
2068         }
2069     }
2070     if (flags & float_muladd_negate_result) {
2071         return float32_chs(ur.s);
2072     }
2073     return ur.s;
2074
2075  soft:
2076     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2077 }
2078
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2081 {
2082     union_float64 ua, ub, uc, ur;
2083
2084     ua.s = xa;
2085     ub.s = xb;
2086     uc.s = xc;
2087
2088     if (unlikely(!can_use_fpu(s))) {
2089         goto soft;
2090     }
2091     if (unlikely(flags & float_muladd_halve_result)) {
2092         goto soft;
2093     }
2094
2095     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2096     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2097         goto soft;
2098     }
2099
2100     if (unlikely(force_soft_fma)) {
2101         goto soft;
2102     }
2103
2104     /*
2105      * When (a || b) == 0, there's no need to check for under/over flow,
2106      * since we know the addend is (normal || 0) and the product is 0.
2107      */
2108     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2109         union_float64 up;
2110         bool prod_sign;
2111
2112         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2113         prod_sign ^= !!(flags & float_muladd_negate_product);
2114         up.s = float64_set_sign(float64_zero, prod_sign);
2115
2116         if (flags & float_muladd_negate_c) {
2117             uc.h = -uc.h;
2118         }
2119         ur.h = up.h + uc.h;
2120     } else {
2121         union_float64 ua_orig = ua;
2122         union_float64 uc_orig = uc;
2123
2124         if (flags & float_muladd_negate_product) {
2125             ua.h = -ua.h;
2126         }
2127         if (flags & float_muladd_negate_c) {
2128             uc.h = -uc.h;
2129         }
2130
2131         ur.h = fma(ua.h, ub.h, uc.h);
2132
2133         if (unlikely(f64_is_inf(ur))) {
2134             float_raise(float_flag_overflow, s);
2135         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2136             ua = ua_orig;
2137             uc = uc_orig;
2138             goto soft;
2139         }
2140     }
2141     if (flags & float_muladd_negate_result) {
2142         return float64_chs(ur.s);
2143     }
2144     return ur.s;
2145
2146  soft:
2147     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2148 }
2149
2150 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2151                                       int flags, float_status *status)
2152 {
2153     FloatParts64 pa, pb, pc, *pr;
2154
2155     bfloat16_unpack_canonical(&pa, a, status);
2156     bfloat16_unpack_canonical(&pb, b, status);
2157     bfloat16_unpack_canonical(&pc, c, status);
2158     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2159
2160     return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2164                                       int flags, float_status *status)
2165 {
2166     FloatParts128 pa, pb, pc, *pr;
2167
2168     float128_unpack_canonical(&pa, a, status);
2169     float128_unpack_canonical(&pb, b, status);
2170     float128_unpack_canonical(&pc, c, status);
2171     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2172
2173     return float128_round_pack_canonical(pr, status);
2174 }
2175
2176 /*
2177  * Division
2178  */
2179
2180 float16 float16_div(float16 a, float16 b, float_status *status)
2181 {
2182     FloatParts64 pa, pb, *pr;
2183
2184     float16_unpack_canonical(&pa, a, status);
2185     float16_unpack_canonical(&pb, b, status);
2186     pr = parts_div(&pa, &pb, status);
2187
2188     return float16_round_pack_canonical(pr, status);
2189 }
2190
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a, float32 b, float_status *status)
2193 {
2194     FloatParts64 pa, pb, *pr;
2195
2196     float32_unpack_canonical(&pa, a, status);
2197     float32_unpack_canonical(&pb, b, status);
2198     pr = parts_div(&pa, &pb, status);
2199
2200     return float32_round_pack_canonical(pr, status);
2201 }
2202
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a, float64 b, float_status *status)
2205 {
2206     FloatParts64 pa, pb, *pr;
2207
2208     float64_unpack_canonical(&pa, a, status);
2209     float64_unpack_canonical(&pb, b, status);
2210     pr = parts_div(&pa, &pb, status);
2211
2212     return float64_round_pack_canonical(pr, status);
2213 }
2214
2215 static float hard_f32_div(float a, float b)
2216 {
2217     return a / b;
2218 }
2219
2220 static double hard_f64_div(double a, double b)
2221 {
2222     return a / b;
2223 }
2224
2225 static bool f32_div_pre(union_float32 a, union_float32 b)
2226 {
2227     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2228         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2229                fpclassify(b.h) == FP_NORMAL;
2230     }
2231     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2232 }
2233
2234 static bool f64_div_pre(union_float64 a, union_float64 b)
2235 {
2236     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2237         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2238                fpclassify(b.h) == FP_NORMAL;
2239     }
2240     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2241 }
2242
2243 static bool f32_div_post(union_float32 a, union_float32 b)
2244 {
2245     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2246         return fpclassify(a.h) != FP_ZERO;
2247     }
2248     return !float32_is_zero(a.s);
2249 }
2250
2251 static bool f64_div_post(union_float64 a, union_float64 b)
2252 {
2253     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2254         return fpclassify(a.h) != FP_ZERO;
2255     }
2256     return !float64_is_zero(a.s);
2257 }
2258
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a, float32 b, float_status *s)
2261 {
2262     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2263                         f32_div_pre, f32_div_post);
2264 }
2265
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a, float64 b, float_status *s)
2268 {
2269     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2270                         f64_div_pre, f64_div_post);
2271 }
2272
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2275 {
2276     FloatParts64 pa, pb, *pr;
2277
2278     bfloat16_unpack_canonical(&pa, a, status);
2279     bfloat16_unpack_canonical(&pb, b, status);
2280     pr = parts_div(&pa, &pb, status);
2281
2282     return bfloat16_round_pack_canonical(pr, status);
2283 }
2284
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a, float128 b, float_status *status)
2287 {
2288     FloatParts128 pa, pb, *pr;
2289
2290     float128_unpack_canonical(&pa, a, status);
2291     float128_unpack_canonical(&pb, b, status);
2292     pr = parts_div(&pa, &pb, status);
2293
2294     return float128_round_pack_canonical(pr, status);
2295 }
2296
2297 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2298 {
2299     FloatParts128 pa, pb, *pr;
2300
2301     if (!floatx80_unpack_canonical(&pa, a, status) ||
2302         !floatx80_unpack_canonical(&pb, b, status)) {
2303         return floatx80_default_nan(status);
2304     }
2305
2306     pr = parts_div(&pa, &pb, status);
2307     return floatx80_round_pack_canonical(pr, status);
2308 }
2309
2310 /*
2311  * Float to Float conversions
2312  *
2313  * Returns the result of converting one float format to another. The
2314  * conversion is performed according to the IEC/IEEE Standard for
2315  * Binary Floating-Point Arithmetic.
2316  *
2317  * Usually this only needs to take care of raising invalid exceptions
2318  * and handling the conversion on NaNs.
2319  */
2320
2321 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2322 {
2323     switch (a->cls) {
2324     case float_class_qnan:
2325     case float_class_snan:
2326         /*
2327          * There is no NaN in the destination format.  Raise Invalid
2328          * and return a zero with the sign of the input NaN.
2329          */
2330         float_raise(float_flag_invalid, s);
2331         a->cls = float_class_zero;
2332         break;
2333
2334     case float_class_inf:
2335         /*
2336          * There is no Inf in the destination format.  Raise Invalid
2337          * and return the maximum normal with the correct sign.
2338          */
2339         float_raise(float_flag_invalid, s);
2340         a->cls = float_class_normal;
2341         a->exp = float16_params_ahp.exp_max;
2342         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2343                                   float16_params_ahp.frac_size + 1);
2344         break;
2345
2346     case float_class_normal:
2347     case float_class_zero:
2348         break;
2349
2350     default:
2351         g_assert_not_reached();
2352     }
2353 }
2354
2355 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2356 {
2357     if (is_nan(a->cls)) {
2358         parts_return_nan(a, s);
2359     }
2360 }
2361
2362 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2363 {
2364     if (is_nan(a->cls)) {
2365         parts_return_nan(a, s);
2366     }
2367 }
2368
2369 #define parts_float_to_float(P, S) \
2370     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2371
2372 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2373                                         float_status *s)
2374 {
2375     a->cls = b->cls;
2376     a->sign = b->sign;
2377     a->exp = b->exp;
2378
2379     if (a->cls == float_class_normal) {
2380         frac_truncjam(a, b);
2381     } else if (is_nan(a->cls)) {
2382         /* Discard the low bits of the NaN. */
2383         a->frac = b->frac_hi;
2384         parts_return_nan(a, s);
2385     }
2386 }
2387
2388 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2389                                        float_status *s)
2390 {
2391     a->cls = b->cls;
2392     a->sign = b->sign;
2393     a->exp = b->exp;
2394     frac_widen(a, b);
2395
2396     if (is_nan(a->cls)) {
2397         parts_return_nan(a, s);
2398     }
2399 }
2400
2401 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2402 {
2403     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2404     FloatParts64 p;
2405
2406     float16a_unpack_canonical(&p, a, s, fmt16);
2407     parts_float_to_float(&p, s);
2408     return float32_round_pack_canonical(&p, s);
2409 }
2410
2411 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2412 {
2413     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2414     FloatParts64 p;
2415
2416     float16a_unpack_canonical(&p, a, s, fmt16);
2417     parts_float_to_float(&p, s);
2418     return float64_round_pack_canonical(&p, s);
2419 }
2420
2421 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2422 {
2423     FloatParts64 p;
2424     const FloatFmt *fmt;
2425
2426     float32_unpack_canonical(&p, a, s);
2427     if (ieee) {
2428         parts_float_to_float(&p, s);
2429         fmt = &float16_params;
2430     } else {
2431         parts_float_to_ahp(&p, s);
2432         fmt = &float16_params_ahp;
2433     }
2434     return float16a_round_pack_canonical(&p, s, fmt);
2435 }
2436
2437 static float64 QEMU_SOFTFLOAT_ATTR
2438 soft_float32_to_float64(float32 a, float_status *s)
2439 {
2440     FloatParts64 p;
2441
2442     float32_unpack_canonical(&p, a, s);
2443     parts_float_to_float(&p, s);
2444     return float64_round_pack_canonical(&p, s);
2445 }
2446
2447 float64 float32_to_float64(float32 a, float_status *s)
2448 {
2449     if (likely(float32_is_normal(a))) {
2450         /* Widening conversion can never produce inexact results.  */
2451         union_float32 uf;
2452         union_float64 ud;
2453         uf.s = a;
2454         ud.h = uf.h;
2455         return ud.s;
2456     } else if (float32_is_zero(a)) {
2457         return float64_set_sign(float64_zero, float32_is_neg(a));
2458     } else {
2459         return soft_float32_to_float64(a, s);
2460     }
2461 }
2462
2463 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2464 {
2465     FloatParts64 p;
2466     const FloatFmt *fmt;
2467
2468     float64_unpack_canonical(&p, a, s);
2469     if (ieee) {
2470         parts_float_to_float(&p, s);
2471         fmt = &float16_params;
2472     } else {
2473         parts_float_to_ahp(&p, s);
2474         fmt = &float16_params_ahp;
2475     }
2476     return float16a_round_pack_canonical(&p, s, fmt);
2477 }
2478
2479 float32 float64_to_float32(float64 a, float_status *s)
2480 {
2481     FloatParts64 p;
2482
2483     float64_unpack_canonical(&p, a, s);
2484     parts_float_to_float(&p, s);
2485     return float32_round_pack_canonical(&p, s);
2486 }
2487
2488 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2489 {
2490     FloatParts64 p;
2491
2492     bfloat16_unpack_canonical(&p, a, s);
2493     parts_float_to_float(&p, s);
2494     return float32_round_pack_canonical(&p, s);
2495 }
2496
2497 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2498 {
2499     FloatParts64 p;
2500
2501     bfloat16_unpack_canonical(&p, a, s);
2502     parts_float_to_float(&p, s);
2503     return float64_round_pack_canonical(&p, s);
2504 }
2505
2506 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2507 {
2508     FloatParts64 p;
2509
2510     float32_unpack_canonical(&p, a, s);
2511     parts_float_to_float(&p, s);
2512     return bfloat16_round_pack_canonical(&p, s);
2513 }
2514
2515 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2516 {
2517     FloatParts64 p;
2518
2519     float64_unpack_canonical(&p, a, s);
2520     parts_float_to_float(&p, s);
2521     return bfloat16_round_pack_canonical(&p, s);
2522 }
2523
2524 float32 float128_to_float32(float128 a, float_status *s)
2525 {
2526     FloatParts64 p64;
2527     FloatParts128 p128;
2528
2529     float128_unpack_canonical(&p128, a, s);
2530     parts_float_to_float_narrow(&p64, &p128, s);
2531     return float32_round_pack_canonical(&p64, s);
2532 }
2533
2534 float64 float128_to_float64(float128 a, float_status *s)
2535 {
2536     FloatParts64 p64;
2537     FloatParts128 p128;
2538
2539     float128_unpack_canonical(&p128, a, s);
2540     parts_float_to_float_narrow(&p64, &p128, s);
2541     return float64_round_pack_canonical(&p64, s);
2542 }
2543
2544 float128 float32_to_float128(float32 a, float_status *s)
2545 {
2546     FloatParts64 p64;
2547     FloatParts128 p128;
2548
2549     float32_unpack_canonical(&p64, a, s);
2550     parts_float_to_float_widen(&p128, &p64, s);
2551     return float128_round_pack_canonical(&p128, s);
2552 }
2553
2554 float128 float64_to_float128(float64 a, float_status *s)
2555 {
2556     FloatParts64 p64;
2557     FloatParts128 p128;
2558
2559     float64_unpack_canonical(&p64, a, s);
2560     parts_float_to_float_widen(&p128, &p64, s);
2561     return float128_round_pack_canonical(&p128, s);
2562 }
2563
2564 float32 floatx80_to_float32(floatx80 a, float_status *s)
2565 {
2566     FloatParts64 p64;
2567     FloatParts128 p128;
2568
2569     if (floatx80_unpack_canonical(&p128, a, s)) {
2570         parts_float_to_float_narrow(&p64, &p128, s);
2571     } else {
2572         parts_default_nan(&p64, s);
2573     }
2574     return float32_round_pack_canonical(&p64, s);
2575 }
2576
2577 float64 floatx80_to_float64(floatx80 a, float_status *s)
2578 {
2579     FloatParts64 p64;
2580     FloatParts128 p128;
2581
2582     if (floatx80_unpack_canonical(&p128, a, s)) {
2583         parts_float_to_float_narrow(&p64, &p128, s);
2584     } else {
2585         parts_default_nan(&p64, s);
2586     }
2587     return float64_round_pack_canonical(&p64, s);
2588 }
2589
2590 float128 floatx80_to_float128(floatx80 a, float_status *s)
2591 {
2592     FloatParts128 p;
2593
2594     if (floatx80_unpack_canonical(&p, a, s)) {
2595         parts_float_to_float(&p, s);
2596     } else {
2597         parts_default_nan(&p, s);
2598     }
2599     return float128_round_pack_canonical(&p, s);
2600 }
2601
2602 floatx80 float32_to_floatx80(float32 a, float_status *s)
2603 {
2604     FloatParts64 p64;
2605     FloatParts128 p128;
2606
2607     float32_unpack_canonical(&p64, a, s);
2608     parts_float_to_float_widen(&p128, &p64, s);
2609     return floatx80_round_pack_canonical(&p128, s);
2610 }
2611
2612 floatx80 float64_to_floatx80(float64 a, float_status *s)
2613 {
2614     FloatParts64 p64;
2615     FloatParts128 p128;
2616
2617     float64_unpack_canonical(&p64, a, s);
2618     parts_float_to_float_widen(&p128, &p64, s);
2619     return floatx80_round_pack_canonical(&p128, s);
2620 }
2621
2622 floatx80 float128_to_floatx80(float128 a, float_status *s)
2623 {
2624     FloatParts128 p;
2625
2626     float128_unpack_canonical(&p, a, s);
2627     parts_float_to_float(&p, s);
2628     return floatx80_round_pack_canonical(&p, s);
2629 }
2630
2631 /*
2632  * Round to integral value
2633  */
2634
2635 float16 float16_round_to_int(float16 a, float_status *s)
2636 {
2637     FloatParts64 p;
2638
2639     float16_unpack_canonical(&p, a, s);
2640     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2641     return float16_round_pack_canonical(&p, s);
2642 }
2643
2644 float32 float32_round_to_int(float32 a, float_status *s)
2645 {
2646     FloatParts64 p;
2647
2648     float32_unpack_canonical(&p, a, s);
2649     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2650     return float32_round_pack_canonical(&p, s);
2651 }
2652
2653 float64 float64_round_to_int(float64 a, float_status *s)
2654 {
2655     FloatParts64 p;
2656
2657     float64_unpack_canonical(&p, a, s);
2658     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2659     return float64_round_pack_canonical(&p, s);
2660 }
2661
2662 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2663 {
2664     FloatParts64 p;
2665
2666     bfloat16_unpack_canonical(&p, a, s);
2667     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2668     return bfloat16_round_pack_canonical(&p, s);
2669 }
2670
2671 float128 float128_round_to_int(float128 a, float_status *s)
2672 {
2673     FloatParts128 p;
2674
2675     float128_unpack_canonical(&p, a, s);
2676     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2677     return float128_round_pack_canonical(&p, s);
2678 }
2679
2680 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2681 {
2682     FloatParts128 p;
2683
2684     if (!floatx80_unpack_canonical(&p, a, status)) {
2685         return floatx80_default_nan(status);
2686     }
2687
2688     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2689                        &floatx80_params[status->floatx80_rounding_precision]);
2690     return floatx80_round_pack_canonical(&p, status);
2691 }
2692
2693 /*
2694  * Floating-point to signed integer conversions
2695  */
2696
2697 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2698                               float_status *s)
2699 {
2700     FloatParts64 p;
2701
2702     float16_unpack_canonical(&p, a, s);
2703     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2704 }
2705
2706 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2707                                 float_status *s)
2708 {
2709     FloatParts64 p;
2710
2711     float16_unpack_canonical(&p, a, s);
2712     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2713 }
2714
2715 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2716                                 float_status *s)
2717 {
2718     FloatParts64 p;
2719
2720     float16_unpack_canonical(&p, a, s);
2721     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2722 }
2723
2724 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2725                                 float_status *s)
2726 {
2727     FloatParts64 p;
2728
2729     float16_unpack_canonical(&p, a, s);
2730     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2731 }
2732
2733 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2734                                 float_status *s)
2735 {
2736     FloatParts64 p;
2737
2738     float32_unpack_canonical(&p, a, s);
2739     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2740 }
2741
2742 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2743                                 float_status *s)
2744 {
2745     FloatParts64 p;
2746
2747     float32_unpack_canonical(&p, a, s);
2748     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2749 }
2750
2751 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2752                                 float_status *s)
2753 {
2754     FloatParts64 p;
2755
2756     float32_unpack_canonical(&p, a, s);
2757     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2758 }
2759
2760 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2761                                 float_status *s)
2762 {
2763     FloatParts64 p;
2764
2765     float64_unpack_canonical(&p, a, s);
2766     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2767 }
2768
2769 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2770                                 float_status *s)
2771 {
2772     FloatParts64 p;
2773
2774     float64_unpack_canonical(&p, a, s);
2775     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2776 }
2777
2778 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2779                                 float_status *s)
2780 {
2781     FloatParts64 p;
2782
2783     float64_unpack_canonical(&p, a, s);
2784     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2785 }
2786
2787 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2788                                  float_status *s)
2789 {
2790     FloatParts64 p;
2791
2792     bfloat16_unpack_canonical(&p, a, s);
2793     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2794 }
2795
2796 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2797                                  float_status *s)
2798 {
2799     FloatParts64 p;
2800
2801     bfloat16_unpack_canonical(&p, a, s);
2802     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2803 }
2804
2805 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2806                                  float_status *s)
2807 {
2808     FloatParts64 p;
2809
2810     bfloat16_unpack_canonical(&p, a, s);
2811     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2812 }
2813
2814 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2815                                         int scale, float_status *s)
2816 {
2817     FloatParts128 p;
2818
2819     float128_unpack_canonical(&p, a, s);
2820     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2821 }
2822
2823 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2824                                         int scale, float_status *s)
2825 {
2826     FloatParts128 p;
2827
2828     float128_unpack_canonical(&p, a, s);
2829     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2830 }
2831
2832 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
2833                                         int scale, float_status *s)
2834 {
2835     FloatParts128 p;
2836
2837     if (!floatx80_unpack_canonical(&p, a, s)) {
2838         parts_default_nan(&p, s);
2839     }
2840     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2841 }
2842
2843 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
2844                                         int scale, float_status *s)
2845 {
2846     FloatParts128 p;
2847
2848     if (!floatx80_unpack_canonical(&p, a, s)) {
2849         parts_default_nan(&p, s);
2850     }
2851     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2852 }
2853
2854 int8_t float16_to_int8(float16 a, float_status *s)
2855 {
2856     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2857 }
2858
2859 int16_t float16_to_int16(float16 a, float_status *s)
2860 {
2861     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2862 }
2863
2864 int32_t float16_to_int32(float16 a, float_status *s)
2865 {
2866     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2867 }
2868
2869 int64_t float16_to_int64(float16 a, float_status *s)
2870 {
2871     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2872 }
2873
2874 int16_t float32_to_int16(float32 a, float_status *s)
2875 {
2876     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2877 }
2878
2879 int32_t float32_to_int32(float32 a, float_status *s)
2880 {
2881     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2882 }
2883
2884 int64_t float32_to_int64(float32 a, float_status *s)
2885 {
2886     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2887 }
2888
2889 int16_t float64_to_int16(float64 a, float_status *s)
2890 {
2891     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2892 }
2893
2894 int32_t float64_to_int32(float64 a, float_status *s)
2895 {
2896     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2897 }
2898
2899 int64_t float64_to_int64(float64 a, float_status *s)
2900 {
2901     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2902 }
2903
2904 int32_t float128_to_int32(float128 a, float_status *s)
2905 {
2906     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2907 }
2908
2909 int64_t float128_to_int64(float128 a, float_status *s)
2910 {
2911     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2912 }
2913
2914 int32_t floatx80_to_int32(floatx80 a, float_status *s)
2915 {
2916     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2917 }
2918
2919 int64_t floatx80_to_int64(floatx80 a, float_status *s)
2920 {
2921     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2922 }
2923
2924 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2925 {
2926     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2927 }
2928
2929 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2930 {
2931     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2932 }
2933
2934 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2935 {
2936     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2937 }
2938
2939 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2940 {
2941     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2942 }
2943
2944 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2945 {
2946     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2947 }
2948
2949 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2950 {
2951     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2952 }
2953
2954 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2955 {
2956     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2957 }
2958
2959 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2960 {
2961     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2962 }
2963
2964 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2965 {
2966     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2967 }
2968
2969 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2970 {
2971     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2972 }
2973
2974 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2975 {
2976     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2977 }
2978
2979 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
2980 {
2981     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
2982 }
2983
2984 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
2985 {
2986     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
2987 }
2988
2989 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2990 {
2991     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2992 }
2993
2994 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2995 {
2996     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2997 }
2998
2999 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3000 {
3001     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3002 }
3003
3004 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3005 {
3006     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3007 }
3008
3009 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3010 {
3011     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3012 }
3013
3014 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3015 {
3016     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3017 }
3018
3019 /*
3020  * Floating-point to unsigned integer conversions
3021  */
3022
3023 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3024                                 float_status *s)
3025 {
3026     FloatParts64 p;
3027
3028     float16_unpack_canonical(&p, a, s);
3029     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3030 }
3031
3032 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3033                                   float_status *s)
3034 {
3035     FloatParts64 p;
3036
3037     float16_unpack_canonical(&p, a, s);
3038     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3039 }
3040
3041 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3042                                   float_status *s)
3043 {
3044     FloatParts64 p;
3045
3046     float16_unpack_canonical(&p, a, s);
3047     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3048 }
3049
3050 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3051                                   float_status *s)
3052 {
3053     FloatParts64 p;
3054
3055     float16_unpack_canonical(&p, a, s);
3056     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3057 }
3058
3059 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3060                                   float_status *s)
3061 {
3062     FloatParts64 p;
3063
3064     float32_unpack_canonical(&p, a, s);
3065     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3066 }
3067
3068 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3069                                   float_status *s)
3070 {
3071     FloatParts64 p;
3072
3073     float32_unpack_canonical(&p, a, s);
3074     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3075 }
3076
3077 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3078                                   float_status *s)
3079 {
3080     FloatParts64 p;
3081
3082     float32_unpack_canonical(&p, a, s);
3083     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3084 }
3085
3086 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3087                                   float_status *s)
3088 {
3089     FloatParts64 p;
3090
3091     float64_unpack_canonical(&p, a, s);
3092     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3093 }
3094
3095 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3096                                   float_status *s)
3097 {
3098     FloatParts64 p;
3099
3100     float64_unpack_canonical(&p, a, s);
3101     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3102 }
3103
3104 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3105                                   float_status *s)
3106 {
3107     FloatParts64 p;
3108
3109     float64_unpack_canonical(&p, a, s);
3110     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3111 }
3112
3113 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3114                                    int scale, float_status *s)
3115 {
3116     FloatParts64 p;
3117
3118     bfloat16_unpack_canonical(&p, a, s);
3119     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3120 }
3121
3122 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3123                                    int scale, float_status *s)
3124 {
3125     FloatParts64 p;
3126
3127     bfloat16_unpack_canonical(&p, a, s);
3128     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3129 }
3130
3131 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3132                                    int scale, float_status *s)
3133 {
3134     FloatParts64 p;
3135
3136     bfloat16_unpack_canonical(&p, a, s);
3137     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3138 }
3139
3140 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3141                                           int scale, float_status *s)
3142 {
3143     FloatParts128 p;
3144
3145     float128_unpack_canonical(&p, a, s);
3146     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3147 }
3148
3149 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3150                                           int scale, float_status *s)
3151 {
3152     FloatParts128 p;
3153
3154     float128_unpack_canonical(&p, a, s);
3155     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3156 }
3157
3158 uint8_t float16_to_uint8(float16 a, float_status *s)
3159 {
3160     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3161 }
3162
3163 uint16_t float16_to_uint16(float16 a, float_status *s)
3164 {
3165     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3166 }
3167
3168 uint32_t float16_to_uint32(float16 a, float_status *s)
3169 {
3170     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3171 }
3172
3173 uint64_t float16_to_uint64(float16 a, float_status *s)
3174 {
3175     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3176 }
3177
3178 uint16_t float32_to_uint16(float32 a, float_status *s)
3179 {
3180     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3181 }
3182
3183 uint32_t float32_to_uint32(float32 a, float_status *s)
3184 {
3185     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3186 }
3187
3188 uint64_t float32_to_uint64(float32 a, float_status *s)
3189 {
3190     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3191 }
3192
3193 uint16_t float64_to_uint16(float64 a, float_status *s)
3194 {
3195     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3196 }
3197
3198 uint32_t float64_to_uint32(float64 a, float_status *s)
3199 {
3200     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3201 }
3202
3203 uint64_t float64_to_uint64(float64 a, float_status *s)
3204 {
3205     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3206 }
3207
3208 uint32_t float128_to_uint32(float128 a, float_status *s)
3209 {
3210     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3211 }
3212
3213 uint64_t float128_to_uint64(float128 a, float_status *s)
3214 {
3215     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3216 }
3217
3218 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3219 {
3220     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3221 }
3222
3223 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3224 {
3225     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3226 }
3227
3228 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3229 {
3230     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3231 }
3232
3233 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3234 {
3235     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3236 }
3237
3238 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3239 {
3240     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3241 }
3242
3243 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3244 {
3245     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3246 }
3247
3248 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3249 {
3250     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3251 }
3252
3253 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3254 {
3255     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3256 }
3257
3258 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3259 {
3260     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3261 }
3262
3263 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3264 {
3265     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3266 }
3267
3268 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3269 {
3270     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3271 }
3272
3273 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3274 {
3275     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3276 }
3277
3278 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3279 {
3280     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3281 }
3282
3283 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3284 {
3285     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3286 }
3287
3288 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3289 {
3290     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3291 }
3292
3293 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3294 {
3295     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3296 }
3297
3298 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3299 {
3300     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3301 }
3302
3303 /*
3304  * Signed integer to floating-point conversions
3305  */
3306
3307 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3308 {
3309     FloatParts64 p;
3310
3311     parts_sint_to_float(&p, a, scale, status);
3312     return float16_round_pack_canonical(&p, status);
3313 }
3314
3315 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3316 {
3317     return int64_to_float16_scalbn(a, scale, status);
3318 }
3319
3320 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3321 {
3322     return int64_to_float16_scalbn(a, scale, status);
3323 }
3324
3325 float16 int64_to_float16(int64_t a, float_status *status)
3326 {
3327     return int64_to_float16_scalbn(a, 0, status);
3328 }
3329
3330 float16 int32_to_float16(int32_t a, float_status *status)
3331 {
3332     return int64_to_float16_scalbn(a, 0, status);
3333 }
3334
3335 float16 int16_to_float16(int16_t a, float_status *status)
3336 {
3337     return int64_to_float16_scalbn(a, 0, status);
3338 }
3339
3340 float16 int8_to_float16(int8_t a, float_status *status)
3341 {
3342     return int64_to_float16_scalbn(a, 0, status);
3343 }
3344
3345 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3346 {
3347     FloatParts64 p;
3348
3349     parts64_sint_to_float(&p, a, scale, status);
3350     return float32_round_pack_canonical(&p, status);
3351 }
3352
3353 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3354 {
3355     return int64_to_float32_scalbn(a, scale, status);
3356 }
3357
3358 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3359 {
3360     return int64_to_float32_scalbn(a, scale, status);
3361 }
3362
3363 float32 int64_to_float32(int64_t a, float_status *status)
3364 {
3365     return int64_to_float32_scalbn(a, 0, status);
3366 }
3367
3368 float32 int32_to_float32(int32_t a, float_status *status)
3369 {
3370     return int64_to_float32_scalbn(a, 0, status);
3371 }
3372
3373 float32 int16_to_float32(int16_t a, float_status *status)
3374 {
3375     return int64_to_float32_scalbn(a, 0, status);
3376 }
3377
3378 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3379 {
3380     FloatParts64 p;
3381
3382     parts_sint_to_float(&p, a, scale, status);
3383     return float64_round_pack_canonical(&p, status);
3384 }
3385
3386 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3387 {
3388     return int64_to_float64_scalbn(a, scale, status);
3389 }
3390
3391 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3392 {
3393     return int64_to_float64_scalbn(a, scale, status);
3394 }
3395
3396 float64 int64_to_float64(int64_t a, float_status *status)
3397 {
3398     return int64_to_float64_scalbn(a, 0, status);
3399 }
3400
3401 float64 int32_to_float64(int32_t a, float_status *status)
3402 {
3403     return int64_to_float64_scalbn(a, 0, status);
3404 }
3405
3406 float64 int16_to_float64(int16_t a, float_status *status)
3407 {
3408     return int64_to_float64_scalbn(a, 0, status);
3409 }
3410
3411 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3412 {
3413     FloatParts64 p;
3414
3415     parts_sint_to_float(&p, a, scale, status);
3416     return bfloat16_round_pack_canonical(&p, status);
3417 }
3418
3419 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3420 {
3421     return int64_to_bfloat16_scalbn(a, scale, status);
3422 }
3423
3424 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3425 {
3426     return int64_to_bfloat16_scalbn(a, scale, status);
3427 }
3428
3429 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3430 {
3431     return int64_to_bfloat16_scalbn(a, 0, status);
3432 }
3433
3434 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3435 {
3436     return int64_to_bfloat16_scalbn(a, 0, status);
3437 }
3438
3439 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3440 {
3441     return int64_to_bfloat16_scalbn(a, 0, status);
3442 }
3443
3444 float128 int64_to_float128(int64_t a, float_status *status)
3445 {
3446     FloatParts128 p;
3447
3448     parts_sint_to_float(&p, a, 0, status);
3449     return float128_round_pack_canonical(&p, status);
3450 }
3451
3452 float128 int32_to_float128(int32_t a, float_status *status)
3453 {
3454     return int64_to_float128(a, status);
3455 }
3456
3457 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3458 {
3459     FloatParts128 p;
3460
3461     parts_sint_to_float(&p, a, 0, status);
3462     return floatx80_round_pack_canonical(&p, status);
3463 }
3464
3465 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3466 {
3467     return int64_to_floatx80(a, status);
3468 }
3469
3470 /*
3471  * Unsigned Integer to floating-point conversions
3472  */
3473
3474 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3475 {
3476     FloatParts64 p;
3477
3478     parts_uint_to_float(&p, a, scale, status);
3479     return float16_round_pack_canonical(&p, status);
3480 }
3481
3482 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3483 {
3484     return uint64_to_float16_scalbn(a, scale, status);
3485 }
3486
3487 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3488 {
3489     return uint64_to_float16_scalbn(a, scale, status);
3490 }
3491
3492 float16 uint64_to_float16(uint64_t a, float_status *status)
3493 {
3494     return uint64_to_float16_scalbn(a, 0, status);
3495 }
3496
3497 float16 uint32_to_float16(uint32_t a, float_status *status)
3498 {
3499     return uint64_to_float16_scalbn(a, 0, status);
3500 }
3501
3502 float16 uint16_to_float16(uint16_t a, float_status *status)
3503 {
3504     return uint64_to_float16_scalbn(a, 0, status);
3505 }
3506
3507 float16 uint8_to_float16(uint8_t a, float_status *status)
3508 {
3509     return uint64_to_float16_scalbn(a, 0, status);
3510 }
3511
3512 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3513 {
3514     FloatParts64 p;
3515
3516     parts_uint_to_float(&p, a, scale, status);
3517     return float32_round_pack_canonical(&p, status);
3518 }
3519
3520 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3521 {
3522     return uint64_to_float32_scalbn(a, scale, status);
3523 }
3524
3525 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3526 {
3527     return uint64_to_float32_scalbn(a, scale, status);
3528 }
3529
3530 float32 uint64_to_float32(uint64_t a, float_status *status)
3531 {
3532     return uint64_to_float32_scalbn(a, 0, status);
3533 }
3534
3535 float32 uint32_to_float32(uint32_t a, float_status *status)
3536 {
3537     return uint64_to_float32_scalbn(a, 0, status);
3538 }
3539
3540 float32 uint16_to_float32(uint16_t a, float_status *status)
3541 {
3542     return uint64_to_float32_scalbn(a, 0, status);
3543 }
3544
3545 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3546 {
3547     FloatParts64 p;
3548
3549     parts_uint_to_float(&p, a, scale, status);
3550     return float64_round_pack_canonical(&p, status);
3551 }
3552
3553 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3554 {
3555     return uint64_to_float64_scalbn(a, scale, status);
3556 }
3557
3558 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3559 {
3560     return uint64_to_float64_scalbn(a, scale, status);
3561 }
3562
3563 float64 uint64_to_float64(uint64_t a, float_status *status)
3564 {
3565     return uint64_to_float64_scalbn(a, 0, status);
3566 }
3567
3568 float64 uint32_to_float64(uint32_t a, float_status *status)
3569 {
3570     return uint64_to_float64_scalbn(a, 0, status);
3571 }
3572
3573 float64 uint16_to_float64(uint16_t a, float_status *status)
3574 {
3575     return uint64_to_float64_scalbn(a, 0, status);
3576 }
3577
3578 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3579 {
3580     FloatParts64 p;
3581
3582     parts_uint_to_float(&p, a, scale, status);
3583     return bfloat16_round_pack_canonical(&p, status);
3584 }
3585
3586 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3587 {
3588     return uint64_to_bfloat16_scalbn(a, scale, status);
3589 }
3590
3591 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3592 {
3593     return uint64_to_bfloat16_scalbn(a, scale, status);
3594 }
3595
3596 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3597 {
3598     return uint64_to_bfloat16_scalbn(a, 0, status);
3599 }
3600
3601 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3602 {
3603     return uint64_to_bfloat16_scalbn(a, 0, status);
3604 }
3605
3606 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3607 {
3608     return uint64_to_bfloat16_scalbn(a, 0, status);
3609 }
3610
3611 float128 uint64_to_float128(uint64_t a, float_status *status)
3612 {
3613     FloatParts128 p;
3614
3615     parts_uint_to_float(&p, a, 0, status);
3616     return float128_round_pack_canonical(&p, status);
3617 }
3618
3619 /*
3620  * Minimum and maximum
3621  */
3622
3623 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3624 {
3625     FloatParts64 pa, pb, *pr;
3626
3627     float16_unpack_canonical(&pa, a, s);
3628     float16_unpack_canonical(&pb, b, s);
3629     pr = parts_minmax(&pa, &pb, s, flags);
3630
3631     return float16_round_pack_canonical(pr, s);
3632 }
3633
3634 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3635                                 float_status *s, int flags)
3636 {
3637     FloatParts64 pa, pb, *pr;
3638
3639     bfloat16_unpack_canonical(&pa, a, s);
3640     bfloat16_unpack_canonical(&pb, b, s);
3641     pr = parts_minmax(&pa, &pb, s, flags);
3642
3643     return bfloat16_round_pack_canonical(pr, s);
3644 }
3645
3646 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3647 {
3648     FloatParts64 pa, pb, *pr;
3649
3650     float32_unpack_canonical(&pa, a, s);
3651     float32_unpack_canonical(&pb, b, s);
3652     pr = parts_minmax(&pa, &pb, s, flags);
3653
3654     return float32_round_pack_canonical(pr, s);
3655 }
3656
3657 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3658 {
3659     FloatParts64 pa, pb, *pr;
3660
3661     float64_unpack_canonical(&pa, a, s);
3662     float64_unpack_canonical(&pb, b, s);
3663     pr = parts_minmax(&pa, &pb, s, flags);
3664
3665     return float64_round_pack_canonical(pr, s);
3666 }
3667
3668 static float128 float128_minmax(float128 a, float128 b,
3669                                 float_status *s, int flags)
3670 {
3671     FloatParts128 pa, pb, *pr;
3672
3673     float128_unpack_canonical(&pa, a, s);
3674     float128_unpack_canonical(&pb, b, s);
3675     pr = parts_minmax(&pa, &pb, s, flags);
3676
3677     return float128_round_pack_canonical(pr, s);
3678 }
3679
3680 #define MINMAX_1(type, name, flags) \
3681     type type##_##name(type a, type b, float_status *s) \
3682     { return type##_minmax(a, b, s, flags); }
3683
3684 #define MINMAX_2(type) \
3685     MINMAX_1(type, max, 0)                                      \
3686     MINMAX_1(type, maxnum, minmax_isnum)                        \
3687     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3688     MINMAX_1(type, min, minmax_ismin)                           \
3689     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3690     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3691
3692 MINMAX_2(float16)
3693 MINMAX_2(bfloat16)
3694 MINMAX_2(float32)
3695 MINMAX_2(float64)
3696 MINMAX_2(float128)
3697
3698 #undef MINMAX_1
3699 #undef MINMAX_2
3700
3701 /*
3702  * Floating point compare
3703  */
3704
3705 static FloatRelation QEMU_FLATTEN
3706 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3707 {
3708     FloatParts64 pa, pb;
3709
3710     float16_unpack_canonical(&pa, a, s);
3711     float16_unpack_canonical(&pb, b, s);
3712     return parts_compare(&pa, &pb, s, is_quiet);
3713 }
3714
3715 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3716 {
3717     return float16_do_compare(a, b, s, false);
3718 }
3719
3720 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3721 {
3722     return float16_do_compare(a, b, s, true);
3723 }
3724
3725 static FloatRelation QEMU_SOFTFLOAT_ATTR
3726 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3727 {
3728     FloatParts64 pa, pb;
3729
3730     float32_unpack_canonical(&pa, a, s);
3731     float32_unpack_canonical(&pb, b, s);
3732     return parts_compare(&pa, &pb, s, is_quiet);
3733 }
3734
3735 static FloatRelation QEMU_FLATTEN
3736 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3737 {
3738     union_float32 ua, ub;
3739
3740     ua.s = xa;
3741     ub.s = xb;
3742
3743     if (QEMU_NO_HARDFLOAT) {
3744         goto soft;
3745     }
3746
3747     float32_input_flush2(&ua.s, &ub.s, s);
3748     if (isgreaterequal(ua.h, ub.h)) {
3749         if (isgreater(ua.h, ub.h)) {
3750             return float_relation_greater;
3751         }
3752         return float_relation_equal;
3753     }
3754     if (likely(isless(ua.h, ub.h))) {
3755         return float_relation_less;
3756     }
3757     /*
3758      * The only condition remaining is unordered.
3759      * Fall through to set flags.
3760      */
3761  soft:
3762     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3763 }
3764
3765 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3766 {
3767     return float32_hs_compare(a, b, s, false);
3768 }
3769
3770 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3771 {
3772     return float32_hs_compare(a, b, s, true);
3773 }
3774
3775 static FloatRelation QEMU_SOFTFLOAT_ATTR
3776 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3777 {
3778     FloatParts64 pa, pb;
3779
3780     float64_unpack_canonical(&pa, a, s);
3781     float64_unpack_canonical(&pb, b, s);
3782     return parts_compare(&pa, &pb, s, is_quiet);
3783 }
3784
3785 static FloatRelation QEMU_FLATTEN
3786 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3787 {
3788     union_float64 ua, ub;
3789
3790     ua.s = xa;
3791     ub.s = xb;
3792
3793     if (QEMU_NO_HARDFLOAT) {
3794         goto soft;
3795     }
3796
3797     float64_input_flush2(&ua.s, &ub.s, s);
3798     if (isgreaterequal(ua.h, ub.h)) {
3799         if (isgreater(ua.h, ub.h)) {
3800             return float_relation_greater;
3801         }
3802         return float_relation_equal;
3803     }
3804     if (likely(isless(ua.h, ub.h))) {
3805         return float_relation_less;
3806     }
3807     /*
3808      * The only condition remaining is unordered.
3809      * Fall through to set flags.
3810      */
3811  soft:
3812     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3813 }
3814
3815 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3816 {
3817     return float64_hs_compare(a, b, s, false);
3818 }
3819
3820 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3821 {
3822     return float64_hs_compare(a, b, s, true);
3823 }
3824
3825 static FloatRelation QEMU_FLATTEN
3826 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3827 {
3828     FloatParts64 pa, pb;
3829
3830     bfloat16_unpack_canonical(&pa, a, s);
3831     bfloat16_unpack_canonical(&pb, b, s);
3832     return parts_compare(&pa, &pb, s, is_quiet);
3833 }
3834
3835 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3836 {
3837     return bfloat16_do_compare(a, b, s, false);
3838 }
3839
3840 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3841 {
3842     return bfloat16_do_compare(a, b, s, true);
3843 }
3844
3845 static FloatRelation QEMU_FLATTEN
3846 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3847 {
3848     FloatParts128 pa, pb;
3849
3850     float128_unpack_canonical(&pa, a, s);
3851     float128_unpack_canonical(&pb, b, s);
3852     return parts_compare(&pa, &pb, s, is_quiet);
3853 }
3854
3855 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3856 {
3857     return float128_do_compare(a, b, s, false);
3858 }
3859
3860 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3861 {
3862     return float128_do_compare(a, b, s, true);
3863 }
3864
3865 /*
3866  * Scale by 2**N
3867  */
3868
3869 float16 float16_scalbn(float16 a, int n, float_status *status)
3870 {
3871     FloatParts64 p;
3872
3873     float16_unpack_canonical(&p, a, status);
3874     parts_scalbn(&p, n, status);
3875     return float16_round_pack_canonical(&p, status);
3876 }
3877
3878 float32 float32_scalbn(float32 a, int n, float_status *status)
3879 {
3880     FloatParts64 p;
3881
3882     float32_unpack_canonical(&p, a, status);
3883     parts_scalbn(&p, n, status);
3884     return float32_round_pack_canonical(&p, status);
3885 }
3886
3887 float64 float64_scalbn(float64 a, int n, float_status *status)
3888 {
3889     FloatParts64 p;
3890
3891     float64_unpack_canonical(&p, a, status);
3892     parts_scalbn(&p, n, status);
3893     return float64_round_pack_canonical(&p, status);
3894 }
3895
3896 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3897 {
3898     FloatParts64 p;
3899
3900     bfloat16_unpack_canonical(&p, a, status);
3901     parts_scalbn(&p, n, status);
3902     return bfloat16_round_pack_canonical(&p, status);
3903 }
3904
3905 float128 float128_scalbn(float128 a, int n, float_status *status)
3906 {
3907     FloatParts128 p;
3908
3909     float128_unpack_canonical(&p, a, status);
3910     parts_scalbn(&p, n, status);
3911     return float128_round_pack_canonical(&p, status);
3912 }
3913
3914 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
3915 {
3916     FloatParts128 p;
3917
3918     if (!floatx80_unpack_canonical(&p, a, status)) {
3919         return floatx80_default_nan(status);
3920     }
3921     parts_scalbn(&p, n, status);
3922     return floatx80_round_pack_canonical(&p, status);
3923 }
3924
3925 /*
3926  * Square Root
3927  */
3928
3929 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3930 {
3931     FloatParts64 p;
3932
3933     float16_unpack_canonical(&p, a, status);
3934     parts_sqrt(&p, status, &float16_params);
3935     return float16_round_pack_canonical(&p, status);
3936 }
3937
3938 static float32 QEMU_SOFTFLOAT_ATTR
3939 soft_f32_sqrt(float32 a, float_status *status)
3940 {
3941     FloatParts64 p;
3942
3943     float32_unpack_canonical(&p, a, status);
3944     parts_sqrt(&p, status, &float32_params);
3945     return float32_round_pack_canonical(&p, status);
3946 }
3947
3948 static float64 QEMU_SOFTFLOAT_ATTR
3949 soft_f64_sqrt(float64 a, float_status *status)
3950 {
3951     FloatParts64 p;
3952
3953     float64_unpack_canonical(&p, a, status);
3954     parts_sqrt(&p, status, &float64_params);
3955     return float64_round_pack_canonical(&p, status);
3956 }
3957
3958 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3959 {
3960     union_float32 ua, ur;
3961
3962     ua.s = xa;
3963     if (unlikely(!can_use_fpu(s))) {
3964         goto soft;
3965     }
3966
3967     float32_input_flush1(&ua.s, s);
3968     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3969         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3970                        fpclassify(ua.h) == FP_ZERO) ||
3971                      signbit(ua.h))) {
3972             goto soft;
3973         }
3974     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3975                         float32_is_neg(ua.s))) {
3976         goto soft;
3977     }
3978     ur.h = sqrtf(ua.h);
3979     return ur.s;
3980
3981  soft:
3982     return soft_f32_sqrt(ua.s, s);
3983 }
3984
3985 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3986 {
3987     union_float64 ua, ur;
3988
3989     ua.s = xa;
3990     if (unlikely(!can_use_fpu(s))) {
3991         goto soft;
3992     }
3993
3994     float64_input_flush1(&ua.s, s);
3995     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3996         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3997                        fpclassify(ua.h) == FP_ZERO) ||
3998                      signbit(ua.h))) {
3999             goto soft;
4000         }
4001     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4002                         float64_is_neg(ua.s))) {
4003         goto soft;
4004     }
4005     ur.h = sqrt(ua.h);
4006     return ur.s;
4007
4008  soft:
4009     return soft_f64_sqrt(ua.s, s);
4010 }
4011
4012 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4013 {
4014     FloatParts64 p;
4015
4016     bfloat16_unpack_canonical(&p, a, status);
4017     parts_sqrt(&p, status, &bfloat16_params);
4018     return bfloat16_round_pack_canonical(&p, status);
4019 }
4020
4021 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4022 {
4023     FloatParts128 p;
4024
4025     float128_unpack_canonical(&p, a, status);
4026     parts_sqrt(&p, status, &float128_params);
4027     return float128_round_pack_canonical(&p, status);
4028 }
4029
4030 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4031 {
4032     FloatParts128 p;
4033
4034     if (!floatx80_unpack_canonical(&p, a, s)) {
4035         return floatx80_default_nan(s);
4036     }
4037     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4038     return floatx80_round_pack_canonical(&p, s);
4039 }
4040
4041 /*----------------------------------------------------------------------------
4042 | The pattern for a default generated NaN.
4043 *----------------------------------------------------------------------------*/
4044
4045 float16 float16_default_nan(float_status *status)
4046 {
4047     FloatParts64 p;
4048
4049     parts_default_nan(&p, status);
4050     p.frac >>= float16_params.frac_shift;
4051     return float16_pack_raw(&p);
4052 }
4053
4054 float32 float32_default_nan(float_status *status)
4055 {
4056     FloatParts64 p;
4057
4058     parts_default_nan(&p, status);
4059     p.frac >>= float32_params.frac_shift;
4060     return float32_pack_raw(&p);
4061 }
4062
4063 float64 float64_default_nan(float_status *status)
4064 {
4065     FloatParts64 p;
4066
4067     parts_default_nan(&p, status);
4068     p.frac >>= float64_params.frac_shift;
4069     return float64_pack_raw(&p);
4070 }
4071
4072 float128 float128_default_nan(float_status *status)
4073 {
4074     FloatParts128 p;
4075
4076     parts_default_nan(&p, status);
4077     frac_shr(&p, float128_params.frac_shift);
4078     return float128_pack_raw(&p);
4079 }
4080
4081 bfloat16 bfloat16_default_nan(float_status *status)
4082 {
4083     FloatParts64 p;
4084
4085     parts_default_nan(&p, status);
4086     p.frac >>= bfloat16_params.frac_shift;
4087     return bfloat16_pack_raw(&p);
4088 }
4089
4090 /*----------------------------------------------------------------------------
4091 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4092 *----------------------------------------------------------------------------*/
4093
4094 float16 float16_silence_nan(float16 a, float_status *status)
4095 {
4096     FloatParts64 p;
4097
4098     float16_unpack_raw(&p, a);
4099     p.frac <<= float16_params.frac_shift;
4100     parts_silence_nan(&p, status);
4101     p.frac >>= float16_params.frac_shift;
4102     return float16_pack_raw(&p);
4103 }
4104
4105 float32 float32_silence_nan(float32 a, float_status *status)
4106 {
4107     FloatParts64 p;
4108
4109     float32_unpack_raw(&p, a);
4110     p.frac <<= float32_params.frac_shift;
4111     parts_silence_nan(&p, status);
4112     p.frac >>= float32_params.frac_shift;
4113     return float32_pack_raw(&p);
4114 }
4115
4116 float64 float64_silence_nan(float64 a, float_status *status)
4117 {
4118     FloatParts64 p;
4119
4120     float64_unpack_raw(&p, a);
4121     p.frac <<= float64_params.frac_shift;
4122     parts_silence_nan(&p, status);
4123     p.frac >>= float64_params.frac_shift;
4124     return float64_pack_raw(&p);
4125 }
4126
4127 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4128 {
4129     FloatParts64 p;
4130
4131     bfloat16_unpack_raw(&p, a);
4132     p.frac <<= bfloat16_params.frac_shift;
4133     parts_silence_nan(&p, status);
4134     p.frac >>= bfloat16_params.frac_shift;
4135     return bfloat16_pack_raw(&p);
4136 }
4137
4138 float128 float128_silence_nan(float128 a, float_status *status)
4139 {
4140     FloatParts128 p;
4141
4142     float128_unpack_raw(&p, a);
4143     frac_shl(&p, float128_params.frac_shift);
4144     parts_silence_nan(&p, status);
4145     frac_shr(&p, float128_params.frac_shift);
4146     return float128_pack_raw(&p);
4147 }
4148
4149 /*----------------------------------------------------------------------------
4150 | If `a' is denormal and we are in flush-to-zero mode then set the
4151 | input-denormal exception and return zero. Otherwise just return the value.
4152 *----------------------------------------------------------------------------*/
4153
4154 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4155 {
4156     if (p.exp == 0 && p.frac != 0) {
4157         float_raise(float_flag_input_denormal, status);
4158         return true;
4159     }
4160
4161     return false;
4162 }
4163
4164 float16 float16_squash_input_denormal(float16 a, float_status *status)
4165 {
4166     if (status->flush_inputs_to_zero) {
4167         FloatParts64 p;
4168
4169         float16_unpack_raw(&p, a);
4170         if (parts_squash_denormal(p, status)) {
4171             return float16_set_sign(float16_zero, p.sign);
4172         }
4173     }
4174     return a;
4175 }
4176
4177 float32 float32_squash_input_denormal(float32 a, float_status *status)
4178 {
4179     if (status->flush_inputs_to_zero) {
4180         FloatParts64 p;
4181
4182         float32_unpack_raw(&p, a);
4183         if (parts_squash_denormal(p, status)) {
4184             return float32_set_sign(float32_zero, p.sign);
4185         }
4186     }
4187     return a;
4188 }
4189
4190 float64 float64_squash_input_denormal(float64 a, float_status *status)
4191 {
4192     if (status->flush_inputs_to_zero) {
4193         FloatParts64 p;
4194
4195         float64_unpack_raw(&p, a);
4196         if (parts_squash_denormal(p, status)) {
4197             return float64_set_sign(float64_zero, p.sign);
4198         }
4199     }
4200     return a;
4201 }
4202
4203 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4204 {
4205     if (status->flush_inputs_to_zero) {
4206         FloatParts64 p;
4207
4208         bfloat16_unpack_raw(&p, a);
4209         if (parts_squash_denormal(p, status)) {
4210             return bfloat16_set_sign(bfloat16_zero, p.sign);
4211         }
4212     }
4213     return a;
4214 }
4215
4216 /*----------------------------------------------------------------------------
4217 | Normalizes the subnormal single-precision floating-point value represented
4218 | by the denormalized significand `aSig'.  The normalized exponent and
4219 | significand are stored at the locations pointed to by `zExpPtr' and
4220 | `zSigPtr', respectively.
4221 *----------------------------------------------------------------------------*/
4222
4223 static void
4224  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4225 {
4226     int8_t shiftCount;
4227
4228     shiftCount = clz32(aSig) - 8;
4229     *zSigPtr = aSig<<shiftCount;
4230     *zExpPtr = 1 - shiftCount;
4231
4232 }
4233
4234 /*----------------------------------------------------------------------------
4235 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4236 | and significand `zSig', and returns the proper single-precision floating-
4237 | point value corresponding to the abstract input.  Ordinarily, the abstract
4238 | value is simply rounded and packed into the single-precision format, with
4239 | the inexact exception raised if the abstract input cannot be represented
4240 | exactly.  However, if the abstract value is too large, the overflow and
4241 | inexact exceptions are raised and an infinity or maximal finite value is
4242 | returned.  If the abstract value is too small, the input value is rounded to
4243 | a subnormal number, and the underflow and inexact exceptions are raised if
4244 | the abstract input cannot be represented exactly as a subnormal single-
4245 | precision floating-point number.
4246 |     The input significand `zSig' has its binary point between bits 30
4247 | and 29, which is 7 bits to the left of the usual location.  This shifted
4248 | significand must be normalized or smaller.  If `zSig' is not normalized,
4249 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4250 | and it must not require rounding.  In the usual case that `zSig' is
4251 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4252 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4253 | Binary Floating-Point Arithmetic.
4254 *----------------------------------------------------------------------------*/
4255
4256 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4257                                    float_status *status)
4258 {
4259     int8_t roundingMode;
4260     bool roundNearestEven;
4261     int8_t roundIncrement, roundBits;
4262     bool isTiny;
4263
4264     roundingMode = status->float_rounding_mode;
4265     roundNearestEven = ( roundingMode == float_round_nearest_even );
4266     switch (roundingMode) {
4267     case float_round_nearest_even:
4268     case float_round_ties_away:
4269         roundIncrement = 0x40;
4270         break;
4271     case float_round_to_zero:
4272         roundIncrement = 0;
4273         break;
4274     case float_round_up:
4275         roundIncrement = zSign ? 0 : 0x7f;
4276         break;
4277     case float_round_down:
4278         roundIncrement = zSign ? 0x7f : 0;
4279         break;
4280     case float_round_to_odd:
4281         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4282         break;
4283     default:
4284         abort();
4285         break;
4286     }
4287     roundBits = zSig & 0x7F;
4288     if ( 0xFD <= (uint16_t) zExp ) {
4289         if (    ( 0xFD < zExp )
4290              || (    ( zExp == 0xFD )
4291                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4292            ) {
4293             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4294                                    roundIncrement != 0;
4295             float_raise(float_flag_overflow | float_flag_inexact, status);
4296             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4297         }
4298         if ( zExp < 0 ) {
4299             if (status->flush_to_zero) {
4300                 float_raise(float_flag_output_denormal, status);
4301                 return packFloat32(zSign, 0, 0);
4302             }
4303             isTiny = status->tininess_before_rounding
4304                   || (zExp < -1)
4305                   || (zSig + roundIncrement < 0x80000000);
4306             shift32RightJamming( zSig, - zExp, &zSig );
4307             zExp = 0;
4308             roundBits = zSig & 0x7F;
4309             if (isTiny && roundBits) {
4310                 float_raise(float_flag_underflow, status);
4311             }
4312             if (roundingMode == float_round_to_odd) {
4313                 /*
4314                  * For round-to-odd case, the roundIncrement depends on
4315                  * zSig which just changed.
4316                  */
4317                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4318             }
4319         }
4320     }
4321     if (roundBits) {
4322         float_raise(float_flag_inexact, status);
4323     }
4324     zSig = ( zSig + roundIncrement )>>7;
4325     if (!(roundBits ^ 0x40) && roundNearestEven) {
4326         zSig &= ~1;
4327     }
4328     if ( zSig == 0 ) zExp = 0;
4329     return packFloat32( zSign, zExp, zSig );
4330
4331 }
4332
4333 /*----------------------------------------------------------------------------
4334 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4335 | and significand `zSig', and returns the proper single-precision floating-
4336 | point value corresponding to the abstract input.  This routine is just like
4337 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4338 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4339 | floating-point exponent.
4340 *----------------------------------------------------------------------------*/
4341
4342 static float32
4343  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4344                               float_status *status)
4345 {
4346     int8_t shiftCount;
4347
4348     shiftCount = clz32(zSig) - 1;
4349     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4350                                status);
4351
4352 }
4353
4354 /*----------------------------------------------------------------------------
4355 | Normalizes the subnormal double-precision floating-point value represented
4356 | by the denormalized significand `aSig'.  The normalized exponent and
4357 | significand are stored at the locations pointed to by `zExpPtr' and
4358 | `zSigPtr', respectively.
4359 *----------------------------------------------------------------------------*/
4360
4361 static void
4362  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4363 {
4364     int8_t shiftCount;
4365
4366     shiftCount = clz64(aSig) - 11;
4367     *zSigPtr = aSig<<shiftCount;
4368     *zExpPtr = 1 - shiftCount;
4369
4370 }
4371
4372 /*----------------------------------------------------------------------------
4373 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4374 | double-precision floating-point value, returning the result.  After being
4375 | shifted into the proper positions, the three fields are simply added
4376 | together to form the result.  This means that any integer portion of `zSig'
4377 | will be added into the exponent.  Since a properly normalized significand
4378 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4379 | than the desired result exponent whenever `zSig' is a complete, normalized
4380 | significand.
4381 *----------------------------------------------------------------------------*/
4382
4383 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4384 {
4385
4386     return make_float64(
4387         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4388
4389 }
4390
4391 /*----------------------------------------------------------------------------
4392 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4393 | and significand `zSig', and returns the proper double-precision floating-
4394 | point value corresponding to the abstract input.  Ordinarily, the abstract
4395 | value is simply rounded and packed into the double-precision format, with
4396 | the inexact exception raised if the abstract input cannot be represented
4397 | exactly.  However, if the abstract value is too large, the overflow and
4398 | inexact exceptions are raised and an infinity or maximal finite value is
4399 | returned.  If the abstract value is too small, the input value is rounded to
4400 | a subnormal number, and the underflow and inexact exceptions are raised if
4401 | the abstract input cannot be represented exactly as a subnormal double-
4402 | precision floating-point number.
4403 |     The input significand `zSig' has its binary point between bits 62
4404 | and 61, which is 10 bits to the left of the usual location.  This shifted
4405 | significand must be normalized or smaller.  If `zSig' is not normalized,
4406 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4407 | and it must not require rounding.  In the usual case that `zSig' is
4408 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4409 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4410 | Binary Floating-Point Arithmetic.
4411 *----------------------------------------------------------------------------*/
4412
4413 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4414                                    float_status *status)
4415 {
4416     int8_t roundingMode;
4417     bool roundNearestEven;
4418     int roundIncrement, roundBits;
4419     bool isTiny;
4420
4421     roundingMode = status->float_rounding_mode;
4422     roundNearestEven = ( roundingMode == float_round_nearest_even );
4423     switch (roundingMode) {
4424     case float_round_nearest_even:
4425     case float_round_ties_away:
4426         roundIncrement = 0x200;
4427         break;
4428     case float_round_to_zero:
4429         roundIncrement = 0;
4430         break;
4431     case float_round_up:
4432         roundIncrement = zSign ? 0 : 0x3ff;
4433         break;
4434     case float_round_down:
4435         roundIncrement = zSign ? 0x3ff : 0;
4436         break;
4437     case float_round_to_odd:
4438         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4439         break;
4440     default:
4441         abort();
4442     }
4443     roundBits = zSig & 0x3FF;
4444     if ( 0x7FD <= (uint16_t) zExp ) {
4445         if (    ( 0x7FD < zExp )
4446              || (    ( zExp == 0x7FD )
4447                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4448            ) {
4449             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4450                                    roundIncrement != 0;
4451             float_raise(float_flag_overflow | float_flag_inexact, status);
4452             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4453         }
4454         if ( zExp < 0 ) {
4455             if (status->flush_to_zero) {
4456                 float_raise(float_flag_output_denormal, status);
4457                 return packFloat64(zSign, 0, 0);
4458             }
4459             isTiny = status->tininess_before_rounding
4460                   || (zExp < -1)
4461                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4462             shift64RightJamming( zSig, - zExp, &zSig );
4463             zExp = 0;
4464             roundBits = zSig & 0x3FF;
4465             if (isTiny && roundBits) {
4466                 float_raise(float_flag_underflow, status);
4467             }
4468             if (roundingMode == float_round_to_odd) {
4469                 /*
4470                  * For round-to-odd case, the roundIncrement depends on
4471                  * zSig which just changed.
4472                  */
4473                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4474             }
4475         }
4476     }
4477     if (roundBits) {
4478         float_raise(float_flag_inexact, status);
4479     }
4480     zSig = ( zSig + roundIncrement )>>10;
4481     if (!(roundBits ^ 0x200) && roundNearestEven) {
4482         zSig &= ~1;
4483     }
4484     if ( zSig == 0 ) zExp = 0;
4485     return packFloat64( zSign, zExp, zSig );
4486
4487 }
4488
4489 /*----------------------------------------------------------------------------
4490 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4491 | and significand `zSig', and returns the proper double-precision floating-
4492 | point value corresponding to the abstract input.  This routine is just like
4493 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4494 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4495 | floating-point exponent.
4496 *----------------------------------------------------------------------------*/
4497
4498 static float64
4499  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4500                               float_status *status)
4501 {
4502     int8_t shiftCount;
4503
4504     shiftCount = clz64(zSig) - 1;
4505     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4506                                status);
4507
4508 }
4509
4510 /*----------------------------------------------------------------------------
4511 | Normalizes the subnormal extended double-precision floating-point value
4512 | represented by the denormalized significand `aSig'.  The normalized exponent
4513 | and significand are stored at the locations pointed to by `zExpPtr' and
4514 | `zSigPtr', respectively.
4515 *----------------------------------------------------------------------------*/
4516
4517 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4518                                 uint64_t *zSigPtr)
4519 {
4520     int8_t shiftCount;
4521
4522     shiftCount = clz64(aSig);
4523     *zSigPtr = aSig<<shiftCount;
4524     *zExpPtr = 1 - shiftCount;
4525 }
4526
4527 /*----------------------------------------------------------------------------
4528 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4529 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4530 | and returns the proper extended double-precision floating-point value
4531 | corresponding to the abstract input.  Ordinarily, the abstract value is
4532 | rounded and packed into the extended double-precision format, with the
4533 | inexact exception raised if the abstract input cannot be represented
4534 | exactly.  However, if the abstract value is too large, the overflow and
4535 | inexact exceptions are raised and an infinity or maximal finite value is
4536 | returned.  If the abstract value is too small, the input value is rounded to
4537 | a subnormal number, and the underflow and inexact exceptions are raised if
4538 | the abstract input cannot be represented exactly as a subnormal extended
4539 | double-precision floating-point number.
4540 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4541 | the result is rounded to the same number of bits as single or double
4542 | precision, respectively.  Otherwise, the result is rounded to the full
4543 | precision of the extended double-precision format.
4544 |     The input significand must be normalized or smaller.  If the input
4545 | significand is not normalized, `zExp' must be 0; in that case, the result
4546 | returned is a subnormal number, and it must not require rounding.  The
4547 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4548 | Floating-Point Arithmetic.
4549 *----------------------------------------------------------------------------*/
4550
4551 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4552                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4553                               float_status *status)
4554 {
4555     FloatRoundMode roundingMode;
4556     bool roundNearestEven, increment, isTiny;
4557     int64_t roundIncrement, roundMask, roundBits;
4558
4559     roundingMode = status->float_rounding_mode;
4560     roundNearestEven = ( roundingMode == float_round_nearest_even );
4561     switch (roundingPrecision) {
4562     case floatx80_precision_x:
4563         goto precision80;
4564     case floatx80_precision_d:
4565         roundIncrement = UINT64_C(0x0000000000000400);
4566         roundMask = UINT64_C(0x00000000000007FF);
4567         break;
4568     case floatx80_precision_s:
4569         roundIncrement = UINT64_C(0x0000008000000000);
4570         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4571         break;
4572     default:
4573         g_assert_not_reached();
4574     }
4575     zSig0 |= ( zSig1 != 0 );
4576     switch (roundingMode) {
4577     case float_round_nearest_even:
4578     case float_round_ties_away:
4579         break;
4580     case float_round_to_zero:
4581         roundIncrement = 0;
4582         break;
4583     case float_round_up:
4584         roundIncrement = zSign ? 0 : roundMask;
4585         break;
4586     case float_round_down:
4587         roundIncrement = zSign ? roundMask : 0;
4588         break;
4589     default:
4590         abort();
4591     }
4592     roundBits = zSig0 & roundMask;
4593     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4594         if (    ( 0x7FFE < zExp )
4595              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4596            ) {
4597             goto overflow;
4598         }
4599         if ( zExp <= 0 ) {
4600             if (status->flush_to_zero) {
4601                 float_raise(float_flag_output_denormal, status);
4602                 return packFloatx80(zSign, 0, 0);
4603             }
4604             isTiny = status->tininess_before_rounding
4605                   || (zExp < 0 )
4606                   || (zSig0 <= zSig0 + roundIncrement);
4607             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4608             zExp = 0;
4609             roundBits = zSig0 & roundMask;
4610             if (isTiny && roundBits) {
4611                 float_raise(float_flag_underflow, status);
4612             }
4613             if (roundBits) {
4614                 float_raise(float_flag_inexact, status);
4615             }
4616             zSig0 += roundIncrement;
4617             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4618             roundIncrement = roundMask + 1;
4619             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4620                 roundMask |= roundIncrement;
4621             }
4622             zSig0 &= ~ roundMask;
4623             return packFloatx80( zSign, zExp, zSig0 );
4624         }
4625     }
4626     if (roundBits) {
4627         float_raise(float_flag_inexact, status);
4628     }
4629     zSig0 += roundIncrement;
4630     if ( zSig0 < roundIncrement ) {
4631         ++zExp;
4632         zSig0 = UINT64_C(0x8000000000000000);
4633     }
4634     roundIncrement = roundMask + 1;
4635     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4636         roundMask |= roundIncrement;
4637     }
4638     zSig0 &= ~ roundMask;
4639     if ( zSig0 == 0 ) zExp = 0;
4640     return packFloatx80( zSign, zExp, zSig0 );
4641  precision80:
4642     switch (roundingMode) {
4643     case float_round_nearest_even:
4644     case float_round_ties_away:
4645         increment = ((int64_t)zSig1 < 0);
4646         break;
4647     case float_round_to_zero:
4648         increment = 0;
4649         break;
4650     case float_round_up:
4651         increment = !zSign && zSig1;
4652         break;
4653     case float_round_down:
4654         increment = zSign && zSig1;
4655         break;
4656     default:
4657         abort();
4658     }
4659     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4660         if (    ( 0x7FFE < zExp )
4661              || (    ( zExp == 0x7FFE )
4662                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4663                   && increment
4664                 )
4665            ) {
4666             roundMask = 0;
4667  overflow:
4668             float_raise(float_flag_overflow | float_flag_inexact, status);
4669             if (    ( roundingMode == float_round_to_zero )
4670                  || ( zSign && ( roundingMode == float_round_up ) )
4671                  || ( ! zSign && ( roundingMode == float_round_down ) )
4672                ) {
4673                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4674             }
4675             return packFloatx80(zSign,
4676                                 floatx80_infinity_high,
4677                                 floatx80_infinity_low);
4678         }
4679         if ( zExp <= 0 ) {
4680             isTiny = status->tininess_before_rounding
4681                   || (zExp < 0)
4682                   || !increment
4683                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4684             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4685             zExp = 0;
4686             if (isTiny && zSig1) {
4687                 float_raise(float_flag_underflow, status);
4688             }
4689             if (zSig1) {
4690                 float_raise(float_flag_inexact, status);
4691             }
4692             switch (roundingMode) {
4693             case float_round_nearest_even:
4694             case float_round_ties_away:
4695                 increment = ((int64_t)zSig1 < 0);
4696                 break;
4697             case float_round_to_zero:
4698                 increment = 0;
4699                 break;
4700             case float_round_up:
4701                 increment = !zSign && zSig1;
4702                 break;
4703             case float_round_down:
4704                 increment = zSign && zSig1;
4705                 break;
4706             default:
4707                 abort();
4708             }
4709             if ( increment ) {
4710                 ++zSig0;
4711                 if (!(zSig1 << 1) && roundNearestEven) {
4712                     zSig0 &= ~1;
4713                 }
4714                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4715             }
4716             return packFloatx80( zSign, zExp, zSig0 );
4717         }
4718     }
4719     if (zSig1) {
4720         float_raise(float_flag_inexact, status);
4721     }
4722     if ( increment ) {
4723         ++zSig0;
4724         if ( zSig0 == 0 ) {
4725             ++zExp;
4726             zSig0 = UINT64_C(0x8000000000000000);
4727         }
4728         else {
4729             if (!(zSig1 << 1) && roundNearestEven) {
4730                 zSig0 &= ~1;
4731             }
4732         }
4733     }
4734     else {
4735         if ( zSig0 == 0 ) zExp = 0;
4736     }
4737     return packFloatx80( zSign, zExp, zSig0 );
4738
4739 }
4740
4741 /*----------------------------------------------------------------------------
4742 | Takes an abstract floating-point value having sign `zSign', exponent
4743 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4744 | and returns the proper extended double-precision floating-point value
4745 | corresponding to the abstract input.  This routine is just like
4746 | `roundAndPackFloatx80' except that the input significand does not have to be
4747 | normalized.
4748 *----------------------------------------------------------------------------*/
4749
4750 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4751                                        bool zSign, int32_t zExp,
4752                                        uint64_t zSig0, uint64_t zSig1,
4753                                        float_status *status)
4754 {
4755     int8_t shiftCount;
4756
4757     if ( zSig0 == 0 ) {
4758         zSig0 = zSig1;
4759         zSig1 = 0;
4760         zExp -= 64;
4761     }
4762     shiftCount = clz64(zSig0);
4763     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4764     zExp -= shiftCount;
4765     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4766                                 zSig0, zSig1, status);
4767
4768 }
4769
4770 /*----------------------------------------------------------------------------
4771 | Returns the least-significant 64 fraction bits of the quadruple-precision
4772 | floating-point value `a'.
4773 *----------------------------------------------------------------------------*/
4774
4775 static inline uint64_t extractFloat128Frac1( float128 a )
4776 {
4777
4778     return a.low;
4779
4780 }
4781
4782 /*----------------------------------------------------------------------------
4783 | Returns the most-significant 48 fraction bits of the quadruple-precision
4784 | floating-point value `a'.
4785 *----------------------------------------------------------------------------*/
4786
4787 static inline uint64_t extractFloat128Frac0( float128 a )
4788 {
4789
4790     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4791
4792 }
4793
4794 /*----------------------------------------------------------------------------
4795 | Returns the exponent bits of the quadruple-precision floating-point value
4796 | `a'.
4797 *----------------------------------------------------------------------------*/
4798
4799 static inline int32_t extractFloat128Exp( float128 a )
4800 {
4801
4802     return ( a.high>>48 ) & 0x7FFF;
4803
4804 }
4805
4806 /*----------------------------------------------------------------------------
4807 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4808 *----------------------------------------------------------------------------*/
4809
4810 static inline bool extractFloat128Sign(float128 a)
4811 {
4812     return a.high >> 63;
4813 }
4814
4815 /*----------------------------------------------------------------------------
4816 | Normalizes the subnormal quadruple-precision floating-point value
4817 | represented by the denormalized significand formed by the concatenation of
4818 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4819 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4820 | significand are stored at the location pointed to by `zSig0Ptr', and the
4821 | least significant 64 bits of the normalized significand are stored at the
4822 | location pointed to by `zSig1Ptr'.
4823 *----------------------------------------------------------------------------*/
4824
4825 static void
4826  normalizeFloat128Subnormal(
4827      uint64_t aSig0,
4828      uint64_t aSig1,
4829      int32_t *zExpPtr,
4830      uint64_t *zSig0Ptr,
4831      uint64_t *zSig1Ptr
4832  )
4833 {
4834     int8_t shiftCount;
4835
4836     if ( aSig0 == 0 ) {
4837         shiftCount = clz64(aSig1) - 15;
4838         if ( shiftCount < 0 ) {
4839             *zSig0Ptr = aSig1>>( - shiftCount );
4840             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4841         }
4842         else {
4843             *zSig0Ptr = aSig1<<shiftCount;
4844             *zSig1Ptr = 0;
4845         }
4846         *zExpPtr = - shiftCount - 63;
4847     }
4848     else {
4849         shiftCount = clz64(aSig0) - 15;
4850         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4851         *zExpPtr = 1 - shiftCount;
4852     }
4853
4854 }
4855
4856 /*----------------------------------------------------------------------------
4857 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4858 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4859 | floating-point value, returning the result.  After being shifted into the
4860 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4861 | added together to form the most significant 32 bits of the result.  This
4862 | means that any integer portion of `zSig0' will be added into the exponent.
4863 | Since a properly normalized significand will have an integer portion equal
4864 | to 1, the `zExp' input should be 1 less than the desired result exponent
4865 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4866 | significand.
4867 *----------------------------------------------------------------------------*/
4868
4869 static inline float128
4870 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4871 {
4872     float128 z;
4873
4874     z.low = zSig1;
4875     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4876     return z;
4877 }
4878
4879 /*----------------------------------------------------------------------------
4880 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4881 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4882 | and `zSig2', and returns the proper quadruple-precision floating-point value
4883 | corresponding to the abstract input.  Ordinarily, the abstract value is
4884 | simply rounded and packed into the quadruple-precision format, with the
4885 | inexact exception raised if the abstract input cannot be represented
4886 | exactly.  However, if the abstract value is too large, the overflow and
4887 | inexact exceptions are raised and an infinity or maximal finite value is
4888 | returned.  If the abstract value is too small, the input value is rounded to
4889 | a subnormal number, and the underflow and inexact exceptions are raised if
4890 | the abstract input cannot be represented exactly as a subnormal quadruple-
4891 | precision floating-point number.
4892 |     The input significand must be normalized or smaller.  If the input
4893 | significand is not normalized, `zExp' must be 0; in that case, the result
4894 | returned is a subnormal number, and it must not require rounding.  In the
4895 | usual case that the input significand is normalized, `zExp' must be 1 less
4896 | than the ``true'' floating-point exponent.  The handling of underflow and
4897 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4898 *----------------------------------------------------------------------------*/
4899
4900 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4901                                      uint64_t zSig0, uint64_t zSig1,
4902                                      uint64_t zSig2, float_status *status)
4903 {
4904     int8_t roundingMode;
4905     bool roundNearestEven, increment, isTiny;
4906
4907     roundingMode = status->float_rounding_mode;
4908     roundNearestEven = ( roundingMode == float_round_nearest_even );
4909     switch (roundingMode) {
4910     case float_round_nearest_even:
4911     case float_round_ties_away:
4912         increment = ((int64_t)zSig2 < 0);
4913         break;
4914     case float_round_to_zero:
4915         increment = 0;
4916         break;
4917     case float_round_up:
4918         increment = !zSign && zSig2;
4919         break;
4920     case float_round_down:
4921         increment = zSign && zSig2;
4922         break;
4923     case float_round_to_odd:
4924         increment = !(zSig1 & 0x1) && zSig2;
4925         break;
4926     default:
4927         abort();
4928     }
4929     if ( 0x7FFD <= (uint32_t) zExp ) {
4930         if (    ( 0x7FFD < zExp )
4931              || (    ( zExp == 0x7FFD )
4932                   && eq128(
4933                          UINT64_C(0x0001FFFFFFFFFFFF),
4934                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4935                          zSig0,
4936                          zSig1
4937                      )
4938                   && increment
4939                 )
4940            ) {
4941             float_raise(float_flag_overflow | float_flag_inexact, status);
4942             if (    ( roundingMode == float_round_to_zero )
4943                  || ( zSign && ( roundingMode == float_round_up ) )
4944                  || ( ! zSign && ( roundingMode == float_round_down ) )
4945                  || (roundingMode == float_round_to_odd)
4946                ) {
4947                 return
4948                     packFloat128(
4949                         zSign,
4950                         0x7FFE,
4951                         UINT64_C(0x0000FFFFFFFFFFFF),
4952                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4953                     );
4954             }
4955             return packFloat128( zSign, 0x7FFF, 0, 0 );
4956         }
4957         if ( zExp < 0 ) {
4958             if (status->flush_to_zero) {
4959                 float_raise(float_flag_output_denormal, status);
4960                 return packFloat128(zSign, 0, 0, 0);
4961             }
4962             isTiny = status->tininess_before_rounding
4963                   || (zExp < -1)
4964                   || !increment
4965                   || lt128(zSig0, zSig1,
4966                            UINT64_C(0x0001FFFFFFFFFFFF),
4967                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4968             shift128ExtraRightJamming(
4969                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4970             zExp = 0;
4971             if (isTiny && zSig2) {
4972                 float_raise(float_flag_underflow, status);
4973             }
4974             switch (roundingMode) {
4975             case float_round_nearest_even:
4976             case float_round_ties_away:
4977                 increment = ((int64_t)zSig2 < 0);
4978                 break;
4979             case float_round_to_zero:
4980                 increment = 0;
4981                 break;
4982             case float_round_up:
4983                 increment = !zSign && zSig2;
4984                 break;
4985             case float_round_down:
4986                 increment = zSign && zSig2;
4987                 break;
4988             case float_round_to_odd:
4989                 increment = !(zSig1 & 0x1) && zSig2;
4990                 break;
4991             default:
4992                 abort();
4993             }
4994         }
4995     }
4996     if (zSig2) {
4997         float_raise(float_flag_inexact, status);
4998     }
4999     if ( increment ) {
5000         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
5001         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
5002             zSig1 &= ~1;
5003         }
5004     }
5005     else {
5006         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
5007     }
5008     return packFloat128( zSign, zExp, zSig0, zSig1 );
5009
5010 }
5011
5012 /*----------------------------------------------------------------------------
5013 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5014 | and significand formed by the concatenation of `zSig0' and `zSig1', and
5015 | returns the proper quadruple-precision floating-point value corresponding
5016 | to the abstract input.  This routine is just like `roundAndPackFloat128'
5017 | except that the input significand has fewer bits and does not have to be
5018 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
5019 | point exponent.
5020 *----------------------------------------------------------------------------*/
5021
5022 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
5023                                               uint64_t zSig0, uint64_t zSig1,
5024                                               float_status *status)
5025 {
5026     int8_t shiftCount;
5027     uint64_t zSig2;
5028
5029     if ( zSig0 == 0 ) {
5030         zSig0 = zSig1;
5031         zSig1 = 0;
5032         zExp -= 64;
5033     }
5034     shiftCount = clz64(zSig0) - 15;
5035     if ( 0 <= shiftCount ) {
5036         zSig2 = 0;
5037         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5038     }
5039     else {
5040         shift128ExtraRightJamming(
5041             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5042     }
5043     zExp -= shiftCount;
5044     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5045
5046 }
5047
5048 /*----------------------------------------------------------------------------
5049 | Returns the remainder of the single-precision floating-point value `a'
5050 | with respect to the corresponding value `b'.  The operation is performed
5051 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5052 *----------------------------------------------------------------------------*/
5053
5054 float32 float32_rem(float32 a, float32 b, float_status *status)
5055 {
5056     bool aSign, zSign;
5057     int aExp, bExp, expDiff;
5058     uint32_t aSig, bSig;
5059     uint32_t q;
5060     uint64_t aSig64, bSig64, q64;
5061     uint32_t alternateASig;
5062     int32_t sigMean;
5063     a = float32_squash_input_denormal(a, status);
5064     b = float32_squash_input_denormal(b, status);
5065
5066     aSig = extractFloat32Frac( a );
5067     aExp = extractFloat32Exp( a );
5068     aSign = extractFloat32Sign( a );
5069     bSig = extractFloat32Frac( b );
5070     bExp = extractFloat32Exp( b );
5071     if ( aExp == 0xFF ) {
5072         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5073             return propagateFloat32NaN(a, b, status);
5074         }
5075         float_raise(float_flag_invalid, status);
5076         return float32_default_nan(status);
5077     }
5078     if ( bExp == 0xFF ) {
5079         if (bSig) {
5080             return propagateFloat32NaN(a, b, status);
5081         }
5082         return a;
5083     }
5084     if ( bExp == 0 ) {
5085         if ( bSig == 0 ) {
5086             float_raise(float_flag_invalid, status);
5087             return float32_default_nan(status);
5088         }
5089         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5090     }
5091     if ( aExp == 0 ) {
5092         if ( aSig == 0 ) return a;
5093         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5094     }
5095     expDiff = aExp - bExp;
5096     aSig |= 0x00800000;
5097     bSig |= 0x00800000;
5098     if ( expDiff < 32 ) {
5099         aSig <<= 8;
5100         bSig <<= 8;
5101         if ( expDiff < 0 ) {
5102             if ( expDiff < -1 ) return a;
5103             aSig >>= 1;
5104         }
5105         q = ( bSig <= aSig );
5106         if ( q ) aSig -= bSig;
5107         if ( 0 < expDiff ) {
5108             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5109             q >>= 32 - expDiff;
5110             bSig >>= 2;
5111             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5112         }
5113         else {
5114             aSig >>= 2;
5115             bSig >>= 2;
5116         }
5117     }
5118     else {
5119         if ( bSig <= aSig ) aSig -= bSig;
5120         aSig64 = ( (uint64_t) aSig )<<40;
5121         bSig64 = ( (uint64_t) bSig )<<40;
5122         expDiff -= 64;
5123         while ( 0 < expDiff ) {
5124             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5125             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5126             aSig64 = - ( ( bSig * q64 )<<38 );
5127             expDiff -= 62;
5128         }
5129         expDiff += 64;
5130         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5131         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5132         q = q64>>( 64 - expDiff );
5133         bSig <<= 6;
5134         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5135     }
5136     do {
5137         alternateASig = aSig;
5138         ++q;
5139         aSig -= bSig;
5140     } while ( 0 <= (int32_t) aSig );
5141     sigMean = aSig + alternateASig;
5142     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5143         aSig = alternateASig;
5144     }
5145     zSign = ( (int32_t) aSig < 0 );
5146     if ( zSign ) aSig = - aSig;
5147     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5148 }
5149
5150
5151
5152 /*----------------------------------------------------------------------------
5153 | Returns the binary exponential of the single-precision floating-point value
5154 | `a'. The operation is performed according to the IEC/IEEE Standard for
5155 | Binary Floating-Point Arithmetic.
5156 |
5157 | Uses the following identities:
5158 |
5159 | 1. -------------------------------------------------------------------------
5160 |      x    x*ln(2)
5161 |     2  = e
5162 |
5163 | 2. -------------------------------------------------------------------------
5164 |                      2     3     4     5           n
5165 |      x        x     x     x     x     x           x
5166 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5167 |               1!    2!    3!    4!    5!          n!
5168 *----------------------------------------------------------------------------*/
5169
5170 static const float64 float32_exp2_coefficients[15] =
5171 {
5172     const_float64( 0x3ff0000000000000ll ), /*  1 */
5173     const_float64( 0x3fe0000000000000ll ), /*  2 */
5174     const_float64( 0x3fc5555555555555ll ), /*  3 */
5175     const_float64( 0x3fa5555555555555ll ), /*  4 */
5176     const_float64( 0x3f81111111111111ll ), /*  5 */
5177     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5178     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5179     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5180     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5181     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5182     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5183     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5184     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5185     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5186     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5187 };
5188
5189 float32 float32_exp2(float32 a, float_status *status)
5190 {
5191     bool aSign;
5192     int aExp;
5193     uint32_t aSig;
5194     float64 r, x, xn;
5195     int i;
5196     a = float32_squash_input_denormal(a, status);
5197
5198     aSig = extractFloat32Frac( a );
5199     aExp = extractFloat32Exp( a );
5200     aSign = extractFloat32Sign( a );
5201
5202     if ( aExp == 0xFF) {
5203         if (aSig) {
5204             return propagateFloat32NaN(a, float32_zero, status);
5205         }
5206         return (aSign) ? float32_zero : a;
5207     }
5208     if (aExp == 0) {
5209         if (aSig == 0) return float32_one;
5210     }
5211
5212     float_raise(float_flag_inexact, status);
5213
5214     /* ******************************* */
5215     /* using float64 for approximation */
5216     /* ******************************* */
5217     x = float32_to_float64(a, status);
5218     x = float64_mul(x, float64_ln2, status);
5219
5220     xn = x;
5221     r = float64_one;
5222     for (i = 0 ; i < 15 ; i++) {
5223         float64 f;
5224
5225         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5226         r = float64_add(r, f, status);
5227
5228         xn = float64_mul(xn, x, status);
5229     }
5230
5231     return float64_to_float32(r, status);
5232 }
5233
5234 /*----------------------------------------------------------------------------
5235 | Returns the binary log of the single-precision floating-point value `a'.
5236 | The operation is performed according to the IEC/IEEE Standard for Binary
5237 | Floating-Point Arithmetic.
5238 *----------------------------------------------------------------------------*/
5239 float32 float32_log2(float32 a, float_status *status)
5240 {
5241     bool aSign, zSign;
5242     int aExp;
5243     uint32_t aSig, zSig, i;
5244
5245     a = float32_squash_input_denormal(a, status);
5246     aSig = extractFloat32Frac( a );
5247     aExp = extractFloat32Exp( a );
5248     aSign = extractFloat32Sign( a );
5249
5250     if ( aExp == 0 ) {
5251         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5252         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5253     }
5254     if ( aSign ) {
5255         float_raise(float_flag_invalid, status);
5256         return float32_default_nan(status);
5257     }
5258     if ( aExp == 0xFF ) {
5259         if (aSig) {
5260             return propagateFloat32NaN(a, float32_zero, status);
5261         }
5262         return a;
5263     }
5264
5265     aExp -= 0x7F;
5266     aSig |= 0x00800000;
5267     zSign = aExp < 0;
5268     zSig = aExp << 23;
5269
5270     for (i = 1 << 22; i > 0; i >>= 1) {
5271         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5272         if ( aSig & 0x01000000 ) {
5273             aSig >>= 1;
5274             zSig |= i;
5275         }
5276     }
5277
5278     if ( zSign )
5279         zSig = -zSig;
5280
5281     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5282 }
5283
5284 /*----------------------------------------------------------------------------
5285 | Returns the remainder of the double-precision floating-point value `a'
5286 | with respect to the corresponding value `b'.  The operation is performed
5287 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5288 *----------------------------------------------------------------------------*/
5289
5290 float64 float64_rem(float64 a, float64 b, float_status *status)
5291 {
5292     bool aSign, zSign;
5293     int aExp, bExp, expDiff;
5294     uint64_t aSig, bSig;
5295     uint64_t q, alternateASig;
5296     int64_t sigMean;
5297
5298     a = float64_squash_input_denormal(a, status);
5299     b = float64_squash_input_denormal(b, status);
5300     aSig = extractFloat64Frac( a );
5301     aExp = extractFloat64Exp( a );
5302     aSign = extractFloat64Sign( a );
5303     bSig = extractFloat64Frac( b );
5304     bExp = extractFloat64Exp( b );
5305     if ( aExp == 0x7FF ) {
5306         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5307             return propagateFloat64NaN(a, b, status);
5308         }
5309         float_raise(float_flag_invalid, status);
5310         return float64_default_nan(status);
5311     }
5312     if ( bExp == 0x7FF ) {
5313         if (bSig) {
5314             return propagateFloat64NaN(a, b, status);
5315         }
5316         return a;
5317     }
5318     if ( bExp == 0 ) {
5319         if ( bSig == 0 ) {
5320             float_raise(float_flag_invalid, status);
5321             return float64_default_nan(status);
5322         }
5323         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5324     }
5325     if ( aExp == 0 ) {
5326         if ( aSig == 0 ) return a;
5327         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5328     }
5329     expDiff = aExp - bExp;
5330     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5331     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5332     if ( expDiff < 0 ) {
5333         if ( expDiff < -1 ) return a;
5334         aSig >>= 1;
5335     }
5336     q = ( bSig <= aSig );
5337     if ( q ) aSig -= bSig;
5338     expDiff -= 64;
5339     while ( 0 < expDiff ) {
5340         q = estimateDiv128To64( aSig, 0, bSig );
5341         q = ( 2 < q ) ? q - 2 : 0;
5342         aSig = - ( ( bSig>>2 ) * q );
5343         expDiff -= 62;
5344     }
5345     expDiff += 64;
5346     if ( 0 < expDiff ) {
5347         q = estimateDiv128To64( aSig, 0, bSig );
5348         q = ( 2 < q ) ? q - 2 : 0;
5349         q >>= 64 - expDiff;
5350         bSig >>= 2;
5351         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5352     }
5353     else {
5354         aSig >>= 2;
5355         bSig >>= 2;
5356     }
5357     do {
5358         alternateASig = aSig;
5359         ++q;
5360         aSig -= bSig;
5361     } while ( 0 <= (int64_t) aSig );
5362     sigMean = aSig + alternateASig;
5363     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5364         aSig = alternateASig;
5365     }
5366     zSign = ( (int64_t) aSig < 0 );
5367     if ( zSign ) aSig = - aSig;
5368     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5369
5370 }
5371
5372 /*----------------------------------------------------------------------------
5373 | Returns the binary log of the double-precision floating-point value `a'.
5374 | The operation is performed according to the IEC/IEEE Standard for Binary
5375 | Floating-Point Arithmetic.
5376 *----------------------------------------------------------------------------*/
5377 float64 float64_log2(float64 a, float_status *status)
5378 {
5379     bool aSign, zSign;
5380     int aExp;
5381     uint64_t aSig, aSig0, aSig1, zSig, i;
5382     a = float64_squash_input_denormal(a, status);
5383
5384     aSig = extractFloat64Frac( a );
5385     aExp = extractFloat64Exp( a );
5386     aSign = extractFloat64Sign( a );
5387
5388     if ( aExp == 0 ) {
5389         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5390         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5391     }
5392     if ( aSign ) {
5393         float_raise(float_flag_invalid, status);
5394         return float64_default_nan(status);
5395     }
5396     if ( aExp == 0x7FF ) {
5397         if (aSig) {
5398             return propagateFloat64NaN(a, float64_zero, status);
5399         }
5400         return a;
5401     }
5402
5403     aExp -= 0x3FF;
5404     aSig |= UINT64_C(0x0010000000000000);
5405     zSign = aExp < 0;
5406     zSig = (uint64_t)aExp << 52;
5407     for (i = 1LL << 51; i > 0; i >>= 1) {
5408         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5409         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5410         if ( aSig & UINT64_C(0x0020000000000000) ) {
5411             aSig >>= 1;
5412             zSig |= i;
5413         }
5414     }
5415
5416     if ( zSign )
5417         zSig = -zSig;
5418     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5419 }
5420
5421 /*----------------------------------------------------------------------------
5422 | Rounds the extended double-precision floating-point value `a'
5423 | to the precision provided by floatx80_rounding_precision and returns the
5424 | result as an extended double-precision floating-point value.
5425 | The operation is performed according to the IEC/IEEE Standard for Binary
5426 | Floating-Point Arithmetic.
5427 *----------------------------------------------------------------------------*/
5428
5429 floatx80 floatx80_round(floatx80 a, float_status *status)
5430 {
5431     FloatParts128 p;
5432
5433     if (!floatx80_unpack_canonical(&p, a, status)) {
5434         return floatx80_default_nan(status);
5435     }
5436     return floatx80_round_pack_canonical(&p, status);
5437 }
5438
5439 /*----------------------------------------------------------------------------
5440 | Returns the remainder of the extended double-precision floating-point value
5441 | `a' with respect to the corresponding value `b'.  The operation is performed
5442 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5443 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5444 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
5445 | the absolute value of the integer quotient.
5446 *----------------------------------------------------------------------------*/
5447
5448 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
5449                          float_status *status)
5450 {
5451     bool aSign, zSign;
5452     int32_t aExp, bExp, expDiff, aExpOrig;
5453     uint64_t aSig0, aSig1, bSig;
5454     uint64_t q, term0, term1, alternateASig0, alternateASig1;
5455
5456     *quotient = 0;
5457     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5458         float_raise(float_flag_invalid, status);
5459         return floatx80_default_nan(status);
5460     }
5461     aSig0 = extractFloatx80Frac( a );
5462     aExpOrig = aExp = extractFloatx80Exp( a );
5463     aSign = extractFloatx80Sign( a );
5464     bSig = extractFloatx80Frac( b );
5465     bExp = extractFloatx80Exp( b );
5466     if ( aExp == 0x7FFF ) {
5467         if (    (uint64_t) ( aSig0<<1 )
5468              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5469             return propagateFloatx80NaN(a, b, status);
5470         }
5471         goto invalid;
5472     }
5473     if ( bExp == 0x7FFF ) {
5474         if ((uint64_t)(bSig << 1)) {
5475             return propagateFloatx80NaN(a, b, status);
5476         }
5477         if (aExp == 0 && aSig0 >> 63) {
5478             /*
5479              * Pseudo-denormal argument must be returned in normalized
5480              * form.
5481              */
5482             return packFloatx80(aSign, 1, aSig0);
5483         }
5484         return a;
5485     }
5486     if ( bExp == 0 ) {
5487         if ( bSig == 0 ) {
5488  invalid:
5489             float_raise(float_flag_invalid, status);
5490             return floatx80_default_nan(status);
5491         }
5492         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5493     }
5494     if ( aExp == 0 ) {
5495         if ( aSig0 == 0 ) return a;
5496         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5497     }
5498     zSign = aSign;
5499     expDiff = aExp - bExp;
5500     aSig1 = 0;
5501     if ( expDiff < 0 ) {
5502         if ( mod || expDiff < -1 ) {
5503             if (aExp == 1 && aExpOrig == 0) {
5504                 /*
5505                  * Pseudo-denormal argument must be returned in
5506                  * normalized form.
5507                  */
5508                 return packFloatx80(aSign, aExp, aSig0);
5509             }
5510             return a;
5511         }
5512         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5513         expDiff = 0;
5514     }
5515     *quotient = q = ( bSig <= aSig0 );
5516     if ( q ) aSig0 -= bSig;
5517     expDiff -= 64;
5518     while ( 0 < expDiff ) {
5519         q = estimateDiv128To64( aSig0, aSig1, bSig );
5520         q = ( 2 < q ) ? q - 2 : 0;
5521         mul64To128( bSig, q, &term0, &term1 );
5522         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5523         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5524         expDiff -= 62;
5525         *quotient <<= 62;
5526         *quotient += q;
5527     }
5528     expDiff += 64;
5529     if ( 0 < expDiff ) {
5530         q = estimateDiv128To64( aSig0, aSig1, bSig );
5531         q = ( 2 < q ) ? q - 2 : 0;
5532         q >>= 64 - expDiff;
5533         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5534         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5535         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5536         while ( le128( term0, term1, aSig0, aSig1 ) ) {
5537             ++q;
5538             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5539         }
5540         if (expDiff < 64) {
5541             *quotient <<= expDiff;
5542         } else {
5543             *quotient = 0;
5544         }
5545         *quotient += q;
5546     }
5547     else {
5548         term1 = 0;
5549         term0 = bSig;
5550     }
5551     if (!mod) {
5552         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5553         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5554                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5555                         && ( q & 1 ) )
5556             ) {
5557             aSig0 = alternateASig0;
5558             aSig1 = alternateASig1;
5559             zSign = ! zSign;
5560             ++*quotient;
5561         }
5562     }
5563     return
5564         normalizeRoundAndPackFloatx80(
5565             floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
5566
5567 }
5568
5569 /*----------------------------------------------------------------------------
5570 | Returns the remainder of the extended double-precision floating-point value
5571 | `a' with respect to the corresponding value `b'.  The operation is performed
5572 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5573 *----------------------------------------------------------------------------*/
5574
5575 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5576 {
5577     uint64_t quotient;
5578     return floatx80_modrem(a, b, false, &quotient, status);
5579 }
5580
5581 /*----------------------------------------------------------------------------
5582 | Returns the remainder of the extended double-precision floating-point value
5583 | `a' with respect to the corresponding value `b', with the quotient truncated
5584 | toward zero.
5585 *----------------------------------------------------------------------------*/
5586
5587 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
5588 {
5589     uint64_t quotient;
5590     return floatx80_modrem(a, b, true, &quotient, status);
5591 }
5592
5593 /*----------------------------------------------------------------------------
5594 | Returns the remainder of the quadruple-precision floating-point value `a'
5595 | with respect to the corresponding value `b'.  The operation is performed
5596 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5597 *----------------------------------------------------------------------------*/
5598
5599 float128 float128_rem(float128 a, float128 b, float_status *status)
5600 {
5601     bool aSign, zSign;
5602     int32_t aExp, bExp, expDiff;
5603     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5604     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5605     int64_t sigMean0;
5606
5607     aSig1 = extractFloat128Frac1( a );
5608     aSig0 = extractFloat128Frac0( a );
5609     aExp = extractFloat128Exp( a );
5610     aSign = extractFloat128Sign( a );
5611     bSig1 = extractFloat128Frac1( b );
5612     bSig0 = extractFloat128Frac0( b );
5613     bExp = extractFloat128Exp( b );
5614     if ( aExp == 0x7FFF ) {
5615         if (    ( aSig0 | aSig1 )
5616              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5617             return propagateFloat128NaN(a, b, status);
5618         }
5619         goto invalid;
5620     }
5621     if ( bExp == 0x7FFF ) {
5622         if (bSig0 | bSig1) {
5623             return propagateFloat128NaN(a, b, status);
5624         }
5625         return a;
5626     }
5627     if ( bExp == 0 ) {
5628         if ( ( bSig0 | bSig1 ) == 0 ) {
5629  invalid:
5630             float_raise(float_flag_invalid, status);
5631             return float128_default_nan(status);
5632         }
5633         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5634     }
5635     if ( aExp == 0 ) {
5636         if ( ( aSig0 | aSig1 ) == 0 ) return a;
5637         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5638     }
5639     expDiff = aExp - bExp;
5640     if ( expDiff < -1 ) return a;
5641     shortShift128Left(
5642         aSig0 | UINT64_C(0x0001000000000000),
5643         aSig1,
5644         15 - ( expDiff < 0 ),
5645         &aSig0,
5646         &aSig1
5647     );
5648     shortShift128Left(
5649         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
5650     q = le128( bSig0, bSig1, aSig0, aSig1 );
5651     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5652     expDiff -= 64;
5653     while ( 0 < expDiff ) {
5654         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5655         q = ( 4 < q ) ? q - 4 : 0;
5656         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5657         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5658         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5659         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5660         expDiff -= 61;
5661     }
5662     if ( -64 < expDiff ) {
5663         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5664         q = ( 4 < q ) ? q - 4 : 0;
5665         q >>= - expDiff;
5666         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5667         expDiff += 52;
5668         if ( expDiff < 0 ) {
5669             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5670         }
5671         else {
5672             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5673         }
5674         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5675         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5676     }
5677     else {
5678         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5679         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5680     }
5681     do {
5682         alternateASig0 = aSig0;
5683         alternateASig1 = aSig1;
5684         ++q;
5685         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5686     } while ( 0 <= (int64_t) aSig0 );
5687     add128(
5688         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
5689     if (    ( sigMean0 < 0 )
5690          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5691         aSig0 = alternateASig0;
5692         aSig1 = alternateASig1;
5693     }
5694     zSign = ( (int64_t) aSig0 < 0 );
5695     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5696     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
5697                                          status);
5698 }
5699
5700 static inline FloatRelation
5701 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
5702                           float_status *status)
5703 {
5704     bool aSign, bSign;
5705
5706     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5707         float_raise(float_flag_invalid, status);
5708         return float_relation_unordered;
5709     }
5710     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
5711           ( extractFloatx80Frac( a )<<1 ) ) ||
5712         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
5713           ( extractFloatx80Frac( b )<<1 ) )) {
5714         if (!is_quiet ||
5715             floatx80_is_signaling_nan(a, status) ||
5716             floatx80_is_signaling_nan(b, status)) {
5717             float_raise(float_flag_invalid, status);
5718         }
5719         return float_relation_unordered;
5720     }
5721     aSign = extractFloatx80Sign( a );
5722     bSign = extractFloatx80Sign( b );
5723     if ( aSign != bSign ) {
5724
5725         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
5726              ( ( a.low | b.low ) == 0 ) ) {
5727             /* zero case */
5728             return float_relation_equal;
5729         } else {
5730             return 1 - (2 * aSign);
5731         }
5732     } else {
5733         /* Normalize pseudo-denormals before comparison.  */
5734         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
5735             ++a.high;
5736         }
5737         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
5738             ++b.high;
5739         }
5740         if (a.low == b.low && a.high == b.high) {
5741             return float_relation_equal;
5742         } else {
5743             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
5744         }
5745     }
5746 }
5747
5748 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
5749 {
5750     return floatx80_compare_internal(a, b, 0, status);
5751 }
5752
5753 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
5754                                      float_status *status)
5755 {
5756     return floatx80_compare_internal(a, b, 1, status);
5757 }
5758
5759 static void __attribute__((constructor)) softfloat_init(void)
5760 {
5761     union_float64 ua, ub, uc, ur;
5762
5763     if (QEMU_NO_HARDFLOAT) {
5764         return;
5765     }
5766     /*
5767      * Test that the host's FMA is not obviously broken. For example,
5768      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5769      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5770      */
5771     ua.s = 0x0020000000000001ULL;
5772     ub.s = 0x3ca0000000000000ULL;
5773     uc.s = 0x0020000000000000ULL;
5774     ur.h = fma(ua.h, ub.h, uc.h);
5775     if (ur.s != 0x0020000000000001ULL) {
5776         force_soft_fma = true;
5777     }
5778 }