fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439     /*
 440      * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441      * operations.
 442      */
 443     minmax_isnumber = 8,
 444 };
 445
 446 /* Simple helpers for checking if, or what kind of, NaN we have */
 447 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448 {
 449     return unlikely(c >= float_class_qnan);
 450 }
 451
 452 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453 {
 454     return c == float_class_snan;
 455 }
 456
 457 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458 {
 459     return c == float_class_qnan;
 460 }
 461
 462 /*
 463  * Structure holding all of the decomposed parts of a float.
 464  * The exponent is unbiased and the fraction is normalized.
 465  *
 466  * The fraction words are stored in big-endian word ordering,
 467  * so that truncation from a larger format to a smaller format
 468  * can be done simply by ignoring subsequent elements.
 469  */
 470
 471 typedef struct {
 472     FloatClass cls;
 473     bool sign;
 474     int32_t exp;
 475     union {
 476         /* Routines that know the structure may reference the singular name. */
 477         uint64_t frac;
 478         /*
 479          * Routines expanded with multiple structures reference "hi" and "lo"
 480          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481          * both the same word and aliased here.
 482          */
 483         uint64_t frac_hi;
 484         uint64_t frac_lo;
 485     };
 486 } FloatParts64;
 487
 488 typedef struct {
 489     FloatClass cls;
 490     bool sign;
 491     int32_t exp;
 492     uint64_t frac_hi;
 493     uint64_t frac_lo;
 494 } FloatParts128;
 495
 496 typedef struct {
 497     FloatClass cls;
 498     bool sign;
 499     int32_t exp;
 500     uint64_t frac_hi;
 501     uint64_t frac_hm;  /* high-middle */
 502     uint64_t frac_lm;  /* low-middle */
 503     uint64_t frac_lo;
 504 } FloatParts256;
 505
 506 /* These apply to the most significant word of each FloatPartsN. */
 507 #define DECOMPOSED_BINARY_POINT    63
 508 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510 /* Structure holding all of the relevant parameters for a format.
 511  *   exp_size: the size of the exponent field
 512  *   exp_bias: the offset applied to the exponent field
 513  *   exp_max: the maximum normalised exponent
 514  *   frac_size: the size of the fraction field
 515  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516  * The following are computed based the size of fraction
 517  *   round_mask: bits below lsb which must be rounded
 518  * The following optional modifiers are available:
 519  *   arm_althp: handle ARM Alternative Half Precision
 520  */
 521 typedef struct {
 522     int exp_size;
 523     int exp_bias;
 524     int exp_re_bias;
 525     int exp_max;
 526     int frac_size;
 527     int frac_shift;
 528     bool arm_althp;
 529     uint64_t round_mask;
 530 } FloatFmt;
 531
 532 /* Expand fields based on the size of exponent and fraction */
 533 #define FLOAT_PARAMS_(E)                                \
 534     .exp_size       = E,                                \
 535     .exp_bias       = ((1 << E) - 1) >> 1,              \
 536     .exp_re_bias    = (1 << (E - 1)) + (1 << (E - 2)),  \
 537     .exp_max        = (1 << E) - 1
 538
 539 #define FLOAT_PARAMS(E, F)                              \
 540     FLOAT_PARAMS_(E),                                   \
 541     .frac_size      = F,                                \
 542     .frac_shift     = (-F - 1) & 63,                    \
 543     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 544
 545 static const FloatFmt float16_params = {
 546     FLOAT_PARAMS(5, 10)
 547 };
 548
 549 static const FloatFmt float16_params_ahp = {
 550     FLOAT_PARAMS(5, 10),
 551     .arm_althp = true
 552 };
 553
 554 static const FloatFmt bfloat16_params = {
 555     FLOAT_PARAMS(8, 7)
 556 };
 557
 558 static const FloatFmt float32_params = {
 559     FLOAT_PARAMS(8, 23)
 560 };
 561
 562 static const FloatFmt float64_params = {
 563     FLOAT_PARAMS(11, 52)
 564 };
 565
 566 static const FloatFmt float128_params = {
 567     FLOAT_PARAMS(15, 112)
 568 };
 569
 570 #define FLOATX80_PARAMS(R)              \
 571     FLOAT_PARAMS_(15),                  \
 572     .frac_size = R == 64 ? 63 : R,      \
 573     .frac_shift = 0,                    \
 574     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 575
 576 static const FloatFmt floatx80_params[3] = {
 577     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 578     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 579     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 580 };
 581
 582 /* Unpack a float to parts, but do not canonicalize.  */
 583 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 584 {
 585     const int f_size = fmt->frac_size;
 586     const int e_size = fmt->exp_size;
 587
 588     *r = (FloatParts64) {
 589         .cls = float_class_unclassified,
 590         .sign = extract64(raw, f_size + e_size, 1),
 591         .exp = extract64(raw, f_size, e_size),
 592         .frac = extract64(raw, 0, f_size)
 593     };
 594 }
 595
 596 static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
 597 {
 598     unpack_raw64(p, &float16_params, f);
 599 }
 600
 601 static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 602 {
 603     unpack_raw64(p, &bfloat16_params, f);
 604 }
 605
 606 static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
 607 {
 608     unpack_raw64(p, &float32_params, f);
 609 }
 610
 611 static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
 612 {
 613     unpack_raw64(p, &float64_params, f);
 614 }
 615
 616 static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 617 {
 618     *p = (FloatParts128) {
 619         .cls = float_class_unclassified,
 620         .sign = extract32(f.high, 15, 1),
 621         .exp = extract32(f.high, 0, 15),
 622         .frac_hi = f.low
 623     };
 624 }
 625
 626 static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
 627 {
 628     const int f_size = float128_params.frac_size - 64;
 629     const int e_size = float128_params.exp_size;
 630
 631     *p = (FloatParts128) {
 632         .cls = float_class_unclassified,
 633         .sign = extract64(f.high, f_size + e_size, 1),
 634         .exp = extract64(f.high, f_size, e_size),
 635         .frac_hi = extract64(f.high, 0, f_size),
 636         .frac_lo = f.low,
 637     };
 638 }
 639
 640 /* Pack a float from parts, but do not canonicalize.  */
 641 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 642 {
 643     const int f_size = fmt->frac_size;
 644     const int e_size = fmt->exp_size;
 645     uint64_t ret;
 646
 647     ret = (uint64_t)p->sign << (f_size + e_size);
 648     ret = deposit64(ret, f_size, e_size, p->exp);
 649     ret = deposit64(ret, 0, f_size, p->frac);
 650     return ret;
 651 }
 652
 653 static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
 654 {
 655     return make_float16(pack_raw64(p, &float16_params));
 656 }
 657
 658 static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
 659 {
 660     return pack_raw64(p, &bfloat16_params);
 661 }
 662
 663 static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
 664 {
 665     return make_float32(pack_raw64(p, &float32_params));
 666 }
 667
 668 static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
 669 {
 670     return make_float64(pack_raw64(p, &float64_params));
 671 }
 672
 673 static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
 674 {
 675     const int f_size = float128_params.frac_size - 64;
 676     const int e_size = float128_params.exp_size;
 677     uint64_t hi;
 678
 679     hi = (uint64_t)p->sign << (f_size + e_size);
 680     hi = deposit64(hi, f_size, e_size, p->exp);
 681     hi = deposit64(hi, 0, f_size, p->frac_hi);
 682     return make_float128(hi, p->frac_lo);
 683 }
 684
 685 /*----------------------------------------------------------------------------
 686 | Functions and definitions to determine:  (1) whether tininess for underflow
 687 | is detected before or after rounding by default, (2) what (if anything)
 688 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 689 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 690 | are propagated from function inputs to output.  These details are target-
 691 | specific.
 692 *----------------------------------------------------------------------------*/
 693 #include "softfloat-specialize.c.inc"
 694
 695 #define PARTS_GENERIC_64_128(NAME, P) \
 696     _Generic((P), FloatParts64 *: parts64_##NAME, \
 697                   FloatParts128 *: parts128_##NAME)
 698
 699 #define PARTS_GENERIC_64_128_256(NAME, P) \
 700     _Generic((P), FloatParts64 *: parts64_##NAME, \
 701                   FloatParts128 *: parts128_##NAME, \
 702                   FloatParts256 *: parts256_##NAME)
 703
 704 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 705 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 706
 707 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 708 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 709
 710 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 711
 712 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 713                                       float_status *s);
 714 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 715                                         float_status *s);
 716
 717 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 718
 719 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 720                                              FloatParts64 *c, float_status *s,
 721                                              int ab_mask, int abc_mask);
 722 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 723                                                FloatParts128 *b,
 724                                                FloatParts128 *c,
 725                                                float_status *s,
 726                                                int ab_mask, int abc_mask);
 727
 728 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 729     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 730
 731 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 732                                  const FloatFmt *fmt);
 733 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 734                                   const FloatFmt *fmt);
 735
 736 #define parts_canonicalize(A, S, F) \
 737     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 738
 739 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 740                                    const FloatFmt *fmt);
 741 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 742                                     const FloatFmt *fmt);
 743
 744 #define parts_uncanon_normal(A, S, F) \
 745     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 746
 747 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 748                             const FloatFmt *fmt);
 749 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 750                              const FloatFmt *fmt);
 751
 752 #define parts_uncanon(A, S, F) \
 753     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 754
 755 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 756 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 757 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 758
 759 #define parts_add_normal(A, B) \
 760     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 761
 762 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 763 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 764 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 765
 766 #define parts_sub_normal(A, B) \
 767     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 768
 769 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 770                                     float_status *s, bool subtract);
 771 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 772                                       float_status *s, bool subtract);
 773
 774 #define parts_addsub(A, B, S, Z) \
 775     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 776
 777 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 778                                  float_status *s);
 779 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 780                                    float_status *s);
 781
 782 #define parts_mul(A, B, S) \
 783     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 784
 785 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 786                                     FloatParts64 *c, int flags,
 787                                     float_status *s);
 788 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 789                                       FloatParts128 *c, int flags,
 790                                       float_status *s);
 791
 792 #define parts_muladd(A, B, C, Z, S) \
 793     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 794
 795 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 796                                  float_status *s);
 797 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 798                                    float_status *s);
 799
 800 #define parts_div(A, B, S) \
 801     PARTS_GENERIC_64_128(div, A)(A, B, S)
 802
 803 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 804                                     uint64_t *mod_quot, float_status *s);
 805 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 806                                       uint64_t *mod_quot, float_status *s);
 807
 808 #define parts_modrem(A, B, Q, S) \
 809     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 810
 811 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 812 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 813
 814 #define parts_sqrt(A, S, F) \
 815     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 816
 817 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 818                                         int scale, int frac_size);
 819 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 820                                          int scale, int frac_size);
 821
 822 #define parts_round_to_int_normal(A, R, C, F) \
 823     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 824
 825 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 826                                  int scale, float_status *s,
 827                                  const FloatFmt *fmt);
 828 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 829                                   int scale, float_status *s,
 830                                   const FloatFmt *fmt);
 831
 832 #define parts_round_to_int(A, R, C, S, F) \
 833     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 834
 835 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 836                                      int scale, int64_t min, int64_t max,
 837                                      float_status *s);
 838 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 839                                      int scale, int64_t min, int64_t max,
 840                                      float_status *s);
 841
 842 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 843     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 844
 845 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 846                                       int scale, uint64_t max,
 847                                       float_status *s);
 848 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 849                                        int scale, uint64_t max,
 850                                        float_status *s);
 851
 852 #define parts_float_to_uint(P, R, Z, M, S) \
 853     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 854
 855 static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
 856                                             FloatRoundMode rmode,
 857                                             int bitsm1, float_status *s);
 858 static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
 859                                              FloatRoundMode rmode,
 860                                              int bitsm1, float_status *s);
 861
 862 #define parts_float_to_sint_modulo(P, R, M, S) \
 863     PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
 864
 865 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 866                                   int scale, float_status *s);
 867 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 868                                    int scale, float_status *s);
 869
 870 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 871     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 872
 873 #define parts_sint_to_float(P, I, Z, S) \
 874     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 875
 876 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 877                                   int scale, float_status *s);
 878 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 879                                    int scale, float_status *s);
 880
 881 #define parts_uint_to_float(P, I, Z, S) \
 882     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 883
 884 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 885                                     float_status *s, int flags);
 886 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 887                                       float_status *s, int flags);
 888
 889 #define parts_minmax(A, B, S, F) \
 890     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 891
 892 static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
 893                                      float_status *s, bool q);
 894 static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
 895                                       float_status *s, bool q);
 896
 897 #define parts_compare(A, B, S, Q) \
 898     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 899
 900 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 901 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 902
 903 #define parts_scalbn(A, N, S) \
 904     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 905
 906 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 907 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 908
 909 #define parts_log2(A, S, F) \
 910     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 911
 912 /*
 913  * Helper functions for softfloat-parts.c.inc, per-size operations.
 914  */
 915
 916 #define FRAC_GENERIC_64_128(NAME, P) \
 917     _Generic((P), FloatParts64 *: frac64_##NAME, \
 918                   FloatParts128 *: frac128_##NAME)
 919
 920 #define FRAC_GENERIC_64_128_256(NAME, P) \
 921     _Generic((P), FloatParts64 *: frac64_##NAME, \
 922                   FloatParts128 *: frac128_##NAME, \
 923                   FloatParts256 *: frac256_##NAME)
 924
 925 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 926 {
 927     return uadd64_overflow(a->frac, b->frac, &r->frac);
 928 }
 929
 930 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 931 {
 932     bool c = 0;
 933     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 934     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 935     return c;
 936 }
 937
 938 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 939 {
 940     bool c = 0;
 941     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 942     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 943     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 944     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 945     return c;
 946 }
 947
 948 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 949
 950 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 951 {
 952     return uadd64_overflow(a->frac, c, &r->frac);
 953 }
 954
 955 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 956 {
 957     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 958     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 959 }
 960
 961 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 962
 963 static void frac64_allones(FloatParts64 *a)
 964 {
 965     a->frac = -1;
 966 }
 967
 968 static void frac128_allones(FloatParts128 *a)
 969 {
 970     a->frac_hi = a->frac_lo = -1;
 971 }
 972
 973 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 974
 975 static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 976 {
 977     return (a->frac == b->frac ? float_relation_equal
 978             : a->frac < b->frac ? float_relation_less
 979             : float_relation_greater);
 980 }
 981
 982 static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 983 {
 984     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 985     if (ta == tb) {
 986         ta = a->frac_lo, tb = b->frac_lo;
 987         if (ta == tb) {
 988             return float_relation_equal;
 989         }
 990     }
 991     return ta < tb ? float_relation_less : float_relation_greater;
 992 }
 993
 994 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 995
 996 static void frac64_clear(FloatParts64 *a)
 997 {
 998     a->frac = 0;
 999 }
1000
1001 static void frac128_clear(FloatParts128 *a)
1002 {
1003     a->frac_hi = a->frac_lo = 0;
1004 }
1005
1006 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1007
1008 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1009 {
1010     uint64_t n1, n0, r, q;
1011     bool ret;
1012
1013     /*
1014      * We want a 2*N / N-bit division to produce exactly an N-bit
1015      * result, so that we do not lose any precision and so that we
1016      * do not have to renormalize afterward.  If A.frac < B.frac,
1017      * then division would produce an (N-1)-bit result; shift A left
1018      * by one to produce the an N-bit result, and return true to
1019      * decrement the exponent to match.
1020      *
1021      * The udiv_qrnnd algorithm that we're using requires normalization,
1022      * i.e. the msb of the denominator must be set, which is already true.
1023      */
1024     ret = a->frac < b->frac;
1025     if (ret) {
1026         n0 = a->frac;
1027         n1 = 0;
1028     } else {
1029         n0 = a->frac >> 1;
1030         n1 = a->frac << 63;
1031     }
1032     q = udiv_qrnnd(&r, n0, n1, b->frac);
1033
1034     /* Set lsb if there is a remainder, to set inexact. */
1035     a->frac = q | (r != 0);
1036
1037     return ret;
1038 }
1039
1040 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1041 {
1042     uint64_t q0, q1, a0, a1, b0, b1;
1043     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1044     bool ret = false;
1045
1046     a0 = a->frac_hi, a1 = a->frac_lo;
1047     b0 = b->frac_hi, b1 = b->frac_lo;
1048
1049     ret = lt128(a0, a1, b0, b1);
1050     if (!ret) {
1051         a1 = shr_double(a0, a1, 1);
1052         a0 = a0 >> 1;
1053     }
1054
1055     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1056     q0 = estimateDiv128To64(a0, a1, b0);
1057
1058     /*
1059      * Estimate is high because B1 was not included (unless B1 == 0).
1060      * Reduce quotient and increase remainder until remainder is non-negative.
1061      * This loop will execute 0 to 2 times.
1062      */
1063     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1064     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1065     while (r0 != 0) {
1066         q0--;
1067         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1068     }
1069
1070     /* Repeat using the remainder, producing a second word of quotient. */
1071     q1 = estimateDiv128To64(r1, r2, b0);
1072     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1073     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1074     while (r1 != 0) {
1075         q1--;
1076         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1077     }
1078
1079     /* Any remainder indicates inexact; set sticky bit. */
1080     q1 |= (r2 | r3) != 0;
1081
1082     a->frac_hi = q0;
1083     a->frac_lo = q1;
1084     return ret;
1085 }
1086
1087 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1088
1089 static bool frac64_eqz(FloatParts64 *a)
1090 {
1091     return a->frac == 0;
1092 }
1093
1094 static bool frac128_eqz(FloatParts128 *a)
1095 {
1096     return (a->frac_hi | a->frac_lo) == 0;
1097 }
1098
1099 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1100
1101 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1102 {
1103     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1104 }
1105
1106 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1107 {
1108     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1109                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1110 }
1111
1112 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1113
1114 static void frac64_neg(FloatParts64 *a)
1115 {
1116     a->frac = -a->frac;
1117 }
1118
1119 static void frac128_neg(FloatParts128 *a)
1120 {
1121     bool c = 0;
1122     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1123     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1124 }
1125
1126 static void frac256_neg(FloatParts256 *a)
1127 {
1128     bool c = 0;
1129     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1131     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1132     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1133 }
1134
1135 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1136
1137 static int frac64_normalize(FloatParts64 *a)
1138 {
1139     if (a->frac) {
1140         int shift = clz64(a->frac);
1141         a->frac <<= shift;
1142         return shift;
1143     }
1144     return 64;
1145 }
1146
1147 static int frac128_normalize(FloatParts128 *a)
1148 {
1149     if (a->frac_hi) {
1150         int shl = clz64(a->frac_hi);
1151         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1152         a->frac_lo <<= shl;
1153         return shl;
1154     } else if (a->frac_lo) {
1155         int shl = clz64(a->frac_lo);
1156         a->frac_hi = a->frac_lo << shl;
1157         a->frac_lo = 0;
1158         return shl + 64;
1159     }
1160     return 128;
1161 }
1162
1163 static int frac256_normalize(FloatParts256 *a)
1164 {
1165     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1166     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1167     int ret, shl;
1168
1169     if (likely(a0)) {
1170         shl = clz64(a0);
1171         if (shl == 0) {
1172             return 0;
1173         }
1174         ret = shl;
1175     } else {
1176         if (a1) {
1177             ret = 64;
1178             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1179         } else if (a2) {
1180             ret = 128;
1181             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1182         } else if (a3) {
1183             ret = 192;
1184             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1185         } else {
1186             ret = 256;
1187             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1188             goto done;
1189         }
1190         shl = clz64(a0);
1191         if (shl == 0) {
1192             goto done;
1193         }
1194         ret += shl;
1195     }
1196
1197     a0 = shl_double(a0, a1, shl);
1198     a1 = shl_double(a1, a2, shl);
1199     a2 = shl_double(a2, a3, shl);
1200     a3 <<= shl;
1201
1202  done:
1203     a->frac_hi = a0;
1204     a->frac_hm = a1;
1205     a->frac_lm = a2;
1206     a->frac_lo = a3;
1207     return ret;
1208 }
1209
1210 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1211
1212 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1213 {
1214     uint64_t a0, a1, b0, t0, t1, q, quot;
1215     int exp_diff = a->exp - b->exp;
1216     int shift;
1217
1218     a0 = a->frac;
1219     a1 = 0;
1220
1221     if (exp_diff < -1) {
1222         if (mod_quot) {
1223             *mod_quot = 0;
1224         }
1225         return;
1226     }
1227     if (exp_diff == -1) {
1228         a0 >>= 1;
1229         exp_diff = 0;
1230     }
1231
1232     b0 = b->frac;
1233     quot = q = b0 <= a0;
1234     if (q) {
1235         a0 -= b0;
1236     }
1237
1238     exp_diff -= 64;
1239     while (exp_diff > 0) {
1240         q = estimateDiv128To64(a0, a1, b0);
1241         q = q > 2 ? q - 2 : 0;
1242         mul64To128(b0, q, &t0, &t1);
1243         sub128(a0, a1, t0, t1, &a0, &a1);
1244         shortShift128Left(a0, a1, 62, &a0, &a1);
1245         exp_diff -= 62;
1246         quot = (quot << 62) + q;
1247     }
1248
1249     exp_diff += 64;
1250     if (exp_diff > 0) {
1251         q = estimateDiv128To64(a0, a1, b0);
1252         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1253         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1254         sub128(a0, a1, t0, t1, &a0, &a1);
1255         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1256         while (le128(t0, t1, a0, a1)) {
1257             ++q;
1258             sub128(a0, a1, t0, t1, &a0, &a1);
1259         }
1260         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1261     } else {
1262         t0 = b0;
1263         t1 = 0;
1264     }
1265
1266     if (mod_quot) {
1267         *mod_quot = quot;
1268     } else {
1269         sub128(t0, t1, a0, a1, &t0, &t1);
1270         if (lt128(t0, t1, a0, a1) ||
1271             (eq128(t0, t1, a0, a1) && (q & 1))) {
1272             a0 = t0;
1273             a1 = t1;
1274             a->sign = !a->sign;
1275         }
1276     }
1277
1278     if (likely(a0)) {
1279         shift = clz64(a0);
1280         shortShift128Left(a0, a1, shift, &a0, &a1);
1281     } else if (likely(a1)) {
1282         shift = clz64(a1);
1283         a0 = a1 << shift;
1284         a1 = 0;
1285         shift += 64;
1286     } else {
1287         a->cls = float_class_zero;
1288         return;
1289     }
1290
1291     a->exp = b->exp + exp_diff - shift;
1292     a->frac = a0 | (a1 != 0);
1293 }
1294
1295 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1296                            uint64_t *mod_quot)
1297 {
1298     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1299     int exp_diff = a->exp - b->exp;
1300     int shift;
1301
1302     a0 = a->frac_hi;
1303     a1 = a->frac_lo;
1304     a2 = 0;
1305
1306     if (exp_diff < -1) {
1307         if (mod_quot) {
1308             *mod_quot = 0;
1309         }
1310         return;
1311     }
1312     if (exp_diff == -1) {
1313         shift128Right(a0, a1, 1, &a0, &a1);
1314         exp_diff = 0;
1315     }
1316
1317     b0 = b->frac_hi;
1318     b1 = b->frac_lo;
1319
1320     quot = q = le128(b0, b1, a0, a1);
1321     if (q) {
1322         sub128(a0, a1, b0, b1, &a0, &a1);
1323     }
1324
1325     exp_diff -= 64;
1326     while (exp_diff > 0) {
1327         q = estimateDiv128To64(a0, a1, b0);
1328         q = q > 4 ? q - 4 : 0;
1329         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1330         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1331         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1332         exp_diff -= 61;
1333         quot = (quot << 61) + q;
1334     }
1335
1336     exp_diff += 64;
1337     if (exp_diff > 0) {
1338         q = estimateDiv128To64(a0, a1, b0);
1339         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1340         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1341         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1342         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1343         while (le192(t0, t1, t2, a0, a1, a2)) {
1344             ++q;
1345             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1346         }
1347         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1348     } else {
1349         t0 = b0;
1350         t1 = b1;
1351         t2 = 0;
1352     }
1353
1354     if (mod_quot) {
1355         *mod_quot = quot;
1356     } else {
1357         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1358         if (lt192(t0, t1, t2, a0, a1, a2) ||
1359             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1360             a0 = t0;
1361             a1 = t1;
1362             a2 = t2;
1363             a->sign = !a->sign;
1364         }
1365     }
1366
1367     if (likely(a0)) {
1368         shift = clz64(a0);
1369         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1370     } else if (likely(a1)) {
1371         shift = clz64(a1);
1372         shortShift128Left(a1, a2, shift, &a0, &a1);
1373         a2 = 0;
1374         shift += 64;
1375     } else if (likely(a2)) {
1376         shift = clz64(a2);
1377         a0 = a2 << shift;
1378         a1 = a2 = 0;
1379         shift += 128;
1380     } else {
1381         a->cls = float_class_zero;
1382         return;
1383     }
1384
1385     a->exp = b->exp + exp_diff - shift;
1386     a->frac_hi = a0;
1387     a->frac_lo = a1 | (a2 != 0);
1388 }
1389
1390 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1391
1392 static void frac64_shl(FloatParts64 *a, int c)
1393 {
1394     a->frac <<= c;
1395 }
1396
1397 static void frac128_shl(FloatParts128 *a, int c)
1398 {
1399     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1400
1401     if (c & 64) {
1402         a0 = a1, a1 = 0;
1403     }
1404
1405     c &= 63;
1406     if (c) {
1407         a0 = shl_double(a0, a1, c);
1408         a1 = a1 << c;
1409     }
1410
1411     a->frac_hi = a0;
1412     a->frac_lo = a1;
1413 }
1414
1415 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1416
1417 static void frac64_shr(FloatParts64 *a, int c)
1418 {
1419     a->frac >>= c;
1420 }
1421
1422 static void frac128_shr(FloatParts128 *a, int c)
1423 {
1424     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1425
1426     if (c & 64) {
1427         a1 = a0, a0 = 0;
1428     }
1429
1430     c &= 63;
1431     if (c) {
1432         a1 = shr_double(a0, a1, c);
1433         a0 = a0 >> c;
1434     }
1435
1436     a->frac_hi = a0;
1437     a->frac_lo = a1;
1438 }
1439
1440 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1441
1442 static void frac64_shrjam(FloatParts64 *a, int c)
1443 {
1444     uint64_t a0 = a->frac;
1445
1446     if (likely(c != 0)) {
1447         if (likely(c < 64)) {
1448             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1449         } else {
1450             a0 = a0 != 0;
1451         }
1452         a->frac = a0;
1453     }
1454 }
1455
1456 static void frac128_shrjam(FloatParts128 *a, int c)
1457 {
1458     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1459     uint64_t sticky = 0;
1460
1461     if (unlikely(c == 0)) {
1462         return;
1463     } else if (likely(c < 64)) {
1464         /* nothing */
1465     } else if (likely(c < 128)) {
1466         sticky = a1;
1467         a1 = a0;
1468         a0 = 0;
1469         c &= 63;
1470         if (c == 0) {
1471             goto done;
1472         }
1473     } else {
1474         sticky = a0 | a1;
1475         a0 = a1 = 0;
1476         goto done;
1477     }
1478
1479     sticky |= shr_double(a1, 0, c);
1480     a1 = shr_double(a0, a1, c);
1481     a0 = a0 >> c;
1482
1483  done:
1484     a->frac_lo = a1 | (sticky != 0);
1485     a->frac_hi = a0;
1486 }
1487
1488 static void frac256_shrjam(FloatParts256 *a, int c)
1489 {
1490     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1491     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1492     uint64_t sticky = 0;
1493
1494     if (unlikely(c == 0)) {
1495         return;
1496     } else if (likely(c < 64)) {
1497         /* nothing */
1498     } else if (likely(c < 256)) {
1499         if (unlikely(c & 128)) {
1500             sticky |= a2 | a3;
1501             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1502         }
1503         if (unlikely(c & 64)) {
1504             sticky |= a3;
1505             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1506         }
1507         c &= 63;
1508         if (c == 0) {
1509             goto done;
1510         }
1511     } else {
1512         sticky = a0 | a1 | a2 | a3;
1513         a0 = a1 = a2 = a3 = 0;
1514         goto done;
1515     }
1516
1517     sticky |= shr_double(a3, 0, c);
1518     a3 = shr_double(a2, a3, c);
1519     a2 = shr_double(a1, a2, c);
1520     a1 = shr_double(a0, a1, c);
1521     a0 = a0 >> c;
1522
1523  done:
1524     a->frac_lo = a3 | (sticky != 0);
1525     a->frac_lm = a2;
1526     a->frac_hm = a1;
1527     a->frac_hi = a0;
1528 }
1529
1530 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1531
1532 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1533 {
1534     return usub64_overflow(a->frac, b->frac, &r->frac);
1535 }
1536
1537 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1538 {
1539     bool c = 0;
1540     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1541     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1542     return c;
1543 }
1544
1545 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1546 {
1547     bool c = 0;
1548     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1549     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1550     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1551     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1552     return c;
1553 }
1554
1555 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1556
1557 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1558 {
1559     r->frac = a->frac_hi | (a->frac_lo != 0);
1560 }
1561
1562 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1563 {
1564     r->frac_hi = a->frac_hi;
1565     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1566 }
1567
1568 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1569
1570 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1571 {
1572     r->frac_hi = a->frac;
1573     r->frac_lo = 0;
1574 }
1575
1576 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1577 {
1578     r->frac_hi = a->frac_hi;
1579     r->frac_hm = a->frac_lo;
1580     r->frac_lm = 0;
1581     r->frac_lo = 0;
1582 }
1583
1584 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1585
1586 /*
1587  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1588  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1589  * and thus MIT licenced.
1590  */
1591 static const uint16_t rsqrt_tab[128] = {
1592     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1593     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1594     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1595     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1596     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1597     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1598     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1599     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1600     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1601     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1602     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1603     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1604     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1605     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1606     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1607     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1608 };
1609
1610 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1611 #define FloatPartsN    glue(FloatParts,N)
1612 #define FloatPartsW    glue(FloatParts,W)
1613
1614 #define N 64
1615 #define W 128
1616
1617 #include "softfloat-parts-addsub.c.inc"
1618 #include "softfloat-parts.c.inc"
1619
1620 #undef  N
1621 #undef  W
1622 #define N 128
1623 #define W 256
1624
1625 #include "softfloat-parts-addsub.c.inc"
1626 #include "softfloat-parts.c.inc"
1627
1628 #undef  N
1629 #undef  W
1630 #define N            256
1631
1632 #include "softfloat-parts-addsub.c.inc"
1633
1634 #undef  N
1635 #undef  W
1636 #undef  partsN
1637 #undef  FloatPartsN
1638 #undef  FloatPartsW
1639
1640 /*
1641  * Pack/unpack routines with a specific FloatFmt.
1642  */
1643
1644 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1645                                       float_status *s, const FloatFmt *params)
1646 {
1647     float16_unpack_raw(p, f);
1648     parts_canonicalize(p, s, params);
1649 }
1650
1651 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1652                                      float_status *s)
1653 {
1654     float16a_unpack_canonical(p, f, s, &float16_params);
1655 }
1656
1657 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1658                                       float_status *s)
1659 {
1660     bfloat16_unpack_raw(p, f);
1661     parts_canonicalize(p, s, &bfloat16_params);
1662 }
1663
1664 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1665                                              float_status *s,
1666                                              const FloatFmt *params)
1667 {
1668     parts_uncanon(p, s, params);
1669     return float16_pack_raw(p);
1670 }
1671
1672 static float16 float16_round_pack_canonical(FloatParts64 *p,
1673                                             float_status *s)
1674 {
1675     return float16a_round_pack_canonical(p, s, &float16_params);
1676 }
1677
1678 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1679                                               float_status *s)
1680 {
1681     parts_uncanon(p, s, &bfloat16_params);
1682     return bfloat16_pack_raw(p);
1683 }
1684
1685 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1686                                      float_status *s)
1687 {
1688     float32_unpack_raw(p, f);
1689     parts_canonicalize(p, s, &float32_params);
1690 }
1691
1692 static float32 float32_round_pack_canonical(FloatParts64 *p,
1693                                             float_status *s)
1694 {
1695     parts_uncanon(p, s, &float32_params);
1696     return float32_pack_raw(p);
1697 }
1698
1699 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1700                                      float_status *s)
1701 {
1702     float64_unpack_raw(p, f);
1703     parts_canonicalize(p, s, &float64_params);
1704 }
1705
1706 static float64 float64_round_pack_canonical(FloatParts64 *p,
1707                                             float_status *s)
1708 {
1709     parts_uncanon(p, s, &float64_params);
1710     return float64_pack_raw(p);
1711 }
1712
1713 static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1714                                                float_status *s)
1715 {
1716     parts_uncanon(p, s, &float32_params);
1717
1718     /*
1719      * In parts_uncanon, we placed the fraction for float32 at the lsb.
1720      * We need to adjust the fraction higher so that the least N bits are
1721      * zero, and the fraction is adjacent to the float64 implicit bit.
1722      */
1723     switch (p->cls) {
1724     case float_class_normal:
1725         if (unlikely(p->exp == 0)) {
1726             /*
1727              * The result is denormal for float32, but can be represented
1728              * in normalized form for float64.  Adjust, per canonicalize.
1729              */
1730             int shift = frac_normalize(p);
1731             p->exp = (float32_params.frac_shift -
1732                       float32_params.exp_bias - shift + 1 +
1733                       float64_params.exp_bias);
1734             frac_shr(p, float64_params.frac_shift);
1735         } else {
1736             frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1737             p->exp += float64_params.exp_bias - float32_params.exp_bias;
1738         }
1739         break;
1740     case float_class_snan:
1741     case float_class_qnan:
1742         frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1743         p->exp = float64_params.exp_max;
1744         break;
1745     case float_class_inf:
1746         p->exp = float64_params.exp_max;
1747         break;
1748     case float_class_zero:
1749         break;
1750     default:
1751         g_assert_not_reached();
1752     }
1753
1754     return float64_pack_raw(p);
1755 }
1756
1757 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1758                                       float_status *s)
1759 {
1760     float128_unpack_raw(p, f);
1761     parts_canonicalize(p, s, &float128_params);
1762 }
1763
1764 static float128 float128_round_pack_canonical(FloatParts128 *p,
1765                                               float_status *s)
1766 {
1767     parts_uncanon(p, s, &float128_params);
1768     return float128_pack_raw(p);
1769 }
1770
1771 /* Returns false if the encoding is invalid. */
1772 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1773                                       float_status *s)
1774 {
1775     /* Ensure rounding precision is set before beginning. */
1776     switch (s->floatx80_rounding_precision) {
1777     case floatx80_precision_x:
1778     case floatx80_precision_d:
1779     case floatx80_precision_s:
1780         break;
1781     default:
1782         g_assert_not_reached();
1783     }
1784
1785     if (unlikely(floatx80_invalid_encoding(f))) {
1786         float_raise(float_flag_invalid, s);
1787         return false;
1788     }
1789
1790     floatx80_unpack_raw(p, f);
1791
1792     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1793         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1794     } else {
1795         /* The explicit integer bit is ignored, after invalid checks. */
1796         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1797         p->cls = (p->frac_hi == 0 ? float_class_inf
1798                   : parts_is_snan_frac(p->frac_hi, s)
1799                   ? float_class_snan : float_class_qnan);
1800     }
1801     return true;
1802 }
1803
1804 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1805                                               float_status *s)
1806 {
1807     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1808     uint64_t frac;
1809     int exp;
1810
1811     switch (p->cls) {
1812     case float_class_normal:
1813         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1814             parts_uncanon_normal(p, s, fmt);
1815             frac = p->frac_hi;
1816             exp = p->exp;
1817         } else {
1818             FloatParts64 p64;
1819
1820             p64.sign = p->sign;
1821             p64.exp = p->exp;
1822             frac_truncjam(&p64, p);
1823             parts_uncanon_normal(&p64, s, fmt);
1824             frac = p64.frac;
1825             exp = p64.exp;
1826         }
1827         if (exp != fmt->exp_max) {
1828             break;
1829         }
1830         /* rounded to inf -- fall through to set frac correctly */
1831
1832     case float_class_inf:
1833         /* x86 and m68k differ in the setting of the integer bit. */
1834         frac = floatx80_infinity_low;
1835         exp = fmt->exp_max;
1836         break;
1837
1838     case float_class_zero:
1839         frac = 0;
1840         exp = 0;
1841         break;
1842
1843     case float_class_snan:
1844     case float_class_qnan:
1845         /* NaNs have the integer bit set. */
1846         frac = p->frac_hi | (1ull << 63);
1847         exp = fmt->exp_max;
1848         break;
1849
1850     default:
1851         g_assert_not_reached();
1852     }
1853
1854     return packFloatx80(p->sign, exp, frac);
1855 }
1856
1857 /*
1858  * Addition and subtraction
1859  */
1860
1861 static float16 QEMU_FLATTEN
1862 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1863 {
1864     FloatParts64 pa, pb, *pr;
1865
1866     float16_unpack_canonical(&pa, a, status);
1867     float16_unpack_canonical(&pb, b, status);
1868     pr = parts_addsub(&pa, &pb, status, subtract);
1869
1870     return float16_round_pack_canonical(pr, status);
1871 }
1872
1873 float16 float16_add(float16 a, float16 b, float_status *status)
1874 {
1875     return float16_addsub(a, b, status, false);
1876 }
1877
1878 float16 float16_sub(float16 a, float16 b, float_status *status)
1879 {
1880     return float16_addsub(a, b, status, true);
1881 }
1882
1883 static float32 QEMU_SOFTFLOAT_ATTR
1884 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1885 {
1886     FloatParts64 pa, pb, *pr;
1887
1888     float32_unpack_canonical(&pa, a, status);
1889     float32_unpack_canonical(&pb, b, status);
1890     pr = parts_addsub(&pa, &pb, status, subtract);
1891
1892     return float32_round_pack_canonical(pr, status);
1893 }
1894
1895 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1896 {
1897     return soft_f32_addsub(a, b, status, false);
1898 }
1899
1900 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1901 {
1902     return soft_f32_addsub(a, b, status, true);
1903 }
1904
1905 static float64 QEMU_SOFTFLOAT_ATTR
1906 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1907 {
1908     FloatParts64 pa, pb, *pr;
1909
1910     float64_unpack_canonical(&pa, a, status);
1911     float64_unpack_canonical(&pb, b, status);
1912     pr = parts_addsub(&pa, &pb, status, subtract);
1913
1914     return float64_round_pack_canonical(pr, status);
1915 }
1916
1917 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1918 {
1919     return soft_f64_addsub(a, b, status, false);
1920 }
1921
1922 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1923 {
1924     return soft_f64_addsub(a, b, status, true);
1925 }
1926
1927 static float hard_f32_add(float a, float b)
1928 {
1929     return a + b;
1930 }
1931
1932 static float hard_f32_sub(float a, float b)
1933 {
1934     return a - b;
1935 }
1936
1937 static double hard_f64_add(double a, double b)
1938 {
1939     return a + b;
1940 }
1941
1942 static double hard_f64_sub(double a, double b)
1943 {
1944     return a - b;
1945 }
1946
1947 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1948 {
1949     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1950         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1951     }
1952     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1953 }
1954
1955 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1956 {
1957     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1958         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1959     } else {
1960         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1961     }
1962 }
1963
1964 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1965                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1966 {
1967     return float32_gen2(a, b, s, hard, soft,
1968                         f32_is_zon2, f32_addsubmul_post);
1969 }
1970
1971 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1972                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1973 {
1974     return float64_gen2(a, b, s, hard, soft,
1975                         f64_is_zon2, f64_addsubmul_post);
1976 }
1977
1978 float32 QEMU_FLATTEN
1979 float32_add(float32 a, float32 b, float_status *s)
1980 {
1981     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1982 }
1983
1984 float32 QEMU_FLATTEN
1985 float32_sub(float32 a, float32 b, float_status *s)
1986 {
1987     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1988 }
1989
1990 float64 QEMU_FLATTEN
1991 float64_add(float64 a, float64 b, float_status *s)
1992 {
1993     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1994 }
1995
1996 float64 QEMU_FLATTEN
1997 float64_sub(float64 a, float64 b, float_status *s)
1998 {
1999     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
2000 }
2001
2002 static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2003                                  bool subtract)
2004 {
2005     FloatParts64 pa, pb, *pr;
2006
2007     float64_unpack_canonical(&pa, a, status);
2008     float64_unpack_canonical(&pb, b, status);
2009     pr = parts_addsub(&pa, &pb, status, subtract);
2010
2011     return float64r32_round_pack_canonical(pr, status);
2012 }
2013
2014 float64 float64r32_add(float64 a, float64 b, float_status *status)
2015 {
2016     return float64r32_addsub(a, b, status, false);
2017 }
2018
2019 float64 float64r32_sub(float64 a, float64 b, float_status *status)
2020 {
2021     return float64r32_addsub(a, b, status, true);
2022 }
2023
2024 static bfloat16 QEMU_FLATTEN
2025 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2026 {
2027     FloatParts64 pa, pb, *pr;
2028
2029     bfloat16_unpack_canonical(&pa, a, status);
2030     bfloat16_unpack_canonical(&pb, b, status);
2031     pr = parts_addsub(&pa, &pb, status, subtract);
2032
2033     return bfloat16_round_pack_canonical(pr, status);
2034 }
2035
2036 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2037 {
2038     return bfloat16_addsub(a, b, status, false);
2039 }
2040
2041 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2042 {
2043     return bfloat16_addsub(a, b, status, true);
2044 }
2045
2046 static float128 QEMU_FLATTEN
2047 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2048 {
2049     FloatParts128 pa, pb, *pr;
2050
2051     float128_unpack_canonical(&pa, a, status);
2052     float128_unpack_canonical(&pb, b, status);
2053     pr = parts_addsub(&pa, &pb, status, subtract);
2054
2055     return float128_round_pack_canonical(pr, status);
2056 }
2057
2058 float128 float128_add(float128 a, float128 b, float_status *status)
2059 {
2060     return float128_addsub(a, b, status, false);
2061 }
2062
2063 float128 float128_sub(float128 a, float128 b, float_status *status)
2064 {
2065     return float128_addsub(a, b, status, true);
2066 }
2067
2068 static floatx80 QEMU_FLATTEN
2069 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2070 {
2071     FloatParts128 pa, pb, *pr;
2072
2073     if (!floatx80_unpack_canonical(&pa, a, status) ||
2074         !floatx80_unpack_canonical(&pb, b, status)) {
2075         return floatx80_default_nan(status);
2076     }
2077
2078     pr = parts_addsub(&pa, &pb, status, subtract);
2079     return floatx80_round_pack_canonical(pr, status);
2080 }
2081
2082 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2083 {
2084     return floatx80_addsub(a, b, status, false);
2085 }
2086
2087 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2088 {
2089     return floatx80_addsub(a, b, status, true);
2090 }
2091
2092 /*
2093  * Multiplication
2094  */
2095
2096 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2097 {
2098     FloatParts64 pa, pb, *pr;
2099
2100     float16_unpack_canonical(&pa, a, status);
2101     float16_unpack_canonical(&pb, b, status);
2102     pr = parts_mul(&pa, &pb, status);
2103
2104     return float16_round_pack_canonical(pr, status);
2105 }
2106
2107 static float32 QEMU_SOFTFLOAT_ATTR
2108 soft_f32_mul(float32 a, float32 b, float_status *status)
2109 {
2110     FloatParts64 pa, pb, *pr;
2111
2112     float32_unpack_canonical(&pa, a, status);
2113     float32_unpack_canonical(&pb, b, status);
2114     pr = parts_mul(&pa, &pb, status);
2115
2116     return float32_round_pack_canonical(pr, status);
2117 }
2118
2119 static float64 QEMU_SOFTFLOAT_ATTR
2120 soft_f64_mul(float64 a, float64 b, float_status *status)
2121 {
2122     FloatParts64 pa, pb, *pr;
2123
2124     float64_unpack_canonical(&pa, a, status);
2125     float64_unpack_canonical(&pb, b, status);
2126     pr = parts_mul(&pa, &pb, status);
2127
2128     return float64_round_pack_canonical(pr, status);
2129 }
2130
2131 static float hard_f32_mul(float a, float b)
2132 {
2133     return a * b;
2134 }
2135
2136 static double hard_f64_mul(double a, double b)
2137 {
2138     return a * b;
2139 }
2140
2141 float32 QEMU_FLATTEN
2142 float32_mul(float32 a, float32 b, float_status *s)
2143 {
2144     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2145                         f32_is_zon2, f32_addsubmul_post);
2146 }
2147
2148 float64 QEMU_FLATTEN
2149 float64_mul(float64 a, float64 b, float_status *s)
2150 {
2151     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2152                         f64_is_zon2, f64_addsubmul_post);
2153 }
2154
2155 float64 float64r32_mul(float64 a, float64 b, float_status *status)
2156 {
2157     FloatParts64 pa, pb, *pr;
2158
2159     float64_unpack_canonical(&pa, a, status);
2160     float64_unpack_canonical(&pb, b, status);
2161     pr = parts_mul(&pa, &pb, status);
2162
2163     return float64r32_round_pack_canonical(pr, status);
2164 }
2165
2166 bfloat16 QEMU_FLATTEN
2167 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2168 {
2169     FloatParts64 pa, pb, *pr;
2170
2171     bfloat16_unpack_canonical(&pa, a, status);
2172     bfloat16_unpack_canonical(&pb, b, status);
2173     pr = parts_mul(&pa, &pb, status);
2174
2175     return bfloat16_round_pack_canonical(pr, status);
2176 }
2177
2178 float128 QEMU_FLATTEN
2179 float128_mul(float128 a, float128 b, float_status *status)
2180 {
2181     FloatParts128 pa, pb, *pr;
2182
2183     float128_unpack_canonical(&pa, a, status);
2184     float128_unpack_canonical(&pb, b, status);
2185     pr = parts_mul(&pa, &pb, status);
2186
2187     return float128_round_pack_canonical(pr, status);
2188 }
2189
2190 floatx80 QEMU_FLATTEN
2191 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2192 {
2193     FloatParts128 pa, pb, *pr;
2194
2195     if (!floatx80_unpack_canonical(&pa, a, status) ||
2196         !floatx80_unpack_canonical(&pb, b, status)) {
2197         return floatx80_default_nan(status);
2198     }
2199
2200     pr = parts_mul(&pa, &pb, status);
2201     return floatx80_round_pack_canonical(pr, status);
2202 }
2203
2204 /*
2205  * Fused multiply-add
2206  */
2207
2208 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2209                                     int flags, float_status *status)
2210 {
2211     FloatParts64 pa, pb, pc, *pr;
2212
2213     float16_unpack_canonical(&pa, a, status);
2214     float16_unpack_canonical(&pb, b, status);
2215     float16_unpack_canonical(&pc, c, status);
2216     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2217
2218     return float16_round_pack_canonical(pr, status);
2219 }
2220
2221 static float32 QEMU_SOFTFLOAT_ATTR
2222 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2223                 float_status *status)
2224 {
2225     FloatParts64 pa, pb, pc, *pr;
2226
2227     float32_unpack_canonical(&pa, a, status);
2228     float32_unpack_canonical(&pb, b, status);
2229     float32_unpack_canonical(&pc, c, status);
2230     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2231
2232     return float32_round_pack_canonical(pr, status);
2233 }
2234
2235 static float64 QEMU_SOFTFLOAT_ATTR
2236 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2237                 float_status *status)
2238 {
2239     FloatParts64 pa, pb, pc, *pr;
2240
2241     float64_unpack_canonical(&pa, a, status);
2242     float64_unpack_canonical(&pb, b, status);
2243     float64_unpack_canonical(&pc, c, status);
2244     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2245
2246     return float64_round_pack_canonical(pr, status);
2247 }
2248
2249 static bool force_soft_fma;
2250
2251 float32 QEMU_FLATTEN
2252 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2253 {
2254     union_float32 ua, ub, uc, ur;
2255
2256     ua.s = xa;
2257     ub.s = xb;
2258     uc.s = xc;
2259
2260     if (unlikely(!can_use_fpu(s))) {
2261         goto soft;
2262     }
2263     if (unlikely(flags & float_muladd_halve_result)) {
2264         goto soft;
2265     }
2266
2267     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2268     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2269         goto soft;
2270     }
2271
2272     if (unlikely(force_soft_fma)) {
2273         goto soft;
2274     }
2275
2276     /*
2277      * When (a || b) == 0, there's no need to check for under/over flow,
2278      * since we know the addend is (normal || 0) and the product is 0.
2279      */
2280     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2281         union_float32 up;
2282         bool prod_sign;
2283
2284         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2285         prod_sign ^= !!(flags & float_muladd_negate_product);
2286         up.s = float32_set_sign(float32_zero, prod_sign);
2287
2288         if (flags & float_muladd_negate_c) {
2289             uc.h = -uc.h;
2290         }
2291         ur.h = up.h + uc.h;
2292     } else {
2293         union_float32 ua_orig = ua;
2294         union_float32 uc_orig = uc;
2295
2296         if (flags & float_muladd_negate_product) {
2297             ua.h = -ua.h;
2298         }
2299         if (flags & float_muladd_negate_c) {
2300             uc.h = -uc.h;
2301         }
2302
2303         ur.h = fmaf(ua.h, ub.h, uc.h);
2304
2305         if (unlikely(f32_is_inf(ur))) {
2306             float_raise(float_flag_overflow, s);
2307         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2308             ua = ua_orig;
2309             uc = uc_orig;
2310             goto soft;
2311         }
2312     }
2313     if (flags & float_muladd_negate_result) {
2314         return float32_chs(ur.s);
2315     }
2316     return ur.s;
2317
2318  soft:
2319     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2320 }
2321
2322 float64 QEMU_FLATTEN
2323 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2324 {
2325     union_float64 ua, ub, uc, ur;
2326
2327     ua.s = xa;
2328     ub.s = xb;
2329     uc.s = xc;
2330
2331     if (unlikely(!can_use_fpu(s))) {
2332         goto soft;
2333     }
2334     if (unlikely(flags & float_muladd_halve_result)) {
2335         goto soft;
2336     }
2337
2338     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2339     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2340         goto soft;
2341     }
2342
2343     if (unlikely(force_soft_fma)) {
2344         goto soft;
2345     }
2346
2347     /*
2348      * When (a || b) == 0, there's no need to check for under/over flow,
2349      * since we know the addend is (normal || 0) and the product is 0.
2350      */
2351     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2352         union_float64 up;
2353         bool prod_sign;
2354
2355         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2356         prod_sign ^= !!(flags & float_muladd_negate_product);
2357         up.s = float64_set_sign(float64_zero, prod_sign);
2358
2359         if (flags & float_muladd_negate_c) {
2360             uc.h = -uc.h;
2361         }
2362         ur.h = up.h + uc.h;
2363     } else {
2364         union_float64 ua_orig = ua;
2365         union_float64 uc_orig = uc;
2366
2367         if (flags & float_muladd_negate_product) {
2368             ua.h = -ua.h;
2369         }
2370         if (flags & float_muladd_negate_c) {
2371             uc.h = -uc.h;
2372         }
2373
2374         ur.h = fma(ua.h, ub.h, uc.h);
2375
2376         if (unlikely(f64_is_inf(ur))) {
2377             float_raise(float_flag_overflow, s);
2378         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2379             ua = ua_orig;
2380             uc = uc_orig;
2381             goto soft;
2382         }
2383     }
2384     if (flags & float_muladd_negate_result) {
2385         return float64_chs(ur.s);
2386     }
2387     return ur.s;
2388
2389  soft:
2390     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2391 }
2392
2393 float64 float64r32_muladd(float64 a, float64 b, float64 c,
2394                           int flags, float_status *status)
2395 {
2396     FloatParts64 pa, pb, pc, *pr;
2397
2398     float64_unpack_canonical(&pa, a, status);
2399     float64_unpack_canonical(&pb, b, status);
2400     float64_unpack_canonical(&pc, c, status);
2401     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2402
2403     return float64r32_round_pack_canonical(pr, status);
2404 }
2405
2406 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2407                                       int flags, float_status *status)
2408 {
2409     FloatParts64 pa, pb, pc, *pr;
2410
2411     bfloat16_unpack_canonical(&pa, a, status);
2412     bfloat16_unpack_canonical(&pb, b, status);
2413     bfloat16_unpack_canonical(&pc, c, status);
2414     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2415
2416     return bfloat16_round_pack_canonical(pr, status);
2417 }
2418
2419 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2420                                       int flags, float_status *status)
2421 {
2422     FloatParts128 pa, pb, pc, *pr;
2423
2424     float128_unpack_canonical(&pa, a, status);
2425     float128_unpack_canonical(&pb, b, status);
2426     float128_unpack_canonical(&pc, c, status);
2427     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2428
2429     return float128_round_pack_canonical(pr, status);
2430 }
2431
2432 /*
2433  * Division
2434  */
2435
2436 float16 float16_div(float16 a, float16 b, float_status *status)
2437 {
2438     FloatParts64 pa, pb, *pr;
2439
2440     float16_unpack_canonical(&pa, a, status);
2441     float16_unpack_canonical(&pb, b, status);
2442     pr = parts_div(&pa, &pb, status);
2443
2444     return float16_round_pack_canonical(pr, status);
2445 }
2446
2447 static float32 QEMU_SOFTFLOAT_ATTR
2448 soft_f32_div(float32 a, float32 b, float_status *status)
2449 {
2450     FloatParts64 pa, pb, *pr;
2451
2452     float32_unpack_canonical(&pa, a, status);
2453     float32_unpack_canonical(&pb, b, status);
2454     pr = parts_div(&pa, &pb, status);
2455
2456     return float32_round_pack_canonical(pr, status);
2457 }
2458
2459 static float64 QEMU_SOFTFLOAT_ATTR
2460 soft_f64_div(float64 a, float64 b, float_status *status)
2461 {
2462     FloatParts64 pa, pb, *pr;
2463
2464     float64_unpack_canonical(&pa, a, status);
2465     float64_unpack_canonical(&pb, b, status);
2466     pr = parts_div(&pa, &pb, status);
2467
2468     return float64_round_pack_canonical(pr, status);
2469 }
2470
2471 static float hard_f32_div(float a, float b)
2472 {
2473     return a / b;
2474 }
2475
2476 static double hard_f64_div(double a, double b)
2477 {
2478     return a / b;
2479 }
2480
2481 static bool f32_div_pre(union_float32 a, union_float32 b)
2482 {
2483     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2484         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2485                fpclassify(b.h) == FP_NORMAL;
2486     }
2487     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2488 }
2489
2490 static bool f64_div_pre(union_float64 a, union_float64 b)
2491 {
2492     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2493         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2494                fpclassify(b.h) == FP_NORMAL;
2495     }
2496     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2497 }
2498
2499 static bool f32_div_post(union_float32 a, union_float32 b)
2500 {
2501     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2502         return fpclassify(a.h) != FP_ZERO;
2503     }
2504     return !float32_is_zero(a.s);
2505 }
2506
2507 static bool f64_div_post(union_float64 a, union_float64 b)
2508 {
2509     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2510         return fpclassify(a.h) != FP_ZERO;
2511     }
2512     return !float64_is_zero(a.s);
2513 }
2514
2515 float32 QEMU_FLATTEN
2516 float32_div(float32 a, float32 b, float_status *s)
2517 {
2518     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2519                         f32_div_pre, f32_div_post);
2520 }
2521
2522 float64 QEMU_FLATTEN
2523 float64_div(float64 a, float64 b, float_status *s)
2524 {
2525     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2526                         f64_div_pre, f64_div_post);
2527 }
2528
2529 float64 float64r32_div(float64 a, float64 b, float_status *status)
2530 {
2531     FloatParts64 pa, pb, *pr;
2532
2533     float64_unpack_canonical(&pa, a, status);
2534     float64_unpack_canonical(&pb, b, status);
2535     pr = parts_div(&pa, &pb, status);
2536
2537     return float64r32_round_pack_canonical(pr, status);
2538 }
2539
2540 bfloat16 QEMU_FLATTEN
2541 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2542 {
2543     FloatParts64 pa, pb, *pr;
2544
2545     bfloat16_unpack_canonical(&pa, a, status);
2546     bfloat16_unpack_canonical(&pb, b, status);
2547     pr = parts_div(&pa, &pb, status);
2548
2549     return bfloat16_round_pack_canonical(pr, status);
2550 }
2551
2552 float128 QEMU_FLATTEN
2553 float128_div(float128 a, float128 b, float_status *status)
2554 {
2555     FloatParts128 pa, pb, *pr;
2556
2557     float128_unpack_canonical(&pa, a, status);
2558     float128_unpack_canonical(&pb, b, status);
2559     pr = parts_div(&pa, &pb, status);
2560
2561     return float128_round_pack_canonical(pr, status);
2562 }
2563
2564 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2565 {
2566     FloatParts128 pa, pb, *pr;
2567
2568     if (!floatx80_unpack_canonical(&pa, a, status) ||
2569         !floatx80_unpack_canonical(&pb, b, status)) {
2570         return floatx80_default_nan(status);
2571     }
2572
2573     pr = parts_div(&pa, &pb, status);
2574     return floatx80_round_pack_canonical(pr, status);
2575 }
2576
2577 /*
2578  * Remainder
2579  */
2580
2581 float32 float32_rem(float32 a, float32 b, float_status *status)
2582 {
2583     FloatParts64 pa, pb, *pr;
2584
2585     float32_unpack_canonical(&pa, a, status);
2586     float32_unpack_canonical(&pb, b, status);
2587     pr = parts_modrem(&pa, &pb, NULL, status);
2588
2589     return float32_round_pack_canonical(pr, status);
2590 }
2591
2592 float64 float64_rem(float64 a, float64 b, float_status *status)
2593 {
2594     FloatParts64 pa, pb, *pr;
2595
2596     float64_unpack_canonical(&pa, a, status);
2597     float64_unpack_canonical(&pb, b, status);
2598     pr = parts_modrem(&pa, &pb, NULL, status);
2599
2600     return float64_round_pack_canonical(pr, status);
2601 }
2602
2603 float128 float128_rem(float128 a, float128 b, float_status *status)
2604 {
2605     FloatParts128 pa, pb, *pr;
2606
2607     float128_unpack_canonical(&pa, a, status);
2608     float128_unpack_canonical(&pb, b, status);
2609     pr = parts_modrem(&pa, &pb, NULL, status);
2610
2611     return float128_round_pack_canonical(pr, status);
2612 }
2613
2614 /*
2615  * Returns the remainder of the extended double-precision floating-point value
2616  * `a' with respect to the corresponding value `b'.
2617  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2618  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2619  * the remainder based on truncating the quotient toward zero instead and
2620  * *quotient is set to the low 64 bits of the absolute value of the integer
2621  * quotient.
2622  */
2623 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2624                          uint64_t *quotient, float_status *status)
2625 {
2626     FloatParts128 pa, pb, *pr;
2627
2628     *quotient = 0;
2629     if (!floatx80_unpack_canonical(&pa, a, status) ||
2630         !floatx80_unpack_canonical(&pb, b, status)) {
2631         return floatx80_default_nan(status);
2632     }
2633     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2634
2635     return floatx80_round_pack_canonical(pr, status);
2636 }
2637
2638 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2639 {
2640     uint64_t quotient;
2641     return floatx80_modrem(a, b, false, &quotient, status);
2642 }
2643
2644 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2645 {
2646     uint64_t quotient;
2647     return floatx80_modrem(a, b, true, &quotient, status);
2648 }
2649
2650 /*
2651  * Float to Float conversions
2652  *
2653  * Returns the result of converting one float format to another. The
2654  * conversion is performed according to the IEC/IEEE Standard for
2655  * Binary Floating-Point Arithmetic.
2656  *
2657  * Usually this only needs to take care of raising invalid exceptions
2658  * and handling the conversion on NaNs.
2659  */
2660
2661 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2662 {
2663     switch (a->cls) {
2664     case float_class_snan:
2665         float_raise(float_flag_invalid_snan, s);
2666         /* fall through */
2667     case float_class_qnan:
2668         /*
2669          * There is no NaN in the destination format.  Raise Invalid
2670          * and return a zero with the sign of the input NaN.
2671          */
2672         float_raise(float_flag_invalid, s);
2673         a->cls = float_class_zero;
2674         break;
2675
2676     case float_class_inf:
2677         /*
2678          * There is no Inf in the destination format.  Raise Invalid
2679          * and return the maximum normal with the correct sign.
2680          */
2681         float_raise(float_flag_invalid, s);
2682         a->cls = float_class_normal;
2683         a->exp = float16_params_ahp.exp_max;
2684         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2685                                   float16_params_ahp.frac_size + 1);
2686         break;
2687
2688     case float_class_normal:
2689     case float_class_zero:
2690         break;
2691
2692     default:
2693         g_assert_not_reached();
2694     }
2695 }
2696
2697 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2698 {
2699     if (is_nan(a->cls)) {
2700         parts_return_nan(a, s);
2701     }
2702 }
2703
2704 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2705 {
2706     if (is_nan(a->cls)) {
2707         parts_return_nan(a, s);
2708     }
2709 }
2710
2711 #define parts_float_to_float(P, S) \
2712     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2713
2714 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2715                                         float_status *s)
2716 {
2717     a->cls = b->cls;
2718     a->sign = b->sign;
2719     a->exp = b->exp;
2720
2721     if (a->cls == float_class_normal) {
2722         frac_truncjam(a, b);
2723     } else if (is_nan(a->cls)) {
2724         /* Discard the low bits of the NaN. */
2725         a->frac = b->frac_hi;
2726         parts_return_nan(a, s);
2727     }
2728 }
2729
2730 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2731                                        float_status *s)
2732 {
2733     a->cls = b->cls;
2734     a->sign = b->sign;
2735     a->exp = b->exp;
2736     frac_widen(a, b);
2737
2738     if (is_nan(a->cls)) {
2739         parts_return_nan(a, s);
2740     }
2741 }
2742
2743 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2744 {
2745     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2746     FloatParts64 p;
2747
2748     float16a_unpack_canonical(&p, a, s, fmt16);
2749     parts_float_to_float(&p, s);
2750     return float32_round_pack_canonical(&p, s);
2751 }
2752
2753 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2754 {
2755     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2756     FloatParts64 p;
2757
2758     float16a_unpack_canonical(&p, a, s, fmt16);
2759     parts_float_to_float(&p, s);
2760     return float64_round_pack_canonical(&p, s);
2761 }
2762
2763 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2764 {
2765     FloatParts64 p;
2766     const FloatFmt *fmt;
2767
2768     float32_unpack_canonical(&p, a, s);
2769     if (ieee) {
2770         parts_float_to_float(&p, s);
2771         fmt = &float16_params;
2772     } else {
2773         parts_float_to_ahp(&p, s);
2774         fmt = &float16_params_ahp;
2775     }
2776     return float16a_round_pack_canonical(&p, s, fmt);
2777 }
2778
2779 static float64 QEMU_SOFTFLOAT_ATTR
2780 soft_float32_to_float64(float32 a, float_status *s)
2781 {
2782     FloatParts64 p;
2783
2784     float32_unpack_canonical(&p, a, s);
2785     parts_float_to_float(&p, s);
2786     return float64_round_pack_canonical(&p, s);
2787 }
2788
2789 float64 float32_to_float64(float32 a, float_status *s)
2790 {
2791     if (likely(float32_is_normal(a))) {
2792         /* Widening conversion can never produce inexact results.  */
2793         union_float32 uf;
2794         union_float64 ud;
2795         uf.s = a;
2796         ud.h = uf.h;
2797         return ud.s;
2798     } else if (float32_is_zero(a)) {
2799         return float64_set_sign(float64_zero, float32_is_neg(a));
2800     } else {
2801         return soft_float32_to_float64(a, s);
2802     }
2803 }
2804
2805 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2806 {
2807     FloatParts64 p;
2808     const FloatFmt *fmt;
2809
2810     float64_unpack_canonical(&p, a, s);
2811     if (ieee) {
2812         parts_float_to_float(&p, s);
2813         fmt = &float16_params;
2814     } else {
2815         parts_float_to_ahp(&p, s);
2816         fmt = &float16_params_ahp;
2817     }
2818     return float16a_round_pack_canonical(&p, s, fmt);
2819 }
2820
2821 float32 float64_to_float32(float64 a, float_status *s)
2822 {
2823     FloatParts64 p;
2824
2825     float64_unpack_canonical(&p, a, s);
2826     parts_float_to_float(&p, s);
2827     return float32_round_pack_canonical(&p, s);
2828 }
2829
2830 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2831 {
2832     FloatParts64 p;
2833
2834     bfloat16_unpack_canonical(&p, a, s);
2835     parts_float_to_float(&p, s);
2836     return float32_round_pack_canonical(&p, s);
2837 }
2838
2839 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2840 {
2841     FloatParts64 p;
2842
2843     bfloat16_unpack_canonical(&p, a, s);
2844     parts_float_to_float(&p, s);
2845     return float64_round_pack_canonical(&p, s);
2846 }
2847
2848 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2849 {
2850     FloatParts64 p;
2851
2852     float32_unpack_canonical(&p, a, s);
2853     parts_float_to_float(&p, s);
2854     return bfloat16_round_pack_canonical(&p, s);
2855 }
2856
2857 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2858 {
2859     FloatParts64 p;
2860
2861     float64_unpack_canonical(&p, a, s);
2862     parts_float_to_float(&p, s);
2863     return bfloat16_round_pack_canonical(&p, s);
2864 }
2865
2866 float32 float128_to_float32(float128 a, float_status *s)
2867 {
2868     FloatParts64 p64;
2869     FloatParts128 p128;
2870
2871     float128_unpack_canonical(&p128, a, s);
2872     parts_float_to_float_narrow(&p64, &p128, s);
2873     return float32_round_pack_canonical(&p64, s);
2874 }
2875
2876 float64 float128_to_float64(float128 a, float_status *s)
2877 {
2878     FloatParts64 p64;
2879     FloatParts128 p128;
2880
2881     float128_unpack_canonical(&p128, a, s);
2882     parts_float_to_float_narrow(&p64, &p128, s);
2883     return float64_round_pack_canonical(&p64, s);
2884 }
2885
2886 float128 float32_to_float128(float32 a, float_status *s)
2887 {
2888     FloatParts64 p64;
2889     FloatParts128 p128;
2890
2891     float32_unpack_canonical(&p64, a, s);
2892     parts_float_to_float_widen(&p128, &p64, s);
2893     return float128_round_pack_canonical(&p128, s);
2894 }
2895
2896 float128 float64_to_float128(float64 a, float_status *s)
2897 {
2898     FloatParts64 p64;
2899     FloatParts128 p128;
2900
2901     float64_unpack_canonical(&p64, a, s);
2902     parts_float_to_float_widen(&p128, &p64, s);
2903     return float128_round_pack_canonical(&p128, s);
2904 }
2905
2906 float32 floatx80_to_float32(floatx80 a, float_status *s)
2907 {
2908     FloatParts64 p64;
2909     FloatParts128 p128;
2910
2911     if (floatx80_unpack_canonical(&p128, a, s)) {
2912         parts_float_to_float_narrow(&p64, &p128, s);
2913     } else {
2914         parts_default_nan(&p64, s);
2915     }
2916     return float32_round_pack_canonical(&p64, s);
2917 }
2918
2919 float64 floatx80_to_float64(floatx80 a, float_status *s)
2920 {
2921     FloatParts64 p64;
2922     FloatParts128 p128;
2923
2924     if (floatx80_unpack_canonical(&p128, a, s)) {
2925         parts_float_to_float_narrow(&p64, &p128, s);
2926     } else {
2927         parts_default_nan(&p64, s);
2928     }
2929     return float64_round_pack_canonical(&p64, s);
2930 }
2931
2932 float128 floatx80_to_float128(floatx80 a, float_status *s)
2933 {
2934     FloatParts128 p;
2935
2936     if (floatx80_unpack_canonical(&p, a, s)) {
2937         parts_float_to_float(&p, s);
2938     } else {
2939         parts_default_nan(&p, s);
2940     }
2941     return float128_round_pack_canonical(&p, s);
2942 }
2943
2944 floatx80 float32_to_floatx80(float32 a, float_status *s)
2945 {
2946     FloatParts64 p64;
2947     FloatParts128 p128;
2948
2949     float32_unpack_canonical(&p64, a, s);
2950     parts_float_to_float_widen(&p128, &p64, s);
2951     return floatx80_round_pack_canonical(&p128, s);
2952 }
2953
2954 floatx80 float64_to_floatx80(float64 a, float_status *s)
2955 {
2956     FloatParts64 p64;
2957     FloatParts128 p128;
2958
2959     float64_unpack_canonical(&p64, a, s);
2960     parts_float_to_float_widen(&p128, &p64, s);
2961     return floatx80_round_pack_canonical(&p128, s);
2962 }
2963
2964 floatx80 float128_to_floatx80(float128 a, float_status *s)
2965 {
2966     FloatParts128 p;
2967
2968     float128_unpack_canonical(&p, a, s);
2969     parts_float_to_float(&p, s);
2970     return floatx80_round_pack_canonical(&p, s);
2971 }
2972
2973 /*
2974  * Round to integral value
2975  */
2976
2977 float16 float16_round_to_int(float16 a, float_status *s)
2978 {
2979     FloatParts64 p;
2980
2981     float16_unpack_canonical(&p, a, s);
2982     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2983     return float16_round_pack_canonical(&p, s);
2984 }
2985
2986 float32 float32_round_to_int(float32 a, float_status *s)
2987 {
2988     FloatParts64 p;
2989
2990     float32_unpack_canonical(&p, a, s);
2991     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2992     return float32_round_pack_canonical(&p, s);
2993 }
2994
2995 float64 float64_round_to_int(float64 a, float_status *s)
2996 {
2997     FloatParts64 p;
2998
2999     float64_unpack_canonical(&p, a, s);
3000     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3001     return float64_round_pack_canonical(&p, s);
3002 }
3003
3004 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3005 {
3006     FloatParts64 p;
3007
3008     bfloat16_unpack_canonical(&p, a, s);
3009     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3010     return bfloat16_round_pack_canonical(&p, s);
3011 }
3012
3013 float128 float128_round_to_int(float128 a, float_status *s)
3014 {
3015     FloatParts128 p;
3016
3017     float128_unpack_canonical(&p, a, s);
3018     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3019     return float128_round_pack_canonical(&p, s);
3020 }
3021
3022 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3023 {
3024     FloatParts128 p;
3025
3026     if (!floatx80_unpack_canonical(&p, a, status)) {
3027         return floatx80_default_nan(status);
3028     }
3029
3030     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3031                        &floatx80_params[status->floatx80_rounding_precision]);
3032     return floatx80_round_pack_canonical(&p, status);
3033 }
3034
3035 /*
3036  * Floating-point to signed integer conversions
3037  */
3038
3039 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3040                               float_status *s)
3041 {
3042     FloatParts64 p;
3043
3044     float16_unpack_canonical(&p, a, s);
3045     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3046 }
3047
3048 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3049                                 float_status *s)
3050 {
3051     FloatParts64 p;
3052
3053     float16_unpack_canonical(&p, a, s);
3054     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3055 }
3056
3057 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3058                                 float_status *s)
3059 {
3060     FloatParts64 p;
3061
3062     float16_unpack_canonical(&p, a, s);
3063     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3064 }
3065
3066 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3067                                 float_status *s)
3068 {
3069     FloatParts64 p;
3070
3071     float16_unpack_canonical(&p, a, s);
3072     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3073 }
3074
3075 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3076                                 float_status *s)
3077 {
3078     FloatParts64 p;
3079
3080     float32_unpack_canonical(&p, a, s);
3081     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3082 }
3083
3084 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3085                                 float_status *s)
3086 {
3087     FloatParts64 p;
3088
3089     float32_unpack_canonical(&p, a, s);
3090     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3091 }
3092
3093 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3094                                 float_status *s)
3095 {
3096     FloatParts64 p;
3097
3098     float32_unpack_canonical(&p, a, s);
3099     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3100 }
3101
3102 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3103                                 float_status *s)
3104 {
3105     FloatParts64 p;
3106
3107     float64_unpack_canonical(&p, a, s);
3108     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3109 }
3110
3111 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3112                                 float_status *s)
3113 {
3114     FloatParts64 p;
3115
3116     float64_unpack_canonical(&p, a, s);
3117     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3118 }
3119
3120 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3121                                 float_status *s)
3122 {
3123     FloatParts64 p;
3124
3125     float64_unpack_canonical(&p, a, s);
3126     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3127 }
3128
3129 int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3130                                float_status *s)
3131 {
3132     FloatParts64 p;
3133
3134     bfloat16_unpack_canonical(&p, a, s);
3135     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3136 }
3137
3138 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3139                                  float_status *s)
3140 {
3141     FloatParts64 p;
3142
3143     bfloat16_unpack_canonical(&p, a, s);
3144     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3145 }
3146
3147 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3148                                  float_status *s)
3149 {
3150     FloatParts64 p;
3151
3152     bfloat16_unpack_canonical(&p, a, s);
3153     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3154 }
3155
3156 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3157                                  float_status *s)
3158 {
3159     FloatParts64 p;
3160
3161     bfloat16_unpack_canonical(&p, a, s);
3162     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3163 }
3164
3165 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3166                                         int scale, float_status *s)
3167 {
3168     FloatParts128 p;
3169
3170     float128_unpack_canonical(&p, a, s);
3171     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3172 }
3173
3174 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3175                                         int scale, float_status *s)
3176 {
3177     FloatParts128 p;
3178
3179     float128_unpack_canonical(&p, a, s);
3180     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3181 }
3182
3183 static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3184                                         int scale, float_status *s)
3185 {
3186     int flags = 0;
3187     Int128 r;
3188     FloatParts128 p;
3189
3190     float128_unpack_canonical(&p, a, s);
3191
3192     switch (p.cls) {
3193     case float_class_snan:
3194         flags |= float_flag_invalid_snan;
3195         /* fall through */
3196     case float_class_qnan:
3197         flags |= float_flag_invalid;
3198         r = UINT128_MAX;
3199         break;
3200
3201     case float_class_inf:
3202         flags = float_flag_invalid | float_flag_invalid_cvti;
3203         r = p.sign ? INT128_MIN : INT128_MAX;
3204         break;
3205
3206     case float_class_zero:
3207         return int128_zero();
3208
3209     case float_class_normal:
3210         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3211             flags = float_flag_inexact;
3212         }
3213
3214         if (p.exp < 127) {
3215             int shift = 127 - p.exp;
3216             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3217             if (p.sign) {
3218                 r = int128_neg(r);
3219             }
3220         } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3221                    p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3222             r = INT128_MIN;
3223         } else {
3224             flags = float_flag_invalid | float_flag_invalid_cvti;
3225             r = p.sign ? INT128_MIN : INT128_MAX;
3226         }
3227         break;
3228
3229     default:
3230         g_assert_not_reached();
3231     }
3232
3233     float_raise(flags, s);
3234     return r;
3235 }
3236
3237 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3238                                         int scale, float_status *s)
3239 {
3240     FloatParts128 p;
3241
3242     if (!floatx80_unpack_canonical(&p, a, s)) {
3243         parts_default_nan(&p, s);
3244     }
3245     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3246 }
3247
3248 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3249                                         int scale, float_status *s)
3250 {
3251     FloatParts128 p;
3252
3253     if (!floatx80_unpack_canonical(&p, a, s)) {
3254         parts_default_nan(&p, s);
3255     }
3256     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3257 }
3258
3259 int8_t float16_to_int8(float16 a, float_status *s)
3260 {
3261     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3262 }
3263
3264 int16_t float16_to_int16(float16 a, float_status *s)
3265 {
3266     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3267 }
3268
3269 int32_t float16_to_int32(float16 a, float_status *s)
3270 {
3271     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3272 }
3273
3274 int64_t float16_to_int64(float16 a, float_status *s)
3275 {
3276     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3277 }
3278
3279 int16_t float32_to_int16(float32 a, float_status *s)
3280 {
3281     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3282 }
3283
3284 int32_t float32_to_int32(float32 a, float_status *s)
3285 {
3286     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3287 }
3288
3289 int64_t float32_to_int64(float32 a, float_status *s)
3290 {
3291     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3292 }
3293
3294 int16_t float64_to_int16(float64 a, float_status *s)
3295 {
3296     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3297 }
3298
3299 int32_t float64_to_int32(float64 a, float_status *s)
3300 {
3301     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3302 }
3303
3304 int64_t float64_to_int64(float64 a, float_status *s)
3305 {
3306     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3307 }
3308
3309 int32_t float128_to_int32(float128 a, float_status *s)
3310 {
3311     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3312 }
3313
3314 int64_t float128_to_int64(float128 a, float_status *s)
3315 {
3316     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3317 }
3318
3319 Int128 float128_to_int128(float128 a, float_status *s)
3320 {
3321     return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3322 }
3323
3324 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3325 {
3326     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3327 }
3328
3329 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3330 {
3331     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3332 }
3333
3334 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3335 {
3336     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3337 }
3338
3339 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3340 {
3341     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3342 }
3343
3344 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3345 {
3346     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3347 }
3348
3349 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3350 {
3351     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3352 }
3353
3354 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3355 {
3356     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3357 }
3358
3359 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3360 {
3361     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3362 }
3363
3364 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3365 {
3366     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3367 }
3368
3369 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3370 {
3371     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3372 }
3373
3374 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3375 {
3376     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3377 }
3378
3379 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3380 {
3381     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3382 }
3383
3384 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3385 {
3386     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3387 }
3388
3389 Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3390 {
3391     return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3392 }
3393
3394 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3395 {
3396     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3397 }
3398
3399 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3400 {
3401     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3402 }
3403
3404 int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3405 {
3406     return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3407 }
3408
3409 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3410 {
3411     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3412 }
3413
3414 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3415 {
3416     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3417 }
3418
3419 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3420 {
3421     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3422 }
3423
3424 int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3425 {
3426     return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3427 }
3428
3429 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3430 {
3431     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3432 }
3433
3434 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3435 {
3436     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3437 }
3438
3439 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3440 {
3441     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3442 }
3443
3444 int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3445                                 float_status *s)
3446 {
3447     FloatParts64 p;
3448
3449     float64_unpack_canonical(&p, a, s);
3450     return parts_float_to_sint_modulo(&p, rmode, 31, s);
3451 }
3452
3453 int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3454                                 float_status *s)
3455 {
3456     FloatParts64 p;
3457
3458     float64_unpack_canonical(&p, a, s);
3459     return parts_float_to_sint_modulo(&p, rmode, 63, s);
3460 }
3461
3462 /*
3463  * Floating-point to unsigned integer conversions
3464  */
3465
3466 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3467                                 float_status *s)
3468 {
3469     FloatParts64 p;
3470
3471     float16_unpack_canonical(&p, a, s);
3472     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3473 }
3474
3475 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3476                                   float_status *s)
3477 {
3478     FloatParts64 p;
3479
3480     float16_unpack_canonical(&p, a, s);
3481     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3482 }
3483
3484 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3485                                   float_status *s)
3486 {
3487     FloatParts64 p;
3488
3489     float16_unpack_canonical(&p, a, s);
3490     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3491 }
3492
3493 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3494                                   float_status *s)
3495 {
3496     FloatParts64 p;
3497
3498     float16_unpack_canonical(&p, a, s);
3499     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3500 }
3501
3502 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3503                                   float_status *s)
3504 {
3505     FloatParts64 p;
3506
3507     float32_unpack_canonical(&p, a, s);
3508     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3509 }
3510
3511 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3512                                   float_status *s)
3513 {
3514     FloatParts64 p;
3515
3516     float32_unpack_canonical(&p, a, s);
3517     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3518 }
3519
3520 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3521                                   float_status *s)
3522 {
3523     FloatParts64 p;
3524
3525     float32_unpack_canonical(&p, a, s);
3526     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3527 }
3528
3529 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3530                                   float_status *s)
3531 {
3532     FloatParts64 p;
3533
3534     float64_unpack_canonical(&p, a, s);
3535     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3536 }
3537
3538 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3539                                   float_status *s)
3540 {
3541     FloatParts64 p;
3542
3543     float64_unpack_canonical(&p, a, s);
3544     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3545 }
3546
3547 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3548                                   float_status *s)
3549 {
3550     FloatParts64 p;
3551
3552     float64_unpack_canonical(&p, a, s);
3553     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3554 }
3555
3556 uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3557                                  int scale, float_status *s)
3558 {
3559     FloatParts64 p;
3560
3561     bfloat16_unpack_canonical(&p, a, s);
3562     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3563 }
3564
3565 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3566                                    int scale, float_status *s)
3567 {
3568     FloatParts64 p;
3569
3570     bfloat16_unpack_canonical(&p, a, s);
3571     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3572 }
3573
3574 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3575                                    int scale, float_status *s)
3576 {
3577     FloatParts64 p;
3578
3579     bfloat16_unpack_canonical(&p, a, s);
3580     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3581 }
3582
3583 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3584                                    int scale, float_status *s)
3585 {
3586     FloatParts64 p;
3587
3588     bfloat16_unpack_canonical(&p, a, s);
3589     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3590 }
3591
3592 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3593                                           int scale, float_status *s)
3594 {
3595     FloatParts128 p;
3596
3597     float128_unpack_canonical(&p, a, s);
3598     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3599 }
3600
3601 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3602                                           int scale, float_status *s)
3603 {
3604     FloatParts128 p;
3605
3606     float128_unpack_canonical(&p, a, s);
3607     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3608 }
3609
3610 static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3611                                          int scale, float_status *s)
3612 {
3613     int flags = 0;
3614     Int128 r;
3615     FloatParts128 p;
3616
3617     float128_unpack_canonical(&p, a, s);
3618
3619     switch (p.cls) {
3620     case float_class_snan:
3621         flags |= float_flag_invalid_snan;
3622         /* fall through */
3623     case float_class_qnan:
3624         flags |= float_flag_invalid;
3625         r = UINT128_MAX;
3626         break;
3627
3628     case float_class_inf:
3629         flags = float_flag_invalid | float_flag_invalid_cvti;
3630         r = p.sign ? int128_zero() : UINT128_MAX;
3631         break;
3632
3633     case float_class_zero:
3634         return int128_zero();
3635
3636     case float_class_normal:
3637         if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3638             flags = float_flag_inexact;
3639             if (p.cls == float_class_zero) {
3640                 r = int128_zero();
3641                 break;
3642             }
3643         }
3644
3645         if (p.sign) {
3646             flags = float_flag_invalid | float_flag_invalid_cvti;
3647             r = int128_zero();
3648         } else if (p.exp <= 127) {
3649             int shift = 127 - p.exp;
3650             r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3651         } else {
3652             flags = float_flag_invalid | float_flag_invalid_cvti;
3653             r = UINT128_MAX;
3654         }
3655         break;
3656
3657     default:
3658         g_assert_not_reached();
3659     }
3660
3661     float_raise(flags, s);
3662     return r;
3663 }
3664
3665 uint8_t float16_to_uint8(float16 a, float_status *s)
3666 {
3667     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3668 }
3669
3670 uint16_t float16_to_uint16(float16 a, float_status *s)
3671 {
3672     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3673 }
3674
3675 uint32_t float16_to_uint32(float16 a, float_status *s)
3676 {
3677     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3678 }
3679
3680 uint64_t float16_to_uint64(float16 a, float_status *s)
3681 {
3682     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3683 }
3684
3685 uint16_t float32_to_uint16(float32 a, float_status *s)
3686 {
3687     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3688 }
3689
3690 uint32_t float32_to_uint32(float32 a, float_status *s)
3691 {
3692     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3693 }
3694
3695 uint64_t float32_to_uint64(float32 a, float_status *s)
3696 {
3697     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3698 }
3699
3700 uint16_t float64_to_uint16(float64 a, float_status *s)
3701 {
3702     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3703 }
3704
3705 uint32_t float64_to_uint32(float64 a, float_status *s)
3706 {
3707     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3708 }
3709
3710 uint64_t float64_to_uint64(float64 a, float_status *s)
3711 {
3712     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3713 }
3714
3715 uint32_t float128_to_uint32(float128 a, float_status *s)
3716 {
3717     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3718 }
3719
3720 uint64_t float128_to_uint64(float128 a, float_status *s)
3721 {
3722     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3723 }
3724
3725 Int128 float128_to_uint128(float128 a, float_status *s)
3726 {
3727     return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3728 }
3729
3730 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3731 {
3732     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3733 }
3734
3735 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3736 {
3737     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3738 }
3739
3740 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3741 {
3742     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3743 }
3744
3745 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3746 {
3747     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3748 }
3749
3750 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3751 {
3752     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3753 }
3754
3755 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3756 {
3757     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3758 }
3759
3760 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3761 {
3762     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3763 }
3764
3765 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3766 {
3767     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3768 }
3769
3770 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3771 {
3772     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3773 }
3774
3775 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3776 {
3777     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3778 }
3779
3780 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3781 {
3782     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3783 }
3784
3785 Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3786 {
3787     return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3788 }
3789
3790 uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3791 {
3792     return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3793 }
3794
3795 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3796 {
3797     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3798 }
3799
3800 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3801 {
3802     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3803 }
3804
3805 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3806 {
3807     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3808 }
3809
3810 uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3811 {
3812     return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3813 }
3814
3815 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3816 {
3817     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3818 }
3819
3820 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3821 {
3822     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3823 }
3824
3825 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3826 {
3827     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3828 }
3829
3830 /*
3831  * Signed integer to floating-point conversions
3832  */
3833
3834 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3835 {
3836     FloatParts64 p;
3837
3838     parts_sint_to_float(&p, a, scale, status);
3839     return float16_round_pack_canonical(&p, status);
3840 }
3841
3842 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3843 {
3844     return int64_to_float16_scalbn(a, scale, status);
3845 }
3846
3847 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3848 {
3849     return int64_to_float16_scalbn(a, scale, status);
3850 }
3851
3852 float16 int64_to_float16(int64_t a, float_status *status)
3853 {
3854     return int64_to_float16_scalbn(a, 0, status);
3855 }
3856
3857 float16 int32_to_float16(int32_t a, float_status *status)
3858 {
3859     return int64_to_float16_scalbn(a, 0, status);
3860 }
3861
3862 float16 int16_to_float16(int16_t a, float_status *status)
3863 {
3864     return int64_to_float16_scalbn(a, 0, status);
3865 }
3866
3867 float16 int8_to_float16(int8_t a, float_status *status)
3868 {
3869     return int64_to_float16_scalbn(a, 0, status);
3870 }
3871
3872 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3873 {
3874     FloatParts64 p;
3875
3876     /* Without scaling, there are no overflow concerns. */
3877     if (likely(scale == 0) && can_use_fpu(status)) {
3878         union_float32 ur;
3879         ur.h = a;
3880         return ur.s;
3881     }
3882
3883     parts64_sint_to_float(&p, a, scale, status);
3884     return float32_round_pack_canonical(&p, status);
3885 }
3886
3887 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3888 {
3889     return int64_to_float32_scalbn(a, scale, status);
3890 }
3891
3892 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3893 {
3894     return int64_to_float32_scalbn(a, scale, status);
3895 }
3896
3897 float32 int64_to_float32(int64_t a, float_status *status)
3898 {
3899     return int64_to_float32_scalbn(a, 0, status);
3900 }
3901
3902 float32 int32_to_float32(int32_t a, float_status *status)
3903 {
3904     return int64_to_float32_scalbn(a, 0, status);
3905 }
3906
3907 float32 int16_to_float32(int16_t a, float_status *status)
3908 {
3909     return int64_to_float32_scalbn(a, 0, status);
3910 }
3911
3912 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3913 {
3914     FloatParts64 p;
3915
3916     /* Without scaling, there are no overflow concerns. */
3917     if (likely(scale == 0) && can_use_fpu(status)) {
3918         union_float64 ur;
3919         ur.h = a;
3920         return ur.s;
3921     }
3922
3923     parts_sint_to_float(&p, a, scale, status);
3924     return float64_round_pack_canonical(&p, status);
3925 }
3926
3927 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3928 {
3929     return int64_to_float64_scalbn(a, scale, status);
3930 }
3931
3932 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3933 {
3934     return int64_to_float64_scalbn(a, scale, status);
3935 }
3936
3937 float64 int64_to_float64(int64_t a, float_status *status)
3938 {
3939     return int64_to_float64_scalbn(a, 0, status);
3940 }
3941
3942 float64 int32_to_float64(int32_t a, float_status *status)
3943 {
3944     return int64_to_float64_scalbn(a, 0, status);
3945 }
3946
3947 float64 int16_to_float64(int16_t a, float_status *status)
3948 {
3949     return int64_to_float64_scalbn(a, 0, status);
3950 }
3951
3952 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3953 {
3954     FloatParts64 p;
3955
3956     parts_sint_to_float(&p, a, scale, status);
3957     return bfloat16_round_pack_canonical(&p, status);
3958 }
3959
3960 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3961 {
3962     return int64_to_bfloat16_scalbn(a, scale, status);
3963 }
3964
3965 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3966 {
3967     return int64_to_bfloat16_scalbn(a, scale, status);
3968 }
3969
3970 bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3971 {
3972     return int64_to_bfloat16_scalbn(a, scale, status);
3973 }
3974
3975 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3976 {
3977     return int64_to_bfloat16_scalbn(a, 0, status);
3978 }
3979
3980 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3981 {
3982     return int64_to_bfloat16_scalbn(a, 0, status);
3983 }
3984
3985 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3986 {
3987     return int64_to_bfloat16_scalbn(a, 0, status);
3988 }
3989
3990 bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3991 {
3992     return int64_to_bfloat16_scalbn(a, 0, status);
3993 }
3994
3995 float128 int128_to_float128(Int128 a, float_status *status)
3996 {
3997     FloatParts128 p = { };
3998     int shift;
3999
4000     if (int128_nz(a)) {
4001         p.cls = float_class_normal;
4002         if (!int128_nonneg(a)) {
4003             p.sign = true;
4004             a = int128_neg(a);
4005         }
4006
4007         shift = clz64(int128_gethi(a));
4008         if (shift == 64) {
4009             shift += clz64(int128_getlo(a));
4010         }
4011
4012         p.exp = 127 - shift;
4013         a = int128_lshift(a, shift);
4014
4015         p.frac_hi = int128_gethi(a);
4016         p.frac_lo = int128_getlo(a);
4017     } else {
4018         p.cls = float_class_zero;
4019     }
4020
4021     return float128_round_pack_canonical(&p, status);
4022 }
4023
4024 float128 int64_to_float128(int64_t a, float_status *status)
4025 {
4026     FloatParts128 p;
4027
4028     parts_sint_to_float(&p, a, 0, status);
4029     return float128_round_pack_canonical(&p, status);
4030 }
4031
4032 float128 int32_to_float128(int32_t a, float_status *status)
4033 {
4034     return int64_to_float128(a, status);
4035 }
4036
4037 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4038 {
4039     FloatParts128 p;
4040
4041     parts_sint_to_float(&p, a, 0, status);
4042     return floatx80_round_pack_canonical(&p, status);
4043 }
4044
4045 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4046 {
4047     return int64_to_floatx80(a, status);
4048 }
4049
4050 /*
4051  * Unsigned Integer to floating-point conversions
4052  */
4053
4054 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
4055 {
4056     FloatParts64 p;
4057
4058     parts_uint_to_float(&p, a, scale, status);
4059     return float16_round_pack_canonical(&p, status);
4060 }
4061
4062 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4063 {
4064     return uint64_to_float16_scalbn(a, scale, status);
4065 }
4066
4067 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4068 {
4069     return uint64_to_float16_scalbn(a, scale, status);
4070 }
4071
4072 float16 uint64_to_float16(uint64_t a, float_status *status)
4073 {
4074     return uint64_to_float16_scalbn(a, 0, status);
4075 }
4076
4077 float16 uint32_to_float16(uint32_t a, float_status *status)
4078 {
4079     return uint64_to_float16_scalbn(a, 0, status);
4080 }
4081
4082 float16 uint16_to_float16(uint16_t a, float_status *status)
4083 {
4084     return uint64_to_float16_scalbn(a, 0, status);
4085 }
4086
4087 float16 uint8_to_float16(uint8_t a, float_status *status)
4088 {
4089     return uint64_to_float16_scalbn(a, 0, status);
4090 }
4091
4092 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4093 {
4094     FloatParts64 p;
4095
4096     /* Without scaling, there are no overflow concerns. */
4097     if (likely(scale == 0) && can_use_fpu(status)) {
4098         union_float32 ur;
4099         ur.h = a;
4100         return ur.s;
4101     }
4102
4103     parts_uint_to_float(&p, a, scale, status);
4104     return float32_round_pack_canonical(&p, status);
4105 }
4106
4107 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4108 {
4109     return uint64_to_float32_scalbn(a, scale, status);
4110 }
4111
4112 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4113 {
4114     return uint64_to_float32_scalbn(a, scale, status);
4115 }
4116
4117 float32 uint64_to_float32(uint64_t a, float_status *status)
4118 {
4119     return uint64_to_float32_scalbn(a, 0, status);
4120 }
4121
4122 float32 uint32_to_float32(uint32_t a, float_status *status)
4123 {
4124     return uint64_to_float32_scalbn(a, 0, status);
4125 }
4126
4127 float32 uint16_to_float32(uint16_t a, float_status *status)
4128 {
4129     return uint64_to_float32_scalbn(a, 0, status);
4130 }
4131
4132 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4133 {
4134     FloatParts64 p;
4135
4136     /* Without scaling, there are no overflow concerns. */
4137     if (likely(scale == 0) && can_use_fpu(status)) {
4138         union_float64 ur;
4139         ur.h = a;
4140         return ur.s;
4141     }
4142
4143     parts_uint_to_float(&p, a, scale, status);
4144     return float64_round_pack_canonical(&p, status);
4145 }
4146
4147 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4148 {
4149     return uint64_to_float64_scalbn(a, scale, status);
4150 }
4151
4152 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4153 {
4154     return uint64_to_float64_scalbn(a, scale, status);
4155 }
4156
4157 float64 uint64_to_float64(uint64_t a, float_status *status)
4158 {
4159     return uint64_to_float64_scalbn(a, 0, status);
4160 }
4161
4162 float64 uint32_to_float64(uint32_t a, float_status *status)
4163 {
4164     return uint64_to_float64_scalbn(a, 0, status);
4165 }
4166
4167 float64 uint16_to_float64(uint16_t a, float_status *status)
4168 {
4169     return uint64_to_float64_scalbn(a, 0, status);
4170 }
4171
4172 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4173 {
4174     FloatParts64 p;
4175
4176     parts_uint_to_float(&p, a, scale, status);
4177     return bfloat16_round_pack_canonical(&p, status);
4178 }
4179
4180 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4181 {
4182     return uint64_to_bfloat16_scalbn(a, scale, status);
4183 }
4184
4185 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4186 {
4187     return uint64_to_bfloat16_scalbn(a, scale, status);
4188 }
4189
4190 bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4191 {
4192     return uint64_to_bfloat16_scalbn(a, scale, status);
4193 }
4194
4195 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4196 {
4197     return uint64_to_bfloat16_scalbn(a, 0, status);
4198 }
4199
4200 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4201 {
4202     return uint64_to_bfloat16_scalbn(a, 0, status);
4203 }
4204
4205 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4206 {
4207     return uint64_to_bfloat16_scalbn(a, 0, status);
4208 }
4209
4210 bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4211 {
4212     return uint64_to_bfloat16_scalbn(a, 0, status);
4213 }
4214
4215 float128 uint64_to_float128(uint64_t a, float_status *status)
4216 {
4217     FloatParts128 p;
4218
4219     parts_uint_to_float(&p, a, 0, status);
4220     return float128_round_pack_canonical(&p, status);
4221 }
4222
4223 float128 uint128_to_float128(Int128 a, float_status *status)
4224 {
4225     FloatParts128 p = { };
4226     int shift;
4227
4228     if (int128_nz(a)) {
4229         p.cls = float_class_normal;
4230
4231         shift = clz64(int128_gethi(a));
4232         if (shift == 64) {
4233             shift += clz64(int128_getlo(a));
4234         }
4235
4236         p.exp = 127 - shift;
4237         a = int128_lshift(a, shift);
4238
4239         p.frac_hi = int128_gethi(a);
4240         p.frac_lo = int128_getlo(a);
4241     } else {
4242         p.cls = float_class_zero;
4243     }
4244
4245     return float128_round_pack_canonical(&p, status);
4246 }
4247
4248 /*
4249  * Minimum and maximum
4250  */
4251
4252 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4253 {
4254     FloatParts64 pa, pb, *pr;
4255
4256     float16_unpack_canonical(&pa, a, s);
4257     float16_unpack_canonical(&pb, b, s);
4258     pr = parts_minmax(&pa, &pb, s, flags);
4259
4260     return float16_round_pack_canonical(pr, s);
4261 }
4262
4263 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4264                                 float_status *s, int flags)
4265 {
4266     FloatParts64 pa, pb, *pr;
4267
4268     bfloat16_unpack_canonical(&pa, a, s);
4269     bfloat16_unpack_canonical(&pb, b, s);
4270     pr = parts_minmax(&pa, &pb, s, flags);
4271
4272     return bfloat16_round_pack_canonical(pr, s);
4273 }
4274
4275 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4276 {
4277     FloatParts64 pa, pb, *pr;
4278
4279     float32_unpack_canonical(&pa, a, s);
4280     float32_unpack_canonical(&pb, b, s);
4281     pr = parts_minmax(&pa, &pb, s, flags);
4282
4283     return float32_round_pack_canonical(pr, s);
4284 }
4285
4286 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4287 {
4288     FloatParts64 pa, pb, *pr;
4289
4290     float64_unpack_canonical(&pa, a, s);
4291     float64_unpack_canonical(&pb, b, s);
4292     pr = parts_minmax(&pa, &pb, s, flags);
4293
4294     return float64_round_pack_canonical(pr, s);
4295 }
4296
4297 static float128 float128_minmax(float128 a, float128 b,
4298                                 float_status *s, int flags)
4299 {
4300     FloatParts128 pa, pb, *pr;
4301
4302     float128_unpack_canonical(&pa, a, s);
4303     float128_unpack_canonical(&pb, b, s);
4304     pr = parts_minmax(&pa, &pb, s, flags);
4305
4306     return float128_round_pack_canonical(pr, s);
4307 }
4308
4309 #define MINMAX_1(type, name, flags) \
4310     type type##_##name(type a, type b, float_status *s) \
4311     { return type##_minmax(a, b, s, flags); }
4312
4313 #define MINMAX_2(type) \
4314     MINMAX_1(type, max, 0)                                                \
4315     MINMAX_1(type, maxnum, minmax_isnum)                                  \
4316     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4317     MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4318     MINMAX_1(type, min, minmax_ismin)                                     \
4319     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4320     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4321     MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4322
4323 MINMAX_2(float16)
4324 MINMAX_2(bfloat16)
4325 MINMAX_2(float32)
4326 MINMAX_2(float64)
4327 MINMAX_2(float128)
4328
4329 #undef MINMAX_1
4330 #undef MINMAX_2
4331
4332 /*
4333  * Floating point compare
4334  */
4335
4336 static FloatRelation QEMU_FLATTEN
4337 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4338 {
4339     FloatParts64 pa, pb;
4340
4341     float16_unpack_canonical(&pa, a, s);
4342     float16_unpack_canonical(&pb, b, s);
4343     return parts_compare(&pa, &pb, s, is_quiet);
4344 }
4345
4346 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4347 {
4348     return float16_do_compare(a, b, s, false);
4349 }
4350
4351 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4352 {
4353     return float16_do_compare(a, b, s, true);
4354 }
4355
4356 static FloatRelation QEMU_SOFTFLOAT_ATTR
4357 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4358 {
4359     FloatParts64 pa, pb;
4360
4361     float32_unpack_canonical(&pa, a, s);
4362     float32_unpack_canonical(&pb, b, s);
4363     return parts_compare(&pa, &pb, s, is_quiet);
4364 }
4365
4366 static FloatRelation QEMU_FLATTEN
4367 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4368 {
4369     union_float32 ua, ub;
4370
4371     ua.s = xa;
4372     ub.s = xb;
4373
4374     if (QEMU_NO_HARDFLOAT) {
4375         goto soft;
4376     }
4377
4378     float32_input_flush2(&ua.s, &ub.s, s);
4379     if (isgreaterequal(ua.h, ub.h)) {
4380         if (isgreater(ua.h, ub.h)) {
4381             return float_relation_greater;
4382         }
4383         return float_relation_equal;
4384     }
4385     if (likely(isless(ua.h, ub.h))) {
4386         return float_relation_less;
4387     }
4388     /*
4389      * The only condition remaining is unordered.
4390      * Fall through to set flags.
4391      */
4392  soft:
4393     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4394 }
4395
4396 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4397 {
4398     return float32_hs_compare(a, b, s, false);
4399 }
4400
4401 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4402 {
4403     return float32_hs_compare(a, b, s, true);
4404 }
4405
4406 static FloatRelation QEMU_SOFTFLOAT_ATTR
4407 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4408 {
4409     FloatParts64 pa, pb;
4410
4411     float64_unpack_canonical(&pa, a, s);
4412     float64_unpack_canonical(&pb, b, s);
4413     return parts_compare(&pa, &pb, s, is_quiet);
4414 }
4415
4416 static FloatRelation QEMU_FLATTEN
4417 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4418 {
4419     union_float64 ua, ub;
4420
4421     ua.s = xa;
4422     ub.s = xb;
4423
4424     if (QEMU_NO_HARDFLOAT) {
4425         goto soft;
4426     }
4427
4428     float64_input_flush2(&ua.s, &ub.s, s);
4429     if (isgreaterequal(ua.h, ub.h)) {
4430         if (isgreater(ua.h, ub.h)) {
4431             return float_relation_greater;
4432         }
4433         return float_relation_equal;
4434     }
4435     if (likely(isless(ua.h, ub.h))) {
4436         return float_relation_less;
4437     }
4438     /*
4439      * The only condition remaining is unordered.
4440      * Fall through to set flags.
4441      */
4442  soft:
4443     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4444 }
4445
4446 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4447 {
4448     return float64_hs_compare(a, b, s, false);
4449 }
4450
4451 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4452 {
4453     return float64_hs_compare(a, b, s, true);
4454 }
4455
4456 static FloatRelation QEMU_FLATTEN
4457 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4458 {
4459     FloatParts64 pa, pb;
4460
4461     bfloat16_unpack_canonical(&pa, a, s);
4462     bfloat16_unpack_canonical(&pb, b, s);
4463     return parts_compare(&pa, &pb, s, is_quiet);
4464 }
4465
4466 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4467 {
4468     return bfloat16_do_compare(a, b, s, false);
4469 }
4470
4471 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4472 {
4473     return bfloat16_do_compare(a, b, s, true);
4474 }
4475
4476 static FloatRelation QEMU_FLATTEN
4477 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4478 {
4479     FloatParts128 pa, pb;
4480
4481     float128_unpack_canonical(&pa, a, s);
4482     float128_unpack_canonical(&pb, b, s);
4483     return parts_compare(&pa, &pb, s, is_quiet);
4484 }
4485
4486 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4487 {
4488     return float128_do_compare(a, b, s, false);
4489 }
4490
4491 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4492 {
4493     return float128_do_compare(a, b, s, true);
4494 }
4495
4496 static FloatRelation QEMU_FLATTEN
4497 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4498 {
4499     FloatParts128 pa, pb;
4500
4501     if (!floatx80_unpack_canonical(&pa, a, s) ||
4502         !floatx80_unpack_canonical(&pb, b, s)) {
4503         return float_relation_unordered;
4504     }
4505     return parts_compare(&pa, &pb, s, is_quiet);
4506 }
4507
4508 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4509 {
4510     return floatx80_do_compare(a, b, s, false);
4511 }
4512
4513 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4514 {
4515     return floatx80_do_compare(a, b, s, true);
4516 }
4517
4518 /*
4519  * Scale by 2**N
4520  */
4521
4522 float16 float16_scalbn(float16 a, int n, float_status *status)
4523 {
4524     FloatParts64 p;
4525
4526     float16_unpack_canonical(&p, a, status);
4527     parts_scalbn(&p, n, status);
4528     return float16_round_pack_canonical(&p, status);
4529 }
4530
4531 float32 float32_scalbn(float32 a, int n, float_status *status)
4532 {
4533     FloatParts64 p;
4534
4535     float32_unpack_canonical(&p, a, status);
4536     parts_scalbn(&p, n, status);
4537     return float32_round_pack_canonical(&p, status);
4538 }
4539
4540 float64 float64_scalbn(float64 a, int n, float_status *status)
4541 {
4542     FloatParts64 p;
4543
4544     float64_unpack_canonical(&p, a, status);
4545     parts_scalbn(&p, n, status);
4546     return float64_round_pack_canonical(&p, status);
4547 }
4548
4549 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4550 {
4551     FloatParts64 p;
4552
4553     bfloat16_unpack_canonical(&p, a, status);
4554     parts_scalbn(&p, n, status);
4555     return bfloat16_round_pack_canonical(&p, status);
4556 }
4557
4558 float128 float128_scalbn(float128 a, int n, float_status *status)
4559 {
4560     FloatParts128 p;
4561
4562     float128_unpack_canonical(&p, a, status);
4563     parts_scalbn(&p, n, status);
4564     return float128_round_pack_canonical(&p, status);
4565 }
4566
4567 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4568 {
4569     FloatParts128 p;
4570
4571     if (!floatx80_unpack_canonical(&p, a, status)) {
4572         return floatx80_default_nan(status);
4573     }
4574     parts_scalbn(&p, n, status);
4575     return floatx80_round_pack_canonical(&p, status);
4576 }
4577
4578 /*
4579  * Square Root
4580  */
4581
4582 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4583 {
4584     FloatParts64 p;
4585
4586     float16_unpack_canonical(&p, a, status);
4587     parts_sqrt(&p, status, &float16_params);
4588     return float16_round_pack_canonical(&p, status);
4589 }
4590
4591 static float32 QEMU_SOFTFLOAT_ATTR
4592 soft_f32_sqrt(float32 a, float_status *status)
4593 {
4594     FloatParts64 p;
4595
4596     float32_unpack_canonical(&p, a, status);
4597     parts_sqrt(&p, status, &float32_params);
4598     return float32_round_pack_canonical(&p, status);
4599 }
4600
4601 static float64 QEMU_SOFTFLOAT_ATTR
4602 soft_f64_sqrt(float64 a, float_status *status)
4603 {
4604     FloatParts64 p;
4605
4606     float64_unpack_canonical(&p, a, status);
4607     parts_sqrt(&p, status, &float64_params);
4608     return float64_round_pack_canonical(&p, status);
4609 }
4610
4611 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4612 {
4613     union_float32 ua, ur;
4614
4615     ua.s = xa;
4616     if (unlikely(!can_use_fpu(s))) {
4617         goto soft;
4618     }
4619
4620     float32_input_flush1(&ua.s, s);
4621     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4622         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4623                        fpclassify(ua.h) == FP_ZERO) ||
4624                      signbit(ua.h))) {
4625             goto soft;
4626         }
4627     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4628                         float32_is_neg(ua.s))) {
4629         goto soft;
4630     }
4631     ur.h = sqrtf(ua.h);
4632     return ur.s;
4633
4634  soft:
4635     return soft_f32_sqrt(ua.s, s);
4636 }
4637
4638 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4639 {
4640     union_float64 ua, ur;
4641
4642     ua.s = xa;
4643     if (unlikely(!can_use_fpu(s))) {
4644         goto soft;
4645     }
4646
4647     float64_input_flush1(&ua.s, s);
4648     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4649         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4650                        fpclassify(ua.h) == FP_ZERO) ||
4651                      signbit(ua.h))) {
4652             goto soft;
4653         }
4654     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4655                         float64_is_neg(ua.s))) {
4656         goto soft;
4657     }
4658     ur.h = sqrt(ua.h);
4659     return ur.s;
4660
4661  soft:
4662     return soft_f64_sqrt(ua.s, s);
4663 }
4664
4665 float64 float64r32_sqrt(float64 a, float_status *status)
4666 {
4667     FloatParts64 p;
4668
4669     float64_unpack_canonical(&p, a, status);
4670     parts_sqrt(&p, status, &float64_params);
4671     return float64r32_round_pack_canonical(&p, status);
4672 }
4673
4674 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4675 {
4676     FloatParts64 p;
4677
4678     bfloat16_unpack_canonical(&p, a, status);
4679     parts_sqrt(&p, status, &bfloat16_params);
4680     return bfloat16_round_pack_canonical(&p, status);
4681 }
4682
4683 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4684 {
4685     FloatParts128 p;
4686
4687     float128_unpack_canonical(&p, a, status);
4688     parts_sqrt(&p, status, &float128_params);
4689     return float128_round_pack_canonical(&p, status);
4690 }
4691
4692 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4693 {
4694     FloatParts128 p;
4695
4696     if (!floatx80_unpack_canonical(&p, a, s)) {
4697         return floatx80_default_nan(s);
4698     }
4699     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4700     return floatx80_round_pack_canonical(&p, s);
4701 }
4702
4703 /*
4704  * log2
4705  */
4706 float32 float32_log2(float32 a, float_status *status)
4707 {
4708     FloatParts64 p;
4709
4710     float32_unpack_canonical(&p, a, status);
4711     parts_log2(&p, status, &float32_params);
4712     return float32_round_pack_canonical(&p, status);
4713 }
4714
4715 float64 float64_log2(float64 a, float_status *status)
4716 {
4717     FloatParts64 p;
4718
4719     float64_unpack_canonical(&p, a, status);
4720     parts_log2(&p, status, &float64_params);
4721     return float64_round_pack_canonical(&p, status);
4722 }
4723
4724 /*----------------------------------------------------------------------------
4725 | The pattern for a default generated NaN.
4726 *----------------------------------------------------------------------------*/
4727
4728 float16 float16_default_nan(float_status *status)
4729 {
4730     FloatParts64 p;
4731
4732     parts_default_nan(&p, status);
4733     p.frac >>= float16_params.frac_shift;
4734     return float16_pack_raw(&p);
4735 }
4736
4737 float32 float32_default_nan(float_status *status)
4738 {
4739     FloatParts64 p;
4740
4741     parts_default_nan(&p, status);
4742     p.frac >>= float32_params.frac_shift;
4743     return float32_pack_raw(&p);
4744 }
4745
4746 float64 float64_default_nan(float_status *status)
4747 {
4748     FloatParts64 p;
4749
4750     parts_default_nan(&p, status);
4751     p.frac >>= float64_params.frac_shift;
4752     return float64_pack_raw(&p);
4753 }
4754
4755 float128 float128_default_nan(float_status *status)
4756 {
4757     FloatParts128 p;
4758
4759     parts_default_nan(&p, status);
4760     frac_shr(&p, float128_params.frac_shift);
4761     return float128_pack_raw(&p);
4762 }
4763
4764 bfloat16 bfloat16_default_nan(float_status *status)
4765 {
4766     FloatParts64 p;
4767
4768     parts_default_nan(&p, status);
4769     p.frac >>= bfloat16_params.frac_shift;
4770     return bfloat16_pack_raw(&p);
4771 }
4772
4773 /*----------------------------------------------------------------------------
4774 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4775 *----------------------------------------------------------------------------*/
4776
4777 float16 float16_silence_nan(float16 a, float_status *status)
4778 {
4779     FloatParts64 p;
4780
4781     float16_unpack_raw(&p, a);
4782     p.frac <<= float16_params.frac_shift;
4783     parts_silence_nan(&p, status);
4784     p.frac >>= float16_params.frac_shift;
4785     return float16_pack_raw(&p);
4786 }
4787
4788 float32 float32_silence_nan(float32 a, float_status *status)
4789 {
4790     FloatParts64 p;
4791
4792     float32_unpack_raw(&p, a);
4793     p.frac <<= float32_params.frac_shift;
4794     parts_silence_nan(&p, status);
4795     p.frac >>= float32_params.frac_shift;
4796     return float32_pack_raw(&p);
4797 }
4798
4799 float64 float64_silence_nan(float64 a, float_status *status)
4800 {
4801     FloatParts64 p;
4802
4803     float64_unpack_raw(&p, a);
4804     p.frac <<= float64_params.frac_shift;
4805     parts_silence_nan(&p, status);
4806     p.frac >>= float64_params.frac_shift;
4807     return float64_pack_raw(&p);
4808 }
4809
4810 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4811 {
4812     FloatParts64 p;
4813
4814     bfloat16_unpack_raw(&p, a);
4815     p.frac <<= bfloat16_params.frac_shift;
4816     parts_silence_nan(&p, status);
4817     p.frac >>= bfloat16_params.frac_shift;
4818     return bfloat16_pack_raw(&p);
4819 }
4820
4821 float128 float128_silence_nan(float128 a, float_status *status)
4822 {
4823     FloatParts128 p;
4824
4825     float128_unpack_raw(&p, a);
4826     frac_shl(&p, float128_params.frac_shift);
4827     parts_silence_nan(&p, status);
4828     frac_shr(&p, float128_params.frac_shift);
4829     return float128_pack_raw(&p);
4830 }
4831
4832 /*----------------------------------------------------------------------------
4833 | If `a' is denormal and we are in flush-to-zero mode then set the
4834 | input-denormal exception and return zero. Otherwise just return the value.
4835 *----------------------------------------------------------------------------*/
4836
4837 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4838 {
4839     if (p.exp == 0 && p.frac != 0) {
4840         float_raise(float_flag_input_denormal, status);
4841         return true;
4842     }
4843
4844     return false;
4845 }
4846
4847 float16 float16_squash_input_denormal(float16 a, float_status *status)
4848 {
4849     if (status->flush_inputs_to_zero) {
4850         FloatParts64 p;
4851
4852         float16_unpack_raw(&p, a);
4853         if (parts_squash_denormal(p, status)) {
4854             return float16_set_sign(float16_zero, p.sign);
4855         }
4856     }
4857     return a;
4858 }
4859
4860 float32 float32_squash_input_denormal(float32 a, float_status *status)
4861 {
4862     if (status->flush_inputs_to_zero) {
4863         FloatParts64 p;
4864
4865         float32_unpack_raw(&p, a);
4866         if (parts_squash_denormal(p, status)) {
4867             return float32_set_sign(float32_zero, p.sign);
4868         }
4869     }
4870     return a;
4871 }
4872
4873 float64 float64_squash_input_denormal(float64 a, float_status *status)
4874 {
4875     if (status->flush_inputs_to_zero) {
4876         FloatParts64 p;
4877
4878         float64_unpack_raw(&p, a);
4879         if (parts_squash_denormal(p, status)) {
4880             return float64_set_sign(float64_zero, p.sign);
4881         }
4882     }
4883     return a;
4884 }
4885
4886 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4887 {
4888     if (status->flush_inputs_to_zero) {
4889         FloatParts64 p;
4890
4891         bfloat16_unpack_raw(&p, a);
4892         if (parts_squash_denormal(p, status)) {
4893             return bfloat16_set_sign(bfloat16_zero, p.sign);
4894         }
4895     }
4896     return a;
4897 }
4898
4899 /*----------------------------------------------------------------------------
4900 | Normalizes the subnormal extended double-precision floating-point value
4901 | represented by the denormalized significand `aSig'.  The normalized exponent
4902 | and significand are stored at the locations pointed to by `zExpPtr' and
4903 | `zSigPtr', respectively.
4904 *----------------------------------------------------------------------------*/
4905
4906 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4907                                 uint64_t *zSigPtr)
4908 {
4909     int8_t shiftCount;
4910
4911     shiftCount = clz64(aSig);
4912     *zSigPtr = aSig<<shiftCount;
4913     *zExpPtr = 1 - shiftCount;
4914 }
4915
4916 /*----------------------------------------------------------------------------
4917 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4918 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4919 | and returns the proper extended double-precision floating-point value
4920 | corresponding to the abstract input.  Ordinarily, the abstract value is
4921 | rounded and packed into the extended double-precision format, with the
4922 | inexact exception raised if the abstract input cannot be represented
4923 | exactly.  However, if the abstract value is too large, the overflow and
4924 | inexact exceptions are raised and an infinity or maximal finite value is
4925 | returned.  If the abstract value is too small, the input value is rounded to
4926 | a subnormal number, and the underflow and inexact exceptions are raised if
4927 | the abstract input cannot be represented exactly as a subnormal extended
4928 | double-precision floating-point number.
4929 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4930 | the result is rounded to the same number of bits as single or double
4931 | precision, respectively.  Otherwise, the result is rounded to the full
4932 | precision of the extended double-precision format.
4933 |     The input significand must be normalized or smaller.  If the input
4934 | significand is not normalized, `zExp' must be 0; in that case, the result
4935 | returned is a subnormal number, and it must not require rounding.  The
4936 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4937 | Floating-Point Arithmetic.
4938 *----------------------------------------------------------------------------*/
4939
4940 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4941                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4942                               float_status *status)
4943 {
4944     FloatRoundMode roundingMode;
4945     bool roundNearestEven, increment, isTiny;
4946     int64_t roundIncrement, roundMask, roundBits;
4947
4948     roundingMode = status->float_rounding_mode;
4949     roundNearestEven = ( roundingMode == float_round_nearest_even );
4950     switch (roundingPrecision) {
4951     case floatx80_precision_x:
4952         goto precision80;
4953     case floatx80_precision_d:
4954         roundIncrement = UINT64_C(0x0000000000000400);
4955         roundMask = UINT64_C(0x00000000000007FF);
4956         break;
4957     case floatx80_precision_s:
4958         roundIncrement = UINT64_C(0x0000008000000000);
4959         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4960         break;
4961     default:
4962         g_assert_not_reached();
4963     }
4964     zSig0 |= ( zSig1 != 0 );
4965     switch (roundingMode) {
4966     case float_round_nearest_even:
4967     case float_round_ties_away:
4968         break;
4969     case float_round_to_zero:
4970         roundIncrement = 0;
4971         break;
4972     case float_round_up:
4973         roundIncrement = zSign ? 0 : roundMask;
4974         break;
4975     case float_round_down:
4976         roundIncrement = zSign ? roundMask : 0;
4977         break;
4978     default:
4979         abort();
4980     }
4981     roundBits = zSig0 & roundMask;
4982     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4983         if (    ( 0x7FFE < zExp )
4984              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4985            ) {
4986             goto overflow;
4987         }
4988         if ( zExp <= 0 ) {
4989             if (status->flush_to_zero) {
4990                 float_raise(float_flag_output_denormal, status);
4991                 return packFloatx80(zSign, 0, 0);
4992             }
4993             isTiny = status->tininess_before_rounding
4994                   || (zExp < 0 )
4995                   || (zSig0 <= zSig0 + roundIncrement);
4996             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4997             zExp = 0;
4998             roundBits = zSig0 & roundMask;
4999             if (isTiny && roundBits) {
5000                 float_raise(float_flag_underflow, status);
5001             }
5002             if (roundBits) {
5003                 float_raise(float_flag_inexact, status);
5004             }
5005             zSig0 += roundIncrement;
5006             if ( (int64_t) zSig0 < 0 ) zExp = 1;
5007             roundIncrement = roundMask + 1;
5008             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5009                 roundMask |= roundIncrement;
5010             }
5011             zSig0 &= ~ roundMask;
5012             return packFloatx80( zSign, zExp, zSig0 );
5013         }
5014     }
5015     if (roundBits) {
5016         float_raise(float_flag_inexact, status);
5017     }
5018     zSig0 += roundIncrement;
5019     if ( zSig0 < roundIncrement ) {
5020         ++zExp;
5021         zSig0 = UINT64_C(0x8000000000000000);
5022     }
5023     roundIncrement = roundMask + 1;
5024     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5025         roundMask |= roundIncrement;
5026     }
5027     zSig0 &= ~ roundMask;
5028     if ( zSig0 == 0 ) zExp = 0;
5029     return packFloatx80( zSign, zExp, zSig0 );
5030  precision80:
5031     switch (roundingMode) {
5032     case float_round_nearest_even:
5033     case float_round_ties_away:
5034         increment = ((int64_t)zSig1 < 0);
5035         break;
5036     case float_round_to_zero:
5037         increment = 0;
5038         break;
5039     case float_round_up:
5040         increment = !zSign && zSig1;
5041         break;
5042     case float_round_down:
5043         increment = zSign && zSig1;
5044         break;
5045     default:
5046         abort();
5047     }
5048     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
5049         if (    ( 0x7FFE < zExp )
5050              || (    ( zExp == 0x7FFE )
5051                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
5052                   && increment
5053                 )
5054            ) {
5055             roundMask = 0;
5056  overflow:
5057             float_raise(float_flag_overflow | float_flag_inexact, status);
5058             if (    ( roundingMode == float_round_to_zero )
5059                  || ( zSign && ( roundingMode == float_round_up ) )
5060                  || ( ! zSign && ( roundingMode == float_round_down ) )
5061                ) {
5062                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5063             }
5064             return packFloatx80(zSign,
5065                                 floatx80_infinity_high,
5066                                 floatx80_infinity_low);
5067         }
5068         if ( zExp <= 0 ) {
5069             isTiny = status->tininess_before_rounding
5070                   || (zExp < 0)
5071                   || !increment
5072                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
5073             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5074             zExp = 0;
5075             if (isTiny && zSig1) {
5076                 float_raise(float_flag_underflow, status);
5077             }
5078             if (zSig1) {
5079                 float_raise(float_flag_inexact, status);
5080             }
5081             switch (roundingMode) {
5082             case float_round_nearest_even:
5083             case float_round_ties_away:
5084                 increment = ((int64_t)zSig1 < 0);
5085                 break;
5086             case float_round_to_zero:
5087                 increment = 0;
5088                 break;
5089             case float_round_up:
5090                 increment = !zSign && zSig1;
5091                 break;
5092             case float_round_down:
5093                 increment = zSign && zSig1;
5094                 break;
5095             default:
5096                 abort();
5097             }
5098             if ( increment ) {
5099                 ++zSig0;
5100                 if (!(zSig1 << 1) && roundNearestEven) {
5101                     zSig0 &= ~1;
5102                 }
5103                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5104             }
5105             return packFloatx80( zSign, zExp, zSig0 );
5106         }
5107     }
5108     if (zSig1) {
5109         float_raise(float_flag_inexact, status);
5110     }
5111     if ( increment ) {
5112         ++zSig0;
5113         if ( zSig0 == 0 ) {
5114             ++zExp;
5115             zSig0 = UINT64_C(0x8000000000000000);
5116         }
5117         else {
5118             if (!(zSig1 << 1) && roundNearestEven) {
5119                 zSig0 &= ~1;
5120             }
5121         }
5122     }
5123     else {
5124         if ( zSig0 == 0 ) zExp = 0;
5125     }
5126     return packFloatx80( zSign, zExp, zSig0 );
5127
5128 }
5129
5130 /*----------------------------------------------------------------------------
5131 | Takes an abstract floating-point value having sign `zSign', exponent
5132 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5133 | and returns the proper extended double-precision floating-point value
5134 | corresponding to the abstract input.  This routine is just like
5135 | `roundAndPackFloatx80' except that the input significand does not have to be
5136 | normalized.
5137 *----------------------------------------------------------------------------*/
5138
5139 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5140                                        bool zSign, int32_t zExp,
5141                                        uint64_t zSig0, uint64_t zSig1,
5142                                        float_status *status)
5143 {
5144     int8_t shiftCount;
5145
5146     if ( zSig0 == 0 ) {
5147         zSig0 = zSig1;
5148         zSig1 = 0;
5149         zExp -= 64;
5150     }
5151     shiftCount = clz64(zSig0);
5152     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5153     zExp -= shiftCount;
5154     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5155                                 zSig0, zSig1, status);
5156
5157 }
5158
5159 /*----------------------------------------------------------------------------
5160 | Returns the binary exponential of the single-precision floating-point value
5161 | `a'. The operation is performed according to the IEC/IEEE Standard for
5162 | Binary Floating-Point Arithmetic.
5163 |
5164 | Uses the following identities:
5165 |
5166 | 1. -------------------------------------------------------------------------
5167 |      x    x*ln(2)
5168 |     2  = e
5169 |
5170 | 2. -------------------------------------------------------------------------
5171 |                      2     3     4     5           n
5172 |      x        x     x     x     x     x           x
5173 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5174 |               1!    2!    3!    4!    5!          n!
5175 *----------------------------------------------------------------------------*/
5176
5177 static const float64 float32_exp2_coefficients[15] =
5178 {
5179     const_float64( 0x3ff0000000000000ll ), /*  1 */
5180     const_float64( 0x3fe0000000000000ll ), /*  2 */
5181     const_float64( 0x3fc5555555555555ll ), /*  3 */
5182     const_float64( 0x3fa5555555555555ll ), /*  4 */
5183     const_float64( 0x3f81111111111111ll ), /*  5 */
5184     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5185     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5186     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5187     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5188     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5189     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5190     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5191     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5192     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5193     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5194 };
5195
5196 float32 float32_exp2(float32 a, float_status *status)
5197 {
5198     FloatParts64 xp, xnp, tp, rp;
5199     int i;
5200
5201     float32_unpack_canonical(&xp, a, status);
5202     if (unlikely(xp.cls != float_class_normal)) {
5203         switch (xp.cls) {
5204         case float_class_snan:
5205         case float_class_qnan:
5206             parts_return_nan(&xp, status);
5207             return float32_round_pack_canonical(&xp, status);
5208         case float_class_inf:
5209             return xp.sign ? float32_zero : a;
5210         case float_class_zero:
5211             return float32_one;
5212         default:
5213             break;
5214         }
5215         g_assert_not_reached();
5216     }
5217
5218     float_raise(float_flag_inexact, status);
5219
5220     float64_unpack_canonical(&tp, float64_ln2, status);
5221     xp = *parts_mul(&xp, &tp, status);
5222     xnp = xp;
5223
5224     float64_unpack_canonical(&rp, float64_one, status);
5225     for (i = 0 ; i < 15 ; i++) {
5226         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5227         rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
5228         xnp = *parts_mul(&xnp, &xp, status);
5229     }
5230
5231     return float32_round_pack_canonical(&rp, status);
5232 }
5233
5234 /*----------------------------------------------------------------------------
5235 | Rounds the extended double-precision floating-point value `a'
5236 | to the precision provided by floatx80_rounding_precision and returns the
5237 | result as an extended double-precision floating-point value.
5238 | The operation is performed according to the IEC/IEEE Standard for Binary
5239 | Floating-Point Arithmetic.
5240 *----------------------------------------------------------------------------*/
5241
5242 floatx80 floatx80_round(floatx80 a, float_status *status)
5243 {
5244     FloatParts128 p;
5245
5246     if (!floatx80_unpack_canonical(&p, a, status)) {
5247         return floatx80_default_nan(status);
5248     }
5249     return floatx80_round_pack_canonical(&p, status);
5250 }
5251
5252 static void __attribute__((constructor)) softfloat_init(void)
5253 {
5254     union_float64 ua, ub, uc, ur;
5255
5256     if (QEMU_NO_HARDFLOAT) {
5257         return;
5258     }
5259     /*
5260      * Test that the host's FMA is not obviously broken. For example,
5261      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5262      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5263      */
5264     ua.s = 0x0020000000000001ULL;
5265     ub.s = 0x3ca0000000000000ULL;
5266     uc.s = 0x0020000000000000ULL;
5267     ur.h = fma(ua.h, ub.h, uc.h);
5268     if (ur.s != 0x0020000000000001ULL) {
5269         force_soft_fma = true;
5270     }
5271 }