fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439 };
 440
 441 /* Simple helpers for checking if, or what kind of, NaN we have */
 442 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 443 {
 444     return unlikely(c >= float_class_qnan);
 445 }
 446
 447 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 448 {
 449     return c == float_class_snan;
 450 }
 451
 452 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 453 {
 454     return c == float_class_qnan;
 455 }
 456
 457 /*
 458  * Structure holding all of the decomposed parts of a float.
 459  * The exponent is unbiased and the fraction is normalized.
 460  *
 461  * The fraction words are stored in big-endian word ordering,
 462  * so that truncation from a larger format to a smaller format
 463  * can be done simply by ignoring subsequent elements.
 464  */
 465
 466 typedef struct {
 467     FloatClass cls;
 468     bool sign;
 469     int32_t exp;
 470     union {
 471         /* Routines that know the structure may reference the singular name. */
 472         uint64_t frac;
 473         /*
 474          * Routines expanded with multiple structures reference "hi" and "lo"
 475          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 476          * both the same word and aliased here.
 477          */
 478         uint64_t frac_hi;
 479         uint64_t frac_lo;
 480     };
 481 } FloatParts64;
 482
 483 typedef struct {
 484     FloatClass cls;
 485     bool sign;
 486     int32_t exp;
 487     uint64_t frac_hi;
 488     uint64_t frac_lo;
 489 } FloatParts128;
 490
 491 typedef struct {
 492     FloatClass cls;
 493     bool sign;
 494     int32_t exp;
 495     uint64_t frac_hi;
 496     uint64_t frac_hm;  /* high-middle */
 497     uint64_t frac_lm;  /* low-middle */
 498     uint64_t frac_lo;
 499 } FloatParts256;
 500
 501 /* These apply to the most significant word of each FloatPartsN. */
 502 #define DECOMPOSED_BINARY_POINT    63
 503 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 504
 505 /* Structure holding all of the relevant parameters for a format.
 506  *   exp_size: the size of the exponent field
 507  *   exp_bias: the offset applied to the exponent field
 508  *   exp_max: the maximum normalised exponent
 509  *   frac_size: the size of the fraction field
 510  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 511  * The following are computed based the size of fraction
 512  *   round_mask: bits below lsb which must be rounded
 513  * The following optional modifiers are available:
 514  *   arm_althp: handle ARM Alternative Half Precision
 515  */
 516 typedef struct {
 517     int exp_size;
 518     int exp_bias;
 519     int exp_max;
 520     int frac_size;
 521     int frac_shift;
 522     bool arm_althp;
 523     uint64_t round_mask;
 524 } FloatFmt;
 525
 526 /* Expand fields based on the size of exponent and fraction */
 527 #define FLOAT_PARAMS_(E)                                \
 528     .exp_size       = E,                                \
 529     .exp_bias       = ((1 << E) - 1) >> 1,              \
 530     .exp_max        = (1 << E) - 1
 531
 532 #define FLOAT_PARAMS(E, F)                              \
 533     FLOAT_PARAMS_(E),                                   \
 534     .frac_size      = F,                                \
 535     .frac_shift     = (-F - 1) & 63,                    \
 536     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 537
 538 static const FloatFmt float16_params = {
 539     FLOAT_PARAMS(5, 10)
 540 };
 541
 542 static const FloatFmt float16_params_ahp = {
 543     FLOAT_PARAMS(5, 10),
 544     .arm_althp = true
 545 };
 546
 547 static const FloatFmt bfloat16_params = {
 548     FLOAT_PARAMS(8, 7)
 549 };
 550
 551 static const FloatFmt float32_params = {
 552     FLOAT_PARAMS(8, 23)
 553 };
 554
 555 static const FloatFmt float64_params = {
 556     FLOAT_PARAMS(11, 52)
 557 };
 558
 559 static const FloatFmt float128_params = {
 560     FLOAT_PARAMS(15, 112)
 561 };
 562
 563 #define FLOATX80_PARAMS(R)              \
 564     FLOAT_PARAMS_(15),                  \
 565     .frac_size = R == 64 ? 63 : R,      \
 566     .frac_shift = 0,                    \
 567     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 568
 569 static const FloatFmt floatx80_params[3] = {
 570     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 571     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 572     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 573 };
 574
 575 /* Unpack a float to parts, but do not canonicalize.  */
 576 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 577 {
 578     const int f_size = fmt->frac_size;
 579     const int e_size = fmt->exp_size;
 580
 581     *r = (FloatParts64) {
 582         .cls = float_class_unclassified,
 583         .sign = extract64(raw, f_size + e_size, 1),
 584         .exp = extract64(raw, f_size, e_size),
 585         .frac = extract64(raw, 0, f_size)
 586     };
 587 }
 588
 589 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 590 {
 591     unpack_raw64(p, &float16_params, f);
 592 }
 593
 594 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 595 {
 596     unpack_raw64(p, &bfloat16_params, f);
 597 }
 598
 599 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 600 {
 601     unpack_raw64(p, &float32_params, f);
 602 }
 603
 604 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 605 {
 606     unpack_raw64(p, &float64_params, f);
 607 }
 608
 609 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 610 {
 611     *p = (FloatParts128) {
 612         .cls = float_class_unclassified,
 613         .sign = extract32(f.high, 15, 1),
 614         .exp = extract32(f.high, 0, 15),
 615         .frac_hi = f.low
 616     };
 617 }
 618
 619 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 620 {
 621     const int f_size = float128_params.frac_size - 64;
 622     const int e_size = float128_params.exp_size;
 623
 624     *p = (FloatParts128) {
 625         .cls = float_class_unclassified,
 626         .sign = extract64(f.high, f_size + e_size, 1),
 627         .exp = extract64(f.high, f_size, e_size),
 628         .frac_hi = extract64(f.high, 0, f_size),
 629         .frac_lo = f.low,
 630     };
 631 }
 632
 633 /* Pack a float from parts, but do not canonicalize.  */
 634 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 635 {
 636     const int f_size = fmt->frac_size;
 637     const int e_size = fmt->exp_size;
 638     uint64_t ret;
 639
 640     ret = (uint64_t)p->sign << (f_size + e_size);
 641     ret = deposit64(ret, f_size, e_size, p->exp);
 642     ret = deposit64(ret, 0, f_size, p->frac);
 643     return ret;
 644 }
 645
 646 static inline float16 float16_pack_raw(const FloatParts64 *p)
 647 {
 648     return make_float16(pack_raw64(p, &float16_params));
 649 }
 650
 651 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 652 {
 653     return pack_raw64(p, &bfloat16_params);
 654 }
 655
 656 static inline float32 float32_pack_raw(const FloatParts64 *p)
 657 {
 658     return make_float32(pack_raw64(p, &float32_params));
 659 }
 660
 661 static inline float64 float64_pack_raw(const FloatParts64 *p)
 662 {
 663     return make_float64(pack_raw64(p, &float64_params));
 664 }
 665
 666 static float128 float128_pack_raw(const FloatParts128 *p)
 667 {
 668     const int f_size = float128_params.frac_size - 64;
 669     const int e_size = float128_params.exp_size;
 670     uint64_t hi;
 671
 672     hi = (uint64_t)p->sign << (f_size + e_size);
 673     hi = deposit64(hi, f_size, e_size, p->exp);
 674     hi = deposit64(hi, 0, f_size, p->frac_hi);
 675     return make_float128(hi, p->frac_lo);
 676 }
 677
 678 /*----------------------------------------------------------------------------
 679 | Functions and definitions to determine:  (1) whether tininess for underflow
 680 | is detected before or after rounding by default, (2) what (if anything)
 681 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 682 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 683 | are propagated from function inputs to output.  These details are target-
 684 | specific.
 685 *----------------------------------------------------------------------------*/
 686 #include "softfloat-specialize.c.inc"
 687
 688 #define PARTS_GENERIC_64_128(NAME, P) \
 689     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 690
 691 #define PARTS_GENERIC_64_128_256(NAME, P) \
 692     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 693                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 694
 695 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 696 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 697
 698 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 699 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 700
 701 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 702
 703 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 704                                       float_status *s);
 705 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 706                                         float_status *s);
 707
 708 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 709
 710 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 711                                              FloatParts64 *c, float_status *s,
 712                                              int ab_mask, int abc_mask);
 713 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 714                                                FloatParts128 *b,
 715                                                FloatParts128 *c,
 716                                                float_status *s,
 717                                                int ab_mask, int abc_mask);
 718
 719 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 720     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 721
 722 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 723                                  const FloatFmt *fmt);
 724 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 725                                   const FloatFmt *fmt);
 726
 727 #define parts_canonicalize(A, S, F) \
 728     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 729
 730 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 731                                    const FloatFmt *fmt);
 732 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 733                                     const FloatFmt *fmt);
 734
 735 #define parts_uncanon_normal(A, S, F) \
 736     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 737
 738 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 739                             const FloatFmt *fmt);
 740 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 741                              const FloatFmt *fmt);
 742
 743 #define parts_uncanon(A, S, F) \
 744     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 745
 746 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 747 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 748 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 749
 750 #define parts_add_normal(A, B) \
 751     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 752
 753 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 754 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 755 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757 #define parts_sub_normal(A, B) \
 758     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 759
 760 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 761                                     float_status *s, bool subtract);
 762 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 763                                       float_status *s, bool subtract);
 764
 765 #define parts_addsub(A, B, S, Z) \
 766     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 767
 768 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 769                                  float_status *s);
 770 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 771                                    float_status *s);
 772
 773 #define parts_mul(A, B, S) \
 774     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 775
 776 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 777                                     FloatParts64 *c, int flags,
 778                                     float_status *s);
 779 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 780                                       FloatParts128 *c, int flags,
 781                                       float_status *s);
 782
 783 #define parts_muladd(A, B, C, Z, S) \
 784     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 785
 786 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 787                                  float_status *s);
 788 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 789                                    float_status *s);
 790
 791 #define parts_div(A, B, S) \
 792     PARTS_GENERIC_64_128(div, A)(A, B, S)
 793
 794 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 795                                     uint64_t *mod_quot, float_status *s);
 796 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 797                                       uint64_t *mod_quot, float_status *s);
 798
 799 #define parts_modrem(A, B, Q, S) \
 800     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 801
 802 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 803 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 804
 805 #define parts_sqrt(A, S, F) \
 806     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 807
 808 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 809                                         int scale, int frac_size);
 810 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 811                                          int scale, int frac_size);
 812
 813 #define parts_round_to_int_normal(A, R, C, F) \
 814     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 815
 816 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 817                                  int scale, float_status *s,
 818                                  const FloatFmt *fmt);
 819 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 820                                   int scale, float_status *s,
 821                                   const FloatFmt *fmt);
 822
 823 #define parts_round_to_int(A, R, C, S, F) \
 824     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 825
 826 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 827                                      int scale, int64_t min, int64_t max,
 828                                      float_status *s);
 829 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 830                                      int scale, int64_t min, int64_t max,
 831                                      float_status *s);
 832
 833 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 834     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 835
 836 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 837                                       int scale, uint64_t max,
 838                                       float_status *s);
 839 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 840                                        int scale, uint64_t max,
 841                                        float_status *s);
 842
 843 #define parts_float_to_uint(P, R, Z, M, S) \
 844     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 845
 846 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 847                                   int scale, float_status *s);
 848 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 849                                    int scale, float_status *s);
 850
 851 #define parts_sint_to_float(P, I, Z, S) \
 852     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 853
 854 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 855                                   int scale, float_status *s);
 856 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 857                                    int scale, float_status *s);
 858
 859 #define parts_uint_to_float(P, I, Z, S) \
 860     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 861
 862 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 863                                     float_status *s, int flags);
 864 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 865                                       float_status *s, int flags);
 866
 867 #define parts_minmax(A, B, S, F) \
 868     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 869
 870 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 871                            float_status *s, bool q);
 872 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 873                             float_status *s, bool q);
 874
 875 #define parts_compare(A, B, S, Q) \
 876     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 877
 878 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 879 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 880
 881 #define parts_scalbn(A, N, S) \
 882     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 883
 884 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 885 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 886
 887 #define parts_log2(A, S, F) \
 888     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 889
 890 /*
 891  * Helper functions for softfloat-parts.c.inc, per-size operations.
 892  */
 893
 894 #define FRAC_GENERIC_64_128(NAME, P) \
 895     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 896
 897 #define FRAC_GENERIC_64_128_256(NAME, P) \
 898     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 899                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 900
 901 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 902 {
 903     return uadd64_overflow(a->frac, b->frac, &r->frac);
 904 }
 905
 906 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 907 {
 908     bool c = 0;
 909     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 910     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 911     return c;
 912 }
 913
 914 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 915 {
 916     bool c = 0;
 917     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 918     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 919     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 920     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 921     return c;
 922 }
 923
 924 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 925
 926 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 927 {
 928     return uadd64_overflow(a->frac, c, &r->frac);
 929 }
 930
 931 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 932 {
 933     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 934     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 935 }
 936
 937 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 938
 939 static void frac64_allones(FloatParts64 *a)
 940 {
 941     a->frac = -1;
 942 }
 943
 944 static void frac128_allones(FloatParts128 *a)
 945 {
 946     a->frac_hi = a->frac_lo = -1;
 947 }
 948
 949 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 950
 951 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 952 {
 953     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 954 }
 955
 956 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 957 {
 958     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 959     if (ta == tb) {
 960         ta = a->frac_lo, tb = b->frac_lo;
 961         if (ta == tb) {
 962             return 0;
 963         }
 964     }
 965     return ta < tb ? -1 : 1;
 966 }
 967
 968 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 969
 970 static void frac64_clear(FloatParts64 *a)
 971 {
 972     a->frac = 0;
 973 }
 974
 975 static void frac128_clear(FloatParts128 *a)
 976 {
 977     a->frac_hi = a->frac_lo = 0;
 978 }
 979
 980 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 981
 982 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 983 {
 984     uint64_t n1, n0, r, q;
 985     bool ret;
 986
 987     /*
 988      * We want a 2*N / N-bit division to produce exactly an N-bit
 989      * result, so that we do not lose any precision and so that we
 990      * do not have to renormalize afterward.  If A.frac < B.frac,
 991      * then division would produce an (N-1)-bit result; shift A left
 992      * by one to produce the an N-bit result, and return true to
 993      * decrement the exponent to match.
 994      *
 995      * The udiv_qrnnd algorithm that we're using requires normalization,
 996      * i.e. the msb of the denominator must be set, which is already true.
 997      */
 998     ret = a->frac < b->frac;
 999     if (ret) {
1000         n0 = a->frac;
1001         n1 = 0;
1002     } else {
1003         n0 = a->frac >> 1;
1004         n1 = a->frac << 63;
1005     }
1006     q = udiv_qrnnd(&r, n0, n1, b->frac);
1007
1008     /* Set lsb if there is a remainder, to set inexact. */
1009     a->frac = q | (r != 0);
1010
1011     return ret;
1012 }
1013
1014 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1015 {
1016     uint64_t q0, q1, a0, a1, b0, b1;
1017     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1018     bool ret = false;
1019
1020     a0 = a->frac_hi, a1 = a->frac_lo;
1021     b0 = b->frac_hi, b1 = b->frac_lo;
1022
1023     ret = lt128(a0, a1, b0, b1);
1024     if (!ret) {
1025         a1 = shr_double(a0, a1, 1);
1026         a0 = a0 >> 1;
1027     }
1028
1029     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1030     q0 = estimateDiv128To64(a0, a1, b0);
1031
1032     /*
1033      * Estimate is high because B1 was not included (unless B1 == 0).
1034      * Reduce quotient and increase remainder until remainder is non-negative.
1035      * This loop will execute 0 to 2 times.
1036      */
1037     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1038     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1039     while (r0 != 0) {
1040         q0--;
1041         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1042     }
1043
1044     /* Repeat using the remainder, producing a second word of quotient. */
1045     q1 = estimateDiv128To64(r1, r2, b0);
1046     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1047     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1048     while (r1 != 0) {
1049         q1--;
1050         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1051     }
1052
1053     /* Any remainder indicates inexact; set sticky bit. */
1054     q1 |= (r2 | r3) != 0;
1055
1056     a->frac_hi = q0;
1057     a->frac_lo = q1;
1058     return ret;
1059 }
1060
1061 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1062
1063 static bool frac64_eqz(FloatParts64 *a)
1064 {
1065     return a->frac == 0;
1066 }
1067
1068 static bool frac128_eqz(FloatParts128 *a)
1069 {
1070     return (a->frac_hi | a->frac_lo) == 0;
1071 }
1072
1073 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1074
1075 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1076 {
1077     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1078 }
1079
1080 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1081 {
1082     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1083                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1084 }
1085
1086 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1087
1088 static void frac64_neg(FloatParts64 *a)
1089 {
1090     a->frac = -a->frac;
1091 }
1092
1093 static void frac128_neg(FloatParts128 *a)
1094 {
1095     bool c = 0;
1096     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1097     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1098 }
1099
1100 static void frac256_neg(FloatParts256 *a)
1101 {
1102     bool c = 0;
1103     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1104     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1105     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108
1109 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1110
1111 static int frac64_normalize(FloatParts64 *a)
1112 {
1113     if (a->frac) {
1114         int shift = clz64(a->frac);
1115         a->frac <<= shift;
1116         return shift;
1117     }
1118     return 64;
1119 }
1120
1121 static int frac128_normalize(FloatParts128 *a)
1122 {
1123     if (a->frac_hi) {
1124         int shl = clz64(a->frac_hi);
1125         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1126         a->frac_lo <<= shl;
1127         return shl;
1128     } else if (a->frac_lo) {
1129         int shl = clz64(a->frac_lo);
1130         a->frac_hi = a->frac_lo << shl;
1131         a->frac_lo = 0;
1132         return shl + 64;
1133     }
1134     return 128;
1135 }
1136
1137 static int frac256_normalize(FloatParts256 *a)
1138 {
1139     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1140     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1141     int ret, shl;
1142
1143     if (likely(a0)) {
1144         shl = clz64(a0);
1145         if (shl == 0) {
1146             return 0;
1147         }
1148         ret = shl;
1149     } else {
1150         if (a1) {
1151             ret = 64;
1152             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1153         } else if (a2) {
1154             ret = 128;
1155             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1156         } else if (a3) {
1157             ret = 192;
1158             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1159         } else {
1160             ret = 256;
1161             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1162             goto done;
1163         }
1164         shl = clz64(a0);
1165         if (shl == 0) {
1166             goto done;
1167         }
1168         ret += shl;
1169     }
1170
1171     a0 = shl_double(a0, a1, shl);
1172     a1 = shl_double(a1, a2, shl);
1173     a2 = shl_double(a2, a3, shl);
1174     a3 <<= shl;
1175
1176  done:
1177     a->frac_hi = a0;
1178     a->frac_hm = a1;
1179     a->frac_lm = a2;
1180     a->frac_lo = a3;
1181     return ret;
1182 }
1183
1184 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1185
1186 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1187 {
1188     uint64_t a0, a1, b0, t0, t1, q, quot;
1189     int exp_diff = a->exp - b->exp;
1190     int shift;
1191
1192     a0 = a->frac;
1193     a1 = 0;
1194
1195     if (exp_diff < -1) {
1196         if (mod_quot) {
1197             *mod_quot = 0;
1198         }
1199         return;
1200     }
1201     if (exp_diff == -1) {
1202         a0 >>= 1;
1203         exp_diff = 0;
1204     }
1205
1206     b0 = b->frac;
1207     quot = q = b0 <= a0;
1208     if (q) {
1209         a0 -= b0;
1210     }
1211
1212     exp_diff -= 64;
1213     while (exp_diff > 0) {
1214         q = estimateDiv128To64(a0, a1, b0);
1215         q = q > 2 ? q - 2 : 0;
1216         mul64To128(b0, q, &t0, &t1);
1217         sub128(a0, a1, t0, t1, &a0, &a1);
1218         shortShift128Left(a0, a1, 62, &a0, &a1);
1219         exp_diff -= 62;
1220         quot = (quot << 62) + q;
1221     }
1222
1223     exp_diff += 64;
1224     if (exp_diff > 0) {
1225         q = estimateDiv128To64(a0, a1, b0);
1226         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1227         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1228         sub128(a0, a1, t0, t1, &a0, &a1);
1229         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1230         while (le128(t0, t1, a0, a1)) {
1231             ++q;
1232             sub128(a0, a1, t0, t1, &a0, &a1);
1233         }
1234         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1235     } else {
1236         t0 = b0;
1237         t1 = 0;
1238     }
1239
1240     if (mod_quot) {
1241         *mod_quot = quot;
1242     } else {
1243         sub128(t0, t1, a0, a1, &t0, &t1);
1244         if (lt128(t0, t1, a0, a1) ||
1245             (eq128(t0, t1, a0, a1) && (q & 1))) {
1246             a0 = t0;
1247             a1 = t1;
1248             a->sign = !a->sign;
1249         }
1250     }
1251
1252     if (likely(a0)) {
1253         shift = clz64(a0);
1254         shortShift128Left(a0, a1, shift, &a0, &a1);
1255     } else if (likely(a1)) {
1256         shift = clz64(a1);
1257         a0 = a1 << shift;
1258         a1 = 0;
1259         shift += 64;
1260     } else {
1261         a->cls = float_class_zero;
1262         return;
1263     }
1264
1265     a->exp = b->exp + exp_diff - shift;
1266     a->frac = a0 | (a1 != 0);
1267 }
1268
1269 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1270                            uint64_t *mod_quot)
1271 {
1272     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1273     int exp_diff = a->exp - b->exp;
1274     int shift;
1275
1276     a0 = a->frac_hi;
1277     a1 = a->frac_lo;
1278     a2 = 0;
1279
1280     if (exp_diff < -1) {
1281         if (mod_quot) {
1282             *mod_quot = 0;
1283         }
1284         return;
1285     }
1286     if (exp_diff == -1) {
1287         shift128Right(a0, a1, 1, &a0, &a1);
1288         exp_diff = 0;
1289     }
1290
1291     b0 = b->frac_hi;
1292     b1 = b->frac_lo;
1293
1294     quot = q = le128(b0, b1, a0, a1);
1295     if (q) {
1296         sub128(a0, a1, b0, b1, &a0, &a1);
1297     }
1298
1299     exp_diff -= 64;
1300     while (exp_diff > 0) {
1301         q = estimateDiv128To64(a0, a1, b0);
1302         q = q > 4 ? q - 4 : 0;
1303         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1304         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1305         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1306         exp_diff -= 61;
1307         quot = (quot << 61) + q;
1308     }
1309
1310     exp_diff += 64;
1311     if (exp_diff > 0) {
1312         q = estimateDiv128To64(a0, a1, b0);
1313         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1314         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1315         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1316         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1317         while (le192(t0, t1, t2, a0, a1, a2)) {
1318             ++q;
1319             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1320         }
1321         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1322     } else {
1323         t0 = b0;
1324         t1 = b1;
1325         t2 = 0;
1326     }
1327
1328     if (mod_quot) {
1329         *mod_quot = quot;
1330     } else {
1331         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1332         if (lt192(t0, t1, t2, a0, a1, a2) ||
1333             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1334             a0 = t0;
1335             a1 = t1;
1336             a2 = t2;
1337             a->sign = !a->sign;
1338         }
1339     }
1340
1341     if (likely(a0)) {
1342         shift = clz64(a0);
1343         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1344     } else if (likely(a1)) {
1345         shift = clz64(a1);
1346         shortShift128Left(a1, a2, shift, &a0, &a1);
1347         a2 = 0;
1348         shift += 64;
1349     } else if (likely(a2)) {
1350         shift = clz64(a2);
1351         a0 = a2 << shift;
1352         a1 = a2 = 0;
1353         shift += 128;
1354     } else {
1355         a->cls = float_class_zero;
1356         return;
1357     }
1358
1359     a->exp = b->exp + exp_diff - shift;
1360     a->frac_hi = a0;
1361     a->frac_lo = a1 | (a2 != 0);
1362 }
1363
1364 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1365
1366 static void frac64_shl(FloatParts64 *a, int c)
1367 {
1368     a->frac <<= c;
1369 }
1370
1371 static void frac128_shl(FloatParts128 *a, int c)
1372 {
1373     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1374
1375     if (c & 64) {
1376         a0 = a1, a1 = 0;
1377     }
1378
1379     c &= 63;
1380     if (c) {
1381         a0 = shl_double(a0, a1, c);
1382         a1 = a1 << c;
1383     }
1384
1385     a->frac_hi = a0;
1386     a->frac_lo = a1;
1387 }
1388
1389 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1390
1391 static void frac64_shr(FloatParts64 *a, int c)
1392 {
1393     a->frac >>= c;
1394 }
1395
1396 static void frac128_shr(FloatParts128 *a, int c)
1397 {
1398     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1399
1400     if (c & 64) {
1401         a1 = a0, a0 = 0;
1402     }
1403
1404     c &= 63;
1405     if (c) {
1406         a1 = shr_double(a0, a1, c);
1407         a0 = a0 >> c;
1408     }
1409
1410     a->frac_hi = a0;
1411     a->frac_lo = a1;
1412 }
1413
1414 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1415
1416 static void frac64_shrjam(FloatParts64 *a, int c)
1417 {
1418     uint64_t a0 = a->frac;
1419
1420     if (likely(c != 0)) {
1421         if (likely(c < 64)) {
1422             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1423         } else {
1424             a0 = a0 != 0;
1425         }
1426         a->frac = a0;
1427     }
1428 }
1429
1430 static void frac128_shrjam(FloatParts128 *a, int c)
1431 {
1432     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1433     uint64_t sticky = 0;
1434
1435     if (unlikely(c == 0)) {
1436         return;
1437     } else if (likely(c < 64)) {
1438         /* nothing */
1439     } else if (likely(c < 128)) {
1440         sticky = a1;
1441         a1 = a0;
1442         a0 = 0;
1443         c &= 63;
1444         if (c == 0) {
1445             goto done;
1446         }
1447     } else {
1448         sticky = a0 | a1;
1449         a0 = a1 = 0;
1450         goto done;
1451     }
1452
1453     sticky |= shr_double(a1, 0, c);
1454     a1 = shr_double(a0, a1, c);
1455     a0 = a0 >> c;
1456
1457  done:
1458     a->frac_lo = a1 | (sticky != 0);
1459     a->frac_hi = a0;
1460 }
1461
1462 static void frac256_shrjam(FloatParts256 *a, int c)
1463 {
1464     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1465     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1466     uint64_t sticky = 0;
1467
1468     if (unlikely(c == 0)) {
1469         return;
1470     } else if (likely(c < 64)) {
1471         /* nothing */
1472     } else if (likely(c < 256)) {
1473         if (unlikely(c & 128)) {
1474             sticky |= a2 | a3;
1475             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1476         }
1477         if (unlikely(c & 64)) {
1478             sticky |= a3;
1479             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1480         }
1481         c &= 63;
1482         if (c == 0) {
1483             goto done;
1484         }
1485     } else {
1486         sticky = a0 | a1 | a2 | a3;
1487         a0 = a1 = a2 = a3 = 0;
1488         goto done;
1489     }
1490
1491     sticky |= shr_double(a3, 0, c);
1492     a3 = shr_double(a2, a3, c);
1493     a2 = shr_double(a1, a2, c);
1494     a1 = shr_double(a0, a1, c);
1495     a0 = a0 >> c;
1496
1497  done:
1498     a->frac_lo = a3 | (sticky != 0);
1499     a->frac_lm = a2;
1500     a->frac_hm = a1;
1501     a->frac_hi = a0;
1502 }
1503
1504 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1505
1506 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1507 {
1508     return usub64_overflow(a->frac, b->frac, &r->frac);
1509 }
1510
1511 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1512 {
1513     bool c = 0;
1514     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1515     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1516     return c;
1517 }
1518
1519 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1520 {
1521     bool c = 0;
1522     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1523     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1524     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1525     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1526     return c;
1527 }
1528
1529 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1530
1531 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1532 {
1533     r->frac = a->frac_hi | (a->frac_lo != 0);
1534 }
1535
1536 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1537 {
1538     r->frac_hi = a->frac_hi;
1539     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1540 }
1541
1542 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1543
1544 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1545 {
1546     r->frac_hi = a->frac;
1547     r->frac_lo = 0;
1548 }
1549
1550 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1551 {
1552     r->frac_hi = a->frac_hi;
1553     r->frac_hm = a->frac_lo;
1554     r->frac_lm = 0;
1555     r->frac_lo = 0;
1556 }
1557
1558 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1559
1560 /*
1561  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1562  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1563  * and thus MIT licenced.
1564  */
1565 static const uint16_t rsqrt_tab[128] = {
1566     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1567     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1568     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1569     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1570     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1571     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1572     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1573     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1574     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1575     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1576     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1577     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1578     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1579     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1580     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1581     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1582 };
1583
1584 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1585 #define FloatPartsN    glue(FloatParts,N)
1586 #define FloatPartsW    glue(FloatParts,W)
1587
1588 #define N 64
1589 #define W 128
1590
1591 #include "softfloat-parts-addsub.c.inc"
1592 #include "softfloat-parts.c.inc"
1593
1594 #undef  N
1595 #undef  W
1596 #define N 128
1597 #define W 256
1598
1599 #include "softfloat-parts-addsub.c.inc"
1600 #include "softfloat-parts.c.inc"
1601
1602 #undef  N
1603 #undef  W
1604 #define N            256
1605
1606 #include "softfloat-parts-addsub.c.inc"
1607
1608 #undef  N
1609 #undef  W
1610 #undef  partsN
1611 #undef  FloatPartsN
1612 #undef  FloatPartsW
1613
1614 /*
1615  * Pack/unpack routines with a specific FloatFmt.
1616  */
1617
1618 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1619                                       float_status *s, const FloatFmt *params)
1620 {
1621     float16_unpack_raw(p, f);
1622     parts_canonicalize(p, s, params);
1623 }
1624
1625 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1626                                      float_status *s)
1627 {
1628     float16a_unpack_canonical(p, f, s, &float16_params);
1629 }
1630
1631 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1632                                       float_status *s)
1633 {
1634     bfloat16_unpack_raw(p, f);
1635     parts_canonicalize(p, s, &bfloat16_params);
1636 }
1637
1638 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1639                                              float_status *s,
1640                                              const FloatFmt *params)
1641 {
1642     parts_uncanon(p, s, params);
1643     return float16_pack_raw(p);
1644 }
1645
1646 static float16 float16_round_pack_canonical(FloatParts64 *p,
1647                                             float_status *s)
1648 {
1649     return float16a_round_pack_canonical(p, s, &float16_params);
1650 }
1651
1652 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1653                                               float_status *s)
1654 {
1655     parts_uncanon(p, s, &bfloat16_params);
1656     return bfloat16_pack_raw(p);
1657 }
1658
1659 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1660                                      float_status *s)
1661 {
1662     float32_unpack_raw(p, f);
1663     parts_canonicalize(p, s, &float32_params);
1664 }
1665
1666 static float32 float32_round_pack_canonical(FloatParts64 *p,
1667                                             float_status *s)
1668 {
1669     parts_uncanon(p, s, &float32_params);
1670     return float32_pack_raw(p);
1671 }
1672
1673 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1674                                      float_status *s)
1675 {
1676     float64_unpack_raw(p, f);
1677     parts_canonicalize(p, s, &float64_params);
1678 }
1679
1680 static float64 float64_round_pack_canonical(FloatParts64 *p,
1681                                             float_status *s)
1682 {
1683     parts_uncanon(p, s, &float64_params);
1684     return float64_pack_raw(p);
1685 }
1686
1687 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1688                                       float_status *s)
1689 {
1690     float128_unpack_raw(p, f);
1691     parts_canonicalize(p, s, &float128_params);
1692 }
1693
1694 static float128 float128_round_pack_canonical(FloatParts128 *p,
1695                                               float_status *s)
1696 {
1697     parts_uncanon(p, s, &float128_params);
1698     return float128_pack_raw(p);
1699 }
1700
1701 /* Returns false if the encoding is invalid. */
1702 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1703                                       float_status *s)
1704 {
1705     /* Ensure rounding precision is set before beginning. */
1706     switch (s->floatx80_rounding_precision) {
1707     case floatx80_precision_x:
1708     case floatx80_precision_d:
1709     case floatx80_precision_s:
1710         break;
1711     default:
1712         g_assert_not_reached();
1713     }
1714
1715     if (unlikely(floatx80_invalid_encoding(f))) {
1716         float_raise(float_flag_invalid, s);
1717         return false;
1718     }
1719
1720     floatx80_unpack_raw(p, f);
1721
1722     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1723         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1724     } else {
1725         /* The explicit integer bit is ignored, after invalid checks. */
1726         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1727         p->cls = (p->frac_hi == 0 ? float_class_inf
1728                   : parts_is_snan_frac(p->frac_hi, s)
1729                   ? float_class_snan : float_class_qnan);
1730     }
1731     return true;
1732 }
1733
1734 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1735                                               float_status *s)
1736 {
1737     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1738     uint64_t frac;
1739     int exp;
1740
1741     switch (p->cls) {
1742     case float_class_normal:
1743         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1744             parts_uncanon_normal(p, s, fmt);
1745             frac = p->frac_hi;
1746             exp = p->exp;
1747         } else {
1748             FloatParts64 p64;
1749
1750             p64.sign = p->sign;
1751             p64.exp = p->exp;
1752             frac_truncjam(&p64, p);
1753             parts_uncanon_normal(&p64, s, fmt);
1754             frac = p64.frac;
1755             exp = p64.exp;
1756         }
1757         if (exp != fmt->exp_max) {
1758             break;
1759         }
1760         /* rounded to inf -- fall through to set frac correctly */
1761
1762     case float_class_inf:
1763         /* x86 and m68k differ in the setting of the integer bit. */
1764         frac = floatx80_infinity_low;
1765         exp = fmt->exp_max;
1766         break;
1767
1768     case float_class_zero:
1769         frac = 0;
1770         exp = 0;
1771         break;
1772
1773     case float_class_snan:
1774     case float_class_qnan:
1775         /* NaNs have the integer bit set. */
1776         frac = p->frac_hi | (1ull << 63);
1777         exp = fmt->exp_max;
1778         break;
1779
1780     default:
1781         g_assert_not_reached();
1782     }
1783
1784     return packFloatx80(p->sign, exp, frac);
1785 }
1786
1787 /*
1788  * Addition and subtraction
1789  */
1790
1791 static float16 QEMU_FLATTEN
1792 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1793 {
1794     FloatParts64 pa, pb, *pr;
1795
1796     float16_unpack_canonical(&pa, a, status);
1797     float16_unpack_canonical(&pb, b, status);
1798     pr = parts_addsub(&pa, &pb, status, subtract);
1799
1800     return float16_round_pack_canonical(pr, status);
1801 }
1802
1803 float16 float16_add(float16 a, float16 b, float_status *status)
1804 {
1805     return float16_addsub(a, b, status, false);
1806 }
1807
1808 float16 float16_sub(float16 a, float16 b, float_status *status)
1809 {
1810     return float16_addsub(a, b, status, true);
1811 }
1812
1813 static float32 QEMU_SOFTFLOAT_ATTR
1814 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1815 {
1816     FloatParts64 pa, pb, *pr;
1817
1818     float32_unpack_canonical(&pa, a, status);
1819     float32_unpack_canonical(&pb, b, status);
1820     pr = parts_addsub(&pa, &pb, status, subtract);
1821
1822     return float32_round_pack_canonical(pr, status);
1823 }
1824
1825 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1826 {
1827     return soft_f32_addsub(a, b, status, false);
1828 }
1829
1830 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1831 {
1832     return soft_f32_addsub(a, b, status, true);
1833 }
1834
1835 static float64 QEMU_SOFTFLOAT_ATTR
1836 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1837 {
1838     FloatParts64 pa, pb, *pr;
1839
1840     float64_unpack_canonical(&pa, a, status);
1841     float64_unpack_canonical(&pb, b, status);
1842     pr = parts_addsub(&pa, &pb, status, subtract);
1843
1844     return float64_round_pack_canonical(pr, status);
1845 }
1846
1847 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1848 {
1849     return soft_f64_addsub(a, b, status, false);
1850 }
1851
1852 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1853 {
1854     return soft_f64_addsub(a, b, status, true);
1855 }
1856
1857 static float hard_f32_add(float a, float b)
1858 {
1859     return a + b;
1860 }
1861
1862 static float hard_f32_sub(float a, float b)
1863 {
1864     return a - b;
1865 }
1866
1867 static double hard_f64_add(double a, double b)
1868 {
1869     return a + b;
1870 }
1871
1872 static double hard_f64_sub(double a, double b)
1873 {
1874     return a - b;
1875 }
1876
1877 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1878 {
1879     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1880         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1881     }
1882     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1883 }
1884
1885 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1886 {
1887     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1888         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1889     } else {
1890         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1891     }
1892 }
1893
1894 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1895                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1896 {
1897     return float32_gen2(a, b, s, hard, soft,
1898                         f32_is_zon2, f32_addsubmul_post);
1899 }
1900
1901 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1902                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1903 {
1904     return float64_gen2(a, b, s, hard, soft,
1905                         f64_is_zon2, f64_addsubmul_post);
1906 }
1907
1908 float32 QEMU_FLATTEN
1909 float32_add(float32 a, float32 b, float_status *s)
1910 {
1911     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1912 }
1913
1914 float32 QEMU_FLATTEN
1915 float32_sub(float32 a, float32 b, float_status *s)
1916 {
1917     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1918 }
1919
1920 float64 QEMU_FLATTEN
1921 float64_add(float64 a, float64 b, float_status *s)
1922 {
1923     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1924 }
1925
1926 float64 QEMU_FLATTEN
1927 float64_sub(float64 a, float64 b, float_status *s)
1928 {
1929     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1930 }
1931
1932 static bfloat16 QEMU_FLATTEN
1933 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1934 {
1935     FloatParts64 pa, pb, *pr;
1936
1937     bfloat16_unpack_canonical(&pa, a, status);
1938     bfloat16_unpack_canonical(&pb, b, status);
1939     pr = parts_addsub(&pa, &pb, status, subtract);
1940
1941     return bfloat16_round_pack_canonical(pr, status);
1942 }
1943
1944 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1945 {
1946     return bfloat16_addsub(a, b, status, false);
1947 }
1948
1949 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1950 {
1951     return bfloat16_addsub(a, b, status, true);
1952 }
1953
1954 static float128 QEMU_FLATTEN
1955 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1956 {
1957     FloatParts128 pa, pb, *pr;
1958
1959     float128_unpack_canonical(&pa, a, status);
1960     float128_unpack_canonical(&pb, b, status);
1961     pr = parts_addsub(&pa, &pb, status, subtract);
1962
1963     return float128_round_pack_canonical(pr, status);
1964 }
1965
1966 float128 float128_add(float128 a, float128 b, float_status *status)
1967 {
1968     return float128_addsub(a, b, status, false);
1969 }
1970
1971 float128 float128_sub(float128 a, float128 b, float_status *status)
1972 {
1973     return float128_addsub(a, b, status, true);
1974 }
1975
1976 static floatx80 QEMU_FLATTEN
1977 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1978 {
1979     FloatParts128 pa, pb, *pr;
1980
1981     if (!floatx80_unpack_canonical(&pa, a, status) ||
1982         !floatx80_unpack_canonical(&pb, b, status)) {
1983         return floatx80_default_nan(status);
1984     }
1985
1986     pr = parts_addsub(&pa, &pb, status, subtract);
1987     return floatx80_round_pack_canonical(pr, status);
1988 }
1989
1990 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1991 {
1992     return floatx80_addsub(a, b, status, false);
1993 }
1994
1995 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1996 {
1997     return floatx80_addsub(a, b, status, true);
1998 }
1999
2000 /*
2001  * Multiplication
2002  */
2003
2004 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2005 {
2006     FloatParts64 pa, pb, *pr;
2007
2008     float16_unpack_canonical(&pa, a, status);
2009     float16_unpack_canonical(&pb, b, status);
2010     pr = parts_mul(&pa, &pb, status);
2011
2012     return float16_round_pack_canonical(pr, status);
2013 }
2014
2015 static float32 QEMU_SOFTFLOAT_ATTR
2016 soft_f32_mul(float32 a, float32 b, float_status *status)
2017 {
2018     FloatParts64 pa, pb, *pr;
2019
2020     float32_unpack_canonical(&pa, a, status);
2021     float32_unpack_canonical(&pb, b, status);
2022     pr = parts_mul(&pa, &pb, status);
2023
2024     return float32_round_pack_canonical(pr, status);
2025 }
2026
2027 static float64 QEMU_SOFTFLOAT_ATTR
2028 soft_f64_mul(float64 a, float64 b, float_status *status)
2029 {
2030     FloatParts64 pa, pb, *pr;
2031
2032     float64_unpack_canonical(&pa, a, status);
2033     float64_unpack_canonical(&pb, b, status);
2034     pr = parts_mul(&pa, &pb, status);
2035
2036     return float64_round_pack_canonical(pr, status);
2037 }
2038
2039 static float hard_f32_mul(float a, float b)
2040 {
2041     return a * b;
2042 }
2043
2044 static double hard_f64_mul(double a, double b)
2045 {
2046     return a * b;
2047 }
2048
2049 float32 QEMU_FLATTEN
2050 float32_mul(float32 a, float32 b, float_status *s)
2051 {
2052     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2053                         f32_is_zon2, f32_addsubmul_post);
2054 }
2055
2056 float64 QEMU_FLATTEN
2057 float64_mul(float64 a, float64 b, float_status *s)
2058 {
2059     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2060                         f64_is_zon2, f64_addsubmul_post);
2061 }
2062
2063 bfloat16 QEMU_FLATTEN
2064 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066     FloatParts64 pa, pb, *pr;
2067
2068     bfloat16_unpack_canonical(&pa, a, status);
2069     bfloat16_unpack_canonical(&pb, b, status);
2070     pr = parts_mul(&pa, &pb, status);
2071
2072     return bfloat16_round_pack_canonical(pr, status);
2073 }
2074
2075 float128 QEMU_FLATTEN
2076 float128_mul(float128 a, float128 b, float_status *status)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079
2080     float128_unpack_canonical(&pa, a, status);
2081     float128_unpack_canonical(&pb, b, status);
2082     pr = parts_mul(&pa, &pb, status);
2083
2084     return float128_round_pack_canonical(pr, status);
2085 }
2086
2087 floatx80 QEMU_FLATTEN
2088 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2089 {
2090     FloatParts128 pa, pb, *pr;
2091
2092     if (!floatx80_unpack_canonical(&pa, a, status) ||
2093         !floatx80_unpack_canonical(&pb, b, status)) {
2094         return floatx80_default_nan(status);
2095     }
2096
2097     pr = parts_mul(&pa, &pb, status);
2098     return floatx80_round_pack_canonical(pr, status);
2099 }
2100
2101 /*
2102  * Fused multiply-add
2103  */
2104
2105 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2106                                     int flags, float_status *status)
2107 {
2108     FloatParts64 pa, pb, pc, *pr;
2109
2110     float16_unpack_canonical(&pa, a, status);
2111     float16_unpack_canonical(&pb, b, status);
2112     float16_unpack_canonical(&pc, c, status);
2113     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2114
2115     return float16_round_pack_canonical(pr, status);
2116 }
2117
2118 static float32 QEMU_SOFTFLOAT_ATTR
2119 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2120                 float_status *status)
2121 {
2122     FloatParts64 pa, pb, pc, *pr;
2123
2124     float32_unpack_canonical(&pa, a, status);
2125     float32_unpack_canonical(&pb, b, status);
2126     float32_unpack_canonical(&pc, c, status);
2127     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2128
2129     return float32_round_pack_canonical(pr, status);
2130 }
2131
2132 static float64 QEMU_SOFTFLOAT_ATTR
2133 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2134                 float_status *status)
2135 {
2136     FloatParts64 pa, pb, pc, *pr;
2137
2138     float64_unpack_canonical(&pa, a, status);
2139     float64_unpack_canonical(&pb, b, status);
2140     float64_unpack_canonical(&pc, c, status);
2141     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2142
2143     return float64_round_pack_canonical(pr, status);
2144 }
2145
2146 static bool force_soft_fma;
2147
2148 float32 QEMU_FLATTEN
2149 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2150 {
2151     union_float32 ua, ub, uc, ur;
2152
2153     ua.s = xa;
2154     ub.s = xb;
2155     uc.s = xc;
2156
2157     if (unlikely(!can_use_fpu(s))) {
2158         goto soft;
2159     }
2160     if (unlikely(flags & float_muladd_halve_result)) {
2161         goto soft;
2162     }
2163
2164     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2165     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2166         goto soft;
2167     }
2168
2169     if (unlikely(force_soft_fma)) {
2170         goto soft;
2171     }
2172
2173     /*
2174      * When (a || b) == 0, there's no need to check for under/over flow,
2175      * since we know the addend is (normal || 0) and the product is 0.
2176      */
2177     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2178         union_float32 up;
2179         bool prod_sign;
2180
2181         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2182         prod_sign ^= !!(flags & float_muladd_negate_product);
2183         up.s = float32_set_sign(float32_zero, prod_sign);
2184
2185         if (flags & float_muladd_negate_c) {
2186             uc.h = -uc.h;
2187         }
2188         ur.h = up.h + uc.h;
2189     } else {
2190         union_float32 ua_orig = ua;
2191         union_float32 uc_orig = uc;
2192
2193         if (flags & float_muladd_negate_product) {
2194             ua.h = -ua.h;
2195         }
2196         if (flags & float_muladd_negate_c) {
2197             uc.h = -uc.h;
2198         }
2199
2200         ur.h = fmaf(ua.h, ub.h, uc.h);
2201
2202         if (unlikely(f32_is_inf(ur))) {
2203             float_raise(float_flag_overflow, s);
2204         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2205             ua = ua_orig;
2206             uc = uc_orig;
2207             goto soft;
2208         }
2209     }
2210     if (flags & float_muladd_negate_result) {
2211         return float32_chs(ur.s);
2212     }
2213     return ur.s;
2214
2215  soft:
2216     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2217 }
2218
2219 float64 QEMU_FLATTEN
2220 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2221 {
2222     union_float64 ua, ub, uc, ur;
2223
2224     ua.s = xa;
2225     ub.s = xb;
2226     uc.s = xc;
2227
2228     if (unlikely(!can_use_fpu(s))) {
2229         goto soft;
2230     }
2231     if (unlikely(flags & float_muladd_halve_result)) {
2232         goto soft;
2233     }
2234
2235     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2236     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2237         goto soft;
2238     }
2239
2240     if (unlikely(force_soft_fma)) {
2241         goto soft;
2242     }
2243
2244     /*
2245      * When (a || b) == 0, there's no need to check for under/over flow,
2246      * since we know the addend is (normal || 0) and the product is 0.
2247      */
2248     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2249         union_float64 up;
2250         bool prod_sign;
2251
2252         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2253         prod_sign ^= !!(flags & float_muladd_negate_product);
2254         up.s = float64_set_sign(float64_zero, prod_sign);
2255
2256         if (flags & float_muladd_negate_c) {
2257             uc.h = -uc.h;
2258         }
2259         ur.h = up.h + uc.h;
2260     } else {
2261         union_float64 ua_orig = ua;
2262         union_float64 uc_orig = uc;
2263
2264         if (flags & float_muladd_negate_product) {
2265             ua.h = -ua.h;
2266         }
2267         if (flags & float_muladd_negate_c) {
2268             uc.h = -uc.h;
2269         }
2270
2271         ur.h = fma(ua.h, ub.h, uc.h);
2272
2273         if (unlikely(f64_is_inf(ur))) {
2274             float_raise(float_flag_overflow, s);
2275         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2276             ua = ua_orig;
2277             uc = uc_orig;
2278             goto soft;
2279         }
2280     }
2281     if (flags & float_muladd_negate_result) {
2282         return float64_chs(ur.s);
2283     }
2284     return ur.s;
2285
2286  soft:
2287     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2288 }
2289
2290 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2291                                       int flags, float_status *status)
2292 {
2293     FloatParts64 pa, pb, pc, *pr;
2294
2295     bfloat16_unpack_canonical(&pa, a, status);
2296     bfloat16_unpack_canonical(&pb, b, status);
2297     bfloat16_unpack_canonical(&pc, c, status);
2298     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2299
2300     return bfloat16_round_pack_canonical(pr, status);
2301 }
2302
2303 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2304                                       int flags, float_status *status)
2305 {
2306     FloatParts128 pa, pb, pc, *pr;
2307
2308     float128_unpack_canonical(&pa, a, status);
2309     float128_unpack_canonical(&pb, b, status);
2310     float128_unpack_canonical(&pc, c, status);
2311     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2312
2313     return float128_round_pack_canonical(pr, status);
2314 }
2315
2316 /*
2317  * Division
2318  */
2319
2320 float16 float16_div(float16 a, float16 b, float_status *status)
2321 {
2322     FloatParts64 pa, pb, *pr;
2323
2324     float16_unpack_canonical(&pa, a, status);
2325     float16_unpack_canonical(&pb, b, status);
2326     pr = parts_div(&pa, &pb, status);
2327
2328     return float16_round_pack_canonical(pr, status);
2329 }
2330
2331 static float32 QEMU_SOFTFLOAT_ATTR
2332 soft_f32_div(float32 a, float32 b, float_status *status)
2333 {
2334     FloatParts64 pa, pb, *pr;
2335
2336     float32_unpack_canonical(&pa, a, status);
2337     float32_unpack_canonical(&pb, b, status);
2338     pr = parts_div(&pa, &pb, status);
2339
2340     return float32_round_pack_canonical(pr, status);
2341 }
2342
2343 static float64 QEMU_SOFTFLOAT_ATTR
2344 soft_f64_div(float64 a, float64 b, float_status *status)
2345 {
2346     FloatParts64 pa, pb, *pr;
2347
2348     float64_unpack_canonical(&pa, a, status);
2349     float64_unpack_canonical(&pb, b, status);
2350     pr = parts_div(&pa, &pb, status);
2351
2352     return float64_round_pack_canonical(pr, status);
2353 }
2354
2355 static float hard_f32_div(float a, float b)
2356 {
2357     return a / b;
2358 }
2359
2360 static double hard_f64_div(double a, double b)
2361 {
2362     return a / b;
2363 }
2364
2365 static bool f32_div_pre(union_float32 a, union_float32 b)
2366 {
2367     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2368         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2369                fpclassify(b.h) == FP_NORMAL;
2370     }
2371     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2372 }
2373
2374 static bool f64_div_pre(union_float64 a, union_float64 b)
2375 {
2376     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2377         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2378                fpclassify(b.h) == FP_NORMAL;
2379     }
2380     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2381 }
2382
2383 static bool f32_div_post(union_float32 a, union_float32 b)
2384 {
2385     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2386         return fpclassify(a.h) != FP_ZERO;
2387     }
2388     return !float32_is_zero(a.s);
2389 }
2390
2391 static bool f64_div_post(union_float64 a, union_float64 b)
2392 {
2393     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2394         return fpclassify(a.h) != FP_ZERO;
2395     }
2396     return !float64_is_zero(a.s);
2397 }
2398
2399 float32 QEMU_FLATTEN
2400 float32_div(float32 a, float32 b, float_status *s)
2401 {
2402     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2403                         f32_div_pre, f32_div_post);
2404 }
2405
2406 float64 QEMU_FLATTEN
2407 float64_div(float64 a, float64 b, float_status *s)
2408 {
2409     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2410                         f64_div_pre, f64_div_post);
2411 }
2412
2413 bfloat16 QEMU_FLATTEN
2414 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2415 {
2416     FloatParts64 pa, pb, *pr;
2417
2418     bfloat16_unpack_canonical(&pa, a, status);
2419     bfloat16_unpack_canonical(&pb, b, status);
2420     pr = parts_div(&pa, &pb, status);
2421
2422     return bfloat16_round_pack_canonical(pr, status);
2423 }
2424
2425 float128 QEMU_FLATTEN
2426 float128_div(float128 a, float128 b, float_status *status)
2427 {
2428     FloatParts128 pa, pb, *pr;
2429
2430     float128_unpack_canonical(&pa, a, status);
2431     float128_unpack_canonical(&pb, b, status);
2432     pr = parts_div(&pa, &pb, status);
2433
2434     return float128_round_pack_canonical(pr, status);
2435 }
2436
2437 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2438 {
2439     FloatParts128 pa, pb, *pr;
2440
2441     if (!floatx80_unpack_canonical(&pa, a, status) ||
2442         !floatx80_unpack_canonical(&pb, b, status)) {
2443         return floatx80_default_nan(status);
2444     }
2445
2446     pr = parts_div(&pa, &pb, status);
2447     return floatx80_round_pack_canonical(pr, status);
2448 }
2449
2450 /*
2451  * Remainder
2452  */
2453
2454 float32 float32_rem(float32 a, float32 b, float_status *status)
2455 {
2456     FloatParts64 pa, pb, *pr;
2457
2458     float32_unpack_canonical(&pa, a, status);
2459     float32_unpack_canonical(&pb, b, status);
2460     pr = parts_modrem(&pa, &pb, NULL, status);
2461
2462     return float32_round_pack_canonical(pr, status);
2463 }
2464
2465 float64 float64_rem(float64 a, float64 b, float_status *status)
2466 {
2467     FloatParts64 pa, pb, *pr;
2468
2469     float64_unpack_canonical(&pa, a, status);
2470     float64_unpack_canonical(&pb, b, status);
2471     pr = parts_modrem(&pa, &pb, NULL, status);
2472
2473     return float64_round_pack_canonical(pr, status);
2474 }
2475
2476 float128 float128_rem(float128 a, float128 b, float_status *status)
2477 {
2478     FloatParts128 pa, pb, *pr;
2479
2480     float128_unpack_canonical(&pa, a, status);
2481     float128_unpack_canonical(&pb, b, status);
2482     pr = parts_modrem(&pa, &pb, NULL, status);
2483
2484     return float128_round_pack_canonical(pr, status);
2485 }
2486
2487 /*
2488  * Returns the remainder of the extended double-precision floating-point value
2489  * `a' with respect to the corresponding value `b'.
2490  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2491  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2492  * the remainder based on truncating the quotient toward zero instead and
2493  * *quotient is set to the low 64 bits of the absolute value of the integer
2494  * quotient.
2495  */
2496 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2497                          uint64_t *quotient, float_status *status)
2498 {
2499     FloatParts128 pa, pb, *pr;
2500
2501     *quotient = 0;
2502     if (!floatx80_unpack_canonical(&pa, a, status) ||
2503         !floatx80_unpack_canonical(&pb, b, status)) {
2504         return floatx80_default_nan(status);
2505     }
2506     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2507
2508     return floatx80_round_pack_canonical(pr, status);
2509 }
2510
2511 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2512 {
2513     uint64_t quotient;
2514     return floatx80_modrem(a, b, false, &quotient, status);
2515 }
2516
2517 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2518 {
2519     uint64_t quotient;
2520     return floatx80_modrem(a, b, true, &quotient, status);
2521 }
2522
2523 /*
2524  * Float to Float conversions
2525  *
2526  * Returns the result of converting one float format to another. The
2527  * conversion is performed according to the IEC/IEEE Standard for
2528  * Binary Floating-Point Arithmetic.
2529  *
2530  * Usually this only needs to take care of raising invalid exceptions
2531  * and handling the conversion on NaNs.
2532  */
2533
2534 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2535 {
2536     switch (a->cls) {
2537     case float_class_qnan:
2538     case float_class_snan:
2539         /*
2540          * There is no NaN in the destination format.  Raise Invalid
2541          * and return a zero with the sign of the input NaN.
2542          */
2543         float_raise(float_flag_invalid, s);
2544         a->cls = float_class_zero;
2545         break;
2546
2547     case float_class_inf:
2548         /*
2549          * There is no Inf in the destination format.  Raise Invalid
2550          * and return the maximum normal with the correct sign.
2551          */
2552         float_raise(float_flag_invalid, s);
2553         a->cls = float_class_normal;
2554         a->exp = float16_params_ahp.exp_max;
2555         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2556                                   float16_params_ahp.frac_size + 1);
2557         break;
2558
2559     case float_class_normal:
2560     case float_class_zero:
2561         break;
2562
2563     default:
2564         g_assert_not_reached();
2565     }
2566 }
2567
2568 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2569 {
2570     if (is_nan(a->cls)) {
2571         parts_return_nan(a, s);
2572     }
2573 }
2574
2575 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2576 {
2577     if (is_nan(a->cls)) {
2578         parts_return_nan(a, s);
2579     }
2580 }
2581
2582 #define parts_float_to_float(P, S) \
2583     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2584
2585 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2586                                         float_status *s)
2587 {
2588     a->cls = b->cls;
2589     a->sign = b->sign;
2590     a->exp = b->exp;
2591
2592     if (a->cls == float_class_normal) {
2593         frac_truncjam(a, b);
2594     } else if (is_nan(a->cls)) {
2595         /* Discard the low bits of the NaN. */
2596         a->frac = b->frac_hi;
2597         parts_return_nan(a, s);
2598     }
2599 }
2600
2601 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2602                                        float_status *s)
2603 {
2604     a->cls = b->cls;
2605     a->sign = b->sign;
2606     a->exp = b->exp;
2607     frac_widen(a, b);
2608
2609     if (is_nan(a->cls)) {
2610         parts_return_nan(a, s);
2611     }
2612 }
2613
2614 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2615 {
2616     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2617     FloatParts64 p;
2618
2619     float16a_unpack_canonical(&p, a, s, fmt16);
2620     parts_float_to_float(&p, s);
2621     return float32_round_pack_canonical(&p, s);
2622 }
2623
2624 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2625 {
2626     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2627     FloatParts64 p;
2628
2629     float16a_unpack_canonical(&p, a, s, fmt16);
2630     parts_float_to_float(&p, s);
2631     return float64_round_pack_canonical(&p, s);
2632 }
2633
2634 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2635 {
2636     FloatParts64 p;
2637     const FloatFmt *fmt;
2638
2639     float32_unpack_canonical(&p, a, s);
2640     if (ieee) {
2641         parts_float_to_float(&p, s);
2642         fmt = &float16_params;
2643     } else {
2644         parts_float_to_ahp(&p, s);
2645         fmt = &float16_params_ahp;
2646     }
2647     return float16a_round_pack_canonical(&p, s, fmt);
2648 }
2649
2650 static float64 QEMU_SOFTFLOAT_ATTR
2651 soft_float32_to_float64(float32 a, float_status *s)
2652 {
2653     FloatParts64 p;
2654
2655     float32_unpack_canonical(&p, a, s);
2656     parts_float_to_float(&p, s);
2657     return float64_round_pack_canonical(&p, s);
2658 }
2659
2660 float64 float32_to_float64(float32 a, float_status *s)
2661 {
2662     if (likely(float32_is_normal(a))) {
2663         /* Widening conversion can never produce inexact results.  */
2664         union_float32 uf;
2665         union_float64 ud;
2666         uf.s = a;
2667         ud.h = uf.h;
2668         return ud.s;
2669     } else if (float32_is_zero(a)) {
2670         return float64_set_sign(float64_zero, float32_is_neg(a));
2671     } else {
2672         return soft_float32_to_float64(a, s);
2673     }
2674 }
2675
2676 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2677 {
2678     FloatParts64 p;
2679     const FloatFmt *fmt;
2680
2681     float64_unpack_canonical(&p, a, s);
2682     if (ieee) {
2683         parts_float_to_float(&p, s);
2684         fmt = &float16_params;
2685     } else {
2686         parts_float_to_ahp(&p, s);
2687         fmt = &float16_params_ahp;
2688     }
2689     return float16a_round_pack_canonical(&p, s, fmt);
2690 }
2691
2692 float32 float64_to_float32(float64 a, float_status *s)
2693 {
2694     FloatParts64 p;
2695
2696     float64_unpack_canonical(&p, a, s);
2697     parts_float_to_float(&p, s);
2698     return float32_round_pack_canonical(&p, s);
2699 }
2700
2701 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2702 {
2703     FloatParts64 p;
2704
2705     bfloat16_unpack_canonical(&p, a, s);
2706     parts_float_to_float(&p, s);
2707     return float32_round_pack_canonical(&p, s);
2708 }
2709
2710 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2711 {
2712     FloatParts64 p;
2713
2714     bfloat16_unpack_canonical(&p, a, s);
2715     parts_float_to_float(&p, s);
2716     return float64_round_pack_canonical(&p, s);
2717 }
2718
2719 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2720 {
2721     FloatParts64 p;
2722
2723     float32_unpack_canonical(&p, a, s);
2724     parts_float_to_float(&p, s);
2725     return bfloat16_round_pack_canonical(&p, s);
2726 }
2727
2728 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2729 {
2730     FloatParts64 p;
2731
2732     float64_unpack_canonical(&p, a, s);
2733     parts_float_to_float(&p, s);
2734     return bfloat16_round_pack_canonical(&p, s);
2735 }
2736
2737 float32 float128_to_float32(float128 a, float_status *s)
2738 {
2739     FloatParts64 p64;
2740     FloatParts128 p128;
2741
2742     float128_unpack_canonical(&p128, a, s);
2743     parts_float_to_float_narrow(&p64, &p128, s);
2744     return float32_round_pack_canonical(&p64, s);
2745 }
2746
2747 float64 float128_to_float64(float128 a, float_status *s)
2748 {
2749     FloatParts64 p64;
2750     FloatParts128 p128;
2751
2752     float128_unpack_canonical(&p128, a, s);
2753     parts_float_to_float_narrow(&p64, &p128, s);
2754     return float64_round_pack_canonical(&p64, s);
2755 }
2756
2757 float128 float32_to_float128(float32 a, float_status *s)
2758 {
2759     FloatParts64 p64;
2760     FloatParts128 p128;
2761
2762     float32_unpack_canonical(&p64, a, s);
2763     parts_float_to_float_widen(&p128, &p64, s);
2764     return float128_round_pack_canonical(&p128, s);
2765 }
2766
2767 float128 float64_to_float128(float64 a, float_status *s)
2768 {
2769     FloatParts64 p64;
2770     FloatParts128 p128;
2771
2772     float64_unpack_canonical(&p64, a, s);
2773     parts_float_to_float_widen(&p128, &p64, s);
2774     return float128_round_pack_canonical(&p128, s);
2775 }
2776
2777 float32 floatx80_to_float32(floatx80 a, float_status *s)
2778 {
2779     FloatParts64 p64;
2780     FloatParts128 p128;
2781
2782     if (floatx80_unpack_canonical(&p128, a, s)) {
2783         parts_float_to_float_narrow(&p64, &p128, s);
2784     } else {
2785         parts_default_nan(&p64, s);
2786     }
2787     return float32_round_pack_canonical(&p64, s);
2788 }
2789
2790 float64 floatx80_to_float64(floatx80 a, float_status *s)
2791 {
2792     FloatParts64 p64;
2793     FloatParts128 p128;
2794
2795     if (floatx80_unpack_canonical(&p128, a, s)) {
2796         parts_float_to_float_narrow(&p64, &p128, s);
2797     } else {
2798         parts_default_nan(&p64, s);
2799     }
2800     return float64_round_pack_canonical(&p64, s);
2801 }
2802
2803 float128 floatx80_to_float128(floatx80 a, float_status *s)
2804 {
2805     FloatParts128 p;
2806
2807     if (floatx80_unpack_canonical(&p, a, s)) {
2808         parts_float_to_float(&p, s);
2809     } else {
2810         parts_default_nan(&p, s);
2811     }
2812     return float128_round_pack_canonical(&p, s);
2813 }
2814
2815 floatx80 float32_to_floatx80(float32 a, float_status *s)
2816 {
2817     FloatParts64 p64;
2818     FloatParts128 p128;
2819
2820     float32_unpack_canonical(&p64, a, s);
2821     parts_float_to_float_widen(&p128, &p64, s);
2822     return floatx80_round_pack_canonical(&p128, s);
2823 }
2824
2825 floatx80 float64_to_floatx80(float64 a, float_status *s)
2826 {
2827     FloatParts64 p64;
2828     FloatParts128 p128;
2829
2830     float64_unpack_canonical(&p64, a, s);
2831     parts_float_to_float_widen(&p128, &p64, s);
2832     return floatx80_round_pack_canonical(&p128, s);
2833 }
2834
2835 floatx80 float128_to_floatx80(float128 a, float_status *s)
2836 {
2837     FloatParts128 p;
2838
2839     float128_unpack_canonical(&p, a, s);
2840     parts_float_to_float(&p, s);
2841     return floatx80_round_pack_canonical(&p, s);
2842 }
2843
2844 /*
2845  * Round to integral value
2846  */
2847
2848 float16 float16_round_to_int(float16 a, float_status *s)
2849 {
2850     FloatParts64 p;
2851
2852     float16_unpack_canonical(&p, a, s);
2853     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2854     return float16_round_pack_canonical(&p, s);
2855 }
2856
2857 float32 float32_round_to_int(float32 a, float_status *s)
2858 {
2859     FloatParts64 p;
2860
2861     float32_unpack_canonical(&p, a, s);
2862     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2863     return float32_round_pack_canonical(&p, s);
2864 }
2865
2866 float64 float64_round_to_int(float64 a, float_status *s)
2867 {
2868     FloatParts64 p;
2869
2870     float64_unpack_canonical(&p, a, s);
2871     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2872     return float64_round_pack_canonical(&p, s);
2873 }
2874
2875 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2876 {
2877     FloatParts64 p;
2878
2879     bfloat16_unpack_canonical(&p, a, s);
2880     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2881     return bfloat16_round_pack_canonical(&p, s);
2882 }
2883
2884 float128 float128_round_to_int(float128 a, float_status *s)
2885 {
2886     FloatParts128 p;
2887
2888     float128_unpack_canonical(&p, a, s);
2889     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2890     return float128_round_pack_canonical(&p, s);
2891 }
2892
2893 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2894 {
2895     FloatParts128 p;
2896
2897     if (!floatx80_unpack_canonical(&p, a, status)) {
2898         return floatx80_default_nan(status);
2899     }
2900
2901     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2902                        &floatx80_params[status->floatx80_rounding_precision]);
2903     return floatx80_round_pack_canonical(&p, status);
2904 }
2905
2906 /*
2907  * Floating-point to signed integer conversions
2908  */
2909
2910 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2911                               float_status *s)
2912 {
2913     FloatParts64 p;
2914
2915     float16_unpack_canonical(&p, a, s);
2916     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2917 }
2918
2919 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920                                 float_status *s)
2921 {
2922     FloatParts64 p;
2923
2924     float16_unpack_canonical(&p, a, s);
2925     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2926 }
2927
2928 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929                                 float_status *s)
2930 {
2931     FloatParts64 p;
2932
2933     float16_unpack_canonical(&p, a, s);
2934     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2935 }
2936
2937 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2938                                 float_status *s)
2939 {
2940     FloatParts64 p;
2941
2942     float16_unpack_canonical(&p, a, s);
2943     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2944 }
2945
2946 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2947                                 float_status *s)
2948 {
2949     FloatParts64 p;
2950
2951     float32_unpack_canonical(&p, a, s);
2952     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2953 }
2954
2955 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956                                 float_status *s)
2957 {
2958     FloatParts64 p;
2959
2960     float32_unpack_canonical(&p, a, s);
2961     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2962 }
2963
2964 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2965                                 float_status *s)
2966 {
2967     FloatParts64 p;
2968
2969     float32_unpack_canonical(&p, a, s);
2970     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2971 }
2972
2973 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2974                                 float_status *s)
2975 {
2976     FloatParts64 p;
2977
2978     float64_unpack_canonical(&p, a, s);
2979     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2980 }
2981
2982 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983                                 float_status *s)
2984 {
2985     FloatParts64 p;
2986
2987     float64_unpack_canonical(&p, a, s);
2988     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2989 }
2990
2991 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2992                                 float_status *s)
2993 {
2994     FloatParts64 p;
2995
2996     float64_unpack_canonical(&p, a, s);
2997     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2998 }
2999
3000 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3001                                  float_status *s)
3002 {
3003     FloatParts64 p;
3004
3005     bfloat16_unpack_canonical(&p, a, s);
3006     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3007 }
3008
3009 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3010                                  float_status *s)
3011 {
3012     FloatParts64 p;
3013
3014     bfloat16_unpack_canonical(&p, a, s);
3015     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3016 }
3017
3018 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3019                                  float_status *s)
3020 {
3021     FloatParts64 p;
3022
3023     bfloat16_unpack_canonical(&p, a, s);
3024     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3025 }
3026
3027 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3028                                         int scale, float_status *s)
3029 {
3030     FloatParts128 p;
3031
3032     float128_unpack_canonical(&p, a, s);
3033     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3034 }
3035
3036 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3037                                         int scale, float_status *s)
3038 {
3039     FloatParts128 p;
3040
3041     float128_unpack_canonical(&p, a, s);
3042     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3043 }
3044
3045 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3046                                         int scale, float_status *s)
3047 {
3048     FloatParts128 p;
3049
3050     if (!floatx80_unpack_canonical(&p, a, s)) {
3051         parts_default_nan(&p, s);
3052     }
3053     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3054 }
3055
3056 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3057                                         int scale, float_status *s)
3058 {
3059     FloatParts128 p;
3060
3061     if (!floatx80_unpack_canonical(&p, a, s)) {
3062         parts_default_nan(&p, s);
3063     }
3064     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3065 }
3066
3067 int8_t float16_to_int8(float16 a, float_status *s)
3068 {
3069     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3070 }
3071
3072 int16_t float16_to_int16(float16 a, float_status *s)
3073 {
3074     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3075 }
3076
3077 int32_t float16_to_int32(float16 a, float_status *s)
3078 {
3079     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3080 }
3081
3082 int64_t float16_to_int64(float16 a, float_status *s)
3083 {
3084     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3085 }
3086
3087 int16_t float32_to_int16(float32 a, float_status *s)
3088 {
3089     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3090 }
3091
3092 int32_t float32_to_int32(float32 a, float_status *s)
3093 {
3094     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3095 }
3096
3097 int64_t float32_to_int64(float32 a, float_status *s)
3098 {
3099     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3100 }
3101
3102 int16_t float64_to_int16(float64 a, float_status *s)
3103 {
3104     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3105 }
3106
3107 int32_t float64_to_int32(float64 a, float_status *s)
3108 {
3109     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3110 }
3111
3112 int64_t float64_to_int64(float64 a, float_status *s)
3113 {
3114     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3115 }
3116
3117 int32_t float128_to_int32(float128 a, float_status *s)
3118 {
3119     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3120 }
3121
3122 int64_t float128_to_int64(float128 a, float_status *s)
3123 {
3124     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3125 }
3126
3127 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3128 {
3129     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3130 }
3131
3132 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3133 {
3134     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3135 }
3136
3137 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3138 {
3139     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3140 }
3141
3142 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3143 {
3144     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3145 }
3146
3147 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3148 {
3149     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3150 }
3151
3152 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3153 {
3154     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3155 }
3156
3157 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3158 {
3159     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3160 }
3161
3162 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3163 {
3164     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3165 }
3166
3167 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3168 {
3169     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3170 }
3171
3172 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3173 {
3174     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3175 }
3176
3177 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3178 {
3179     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3180 }
3181
3182 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3183 {
3184     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3185 }
3186
3187 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3188 {
3189     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3190 }
3191
3192 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3193 {
3194     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3195 }
3196
3197 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3198 {
3199     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3200 }
3201
3202 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3203 {
3204     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3205 }
3206
3207 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3208 {
3209     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3210 }
3211
3212 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3213 {
3214     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3215 }
3216
3217 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3218 {
3219     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3220 }
3221
3222 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3223 {
3224     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3225 }
3226
3227 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3228 {
3229     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3230 }
3231
3232 /*
3233  * Floating-point to unsigned integer conversions
3234  */
3235
3236 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3237                                 float_status *s)
3238 {
3239     FloatParts64 p;
3240
3241     float16_unpack_canonical(&p, a, s);
3242     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3243 }
3244
3245 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3246                                   float_status *s)
3247 {
3248     FloatParts64 p;
3249
3250     float16_unpack_canonical(&p, a, s);
3251     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3252 }
3253
3254 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3255                                   float_status *s)
3256 {
3257     FloatParts64 p;
3258
3259     float16_unpack_canonical(&p, a, s);
3260     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3261 }
3262
3263 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3264                                   float_status *s)
3265 {
3266     FloatParts64 p;
3267
3268     float16_unpack_canonical(&p, a, s);
3269     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3270 }
3271
3272 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3273                                   float_status *s)
3274 {
3275     FloatParts64 p;
3276
3277     float32_unpack_canonical(&p, a, s);
3278     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3279 }
3280
3281 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3282                                   float_status *s)
3283 {
3284     FloatParts64 p;
3285
3286     float32_unpack_canonical(&p, a, s);
3287     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3288 }
3289
3290 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3291                                   float_status *s)
3292 {
3293     FloatParts64 p;
3294
3295     float32_unpack_canonical(&p, a, s);
3296     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3297 }
3298
3299 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3300                                   float_status *s)
3301 {
3302     FloatParts64 p;
3303
3304     float64_unpack_canonical(&p, a, s);
3305     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3306 }
3307
3308 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3309                                   float_status *s)
3310 {
3311     FloatParts64 p;
3312
3313     float64_unpack_canonical(&p, a, s);
3314     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3315 }
3316
3317 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3318                                   float_status *s)
3319 {
3320     FloatParts64 p;
3321
3322     float64_unpack_canonical(&p, a, s);
3323     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3324 }
3325
3326 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3327                                    int scale, float_status *s)
3328 {
3329     FloatParts64 p;
3330
3331     bfloat16_unpack_canonical(&p, a, s);
3332     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3333 }
3334
3335 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3336                                    int scale, float_status *s)
3337 {
3338     FloatParts64 p;
3339
3340     bfloat16_unpack_canonical(&p, a, s);
3341     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3342 }
3343
3344 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3345                                    int scale, float_status *s)
3346 {
3347     FloatParts64 p;
3348
3349     bfloat16_unpack_canonical(&p, a, s);
3350     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3351 }
3352
3353 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3354                                           int scale, float_status *s)
3355 {
3356     FloatParts128 p;
3357
3358     float128_unpack_canonical(&p, a, s);
3359     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3360 }
3361
3362 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3363                                           int scale, float_status *s)
3364 {
3365     FloatParts128 p;
3366
3367     float128_unpack_canonical(&p, a, s);
3368     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3369 }
3370
3371 uint8_t float16_to_uint8(float16 a, float_status *s)
3372 {
3373     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3374 }
3375
3376 uint16_t float16_to_uint16(float16 a, float_status *s)
3377 {
3378     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3379 }
3380
3381 uint32_t float16_to_uint32(float16 a, float_status *s)
3382 {
3383     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3384 }
3385
3386 uint64_t float16_to_uint64(float16 a, float_status *s)
3387 {
3388     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3389 }
3390
3391 uint16_t float32_to_uint16(float32 a, float_status *s)
3392 {
3393     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3394 }
3395
3396 uint32_t float32_to_uint32(float32 a, float_status *s)
3397 {
3398     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3399 }
3400
3401 uint64_t float32_to_uint64(float32 a, float_status *s)
3402 {
3403     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3404 }
3405
3406 uint16_t float64_to_uint16(float64 a, float_status *s)
3407 {
3408     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3409 }
3410
3411 uint32_t float64_to_uint32(float64 a, float_status *s)
3412 {
3413     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3414 }
3415
3416 uint64_t float64_to_uint64(float64 a, float_status *s)
3417 {
3418     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3419 }
3420
3421 uint32_t float128_to_uint32(float128 a, float_status *s)
3422 {
3423     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3424 }
3425
3426 uint64_t float128_to_uint64(float128 a, float_status *s)
3427 {
3428     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3429 }
3430
3431 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3432 {
3433     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3434 }
3435
3436 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3437 {
3438     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3439 }
3440
3441 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3442 {
3443     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3444 }
3445
3446 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3447 {
3448     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3449 }
3450
3451 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3452 {
3453     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3454 }
3455
3456 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3457 {
3458     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3459 }
3460
3461 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3462 {
3463     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3464 }
3465
3466 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3467 {
3468     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3469 }
3470
3471 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3472 {
3473     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3474 }
3475
3476 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3477 {
3478     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3479 }
3480
3481 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3482 {
3483     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3484 }
3485
3486 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3487 {
3488     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3489 }
3490
3491 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3492 {
3493     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3494 }
3495
3496 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3497 {
3498     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3499 }
3500
3501 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3502 {
3503     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3504 }
3505
3506 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3507 {
3508     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3509 }
3510
3511 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3512 {
3513     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3514 }
3515
3516 /*
3517  * Signed integer to floating-point conversions
3518  */
3519
3520 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3521 {
3522     FloatParts64 p;
3523
3524     parts_sint_to_float(&p, a, scale, status);
3525     return float16_round_pack_canonical(&p, status);
3526 }
3527
3528 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3529 {
3530     return int64_to_float16_scalbn(a, scale, status);
3531 }
3532
3533 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3534 {
3535     return int64_to_float16_scalbn(a, scale, status);
3536 }
3537
3538 float16 int64_to_float16(int64_t a, float_status *status)
3539 {
3540     return int64_to_float16_scalbn(a, 0, status);
3541 }
3542
3543 float16 int32_to_float16(int32_t a, float_status *status)
3544 {
3545     return int64_to_float16_scalbn(a, 0, status);
3546 }
3547
3548 float16 int16_to_float16(int16_t a, float_status *status)
3549 {
3550     return int64_to_float16_scalbn(a, 0, status);
3551 }
3552
3553 float16 int8_to_float16(int8_t a, float_status *status)
3554 {
3555     return int64_to_float16_scalbn(a, 0, status);
3556 }
3557
3558 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3559 {
3560     FloatParts64 p;
3561
3562     parts64_sint_to_float(&p, a, scale, status);
3563     return float32_round_pack_canonical(&p, status);
3564 }
3565
3566 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3567 {
3568     return int64_to_float32_scalbn(a, scale, status);
3569 }
3570
3571 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3572 {
3573     return int64_to_float32_scalbn(a, scale, status);
3574 }
3575
3576 float32 int64_to_float32(int64_t a, float_status *status)
3577 {
3578     return int64_to_float32_scalbn(a, 0, status);
3579 }
3580
3581 float32 int32_to_float32(int32_t a, float_status *status)
3582 {
3583     return int64_to_float32_scalbn(a, 0, status);
3584 }
3585
3586 float32 int16_to_float32(int16_t a, float_status *status)
3587 {
3588     return int64_to_float32_scalbn(a, 0, status);
3589 }
3590
3591 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3592 {
3593     FloatParts64 p;
3594
3595     parts_sint_to_float(&p, a, scale, status);
3596     return float64_round_pack_canonical(&p, status);
3597 }
3598
3599 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3600 {
3601     return int64_to_float64_scalbn(a, scale, status);
3602 }
3603
3604 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3605 {
3606     return int64_to_float64_scalbn(a, scale, status);
3607 }
3608
3609 float64 int64_to_float64(int64_t a, float_status *status)
3610 {
3611     return int64_to_float64_scalbn(a, 0, status);
3612 }
3613
3614 float64 int32_to_float64(int32_t a, float_status *status)
3615 {
3616     return int64_to_float64_scalbn(a, 0, status);
3617 }
3618
3619 float64 int16_to_float64(int16_t a, float_status *status)
3620 {
3621     return int64_to_float64_scalbn(a, 0, status);
3622 }
3623
3624 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3625 {
3626     FloatParts64 p;
3627
3628     parts_sint_to_float(&p, a, scale, status);
3629     return bfloat16_round_pack_canonical(&p, status);
3630 }
3631
3632 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3633 {
3634     return int64_to_bfloat16_scalbn(a, scale, status);
3635 }
3636
3637 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3638 {
3639     return int64_to_bfloat16_scalbn(a, scale, status);
3640 }
3641
3642 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3643 {
3644     return int64_to_bfloat16_scalbn(a, 0, status);
3645 }
3646
3647 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3648 {
3649     return int64_to_bfloat16_scalbn(a, 0, status);
3650 }
3651
3652 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3653 {
3654     return int64_to_bfloat16_scalbn(a, 0, status);
3655 }
3656
3657 float128 int64_to_float128(int64_t a, float_status *status)
3658 {
3659     FloatParts128 p;
3660
3661     parts_sint_to_float(&p, a, 0, status);
3662     return float128_round_pack_canonical(&p, status);
3663 }
3664
3665 float128 int32_to_float128(int32_t a, float_status *status)
3666 {
3667     return int64_to_float128(a, status);
3668 }
3669
3670 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3671 {
3672     FloatParts128 p;
3673
3674     parts_sint_to_float(&p, a, 0, status);
3675     return floatx80_round_pack_canonical(&p, status);
3676 }
3677
3678 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3679 {
3680     return int64_to_floatx80(a, status);
3681 }
3682
3683 /*
3684  * Unsigned Integer to floating-point conversions
3685  */
3686
3687 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3688 {
3689     FloatParts64 p;
3690
3691     parts_uint_to_float(&p, a, scale, status);
3692     return float16_round_pack_canonical(&p, status);
3693 }
3694
3695 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3696 {
3697     return uint64_to_float16_scalbn(a, scale, status);
3698 }
3699
3700 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3701 {
3702     return uint64_to_float16_scalbn(a, scale, status);
3703 }
3704
3705 float16 uint64_to_float16(uint64_t a, float_status *status)
3706 {
3707     return uint64_to_float16_scalbn(a, 0, status);
3708 }
3709
3710 float16 uint32_to_float16(uint32_t a, float_status *status)
3711 {
3712     return uint64_to_float16_scalbn(a, 0, status);
3713 }
3714
3715 float16 uint16_to_float16(uint16_t a, float_status *status)
3716 {
3717     return uint64_to_float16_scalbn(a, 0, status);
3718 }
3719
3720 float16 uint8_to_float16(uint8_t a, float_status *status)
3721 {
3722     return uint64_to_float16_scalbn(a, 0, status);
3723 }
3724
3725 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3726 {
3727     FloatParts64 p;
3728
3729     parts_uint_to_float(&p, a, scale, status);
3730     return float32_round_pack_canonical(&p, status);
3731 }
3732
3733 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3734 {
3735     return uint64_to_float32_scalbn(a, scale, status);
3736 }
3737
3738 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3739 {
3740     return uint64_to_float32_scalbn(a, scale, status);
3741 }
3742
3743 float32 uint64_to_float32(uint64_t a, float_status *status)
3744 {
3745     return uint64_to_float32_scalbn(a, 0, status);
3746 }
3747
3748 float32 uint32_to_float32(uint32_t a, float_status *status)
3749 {
3750     return uint64_to_float32_scalbn(a, 0, status);
3751 }
3752
3753 float32 uint16_to_float32(uint16_t a, float_status *status)
3754 {
3755     return uint64_to_float32_scalbn(a, 0, status);
3756 }
3757
3758 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3759 {
3760     FloatParts64 p;
3761
3762     parts_uint_to_float(&p, a, scale, status);
3763     return float64_round_pack_canonical(&p, status);
3764 }
3765
3766 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3767 {
3768     return uint64_to_float64_scalbn(a, scale, status);
3769 }
3770
3771 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3772 {
3773     return uint64_to_float64_scalbn(a, scale, status);
3774 }
3775
3776 float64 uint64_to_float64(uint64_t a, float_status *status)
3777 {
3778     return uint64_to_float64_scalbn(a, 0, status);
3779 }
3780
3781 float64 uint32_to_float64(uint32_t a, float_status *status)
3782 {
3783     return uint64_to_float64_scalbn(a, 0, status);
3784 }
3785
3786 float64 uint16_to_float64(uint16_t a, float_status *status)
3787 {
3788     return uint64_to_float64_scalbn(a, 0, status);
3789 }
3790
3791 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3792 {
3793     FloatParts64 p;
3794
3795     parts_uint_to_float(&p, a, scale, status);
3796     return bfloat16_round_pack_canonical(&p, status);
3797 }
3798
3799 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3800 {
3801     return uint64_to_bfloat16_scalbn(a, scale, status);
3802 }
3803
3804 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3805 {
3806     return uint64_to_bfloat16_scalbn(a, scale, status);
3807 }
3808
3809 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3810 {
3811     return uint64_to_bfloat16_scalbn(a, 0, status);
3812 }
3813
3814 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3815 {
3816     return uint64_to_bfloat16_scalbn(a, 0, status);
3817 }
3818
3819 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3820 {
3821     return uint64_to_bfloat16_scalbn(a, 0, status);
3822 }
3823
3824 float128 uint64_to_float128(uint64_t a, float_status *status)
3825 {
3826     FloatParts128 p;
3827
3828     parts_uint_to_float(&p, a, 0, status);
3829     return float128_round_pack_canonical(&p, status);
3830 }
3831
3832 /*
3833  * Minimum and maximum
3834  */
3835
3836 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3837 {
3838     FloatParts64 pa, pb, *pr;
3839
3840     float16_unpack_canonical(&pa, a, s);
3841     float16_unpack_canonical(&pb, b, s);
3842     pr = parts_minmax(&pa, &pb, s, flags);
3843
3844     return float16_round_pack_canonical(pr, s);
3845 }
3846
3847 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3848                                 float_status *s, int flags)
3849 {
3850     FloatParts64 pa, pb, *pr;
3851
3852     bfloat16_unpack_canonical(&pa, a, s);
3853     bfloat16_unpack_canonical(&pb, b, s);
3854     pr = parts_minmax(&pa, &pb, s, flags);
3855
3856     return bfloat16_round_pack_canonical(pr, s);
3857 }
3858
3859 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3860 {
3861     FloatParts64 pa, pb, *pr;
3862
3863     float32_unpack_canonical(&pa, a, s);
3864     float32_unpack_canonical(&pb, b, s);
3865     pr = parts_minmax(&pa, &pb, s, flags);
3866
3867     return float32_round_pack_canonical(pr, s);
3868 }
3869
3870 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3871 {
3872     FloatParts64 pa, pb, *pr;
3873
3874     float64_unpack_canonical(&pa, a, s);
3875     float64_unpack_canonical(&pb, b, s);
3876     pr = parts_minmax(&pa, &pb, s, flags);
3877
3878     return float64_round_pack_canonical(pr, s);
3879 }
3880
3881 static float128 float128_minmax(float128 a, float128 b,
3882                                 float_status *s, int flags)
3883 {
3884     FloatParts128 pa, pb, *pr;
3885
3886     float128_unpack_canonical(&pa, a, s);
3887     float128_unpack_canonical(&pb, b, s);
3888     pr = parts_minmax(&pa, &pb, s, flags);
3889
3890     return float128_round_pack_canonical(pr, s);
3891 }
3892
3893 #define MINMAX_1(type, name, flags) \
3894     type type##_##name(type a, type b, float_status *s) \
3895     { return type##_minmax(a, b, s, flags); }
3896
3897 #define MINMAX_2(type) \
3898     MINMAX_1(type, max, 0)                                      \
3899     MINMAX_1(type, maxnum, minmax_isnum)                        \
3900     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3901     MINMAX_1(type, min, minmax_ismin)                           \
3902     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3903     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3904
3905 MINMAX_2(float16)
3906 MINMAX_2(bfloat16)
3907 MINMAX_2(float32)
3908 MINMAX_2(float64)
3909 MINMAX_2(float128)
3910
3911 #undef MINMAX_1
3912 #undef MINMAX_2
3913
3914 /*
3915  * Floating point compare
3916  */
3917
3918 static FloatRelation QEMU_FLATTEN
3919 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3920 {
3921     FloatParts64 pa, pb;
3922
3923     float16_unpack_canonical(&pa, a, s);
3924     float16_unpack_canonical(&pb, b, s);
3925     return parts_compare(&pa, &pb, s, is_quiet);
3926 }
3927
3928 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3929 {
3930     return float16_do_compare(a, b, s, false);
3931 }
3932
3933 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3934 {
3935     return float16_do_compare(a, b, s, true);
3936 }
3937
3938 static FloatRelation QEMU_SOFTFLOAT_ATTR
3939 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3940 {
3941     FloatParts64 pa, pb;
3942
3943     float32_unpack_canonical(&pa, a, s);
3944     float32_unpack_canonical(&pb, b, s);
3945     return parts_compare(&pa, &pb, s, is_quiet);
3946 }
3947
3948 static FloatRelation QEMU_FLATTEN
3949 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3950 {
3951     union_float32 ua, ub;
3952
3953     ua.s = xa;
3954     ub.s = xb;
3955
3956     if (QEMU_NO_HARDFLOAT) {
3957         goto soft;
3958     }
3959
3960     float32_input_flush2(&ua.s, &ub.s, s);
3961     if (isgreaterequal(ua.h, ub.h)) {
3962         if (isgreater(ua.h, ub.h)) {
3963             return float_relation_greater;
3964         }
3965         return float_relation_equal;
3966     }
3967     if (likely(isless(ua.h, ub.h))) {
3968         return float_relation_less;
3969     }
3970     /*
3971      * The only condition remaining is unordered.
3972      * Fall through to set flags.
3973      */
3974  soft:
3975     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3976 }
3977
3978 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3979 {
3980     return float32_hs_compare(a, b, s, false);
3981 }
3982
3983 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3984 {
3985     return float32_hs_compare(a, b, s, true);
3986 }
3987
3988 static FloatRelation QEMU_SOFTFLOAT_ATTR
3989 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3990 {
3991     FloatParts64 pa, pb;
3992
3993     float64_unpack_canonical(&pa, a, s);
3994     float64_unpack_canonical(&pb, b, s);
3995     return parts_compare(&pa, &pb, s, is_quiet);
3996 }
3997
3998 static FloatRelation QEMU_FLATTEN
3999 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4000 {
4001     union_float64 ua, ub;
4002
4003     ua.s = xa;
4004     ub.s = xb;
4005
4006     if (QEMU_NO_HARDFLOAT) {
4007         goto soft;
4008     }
4009
4010     float64_input_flush2(&ua.s, &ub.s, s);
4011     if (isgreaterequal(ua.h, ub.h)) {
4012         if (isgreater(ua.h, ub.h)) {
4013             return float_relation_greater;
4014         }
4015         return float_relation_equal;
4016     }
4017     if (likely(isless(ua.h, ub.h))) {
4018         return float_relation_less;
4019     }
4020     /*
4021      * The only condition remaining is unordered.
4022      * Fall through to set flags.
4023      */
4024  soft:
4025     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4026 }
4027
4028 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4029 {
4030     return float64_hs_compare(a, b, s, false);
4031 }
4032
4033 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4034 {
4035     return float64_hs_compare(a, b, s, true);
4036 }
4037
4038 static FloatRelation QEMU_FLATTEN
4039 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4040 {
4041     FloatParts64 pa, pb;
4042
4043     bfloat16_unpack_canonical(&pa, a, s);
4044     bfloat16_unpack_canonical(&pb, b, s);
4045     return parts_compare(&pa, &pb, s, is_quiet);
4046 }
4047
4048 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4049 {
4050     return bfloat16_do_compare(a, b, s, false);
4051 }
4052
4053 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4054 {
4055     return bfloat16_do_compare(a, b, s, true);
4056 }
4057
4058 static FloatRelation QEMU_FLATTEN
4059 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4060 {
4061     FloatParts128 pa, pb;
4062
4063     float128_unpack_canonical(&pa, a, s);
4064     float128_unpack_canonical(&pb, b, s);
4065     return parts_compare(&pa, &pb, s, is_quiet);
4066 }
4067
4068 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4069 {
4070     return float128_do_compare(a, b, s, false);
4071 }
4072
4073 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4074 {
4075     return float128_do_compare(a, b, s, true);
4076 }
4077
4078 static FloatRelation QEMU_FLATTEN
4079 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4080 {
4081     FloatParts128 pa, pb;
4082
4083     if (!floatx80_unpack_canonical(&pa, a, s) ||
4084         !floatx80_unpack_canonical(&pb, b, s)) {
4085         return float_relation_unordered;
4086     }
4087     return parts_compare(&pa, &pb, s, is_quiet);
4088 }
4089
4090 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4091 {
4092     return floatx80_do_compare(a, b, s, false);
4093 }
4094
4095 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4096 {
4097     return floatx80_do_compare(a, b, s, true);
4098 }
4099
4100 /*
4101  * Scale by 2**N
4102  */
4103
4104 float16 float16_scalbn(float16 a, int n, float_status *status)
4105 {
4106     FloatParts64 p;
4107
4108     float16_unpack_canonical(&p, a, status);
4109     parts_scalbn(&p, n, status);
4110     return float16_round_pack_canonical(&p, status);
4111 }
4112
4113 float32 float32_scalbn(float32 a, int n, float_status *status)
4114 {
4115     FloatParts64 p;
4116
4117     float32_unpack_canonical(&p, a, status);
4118     parts_scalbn(&p, n, status);
4119     return float32_round_pack_canonical(&p, status);
4120 }
4121
4122 float64 float64_scalbn(float64 a, int n, float_status *status)
4123 {
4124     FloatParts64 p;
4125
4126     float64_unpack_canonical(&p, a, status);
4127     parts_scalbn(&p, n, status);
4128     return float64_round_pack_canonical(&p, status);
4129 }
4130
4131 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4132 {
4133     FloatParts64 p;
4134
4135     bfloat16_unpack_canonical(&p, a, status);
4136     parts_scalbn(&p, n, status);
4137     return bfloat16_round_pack_canonical(&p, status);
4138 }
4139
4140 float128 float128_scalbn(float128 a, int n, float_status *status)
4141 {
4142     FloatParts128 p;
4143
4144     float128_unpack_canonical(&p, a, status);
4145     parts_scalbn(&p, n, status);
4146     return float128_round_pack_canonical(&p, status);
4147 }
4148
4149 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4150 {
4151     FloatParts128 p;
4152
4153     if (!floatx80_unpack_canonical(&p, a, status)) {
4154         return floatx80_default_nan(status);
4155     }
4156     parts_scalbn(&p, n, status);
4157     return floatx80_round_pack_canonical(&p, status);
4158 }
4159
4160 /*
4161  * Square Root
4162  */
4163
4164 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4165 {
4166     FloatParts64 p;
4167
4168     float16_unpack_canonical(&p, a, status);
4169     parts_sqrt(&p, status, &float16_params);
4170     return float16_round_pack_canonical(&p, status);
4171 }
4172
4173 static float32 QEMU_SOFTFLOAT_ATTR
4174 soft_f32_sqrt(float32 a, float_status *status)
4175 {
4176     FloatParts64 p;
4177
4178     float32_unpack_canonical(&p, a, status);
4179     parts_sqrt(&p, status, &float32_params);
4180     return float32_round_pack_canonical(&p, status);
4181 }
4182
4183 static float64 QEMU_SOFTFLOAT_ATTR
4184 soft_f64_sqrt(float64 a, float_status *status)
4185 {
4186     FloatParts64 p;
4187
4188     float64_unpack_canonical(&p, a, status);
4189     parts_sqrt(&p, status, &float64_params);
4190     return float64_round_pack_canonical(&p, status);
4191 }
4192
4193 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4194 {
4195     union_float32 ua, ur;
4196
4197     ua.s = xa;
4198     if (unlikely(!can_use_fpu(s))) {
4199         goto soft;
4200     }
4201
4202     float32_input_flush1(&ua.s, s);
4203     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4204         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4205                        fpclassify(ua.h) == FP_ZERO) ||
4206                      signbit(ua.h))) {
4207             goto soft;
4208         }
4209     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4210                         float32_is_neg(ua.s))) {
4211         goto soft;
4212     }
4213     ur.h = sqrtf(ua.h);
4214     return ur.s;
4215
4216  soft:
4217     return soft_f32_sqrt(ua.s, s);
4218 }
4219
4220 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4221 {
4222     union_float64 ua, ur;
4223
4224     ua.s = xa;
4225     if (unlikely(!can_use_fpu(s))) {
4226         goto soft;
4227     }
4228
4229     float64_input_flush1(&ua.s, s);
4230     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4231         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4232                        fpclassify(ua.h) == FP_ZERO) ||
4233                      signbit(ua.h))) {
4234             goto soft;
4235         }
4236     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4237                         float64_is_neg(ua.s))) {
4238         goto soft;
4239     }
4240     ur.h = sqrt(ua.h);
4241     return ur.s;
4242
4243  soft:
4244     return soft_f64_sqrt(ua.s, s);
4245 }
4246
4247 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4248 {
4249     FloatParts64 p;
4250
4251     bfloat16_unpack_canonical(&p, a, status);
4252     parts_sqrt(&p, status, &bfloat16_params);
4253     return bfloat16_round_pack_canonical(&p, status);
4254 }
4255
4256 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4257 {
4258     FloatParts128 p;
4259
4260     float128_unpack_canonical(&p, a, status);
4261     parts_sqrt(&p, status, &float128_params);
4262     return float128_round_pack_canonical(&p, status);
4263 }
4264
4265 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4266 {
4267     FloatParts128 p;
4268
4269     if (!floatx80_unpack_canonical(&p, a, s)) {
4270         return floatx80_default_nan(s);
4271     }
4272     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4273     return floatx80_round_pack_canonical(&p, s);
4274 }
4275
4276 /*
4277  * log2
4278  */
4279 float32 float32_log2(float32 a, float_status *status)
4280 {
4281     FloatParts64 p;
4282
4283     float32_unpack_canonical(&p, a, status);
4284     parts_log2(&p, status, &float32_params);
4285     return float32_round_pack_canonical(&p, status);
4286 }
4287
4288 float64 float64_log2(float64 a, float_status *status)
4289 {
4290     FloatParts64 p;
4291
4292     float64_unpack_canonical(&p, a, status);
4293     parts_log2(&p, status, &float64_params);
4294     return float64_round_pack_canonical(&p, status);
4295 }
4296
4297 /*----------------------------------------------------------------------------
4298 | The pattern for a default generated NaN.
4299 *----------------------------------------------------------------------------*/
4300
4301 float16 float16_default_nan(float_status *status)
4302 {
4303     FloatParts64 p;
4304
4305     parts_default_nan(&p, status);
4306     p.frac >>= float16_params.frac_shift;
4307     return float16_pack_raw(&p);
4308 }
4309
4310 float32 float32_default_nan(float_status *status)
4311 {
4312     FloatParts64 p;
4313
4314     parts_default_nan(&p, status);
4315     p.frac >>= float32_params.frac_shift;
4316     return float32_pack_raw(&p);
4317 }
4318
4319 float64 float64_default_nan(float_status *status)
4320 {
4321     FloatParts64 p;
4322
4323     parts_default_nan(&p, status);
4324     p.frac >>= float64_params.frac_shift;
4325     return float64_pack_raw(&p);
4326 }
4327
4328 float128 float128_default_nan(float_status *status)
4329 {
4330     FloatParts128 p;
4331
4332     parts_default_nan(&p, status);
4333     frac_shr(&p, float128_params.frac_shift);
4334     return float128_pack_raw(&p);
4335 }
4336
4337 bfloat16 bfloat16_default_nan(float_status *status)
4338 {
4339     FloatParts64 p;
4340
4341     parts_default_nan(&p, status);
4342     p.frac >>= bfloat16_params.frac_shift;
4343     return bfloat16_pack_raw(&p);
4344 }
4345
4346 /*----------------------------------------------------------------------------
4347 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4348 *----------------------------------------------------------------------------*/
4349
4350 float16 float16_silence_nan(float16 a, float_status *status)
4351 {
4352     FloatParts64 p;
4353
4354     float16_unpack_raw(&p, a);
4355     p.frac <<= float16_params.frac_shift;
4356     parts_silence_nan(&p, status);
4357     p.frac >>= float16_params.frac_shift;
4358     return float16_pack_raw(&p);
4359 }
4360
4361 float32 float32_silence_nan(float32 a, float_status *status)
4362 {
4363     FloatParts64 p;
4364
4365     float32_unpack_raw(&p, a);
4366     p.frac <<= float32_params.frac_shift;
4367     parts_silence_nan(&p, status);
4368     p.frac >>= float32_params.frac_shift;
4369     return float32_pack_raw(&p);
4370 }
4371
4372 float64 float64_silence_nan(float64 a, float_status *status)
4373 {
4374     FloatParts64 p;
4375
4376     float64_unpack_raw(&p, a);
4377     p.frac <<= float64_params.frac_shift;
4378     parts_silence_nan(&p, status);
4379     p.frac >>= float64_params.frac_shift;
4380     return float64_pack_raw(&p);
4381 }
4382
4383 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4384 {
4385     FloatParts64 p;
4386
4387     bfloat16_unpack_raw(&p, a);
4388     p.frac <<= bfloat16_params.frac_shift;
4389     parts_silence_nan(&p, status);
4390     p.frac >>= bfloat16_params.frac_shift;
4391     return bfloat16_pack_raw(&p);
4392 }
4393
4394 float128 float128_silence_nan(float128 a, float_status *status)
4395 {
4396     FloatParts128 p;
4397
4398     float128_unpack_raw(&p, a);
4399     frac_shl(&p, float128_params.frac_shift);
4400     parts_silence_nan(&p, status);
4401     frac_shr(&p, float128_params.frac_shift);
4402     return float128_pack_raw(&p);
4403 }
4404
4405 /*----------------------------------------------------------------------------
4406 | If `a' is denormal and we are in flush-to-zero mode then set the
4407 | input-denormal exception and return zero. Otherwise just return the value.
4408 *----------------------------------------------------------------------------*/
4409
4410 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4411 {
4412     if (p.exp == 0 && p.frac != 0) {
4413         float_raise(float_flag_input_denormal, status);
4414         return true;
4415     }
4416
4417     return false;
4418 }
4419
4420 float16 float16_squash_input_denormal(float16 a, float_status *status)
4421 {
4422     if (status->flush_inputs_to_zero) {
4423         FloatParts64 p;
4424
4425         float16_unpack_raw(&p, a);
4426         if (parts_squash_denormal(p, status)) {
4427             return float16_set_sign(float16_zero, p.sign);
4428         }
4429     }
4430     return a;
4431 }
4432
4433 float32 float32_squash_input_denormal(float32 a, float_status *status)
4434 {
4435     if (status->flush_inputs_to_zero) {
4436         FloatParts64 p;
4437
4438         float32_unpack_raw(&p, a);
4439         if (parts_squash_denormal(p, status)) {
4440             return float32_set_sign(float32_zero, p.sign);
4441         }
4442     }
4443     return a;
4444 }
4445
4446 float64 float64_squash_input_denormal(float64 a, float_status *status)
4447 {
4448     if (status->flush_inputs_to_zero) {
4449         FloatParts64 p;
4450
4451         float64_unpack_raw(&p, a);
4452         if (parts_squash_denormal(p, status)) {
4453             return float64_set_sign(float64_zero, p.sign);
4454         }
4455     }
4456     return a;
4457 }
4458
4459 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4460 {
4461     if (status->flush_inputs_to_zero) {
4462         FloatParts64 p;
4463
4464         bfloat16_unpack_raw(&p, a);
4465         if (parts_squash_denormal(p, status)) {
4466             return bfloat16_set_sign(bfloat16_zero, p.sign);
4467         }
4468     }
4469     return a;
4470 }
4471
4472 /*----------------------------------------------------------------------------
4473 | Normalizes the subnormal extended double-precision floating-point value
4474 | represented by the denormalized significand `aSig'.  The normalized exponent
4475 | and significand are stored at the locations pointed to by `zExpPtr' and
4476 | `zSigPtr', respectively.
4477 *----------------------------------------------------------------------------*/
4478
4479 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4480                                 uint64_t *zSigPtr)
4481 {
4482     int8_t shiftCount;
4483
4484     shiftCount = clz64(aSig);
4485     *zSigPtr = aSig<<shiftCount;
4486     *zExpPtr = 1 - shiftCount;
4487 }
4488
4489 /*----------------------------------------------------------------------------
4490 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4491 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4492 | and returns the proper extended double-precision floating-point value
4493 | corresponding to the abstract input.  Ordinarily, the abstract value is
4494 | rounded and packed into the extended double-precision format, with the
4495 | inexact exception raised if the abstract input cannot be represented
4496 | exactly.  However, if the abstract value is too large, the overflow and
4497 | inexact exceptions are raised and an infinity or maximal finite value is
4498 | returned.  If the abstract value is too small, the input value is rounded to
4499 | a subnormal number, and the underflow and inexact exceptions are raised if
4500 | the abstract input cannot be represented exactly as a subnormal extended
4501 | double-precision floating-point number.
4502 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4503 | the result is rounded to the same number of bits as single or double
4504 | precision, respectively.  Otherwise, the result is rounded to the full
4505 | precision of the extended double-precision format.
4506 |     The input significand must be normalized or smaller.  If the input
4507 | significand is not normalized, `zExp' must be 0; in that case, the result
4508 | returned is a subnormal number, and it must not require rounding.  The
4509 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4510 | Floating-Point Arithmetic.
4511 *----------------------------------------------------------------------------*/
4512
4513 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4514                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4515                               float_status *status)
4516 {
4517     FloatRoundMode roundingMode;
4518     bool roundNearestEven, increment, isTiny;
4519     int64_t roundIncrement, roundMask, roundBits;
4520
4521     roundingMode = status->float_rounding_mode;
4522     roundNearestEven = ( roundingMode == float_round_nearest_even );
4523     switch (roundingPrecision) {
4524     case floatx80_precision_x:
4525         goto precision80;
4526     case floatx80_precision_d:
4527         roundIncrement = UINT64_C(0x0000000000000400);
4528         roundMask = UINT64_C(0x00000000000007FF);
4529         break;
4530     case floatx80_precision_s:
4531         roundIncrement = UINT64_C(0x0000008000000000);
4532         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4533         break;
4534     default:
4535         g_assert_not_reached();
4536     }
4537     zSig0 |= ( zSig1 != 0 );
4538     switch (roundingMode) {
4539     case float_round_nearest_even:
4540     case float_round_ties_away:
4541         break;
4542     case float_round_to_zero:
4543         roundIncrement = 0;
4544         break;
4545     case float_round_up:
4546         roundIncrement = zSign ? 0 : roundMask;
4547         break;
4548     case float_round_down:
4549         roundIncrement = zSign ? roundMask : 0;
4550         break;
4551     default:
4552         abort();
4553     }
4554     roundBits = zSig0 & roundMask;
4555     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4556         if (    ( 0x7FFE < zExp )
4557              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4558            ) {
4559             goto overflow;
4560         }
4561         if ( zExp <= 0 ) {
4562             if (status->flush_to_zero) {
4563                 float_raise(float_flag_output_denormal, status);
4564                 return packFloatx80(zSign, 0, 0);
4565             }
4566             isTiny = status->tininess_before_rounding
4567                   || (zExp < 0 )
4568                   || (zSig0 <= zSig0 + roundIncrement);
4569             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4570             zExp = 0;
4571             roundBits = zSig0 & roundMask;
4572             if (isTiny && roundBits) {
4573                 float_raise(float_flag_underflow, status);
4574             }
4575             if (roundBits) {
4576                 float_raise(float_flag_inexact, status);
4577             }
4578             zSig0 += roundIncrement;
4579             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4580             roundIncrement = roundMask + 1;
4581             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4582                 roundMask |= roundIncrement;
4583             }
4584             zSig0 &= ~ roundMask;
4585             return packFloatx80( zSign, zExp, zSig0 );
4586         }
4587     }
4588     if (roundBits) {
4589         float_raise(float_flag_inexact, status);
4590     }
4591     zSig0 += roundIncrement;
4592     if ( zSig0 < roundIncrement ) {
4593         ++zExp;
4594         zSig0 = UINT64_C(0x8000000000000000);
4595     }
4596     roundIncrement = roundMask + 1;
4597     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4598         roundMask |= roundIncrement;
4599     }
4600     zSig0 &= ~ roundMask;
4601     if ( zSig0 == 0 ) zExp = 0;
4602     return packFloatx80( zSign, zExp, zSig0 );
4603  precision80:
4604     switch (roundingMode) {
4605     case float_round_nearest_even:
4606     case float_round_ties_away:
4607         increment = ((int64_t)zSig1 < 0);
4608         break;
4609     case float_round_to_zero:
4610         increment = 0;
4611         break;
4612     case float_round_up:
4613         increment = !zSign && zSig1;
4614         break;
4615     case float_round_down:
4616         increment = zSign && zSig1;
4617         break;
4618     default:
4619         abort();
4620     }
4621     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4622         if (    ( 0x7FFE < zExp )
4623              || (    ( zExp == 0x7FFE )
4624                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4625                   && increment
4626                 )
4627            ) {
4628             roundMask = 0;
4629  overflow:
4630             float_raise(float_flag_overflow | float_flag_inexact, status);
4631             if (    ( roundingMode == float_round_to_zero )
4632                  || ( zSign && ( roundingMode == float_round_up ) )
4633                  || ( ! zSign && ( roundingMode == float_round_down ) )
4634                ) {
4635                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4636             }
4637             return packFloatx80(zSign,
4638                                 floatx80_infinity_high,
4639                                 floatx80_infinity_low);
4640         }
4641         if ( zExp <= 0 ) {
4642             isTiny = status->tininess_before_rounding
4643                   || (zExp < 0)
4644                   || !increment
4645                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4646             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4647             zExp = 0;
4648             if (isTiny && zSig1) {
4649                 float_raise(float_flag_underflow, status);
4650             }
4651             if (zSig1) {
4652                 float_raise(float_flag_inexact, status);
4653             }
4654             switch (roundingMode) {
4655             case float_round_nearest_even:
4656             case float_round_ties_away:
4657                 increment = ((int64_t)zSig1 < 0);
4658                 break;
4659             case float_round_to_zero:
4660                 increment = 0;
4661                 break;
4662             case float_round_up:
4663                 increment = !zSign && zSig1;
4664                 break;
4665             case float_round_down:
4666                 increment = zSign && zSig1;
4667                 break;
4668             default:
4669                 abort();
4670             }
4671             if ( increment ) {
4672                 ++zSig0;
4673                 if (!(zSig1 << 1) && roundNearestEven) {
4674                     zSig0 &= ~1;
4675                 }
4676                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4677             }
4678             return packFloatx80( zSign, zExp, zSig0 );
4679         }
4680     }
4681     if (zSig1) {
4682         float_raise(float_flag_inexact, status);
4683     }
4684     if ( increment ) {
4685         ++zSig0;
4686         if ( zSig0 == 0 ) {
4687             ++zExp;
4688             zSig0 = UINT64_C(0x8000000000000000);
4689         }
4690         else {
4691             if (!(zSig1 << 1) && roundNearestEven) {
4692                 zSig0 &= ~1;
4693             }
4694         }
4695     }
4696     else {
4697         if ( zSig0 == 0 ) zExp = 0;
4698     }
4699     return packFloatx80( zSign, zExp, zSig0 );
4700
4701 }
4702
4703 /*----------------------------------------------------------------------------
4704 | Takes an abstract floating-point value having sign `zSign', exponent
4705 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4706 | and returns the proper extended double-precision floating-point value
4707 | corresponding to the abstract input.  This routine is just like
4708 | `roundAndPackFloatx80' except that the input significand does not have to be
4709 | normalized.
4710 *----------------------------------------------------------------------------*/
4711
4712 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4713                                        bool zSign, int32_t zExp,
4714                                        uint64_t zSig0, uint64_t zSig1,
4715                                        float_status *status)
4716 {
4717     int8_t shiftCount;
4718
4719     if ( zSig0 == 0 ) {
4720         zSig0 = zSig1;
4721         zSig1 = 0;
4722         zExp -= 64;
4723     }
4724     shiftCount = clz64(zSig0);
4725     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4726     zExp -= shiftCount;
4727     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4728                                 zSig0, zSig1, status);
4729
4730 }
4731
4732 /*----------------------------------------------------------------------------
4733 | Returns the binary exponential of the single-precision floating-point value
4734 | `a'. The operation is performed according to the IEC/IEEE Standard for
4735 | Binary Floating-Point Arithmetic.
4736 |
4737 | Uses the following identities:
4738 |
4739 | 1. -------------------------------------------------------------------------
4740 |      x    x*ln(2)
4741 |     2  = e
4742 |
4743 | 2. -------------------------------------------------------------------------
4744 |                      2     3     4     5           n
4745 |      x        x     x     x     x     x           x
4746 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4747 |               1!    2!    3!    4!    5!          n!
4748 *----------------------------------------------------------------------------*/
4749
4750 static const float64 float32_exp2_coefficients[15] =
4751 {
4752     const_float64( 0x3ff0000000000000ll ), /*  1 */
4753     const_float64( 0x3fe0000000000000ll ), /*  2 */
4754     const_float64( 0x3fc5555555555555ll ), /*  3 */
4755     const_float64( 0x3fa5555555555555ll ), /*  4 */
4756     const_float64( 0x3f81111111111111ll ), /*  5 */
4757     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4758     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4759     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4760     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4761     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4762     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4763     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4764     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4765     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4766     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4767 };
4768
4769 float32 float32_exp2(float32 a, float_status *status)
4770 {
4771     FloatParts64 xp, xnp, tp, rp;
4772     int i;
4773
4774     float32_unpack_canonical(&xp, a, status);
4775     if (unlikely(xp.cls != float_class_normal)) {
4776         switch (xp.cls) {
4777         case float_class_snan:
4778         case float_class_qnan:
4779             parts_return_nan(&xp, status);
4780             return float32_round_pack_canonical(&xp, status);
4781         case float_class_inf:
4782             return xp.sign ? float32_zero : a;
4783         case float_class_zero:
4784             return float32_one;
4785         default:
4786             break;
4787         }
4788         g_assert_not_reached();
4789     }
4790
4791     float_raise(float_flag_inexact, status);
4792
4793     float64_unpack_canonical(&xnp, float64_ln2, status);
4794     xp = *parts_mul(&xp, &tp, status);
4795     xnp = xp;
4796
4797     float64_unpack_canonical(&rp, float64_one, status);
4798     for (i = 0 ; i < 15 ; i++) {
4799         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4800         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4801         xnp = *parts_mul(&xnp, &xp, status);
4802     }
4803
4804     return float32_round_pack_canonical(&rp, status);
4805 }
4806
4807 /*----------------------------------------------------------------------------
4808 | Rounds the extended double-precision floating-point value `a'
4809 | to the precision provided by floatx80_rounding_precision and returns the
4810 | result as an extended double-precision floating-point value.
4811 | The operation is performed according to the IEC/IEEE Standard for Binary
4812 | Floating-Point Arithmetic.
4813 *----------------------------------------------------------------------------*/
4814
4815 floatx80 floatx80_round(floatx80 a, float_status *status)
4816 {
4817     FloatParts128 p;
4818
4819     if (!floatx80_unpack_canonical(&p, a, status)) {
4820         return floatx80_default_nan(status);
4821     }
4822     return floatx80_round_pack_canonical(&p, status);
4823 }
4824
4825 static void __attribute__((constructor)) softfloat_init(void)
4826 {
4827     union_float64 ua, ub, uc, ur;
4828
4829     if (QEMU_NO_HARDFLOAT) {
4830         return;
4831     }
4832     /*
4833      * Test that the host's FMA is not obviously broken. For example,
4834      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4835      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4836      */
4837     ua.s = 0x0020000000000001ULL;
4838     ub.s = 0x3ca0000000000000ULL;
4839     uc.s = 0x0020000000000000ULL;
4840     ur.h = fma(ua.h, ub.h, uc.h);
4841     if (ur.s != 0x0020000000000001ULL) {
4842         force_soft_fma = true;
4843     }
4844 }