fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*
 405  * Classify a floating point number. Everything above float_class_qnan
 406  * is a NaN so cls >= float_class_qnan is any NaN.
 407  */
 408
 409 typedef enum __attribute__ ((__packed__)) {
 410     float_class_unclassified,
 411     float_class_zero,
 412     float_class_normal,
 413     float_class_inf,
 414     float_class_qnan,  /* all NaNs from here */
 415     float_class_snan,
 416 } FloatClass;
 417
 418 #define float_cmask(bit)  (1u << (bit))
 419
 420 enum {
 421     float_cmask_zero    = float_cmask(float_class_zero),
 422     float_cmask_normal  = float_cmask(float_class_normal),
 423     float_cmask_inf     = float_cmask(float_class_inf),
 424     float_cmask_qnan    = float_cmask(float_class_qnan),
 425     float_cmask_snan    = float_cmask(float_class_snan),
 426
 427     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429 };
 430
 431 /* Flags for parts_minmax. */
 432 enum {
 433     /* Set for minimum; clear for maximum. */
 434     minmax_ismin = 1,
 435     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436     minmax_isnum = 2,
 437     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438     minmax_ismag = 4,
 439 };
 440
 441 /* Simple helpers for checking if, or what kind of, NaN we have */
 442 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 443 {
 444     return unlikely(c >= float_class_qnan);
 445 }
 446
 447 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 448 {
 449     return c == float_class_snan;
 450 }
 451
 452 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 453 {
 454     return c == float_class_qnan;
 455 }
 456
 457 /*
 458  * Structure holding all of the decomposed parts of a float.
 459  * The exponent is unbiased and the fraction is normalized.
 460  *
 461  * The fraction words are stored in big-endian word ordering,
 462  * so that truncation from a larger format to a smaller format
 463  * can be done simply by ignoring subsequent elements.
 464  */
 465
 466 typedef struct {
 467     FloatClass cls;
 468     bool sign;
 469     int32_t exp;
 470     union {
 471         /* Routines that know the structure may reference the singular name. */
 472         uint64_t frac;
 473         /*
 474          * Routines expanded with multiple structures reference "hi" and "lo"
 475          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 476          * both the same word and aliased here.
 477          */
 478         uint64_t frac_hi;
 479         uint64_t frac_lo;
 480     };
 481 } FloatParts64;
 482
 483 typedef struct {
 484     FloatClass cls;
 485     bool sign;
 486     int32_t exp;
 487     uint64_t frac_hi;
 488     uint64_t frac_lo;
 489 } FloatParts128;
 490
 491 typedef struct {
 492     FloatClass cls;
 493     bool sign;
 494     int32_t exp;
 495     uint64_t frac_hi;
 496     uint64_t frac_hm;  /* high-middle */
 497     uint64_t frac_lm;  /* low-middle */
 498     uint64_t frac_lo;
 499 } FloatParts256;
 500
 501 /* These apply to the most significant word of each FloatPartsN. */
 502 #define DECOMPOSED_BINARY_POINT    63
 503 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 504
 505 /* Structure holding all of the relevant parameters for a format.
 506  *   exp_size: the size of the exponent field
 507  *   exp_bias: the offset applied to the exponent field
 508  *   exp_max: the maximum normalised exponent
 509  *   frac_size: the size of the fraction field
 510  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 511  * The following are computed based the size of fraction
 512  *   round_mask: bits below lsb which must be rounded
 513  * The following optional modifiers are available:
 514  *   arm_althp: handle ARM Alternative Half Precision
 515  */
 516 typedef struct {
 517     int exp_size;
 518     int exp_bias;
 519     int exp_max;
 520     int frac_size;
 521     int frac_shift;
 522     bool arm_althp;
 523     uint64_t round_mask;
 524 } FloatFmt;
 525
 526 /* Expand fields based on the size of exponent and fraction */
 527 #define FLOAT_PARAMS_(E)                                \
 528     .exp_size       = E,                                \
 529     .exp_bias       = ((1 << E) - 1) >> 1,              \
 530     .exp_max        = (1 << E) - 1
 531
 532 #define FLOAT_PARAMS(E, F)                              \
 533     FLOAT_PARAMS_(E),                                   \
 534     .frac_size      = F,                                \
 535     .frac_shift     = (-F - 1) & 63,                    \
 536     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 537
 538 static const FloatFmt float16_params = {
 539     FLOAT_PARAMS(5, 10)
 540 };
 541
 542 static const FloatFmt float16_params_ahp = {
 543     FLOAT_PARAMS(5, 10),
 544     .arm_althp = true
 545 };
 546
 547 static const FloatFmt bfloat16_params = {
 548     FLOAT_PARAMS(8, 7)
 549 };
 550
 551 static const FloatFmt float32_params = {
 552     FLOAT_PARAMS(8, 23)
 553 };
 554
 555 static const FloatFmt float64_params = {
 556     FLOAT_PARAMS(11, 52)
 557 };
 558
 559 static const FloatFmt float128_params = {
 560     FLOAT_PARAMS(15, 112)
 561 };
 562
 563 #define FLOATX80_PARAMS(R)              \
 564     FLOAT_PARAMS_(15),                  \
 565     .frac_size = R == 64 ? 63 : R,      \
 566     .frac_shift = 0,                    \
 567     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 568
 569 static const FloatFmt floatx80_params[3] = {
 570     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 571     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 572     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 573 };
 574
 575 /* Unpack a float to parts, but do not canonicalize.  */
 576 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 577 {
 578     const int f_size = fmt->frac_size;
 579     const int e_size = fmt->exp_size;
 580
 581     *r = (FloatParts64) {
 582         .cls = float_class_unclassified,
 583         .sign = extract64(raw, f_size + e_size, 1),
 584         .exp = extract64(raw, f_size, e_size),
 585         .frac = extract64(raw, 0, f_size)
 586     };
 587 }
 588
 589 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 590 {
 591     unpack_raw64(p, &float16_params, f);
 592 }
 593
 594 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 595 {
 596     unpack_raw64(p, &bfloat16_params, f);
 597 }
 598
 599 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 600 {
 601     unpack_raw64(p, &float32_params, f);
 602 }
 603
 604 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 605 {
 606     unpack_raw64(p, &float64_params, f);
 607 }
 608
 609 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 610 {
 611     *p = (FloatParts128) {
 612         .cls = float_class_unclassified,
 613         .sign = extract32(f.high, 15, 1),
 614         .exp = extract32(f.high, 0, 15),
 615         .frac_hi = f.low
 616     };
 617 }
 618
 619 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 620 {
 621     const int f_size = float128_params.frac_size - 64;
 622     const int e_size = float128_params.exp_size;
 623
 624     *p = (FloatParts128) {
 625         .cls = float_class_unclassified,
 626         .sign = extract64(f.high, f_size + e_size, 1),
 627         .exp = extract64(f.high, f_size, e_size),
 628         .frac_hi = extract64(f.high, 0, f_size),
 629         .frac_lo = f.low,
 630     };
 631 }
 632
 633 /* Pack a float from parts, but do not canonicalize.  */
 634 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 635 {
 636     const int f_size = fmt->frac_size;
 637     const int e_size = fmt->exp_size;
 638     uint64_t ret;
 639
 640     ret = (uint64_t)p->sign << (f_size + e_size);
 641     ret = deposit64(ret, f_size, e_size, p->exp);
 642     ret = deposit64(ret, 0, f_size, p->frac);
 643     return ret;
 644 }
 645
 646 static inline float16 float16_pack_raw(const FloatParts64 *p)
 647 {
 648     return make_float16(pack_raw64(p, &float16_params));
 649 }
 650
 651 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 652 {
 653     return pack_raw64(p, &bfloat16_params);
 654 }
 655
 656 static inline float32 float32_pack_raw(const FloatParts64 *p)
 657 {
 658     return make_float32(pack_raw64(p, &float32_params));
 659 }
 660
 661 static inline float64 float64_pack_raw(const FloatParts64 *p)
 662 {
 663     return make_float64(pack_raw64(p, &float64_params));
 664 }
 665
 666 static float128 float128_pack_raw(const FloatParts128 *p)
 667 {
 668     const int f_size = float128_params.frac_size - 64;
 669     const int e_size = float128_params.exp_size;
 670     uint64_t hi;
 671
 672     hi = (uint64_t)p->sign << (f_size + e_size);
 673     hi = deposit64(hi, f_size, e_size, p->exp);
 674     hi = deposit64(hi, 0, f_size, p->frac_hi);
 675     return make_float128(hi, p->frac_lo);
 676 }
 677
 678 /*----------------------------------------------------------------------------
 679 | Functions and definitions to determine:  (1) whether tininess for underflow
 680 | is detected before or after rounding by default, (2) what (if anything)
 681 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 682 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 683 | are propagated from function inputs to output.  These details are target-
 684 | specific.
 685 *----------------------------------------------------------------------------*/
 686 #include "softfloat-specialize.c.inc"
 687
 688 #define PARTS_GENERIC_64_128(NAME, P) \
 689     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 690
 691 #define PARTS_GENERIC_64_128_256(NAME, P) \
 692     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 693                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 694
 695 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 696 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 697
 698 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 699 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 700
 701 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 702
 703 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 704                                       float_status *s);
 705 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 706                                         float_status *s);
 707
 708 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 709
 710 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 711                                              FloatParts64 *c, float_status *s,
 712                                              int ab_mask, int abc_mask);
 713 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 714                                                FloatParts128 *b,
 715                                                FloatParts128 *c,
 716                                                float_status *s,
 717                                                int ab_mask, int abc_mask);
 718
 719 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 720     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 721
 722 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 723                                  const FloatFmt *fmt);
 724 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 725                                   const FloatFmt *fmt);
 726
 727 #define parts_canonicalize(A, S, F) \
 728     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 729
 730 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 731                                    const FloatFmt *fmt);
 732 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 733                                     const FloatFmt *fmt);
 734
 735 #define parts_uncanon_normal(A, S, F) \
 736     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 737
 738 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 739                             const FloatFmt *fmt);
 740 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 741                              const FloatFmt *fmt);
 742
 743 #define parts_uncanon(A, S, F) \
 744     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 745
 746 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 747 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 748 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 749
 750 #define parts_add_normal(A, B) \
 751     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 752
 753 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 754 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 755 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757 #define parts_sub_normal(A, B) \
 758     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 759
 760 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 761                                     float_status *s, bool subtract);
 762 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 763                                       float_status *s, bool subtract);
 764
 765 #define parts_addsub(A, B, S, Z) \
 766     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 767
 768 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 769                                  float_status *s);
 770 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 771                                    float_status *s);
 772
 773 #define parts_mul(A, B, S) \
 774     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 775
 776 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 777                                     FloatParts64 *c, int flags,
 778                                     float_status *s);
 779 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 780                                       FloatParts128 *c, int flags,
 781                                       float_status *s);
 782
 783 #define parts_muladd(A, B, C, Z, S) \
 784     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 785
 786 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 787                                  float_status *s);
 788 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 789                                    float_status *s);
 790
 791 #define parts_div(A, B, S) \
 792     PARTS_GENERIC_64_128(div, A)(A, B, S)
 793
 794 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 795                                     uint64_t *mod_quot, float_status *s);
 796 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 797                                       uint64_t *mod_quot, float_status *s);
 798
 799 #define parts_modrem(A, B, Q, S) \
 800     PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 801
 802 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 803 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 804
 805 #define parts_sqrt(A, S, F) \
 806     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 807
 808 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 809                                         int scale, int frac_size);
 810 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 811                                          int scale, int frac_size);
 812
 813 #define parts_round_to_int_normal(A, R, C, F) \
 814     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 815
 816 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 817                                  int scale, float_status *s,
 818                                  const FloatFmt *fmt);
 819 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 820                                   int scale, float_status *s,
 821                                   const FloatFmt *fmt);
 822
 823 #define parts_round_to_int(A, R, C, S, F) \
 824     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 825
 826 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 827                                      int scale, int64_t min, int64_t max,
 828                                      float_status *s);
 829 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 830                                      int scale, int64_t min, int64_t max,
 831                                      float_status *s);
 832
 833 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 834     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 835
 836 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 837                                       int scale, uint64_t max,
 838                                       float_status *s);
 839 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 840                                        int scale, uint64_t max,
 841                                        float_status *s);
 842
 843 #define parts_float_to_uint(P, R, Z, M, S) \
 844     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 845
 846 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 847                                   int scale, float_status *s);
 848 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 849                                    int scale, float_status *s);
 850
 851 #define parts_sint_to_float(P, I, Z, S) \
 852     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 853
 854 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 855                                   int scale, float_status *s);
 856 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 857                                    int scale, float_status *s);
 858
 859 #define parts_uint_to_float(P, I, Z, S) \
 860     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 861
 862 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 863                                     float_status *s, int flags);
 864 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 865                                       float_status *s, int flags);
 866
 867 #define parts_minmax(A, B, S, F) \
 868     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 869
 870 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 871                            float_status *s, bool q);
 872 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 873                             float_status *s, bool q);
 874
 875 #define parts_compare(A, B, S, Q) \
 876     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 877
 878 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 879 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 880
 881 #define parts_scalbn(A, N, S) \
 882     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 883
 884 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 885 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 886
 887 #define parts_log2(A, S, F) \
 888     PARTS_GENERIC_64_128(log2, A)(A, S, F)
 889
 890 /*
 891  * Helper functions for softfloat-parts.c.inc, per-size operations.
 892  */
 893
 894 #define FRAC_GENERIC_64_128(NAME, P) \
 895     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 896
 897 #define FRAC_GENERIC_64_128_256(NAME, P) \
 898     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 899                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 900
 901 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 902 {
 903     return uadd64_overflow(a->frac, b->frac, &r->frac);
 904 }
 905
 906 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 907 {
 908     bool c = 0;
 909     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 910     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 911     return c;
 912 }
 913
 914 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 915 {
 916     bool c = 0;
 917     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 918     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 919     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 920     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 921     return c;
 922 }
 923
 924 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 925
 926 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 927 {
 928     return uadd64_overflow(a->frac, c, &r->frac);
 929 }
 930
 931 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 932 {
 933     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 934     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 935 }
 936
 937 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 938
 939 static void frac64_allones(FloatParts64 *a)
 940 {
 941     a->frac = -1;
 942 }
 943
 944 static void frac128_allones(FloatParts128 *a)
 945 {
 946     a->frac_hi = a->frac_lo = -1;
 947 }
 948
 949 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 950
 951 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 952 {
 953     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 954 }
 955
 956 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 957 {
 958     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 959     if (ta == tb) {
 960         ta = a->frac_lo, tb = b->frac_lo;
 961         if (ta == tb) {
 962             return 0;
 963         }
 964     }
 965     return ta < tb ? -1 : 1;
 966 }
 967
 968 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 969
 970 static void frac64_clear(FloatParts64 *a)
 971 {
 972     a->frac = 0;
 973 }
 974
 975 static void frac128_clear(FloatParts128 *a)
 976 {
 977     a->frac_hi = a->frac_lo = 0;
 978 }
 979
 980 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 981
 982 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 983 {
 984     uint64_t n1, n0, r, q;
 985     bool ret;
 986
 987     /*
 988      * We want a 2*N / N-bit division to produce exactly an N-bit
 989      * result, so that we do not lose any precision and so that we
 990      * do not have to renormalize afterward.  If A.frac < B.frac,
 991      * then division would produce an (N-1)-bit result; shift A left
 992      * by one to produce the an N-bit result, and return true to
 993      * decrement the exponent to match.
 994      *
 995      * The udiv_qrnnd algorithm that we're using requires normalization,
 996      * i.e. the msb of the denominator must be set, which is already true.
 997      */
 998     ret = a->frac < b->frac;
 999     if (ret) {
1000         n0 = a->frac;
1001         n1 = 0;
1002     } else {
1003         n0 = a->frac >> 1;
1004         n1 = a->frac << 63;
1005     }
1006     q = udiv_qrnnd(&r, n0, n1, b->frac);
1007
1008     /* Set lsb if there is a remainder, to set inexact. */
1009     a->frac = q | (r != 0);
1010
1011     return ret;
1012 }
1013
1014 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1015 {
1016     uint64_t q0, q1, a0, a1, b0, b1;
1017     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1018     bool ret = false;
1019
1020     a0 = a->frac_hi, a1 = a->frac_lo;
1021     b0 = b->frac_hi, b1 = b->frac_lo;
1022
1023     ret = lt128(a0, a1, b0, b1);
1024     if (!ret) {
1025         a1 = shr_double(a0, a1, 1);
1026         a0 = a0 >> 1;
1027     }
1028
1029     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1030     q0 = estimateDiv128To64(a0, a1, b0);
1031
1032     /*
1033      * Estimate is high because B1 was not included (unless B1 == 0).
1034      * Reduce quotient and increase remainder until remainder is non-negative.
1035      * This loop will execute 0 to 2 times.
1036      */
1037     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1038     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1039     while (r0 != 0) {
1040         q0--;
1041         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1042     }
1043
1044     /* Repeat using the remainder, producing a second word of quotient. */
1045     q1 = estimateDiv128To64(r1, r2, b0);
1046     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1047     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1048     while (r1 != 0) {
1049         q1--;
1050         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1051     }
1052
1053     /* Any remainder indicates inexact; set sticky bit. */
1054     q1 |= (r2 | r3) != 0;
1055
1056     a->frac_hi = q0;
1057     a->frac_lo = q1;
1058     return ret;
1059 }
1060
1061 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1062
1063 static bool frac64_eqz(FloatParts64 *a)
1064 {
1065     return a->frac == 0;
1066 }
1067
1068 static bool frac128_eqz(FloatParts128 *a)
1069 {
1070     return (a->frac_hi | a->frac_lo) == 0;
1071 }
1072
1073 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1074
1075 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1076 {
1077     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1078 }
1079
1080 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1081 {
1082     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1083                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1084 }
1085
1086 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1087
1088 static void frac64_neg(FloatParts64 *a)
1089 {
1090     a->frac = -a->frac;
1091 }
1092
1093 static void frac128_neg(FloatParts128 *a)
1094 {
1095     bool c = 0;
1096     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1097     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1098 }
1099
1100 static void frac256_neg(FloatParts256 *a)
1101 {
1102     bool c = 0;
1103     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1104     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1105     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1106     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108
1109 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1110
1111 static int frac64_normalize(FloatParts64 *a)
1112 {
1113     if (a->frac) {
1114         int shift = clz64(a->frac);
1115         a->frac <<= shift;
1116         return shift;
1117     }
1118     return 64;
1119 }
1120
1121 static int frac128_normalize(FloatParts128 *a)
1122 {
1123     if (a->frac_hi) {
1124         int shl = clz64(a->frac_hi);
1125         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1126         a->frac_lo <<= shl;
1127         return shl;
1128     } else if (a->frac_lo) {
1129         int shl = clz64(a->frac_lo);
1130         a->frac_hi = a->frac_lo << shl;
1131         a->frac_lo = 0;
1132         return shl + 64;
1133     }
1134     return 128;
1135 }
1136
1137 static int frac256_normalize(FloatParts256 *a)
1138 {
1139     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1140     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1141     int ret, shl;
1142
1143     if (likely(a0)) {
1144         shl = clz64(a0);
1145         if (shl == 0) {
1146             return 0;
1147         }
1148         ret = shl;
1149     } else {
1150         if (a1) {
1151             ret = 64;
1152             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1153         } else if (a2) {
1154             ret = 128;
1155             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1156         } else if (a3) {
1157             ret = 192;
1158             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1159         } else {
1160             ret = 256;
1161             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1162             goto done;
1163         }
1164         shl = clz64(a0);
1165         if (shl == 0) {
1166             goto done;
1167         }
1168         ret += shl;
1169     }
1170
1171     a0 = shl_double(a0, a1, shl);
1172     a1 = shl_double(a1, a2, shl);
1173     a2 = shl_double(a2, a3, shl);
1174     a3 <<= shl;
1175
1176  done:
1177     a->frac_hi = a0;
1178     a->frac_hm = a1;
1179     a->frac_lm = a2;
1180     a->frac_lo = a3;
1181     return ret;
1182 }
1183
1184 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1185
1186 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1187 {
1188     uint64_t a0, a1, b0, t0, t1, q, quot;
1189     int exp_diff = a->exp - b->exp;
1190     int shift;
1191
1192     a0 = a->frac;
1193     a1 = 0;
1194
1195     if (exp_diff < -1) {
1196         if (mod_quot) {
1197             *mod_quot = 0;
1198         }
1199         return;
1200     }
1201     if (exp_diff == -1) {
1202         a0 >>= 1;
1203         exp_diff = 0;
1204     }
1205
1206     b0 = b->frac;
1207     quot = q = b0 <= a0;
1208     if (q) {
1209         a0 -= b0;
1210     }
1211
1212     exp_diff -= 64;
1213     while (exp_diff > 0) {
1214         q = estimateDiv128To64(a0, a1, b0);
1215         q = q > 2 ? q - 2 : 0;
1216         mul64To128(b0, q, &t0, &t1);
1217         sub128(a0, a1, t0, t1, &a0, &a1);
1218         shortShift128Left(a0, a1, 62, &a0, &a1);
1219         exp_diff -= 62;
1220         quot = (quot << 62) + q;
1221     }
1222
1223     exp_diff += 64;
1224     if (exp_diff > 0) {
1225         q = estimateDiv128To64(a0, a1, b0);
1226         q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1227         mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1228         sub128(a0, a1, t0, t1, &a0, &a1);
1229         shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1230         while (le128(t0, t1, a0, a1)) {
1231             ++q;
1232             sub128(a0, a1, t0, t1, &a0, &a1);
1233         }
1234         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1235     } else {
1236         t0 = b0;
1237         t1 = 0;
1238     }
1239
1240     if (mod_quot) {
1241         *mod_quot = quot;
1242     } else {
1243         sub128(t0, t1, a0, a1, &t0, &t1);
1244         if (lt128(t0, t1, a0, a1) ||
1245             (eq128(t0, t1, a0, a1) && (q & 1))) {
1246             a0 = t0;
1247             a1 = t1;
1248             a->sign = !a->sign;
1249         }
1250     }
1251
1252     if (likely(a0)) {
1253         shift = clz64(a0);
1254         shortShift128Left(a0, a1, shift, &a0, &a1);
1255     } else if (likely(a1)) {
1256         shift = clz64(a1);
1257         a0 = a1 << shift;
1258         a1 = 0;
1259         shift += 64;
1260     } else {
1261         a->cls = float_class_zero;
1262         return;
1263     }
1264
1265     a->exp = b->exp + exp_diff - shift;
1266     a->frac = a0 | (a1 != 0);
1267 }
1268
1269 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1270                            uint64_t *mod_quot)
1271 {
1272     uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1273     int exp_diff = a->exp - b->exp;
1274     int shift;
1275
1276     a0 = a->frac_hi;
1277     a1 = a->frac_lo;
1278     a2 = 0;
1279
1280     if (exp_diff < -1) {
1281         if (mod_quot) {
1282             *mod_quot = 0;
1283         }
1284         return;
1285     }
1286     if (exp_diff == -1) {
1287         shift128Right(a0, a1, 1, &a0, &a1);
1288         exp_diff = 0;
1289     }
1290
1291     b0 = b->frac_hi;
1292     b1 = b->frac_lo;
1293
1294     quot = q = le128(b0, b1, a0, a1);
1295     if (q) {
1296         sub128(a0, a1, b0, b1, &a0, &a1);
1297     }
1298
1299     exp_diff -= 64;
1300     while (exp_diff > 0) {
1301         q = estimateDiv128To64(a0, a1, b0);
1302         q = q > 4 ? q - 4 : 0;
1303         mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1304         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1305         shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1306         exp_diff -= 61;
1307         quot = (quot << 61) + q;
1308     }
1309
1310     exp_diff += 64;
1311     if (exp_diff > 0) {
1312         q = estimateDiv128To64(a0, a1, b0);
1313         q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1314         mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1315         sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1316         shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1317         while (le192(t0, t1, t2, a0, a1, a2)) {
1318             ++q;
1319             sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1320         }
1321         quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1322     } else {
1323         t0 = b0;
1324         t1 = b1;
1325         t2 = 0;
1326     }
1327
1328     if (mod_quot) {
1329         *mod_quot = quot;
1330     } else {
1331         sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1332         if (lt192(t0, t1, t2, a0, a1, a2) ||
1333             (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1334             a0 = t0;
1335             a1 = t1;
1336             a2 = t2;
1337             a->sign = !a->sign;
1338         }
1339     }
1340
1341     if (likely(a0)) {
1342         shift = clz64(a0);
1343         shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1344     } else if (likely(a1)) {
1345         shift = clz64(a1);
1346         shortShift128Left(a1, a2, shift, &a0, &a1);
1347         a2 = 0;
1348         shift += 64;
1349     } else if (likely(a2)) {
1350         shift = clz64(a2);
1351         a0 = a2 << shift;
1352         a1 = a2 = 0;
1353         shift += 128;
1354     } else {
1355         a->cls = float_class_zero;
1356         return;
1357     }
1358
1359     a->exp = b->exp + exp_diff - shift;
1360     a->frac_hi = a0;
1361     a->frac_lo = a1 | (a2 != 0);
1362 }
1363
1364 #define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1365
1366 static void frac64_shl(FloatParts64 *a, int c)
1367 {
1368     a->frac <<= c;
1369 }
1370
1371 static void frac128_shl(FloatParts128 *a, int c)
1372 {
1373     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1374
1375     if (c & 64) {
1376         a0 = a1, a1 = 0;
1377     }
1378
1379     c &= 63;
1380     if (c) {
1381         a0 = shl_double(a0, a1, c);
1382         a1 = a1 << c;
1383     }
1384
1385     a->frac_hi = a0;
1386     a->frac_lo = a1;
1387 }
1388
1389 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1390
1391 static void frac64_shr(FloatParts64 *a, int c)
1392 {
1393     a->frac >>= c;
1394 }
1395
1396 static void frac128_shr(FloatParts128 *a, int c)
1397 {
1398     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1399
1400     if (c & 64) {
1401         a1 = a0, a0 = 0;
1402     }
1403
1404     c &= 63;
1405     if (c) {
1406         a1 = shr_double(a0, a1, c);
1407         a0 = a0 >> c;
1408     }
1409
1410     a->frac_hi = a0;
1411     a->frac_lo = a1;
1412 }
1413
1414 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1415
1416 static void frac64_shrjam(FloatParts64 *a, int c)
1417 {
1418     uint64_t a0 = a->frac;
1419
1420     if (likely(c != 0)) {
1421         if (likely(c < 64)) {
1422             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1423         } else {
1424             a0 = a0 != 0;
1425         }
1426         a->frac = a0;
1427     }
1428 }
1429
1430 static void frac128_shrjam(FloatParts128 *a, int c)
1431 {
1432     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1433     uint64_t sticky = 0;
1434
1435     if (unlikely(c == 0)) {
1436         return;
1437     } else if (likely(c < 64)) {
1438         /* nothing */
1439     } else if (likely(c < 128)) {
1440         sticky = a1;
1441         a1 = a0;
1442         a0 = 0;
1443         c &= 63;
1444         if (c == 0) {
1445             goto done;
1446         }
1447     } else {
1448         sticky = a0 | a1;
1449         a0 = a1 = 0;
1450         goto done;
1451     }
1452
1453     sticky |= shr_double(a1, 0, c);
1454     a1 = shr_double(a0, a1, c);
1455     a0 = a0 >> c;
1456
1457  done:
1458     a->frac_lo = a1 | (sticky != 0);
1459     a->frac_hi = a0;
1460 }
1461
1462 static void frac256_shrjam(FloatParts256 *a, int c)
1463 {
1464     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1465     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1466     uint64_t sticky = 0;
1467
1468     if (unlikely(c == 0)) {
1469         return;
1470     } else if (likely(c < 64)) {
1471         /* nothing */
1472     } else if (likely(c < 256)) {
1473         if (unlikely(c & 128)) {
1474             sticky |= a2 | a3;
1475             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1476         }
1477         if (unlikely(c & 64)) {
1478             sticky |= a3;
1479             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1480         }
1481         c &= 63;
1482         if (c == 0) {
1483             goto done;
1484         }
1485     } else {
1486         sticky = a0 | a1 | a2 | a3;
1487         a0 = a1 = a2 = a3 = 0;
1488         goto done;
1489     }
1490
1491     sticky |= shr_double(a3, 0, c);
1492     a3 = shr_double(a2, a3, c);
1493     a2 = shr_double(a1, a2, c);
1494     a1 = shr_double(a0, a1, c);
1495     a0 = a0 >> c;
1496
1497  done:
1498     a->frac_lo = a3 | (sticky != 0);
1499     a->frac_lm = a2;
1500     a->frac_hm = a1;
1501     a->frac_hi = a0;
1502 }
1503
1504 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1505
1506 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1507 {
1508     return usub64_overflow(a->frac, b->frac, &r->frac);
1509 }
1510
1511 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1512 {
1513     bool c = 0;
1514     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1515     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1516     return c;
1517 }
1518
1519 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1520 {
1521     bool c = 0;
1522     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1523     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1524     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1525     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1526     return c;
1527 }
1528
1529 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1530
1531 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1532 {
1533     r->frac = a->frac_hi | (a->frac_lo != 0);
1534 }
1535
1536 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1537 {
1538     r->frac_hi = a->frac_hi;
1539     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1540 }
1541
1542 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1543
1544 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1545 {
1546     r->frac_hi = a->frac;
1547     r->frac_lo = 0;
1548 }
1549
1550 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1551 {
1552     r->frac_hi = a->frac_hi;
1553     r->frac_hm = a->frac_lo;
1554     r->frac_lm = 0;
1555     r->frac_lo = 0;
1556 }
1557
1558 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1559
1560 /*
1561  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1562  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1563  * and thus MIT licenced.
1564  */
1565 static const uint16_t rsqrt_tab[128] = {
1566     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1567     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1568     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1569     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1570     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1571     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1572     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1573     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1574     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1575     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1576     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1577     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1578     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1579     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1580     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1581     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1582 };
1583
1584 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1585 #define FloatPartsN    glue(FloatParts,N)
1586 #define FloatPartsW    glue(FloatParts,W)
1587
1588 #define N 64
1589 #define W 128
1590
1591 #include "softfloat-parts-addsub.c.inc"
1592 #include "softfloat-parts.c.inc"
1593
1594 #undef  N
1595 #undef  W
1596 #define N 128
1597 #define W 256
1598
1599 #include "softfloat-parts-addsub.c.inc"
1600 #include "softfloat-parts.c.inc"
1601
1602 #undef  N
1603 #undef  W
1604 #define N            256
1605
1606 #include "softfloat-parts-addsub.c.inc"
1607
1608 #undef  N
1609 #undef  W
1610 #undef  partsN
1611 #undef  FloatPartsN
1612 #undef  FloatPartsW
1613
1614 /*
1615  * Pack/unpack routines with a specific FloatFmt.
1616  */
1617
1618 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1619                                       float_status *s, const FloatFmt *params)
1620 {
1621     float16_unpack_raw(p, f);
1622     parts_canonicalize(p, s, params);
1623 }
1624
1625 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1626                                      float_status *s)
1627 {
1628     float16a_unpack_canonical(p, f, s, &float16_params);
1629 }
1630
1631 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1632                                       float_status *s)
1633 {
1634     bfloat16_unpack_raw(p, f);
1635     parts_canonicalize(p, s, &bfloat16_params);
1636 }
1637
1638 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1639                                              float_status *s,
1640                                              const FloatFmt *params)
1641 {
1642     parts_uncanon(p, s, params);
1643     return float16_pack_raw(p);
1644 }
1645
1646 static float16 float16_round_pack_canonical(FloatParts64 *p,
1647                                             float_status *s)
1648 {
1649     return float16a_round_pack_canonical(p, s, &float16_params);
1650 }
1651
1652 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1653                                               float_status *s)
1654 {
1655     parts_uncanon(p, s, &bfloat16_params);
1656     return bfloat16_pack_raw(p);
1657 }
1658
1659 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1660                                      float_status *s)
1661 {
1662     float32_unpack_raw(p, f);
1663     parts_canonicalize(p, s, &float32_params);
1664 }
1665
1666 static float32 float32_round_pack_canonical(FloatParts64 *p,
1667                                             float_status *s)
1668 {
1669     parts_uncanon(p, s, &float32_params);
1670     return float32_pack_raw(p);
1671 }
1672
1673 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1674                                      float_status *s)
1675 {
1676     float64_unpack_raw(p, f);
1677     parts_canonicalize(p, s, &float64_params);
1678 }
1679
1680 static float64 float64_round_pack_canonical(FloatParts64 *p,
1681                                             float_status *s)
1682 {
1683     parts_uncanon(p, s, &float64_params);
1684     return float64_pack_raw(p);
1685 }
1686
1687 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1688                                       float_status *s)
1689 {
1690     float128_unpack_raw(p, f);
1691     parts_canonicalize(p, s, &float128_params);
1692 }
1693
1694 static float128 float128_round_pack_canonical(FloatParts128 *p,
1695                                               float_status *s)
1696 {
1697     parts_uncanon(p, s, &float128_params);
1698     return float128_pack_raw(p);
1699 }
1700
1701 /* Returns false if the encoding is invalid. */
1702 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1703                                       float_status *s)
1704 {
1705     /* Ensure rounding precision is set before beginning. */
1706     switch (s->floatx80_rounding_precision) {
1707     case floatx80_precision_x:
1708     case floatx80_precision_d:
1709     case floatx80_precision_s:
1710         break;
1711     default:
1712         g_assert_not_reached();
1713     }
1714
1715     if (unlikely(floatx80_invalid_encoding(f))) {
1716         float_raise(float_flag_invalid, s);
1717         return false;
1718     }
1719
1720     floatx80_unpack_raw(p, f);
1721
1722     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1723         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1724     } else {
1725         /* The explicit integer bit is ignored, after invalid checks. */
1726         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1727         p->cls = (p->frac_hi == 0 ? float_class_inf
1728                   : parts_is_snan_frac(p->frac_hi, s)
1729                   ? float_class_snan : float_class_qnan);
1730     }
1731     return true;
1732 }
1733
1734 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1735                                               float_status *s)
1736 {
1737     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1738     uint64_t frac;
1739     int exp;
1740
1741     switch (p->cls) {
1742     case float_class_normal:
1743         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1744             parts_uncanon_normal(p, s, fmt);
1745             frac = p->frac_hi;
1746             exp = p->exp;
1747         } else {
1748             FloatParts64 p64;
1749
1750             p64.sign = p->sign;
1751             p64.exp = p->exp;
1752             frac_truncjam(&p64, p);
1753             parts_uncanon_normal(&p64, s, fmt);
1754             frac = p64.frac;
1755             exp = p64.exp;
1756         }
1757         if (exp != fmt->exp_max) {
1758             break;
1759         }
1760         /* rounded to inf -- fall through to set frac correctly */
1761
1762     case float_class_inf:
1763         /* x86 and m68k differ in the setting of the integer bit. */
1764         frac = floatx80_infinity_low;
1765         exp = fmt->exp_max;
1766         break;
1767
1768     case float_class_zero:
1769         frac = 0;
1770         exp = 0;
1771         break;
1772
1773     case float_class_snan:
1774     case float_class_qnan:
1775         /* NaNs have the integer bit set. */
1776         frac = p->frac_hi | (1ull << 63);
1777         exp = fmt->exp_max;
1778         break;
1779
1780     default:
1781         g_assert_not_reached();
1782     }
1783
1784     return packFloatx80(p->sign, exp, frac);
1785 }
1786
1787 /*
1788  * Addition and subtraction
1789  */
1790
1791 static float16 QEMU_FLATTEN
1792 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1793 {
1794     FloatParts64 pa, pb, *pr;
1795
1796     float16_unpack_canonical(&pa, a, status);
1797     float16_unpack_canonical(&pb, b, status);
1798     pr = parts_addsub(&pa, &pb, status, subtract);
1799
1800     return float16_round_pack_canonical(pr, status);
1801 }
1802
1803 float16 float16_add(float16 a, float16 b, float_status *status)
1804 {
1805     return float16_addsub(a, b, status, false);
1806 }
1807
1808 float16 float16_sub(float16 a, float16 b, float_status *status)
1809 {
1810     return float16_addsub(a, b, status, true);
1811 }
1812
1813 static float32 QEMU_SOFTFLOAT_ATTR
1814 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1815 {
1816     FloatParts64 pa, pb, *pr;
1817
1818     float32_unpack_canonical(&pa, a, status);
1819     float32_unpack_canonical(&pb, b, status);
1820     pr = parts_addsub(&pa, &pb, status, subtract);
1821
1822     return float32_round_pack_canonical(pr, status);
1823 }
1824
1825 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1826 {
1827     return soft_f32_addsub(a, b, status, false);
1828 }
1829
1830 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1831 {
1832     return soft_f32_addsub(a, b, status, true);
1833 }
1834
1835 static float64 QEMU_SOFTFLOAT_ATTR
1836 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1837 {
1838     FloatParts64 pa, pb, *pr;
1839
1840     float64_unpack_canonical(&pa, a, status);
1841     float64_unpack_canonical(&pb, b, status);
1842     pr = parts_addsub(&pa, &pb, status, subtract);
1843
1844     return float64_round_pack_canonical(pr, status);
1845 }
1846
1847 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1848 {
1849     return soft_f64_addsub(a, b, status, false);
1850 }
1851
1852 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1853 {
1854     return soft_f64_addsub(a, b, status, true);
1855 }
1856
1857 static float hard_f32_add(float a, float b)
1858 {
1859     return a + b;
1860 }
1861
1862 static float hard_f32_sub(float a, float b)
1863 {
1864     return a - b;
1865 }
1866
1867 static double hard_f64_add(double a, double b)
1868 {
1869     return a + b;
1870 }
1871
1872 static double hard_f64_sub(double a, double b)
1873 {
1874     return a - b;
1875 }
1876
1877 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1878 {
1879     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1880         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1881     }
1882     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1883 }
1884
1885 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1886 {
1887     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1888         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1889     } else {
1890         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1891     }
1892 }
1893
1894 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1895                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1896 {
1897     return float32_gen2(a, b, s, hard, soft,
1898                         f32_is_zon2, f32_addsubmul_post);
1899 }
1900
1901 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1902                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1903 {
1904     return float64_gen2(a, b, s, hard, soft,
1905                         f64_is_zon2, f64_addsubmul_post);
1906 }
1907
1908 float32 QEMU_FLATTEN
1909 float32_add(float32 a, float32 b, float_status *s)
1910 {
1911     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1912 }
1913
1914 float32 QEMU_FLATTEN
1915 float32_sub(float32 a, float32 b, float_status *s)
1916 {
1917     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1918 }
1919
1920 float64 QEMU_FLATTEN
1921 float64_add(float64 a, float64 b, float_status *s)
1922 {
1923     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1924 }
1925
1926 float64 QEMU_FLATTEN
1927 float64_sub(float64 a, float64 b, float_status *s)
1928 {
1929     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1930 }
1931
1932 static bfloat16 QEMU_FLATTEN
1933 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1934 {
1935     FloatParts64 pa, pb, *pr;
1936
1937     bfloat16_unpack_canonical(&pa, a, status);
1938     bfloat16_unpack_canonical(&pb, b, status);
1939     pr = parts_addsub(&pa, &pb, status, subtract);
1940
1941     return bfloat16_round_pack_canonical(pr, status);
1942 }
1943
1944 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1945 {
1946     return bfloat16_addsub(a, b, status, false);
1947 }
1948
1949 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1950 {
1951     return bfloat16_addsub(a, b, status, true);
1952 }
1953
1954 static float128 QEMU_FLATTEN
1955 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1956 {
1957     FloatParts128 pa, pb, *pr;
1958
1959     float128_unpack_canonical(&pa, a, status);
1960     float128_unpack_canonical(&pb, b, status);
1961     pr = parts_addsub(&pa, &pb, status, subtract);
1962
1963     return float128_round_pack_canonical(pr, status);
1964 }
1965
1966 float128 float128_add(float128 a, float128 b, float_status *status)
1967 {
1968     return float128_addsub(a, b, status, false);
1969 }
1970
1971 float128 float128_sub(float128 a, float128 b, float_status *status)
1972 {
1973     return float128_addsub(a, b, status, true);
1974 }
1975
1976 static floatx80 QEMU_FLATTEN
1977 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1978 {
1979     FloatParts128 pa, pb, *pr;
1980
1981     if (!floatx80_unpack_canonical(&pa, a, status) ||
1982         !floatx80_unpack_canonical(&pb, b, status)) {
1983         return floatx80_default_nan(status);
1984     }
1985
1986     pr = parts_addsub(&pa, &pb, status, subtract);
1987     return floatx80_round_pack_canonical(pr, status);
1988 }
1989
1990 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1991 {
1992     return floatx80_addsub(a, b, status, false);
1993 }
1994
1995 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1996 {
1997     return floatx80_addsub(a, b, status, true);
1998 }
1999
2000 /*
2001  * Multiplication
2002  */
2003
2004 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2005 {
2006     FloatParts64 pa, pb, *pr;
2007
2008     float16_unpack_canonical(&pa, a, status);
2009     float16_unpack_canonical(&pb, b, status);
2010     pr = parts_mul(&pa, &pb, status);
2011
2012     return float16_round_pack_canonical(pr, status);
2013 }
2014
2015 static float32 QEMU_SOFTFLOAT_ATTR
2016 soft_f32_mul(float32 a, float32 b, float_status *status)
2017 {
2018     FloatParts64 pa, pb, *pr;
2019
2020     float32_unpack_canonical(&pa, a, status);
2021     float32_unpack_canonical(&pb, b, status);
2022     pr = parts_mul(&pa, &pb, status);
2023
2024     return float32_round_pack_canonical(pr, status);
2025 }
2026
2027 static float64 QEMU_SOFTFLOAT_ATTR
2028 soft_f64_mul(float64 a, float64 b, float_status *status)
2029 {
2030     FloatParts64 pa, pb, *pr;
2031
2032     float64_unpack_canonical(&pa, a, status);
2033     float64_unpack_canonical(&pb, b, status);
2034     pr = parts_mul(&pa, &pb, status);
2035
2036     return float64_round_pack_canonical(pr, status);
2037 }
2038
2039 static float hard_f32_mul(float a, float b)
2040 {
2041     return a * b;
2042 }
2043
2044 static double hard_f64_mul(double a, double b)
2045 {
2046     return a * b;
2047 }
2048
2049 float32 QEMU_FLATTEN
2050 float32_mul(float32 a, float32 b, float_status *s)
2051 {
2052     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2053                         f32_is_zon2, f32_addsubmul_post);
2054 }
2055
2056 float64 QEMU_FLATTEN
2057 float64_mul(float64 a, float64 b, float_status *s)
2058 {
2059     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2060                         f64_is_zon2, f64_addsubmul_post);
2061 }
2062
2063 bfloat16 QEMU_FLATTEN
2064 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066     FloatParts64 pa, pb, *pr;
2067
2068     bfloat16_unpack_canonical(&pa, a, status);
2069     bfloat16_unpack_canonical(&pb, b, status);
2070     pr = parts_mul(&pa, &pb, status);
2071
2072     return bfloat16_round_pack_canonical(pr, status);
2073 }
2074
2075 float128 QEMU_FLATTEN
2076 float128_mul(float128 a, float128 b, float_status *status)
2077 {
2078     FloatParts128 pa, pb, *pr;
2079
2080     float128_unpack_canonical(&pa, a, status);
2081     float128_unpack_canonical(&pb, b, status);
2082     pr = parts_mul(&pa, &pb, status);
2083
2084     return float128_round_pack_canonical(pr, status);
2085 }
2086
2087 floatx80 QEMU_FLATTEN
2088 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2089 {
2090     FloatParts128 pa, pb, *pr;
2091
2092     if (!floatx80_unpack_canonical(&pa, a, status) ||
2093         !floatx80_unpack_canonical(&pb, b, status)) {
2094         return floatx80_default_nan(status);
2095     }
2096
2097     pr = parts_mul(&pa, &pb, status);
2098     return floatx80_round_pack_canonical(pr, status);
2099 }
2100
2101 /*
2102  * Fused multiply-add
2103  */
2104
2105 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2106                                     int flags, float_status *status)
2107 {
2108     FloatParts64 pa, pb, pc, *pr;
2109
2110     float16_unpack_canonical(&pa, a, status);
2111     float16_unpack_canonical(&pb, b, status);
2112     float16_unpack_canonical(&pc, c, status);
2113     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2114
2115     return float16_round_pack_canonical(pr, status);
2116 }
2117
2118 static float32 QEMU_SOFTFLOAT_ATTR
2119 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2120                 float_status *status)
2121 {
2122     FloatParts64 pa, pb, pc, *pr;
2123
2124     float32_unpack_canonical(&pa, a, status);
2125     float32_unpack_canonical(&pb, b, status);
2126     float32_unpack_canonical(&pc, c, status);
2127     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2128
2129     return float32_round_pack_canonical(pr, status);
2130 }
2131
2132 static float64 QEMU_SOFTFLOAT_ATTR
2133 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2134                 float_status *status)
2135 {
2136     FloatParts64 pa, pb, pc, *pr;
2137
2138     float64_unpack_canonical(&pa, a, status);
2139     float64_unpack_canonical(&pb, b, status);
2140     float64_unpack_canonical(&pc, c, status);
2141     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2142
2143     return float64_round_pack_canonical(pr, status);
2144 }
2145
2146 static bool force_soft_fma;
2147
2148 float32 QEMU_FLATTEN
2149 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2150 {
2151     union_float32 ua, ub, uc, ur;
2152
2153     ua.s = xa;
2154     ub.s = xb;
2155     uc.s = xc;
2156
2157     if (unlikely(!can_use_fpu(s))) {
2158         goto soft;
2159     }
2160     if (unlikely(flags & float_muladd_halve_result)) {
2161         goto soft;
2162     }
2163
2164     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2165     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2166         goto soft;
2167     }
2168
2169     if (unlikely(force_soft_fma)) {
2170         goto soft;
2171     }
2172
2173     /*
2174      * When (a || b) == 0, there's no need to check for under/over flow,
2175      * since we know the addend is (normal || 0) and the product is 0.
2176      */
2177     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2178         union_float32 up;
2179         bool prod_sign;
2180
2181         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2182         prod_sign ^= !!(flags & float_muladd_negate_product);
2183         up.s = float32_set_sign(float32_zero, prod_sign);
2184
2185         if (flags & float_muladd_negate_c) {
2186             uc.h = -uc.h;
2187         }
2188         ur.h = up.h + uc.h;
2189     } else {
2190         union_float32 ua_orig = ua;
2191         union_float32 uc_orig = uc;
2192
2193         if (flags & float_muladd_negate_product) {
2194             ua.h = -ua.h;
2195         }
2196         if (flags & float_muladd_negate_c) {
2197             uc.h = -uc.h;
2198         }
2199
2200         ur.h = fmaf(ua.h, ub.h, uc.h);
2201
2202         if (unlikely(f32_is_inf(ur))) {
2203             float_raise(float_flag_overflow, s);
2204         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2205             ua = ua_orig;
2206             uc = uc_orig;
2207             goto soft;
2208         }
2209     }
2210     if (flags & float_muladd_negate_result) {
2211         return float32_chs(ur.s);
2212     }
2213     return ur.s;
2214
2215  soft:
2216     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2217 }
2218
2219 float64 QEMU_FLATTEN
2220 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2221 {
2222     union_float64 ua, ub, uc, ur;
2223
2224     ua.s = xa;
2225     ub.s = xb;
2226     uc.s = xc;
2227
2228     if (unlikely(!can_use_fpu(s))) {
2229         goto soft;
2230     }
2231     if (unlikely(flags & float_muladd_halve_result)) {
2232         goto soft;
2233     }
2234
2235     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2236     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2237         goto soft;
2238     }
2239
2240     if (unlikely(force_soft_fma)) {
2241         goto soft;
2242     }
2243
2244     /*
2245      * When (a || b) == 0, there's no need to check for under/over flow,
2246      * since we know the addend is (normal || 0) and the product is 0.
2247      */
2248     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2249         union_float64 up;
2250         bool prod_sign;
2251
2252         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2253         prod_sign ^= !!(flags & float_muladd_negate_product);
2254         up.s = float64_set_sign(float64_zero, prod_sign);
2255
2256         if (flags & float_muladd_negate_c) {
2257             uc.h = -uc.h;
2258         }
2259         ur.h = up.h + uc.h;
2260     } else {
2261         union_float64 ua_orig = ua;
2262         union_float64 uc_orig = uc;
2263
2264         if (flags & float_muladd_negate_product) {
2265             ua.h = -ua.h;
2266         }
2267         if (flags & float_muladd_negate_c) {
2268             uc.h = -uc.h;
2269         }
2270
2271         ur.h = fma(ua.h, ub.h, uc.h);
2272
2273         if (unlikely(f64_is_inf(ur))) {
2274             float_raise(float_flag_overflow, s);
2275         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2276             ua = ua_orig;
2277             uc = uc_orig;
2278             goto soft;
2279         }
2280     }
2281     if (flags & float_muladd_negate_result) {
2282         return float64_chs(ur.s);
2283     }
2284     return ur.s;
2285
2286  soft:
2287     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2288 }
2289
2290 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2291                                       int flags, float_status *status)
2292 {
2293     FloatParts64 pa, pb, pc, *pr;
2294
2295     bfloat16_unpack_canonical(&pa, a, status);
2296     bfloat16_unpack_canonical(&pb, b, status);
2297     bfloat16_unpack_canonical(&pc, c, status);
2298     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2299
2300     return bfloat16_round_pack_canonical(pr, status);
2301 }
2302
2303 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2304                                       int flags, float_status *status)
2305 {
2306     FloatParts128 pa, pb, pc, *pr;
2307
2308     float128_unpack_canonical(&pa, a, status);
2309     float128_unpack_canonical(&pb, b, status);
2310     float128_unpack_canonical(&pc, c, status);
2311     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2312
2313     return float128_round_pack_canonical(pr, status);
2314 }
2315
2316 /*
2317  * Division
2318  */
2319
2320 float16 float16_div(float16 a, float16 b, float_status *status)
2321 {
2322     FloatParts64 pa, pb, *pr;
2323
2324     float16_unpack_canonical(&pa, a, status);
2325     float16_unpack_canonical(&pb, b, status);
2326     pr = parts_div(&pa, &pb, status);
2327
2328     return float16_round_pack_canonical(pr, status);
2329 }
2330
2331 static float32 QEMU_SOFTFLOAT_ATTR
2332 soft_f32_div(float32 a, float32 b, float_status *status)
2333 {
2334     FloatParts64 pa, pb, *pr;
2335
2336     float32_unpack_canonical(&pa, a, status);
2337     float32_unpack_canonical(&pb, b, status);
2338     pr = parts_div(&pa, &pb, status);
2339
2340     return float32_round_pack_canonical(pr, status);
2341 }
2342
2343 static float64 QEMU_SOFTFLOAT_ATTR
2344 soft_f64_div(float64 a, float64 b, float_status *status)
2345 {
2346     FloatParts64 pa, pb, *pr;
2347
2348     float64_unpack_canonical(&pa, a, status);
2349     float64_unpack_canonical(&pb, b, status);
2350     pr = parts_div(&pa, &pb, status);
2351
2352     return float64_round_pack_canonical(pr, status);
2353 }
2354
2355 static float hard_f32_div(float a, float b)
2356 {
2357     return a / b;
2358 }
2359
2360 static double hard_f64_div(double a, double b)
2361 {
2362     return a / b;
2363 }
2364
2365 static bool f32_div_pre(union_float32 a, union_float32 b)
2366 {
2367     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2368         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2369                fpclassify(b.h) == FP_NORMAL;
2370     }
2371     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2372 }
2373
2374 static bool f64_div_pre(union_float64 a, union_float64 b)
2375 {
2376     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2377         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2378                fpclassify(b.h) == FP_NORMAL;
2379     }
2380     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2381 }
2382
2383 static bool f32_div_post(union_float32 a, union_float32 b)
2384 {
2385     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2386         return fpclassify(a.h) != FP_ZERO;
2387     }
2388     return !float32_is_zero(a.s);
2389 }
2390
2391 static bool f64_div_post(union_float64 a, union_float64 b)
2392 {
2393     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2394         return fpclassify(a.h) != FP_ZERO;
2395     }
2396     return !float64_is_zero(a.s);
2397 }
2398
2399 float32 QEMU_FLATTEN
2400 float32_div(float32 a, float32 b, float_status *s)
2401 {
2402     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2403                         f32_div_pre, f32_div_post);
2404 }
2405
2406 float64 QEMU_FLATTEN
2407 float64_div(float64 a, float64 b, float_status *s)
2408 {
2409     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2410                         f64_div_pre, f64_div_post);
2411 }
2412
2413 bfloat16 QEMU_FLATTEN
2414 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2415 {
2416     FloatParts64 pa, pb, *pr;
2417
2418     bfloat16_unpack_canonical(&pa, a, status);
2419     bfloat16_unpack_canonical(&pb, b, status);
2420     pr = parts_div(&pa, &pb, status);
2421
2422     return bfloat16_round_pack_canonical(pr, status);
2423 }
2424
2425 float128 QEMU_FLATTEN
2426 float128_div(float128 a, float128 b, float_status *status)
2427 {
2428     FloatParts128 pa, pb, *pr;
2429
2430     float128_unpack_canonical(&pa, a, status);
2431     float128_unpack_canonical(&pb, b, status);
2432     pr = parts_div(&pa, &pb, status);
2433
2434     return float128_round_pack_canonical(pr, status);
2435 }
2436
2437 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2438 {
2439     FloatParts128 pa, pb, *pr;
2440
2441     if (!floatx80_unpack_canonical(&pa, a, status) ||
2442         !floatx80_unpack_canonical(&pb, b, status)) {
2443         return floatx80_default_nan(status);
2444     }
2445
2446     pr = parts_div(&pa, &pb, status);
2447     return floatx80_round_pack_canonical(pr, status);
2448 }
2449
2450 /*
2451  * Remainder
2452  */
2453
2454 float32 float32_rem(float32 a, float32 b, float_status *status)
2455 {
2456     FloatParts64 pa, pb, *pr;
2457
2458     float32_unpack_canonical(&pa, a, status);
2459     float32_unpack_canonical(&pb, b, status);
2460     pr = parts_modrem(&pa, &pb, NULL, status);
2461
2462     return float32_round_pack_canonical(pr, status);
2463 }
2464
2465 float64 float64_rem(float64 a, float64 b, float_status *status)
2466 {
2467     FloatParts64 pa, pb, *pr;
2468
2469     float64_unpack_canonical(&pa, a, status);
2470     float64_unpack_canonical(&pb, b, status);
2471     pr = parts_modrem(&pa, &pb, NULL, status);
2472
2473     return float64_round_pack_canonical(pr, status);
2474 }
2475
2476 float128 float128_rem(float128 a, float128 b, float_status *status)
2477 {
2478     FloatParts128 pa, pb, *pr;
2479
2480     float128_unpack_canonical(&pa, a, status);
2481     float128_unpack_canonical(&pb, b, status);
2482     pr = parts_modrem(&pa, &pb, NULL, status);
2483
2484     return float128_round_pack_canonical(pr, status);
2485 }
2486
2487 /*
2488  * Returns the remainder of the extended double-precision floating-point value
2489  * `a' with respect to the corresponding value `b'.
2490  * If 'mod' is false, the operation is performed according to the IEC/IEEE
2491  * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2492  * the remainder based on truncating the quotient toward zero instead and
2493  * *quotient is set to the low 64 bits of the absolute value of the integer
2494  * quotient.
2495  */
2496 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2497                          uint64_t *quotient, float_status *status)
2498 {
2499     FloatParts128 pa, pb, *pr;
2500
2501     *quotient = 0;
2502     if (!floatx80_unpack_canonical(&pa, a, status) ||
2503         !floatx80_unpack_canonical(&pb, b, status)) {
2504         return floatx80_default_nan(status);
2505     }
2506     pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2507
2508     return floatx80_round_pack_canonical(pr, status);
2509 }
2510
2511 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2512 {
2513     uint64_t quotient;
2514     return floatx80_modrem(a, b, false, &quotient, status);
2515 }
2516
2517 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2518 {
2519     uint64_t quotient;
2520     return floatx80_modrem(a, b, true, &quotient, status);
2521 }
2522
2523 /*
2524  * Float to Float conversions
2525  *
2526  * Returns the result of converting one float format to another. The
2527  * conversion is performed according to the IEC/IEEE Standard for
2528  * Binary Floating-Point Arithmetic.
2529  *
2530  * Usually this only needs to take care of raising invalid exceptions
2531  * and handling the conversion on NaNs.
2532  */
2533
2534 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2535 {
2536     switch (a->cls) {
2537     case float_class_qnan:
2538     case float_class_snan:
2539         /*
2540          * There is no NaN in the destination format.  Raise Invalid
2541          * and return a zero with the sign of the input NaN.
2542          */
2543         float_raise(float_flag_invalid, s);
2544         a->cls = float_class_zero;
2545         break;
2546
2547     case float_class_inf:
2548         /*
2549          * There is no Inf in the destination format.  Raise Invalid
2550          * and return the maximum normal with the correct sign.
2551          */
2552         float_raise(float_flag_invalid, s);
2553         a->cls = float_class_normal;
2554         a->exp = float16_params_ahp.exp_max;
2555         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2556                                   float16_params_ahp.frac_size + 1);
2557         break;
2558
2559     case float_class_normal:
2560     case float_class_zero:
2561         break;
2562
2563     default:
2564         g_assert_not_reached();
2565     }
2566 }
2567
2568 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2569 {
2570     if (is_nan(a->cls)) {
2571         parts_return_nan(a, s);
2572     }
2573 }
2574
2575 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2576 {
2577     if (is_nan(a->cls)) {
2578         parts_return_nan(a, s);
2579     }
2580 }
2581
2582 #define parts_float_to_float(P, S) \
2583     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2584
2585 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2586                                         float_status *s)
2587 {
2588     a->cls = b->cls;
2589     a->sign = b->sign;
2590     a->exp = b->exp;
2591
2592     if (a->cls == float_class_normal) {
2593         frac_truncjam(a, b);
2594     } else if (is_nan(a->cls)) {
2595         /* Discard the low bits of the NaN. */
2596         a->frac = b->frac_hi;
2597         parts_return_nan(a, s);
2598     }
2599 }
2600
2601 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2602                                        float_status *s)
2603 {
2604     a->cls = b->cls;
2605     a->sign = b->sign;
2606     a->exp = b->exp;
2607     frac_widen(a, b);
2608
2609     if (is_nan(a->cls)) {
2610         parts_return_nan(a, s);
2611     }
2612 }
2613
2614 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2615 {
2616     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2617     FloatParts64 p;
2618
2619     float16a_unpack_canonical(&p, a, s, fmt16);
2620     parts_float_to_float(&p, s);
2621     return float32_round_pack_canonical(&p, s);
2622 }
2623
2624 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2625 {
2626     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2627     FloatParts64 p;
2628
2629     float16a_unpack_canonical(&p, a, s, fmt16);
2630     parts_float_to_float(&p, s);
2631     return float64_round_pack_canonical(&p, s);
2632 }
2633
2634 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2635 {
2636     FloatParts64 p;
2637     const FloatFmt *fmt;
2638
2639     float32_unpack_canonical(&p, a, s);
2640     if (ieee) {
2641         parts_float_to_float(&p, s);
2642         fmt = &float16_params;
2643     } else {
2644         parts_float_to_ahp(&p, s);
2645         fmt = &float16_params_ahp;
2646     }
2647     return float16a_round_pack_canonical(&p, s, fmt);
2648 }
2649
2650 static float64 QEMU_SOFTFLOAT_ATTR
2651 soft_float32_to_float64(float32 a, float_status *s)
2652 {
2653     FloatParts64 p;
2654
2655     float32_unpack_canonical(&p, a, s);
2656     parts_float_to_float(&p, s);
2657     return float64_round_pack_canonical(&p, s);
2658 }
2659
2660 float64 float32_to_float64(float32 a, float_status *s)
2661 {
2662     if (likely(float32_is_normal(a))) {
2663         /* Widening conversion can never produce inexact results.  */
2664         union_float32 uf;
2665         union_float64 ud;
2666         uf.s = a;
2667         ud.h = uf.h;
2668         return ud.s;
2669     } else if (float32_is_zero(a)) {
2670         return float64_set_sign(float64_zero, float32_is_neg(a));
2671     } else {
2672         return soft_float32_to_float64(a, s);
2673     }
2674 }
2675
2676 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2677 {
2678     FloatParts64 p;
2679     const FloatFmt *fmt;
2680
2681     float64_unpack_canonical(&p, a, s);
2682     if (ieee) {
2683         parts_float_to_float(&p, s);
2684         fmt = &float16_params;
2685     } else {
2686         parts_float_to_ahp(&p, s);
2687         fmt = &float16_params_ahp;
2688     }
2689     return float16a_round_pack_canonical(&p, s, fmt);
2690 }
2691
2692 float32 float64_to_float32(float64 a, float_status *s)
2693 {
2694     FloatParts64 p;
2695
2696     float64_unpack_canonical(&p, a, s);
2697     parts_float_to_float(&p, s);
2698     return float32_round_pack_canonical(&p, s);
2699 }
2700
2701 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2702 {
2703     FloatParts64 p;
2704
2705     bfloat16_unpack_canonical(&p, a, s);
2706     parts_float_to_float(&p, s);
2707     return float32_round_pack_canonical(&p, s);
2708 }
2709
2710 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2711 {
2712     FloatParts64 p;
2713
2714     bfloat16_unpack_canonical(&p, a, s);
2715     parts_float_to_float(&p, s);
2716     return float64_round_pack_canonical(&p, s);
2717 }
2718
2719 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2720 {
2721     FloatParts64 p;
2722
2723     float32_unpack_canonical(&p, a, s);
2724     parts_float_to_float(&p, s);
2725     return bfloat16_round_pack_canonical(&p, s);
2726 }
2727
2728 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2729 {
2730     FloatParts64 p;
2731
2732     float64_unpack_canonical(&p, a, s);
2733     parts_float_to_float(&p, s);
2734     return bfloat16_round_pack_canonical(&p, s);
2735 }
2736
2737 float32 float128_to_float32(float128 a, float_status *s)
2738 {
2739     FloatParts64 p64;
2740     FloatParts128 p128;
2741
2742     float128_unpack_canonical(&p128, a, s);
2743     parts_float_to_float_narrow(&p64, &p128, s);
2744     return float32_round_pack_canonical(&p64, s);
2745 }
2746
2747 float64 float128_to_float64(float128 a, float_status *s)
2748 {
2749     FloatParts64 p64;
2750     FloatParts128 p128;
2751
2752     float128_unpack_canonical(&p128, a, s);
2753     parts_float_to_float_narrow(&p64, &p128, s);
2754     return float64_round_pack_canonical(&p64, s);
2755 }
2756
2757 float128 float32_to_float128(float32 a, float_status *s)
2758 {
2759     FloatParts64 p64;
2760     FloatParts128 p128;
2761
2762     float32_unpack_canonical(&p64, a, s);
2763     parts_float_to_float_widen(&p128, &p64, s);
2764     return float128_round_pack_canonical(&p128, s);
2765 }
2766
2767 float128 float64_to_float128(float64 a, float_status *s)
2768 {
2769     FloatParts64 p64;
2770     FloatParts128 p128;
2771
2772     float64_unpack_canonical(&p64, a, s);
2773     parts_float_to_float_widen(&p128, &p64, s);
2774     return float128_round_pack_canonical(&p128, s);
2775 }
2776
2777 float32 floatx80_to_float32(floatx80 a, float_status *s)
2778 {
2779     FloatParts64 p64;
2780     FloatParts128 p128;
2781
2782     if (floatx80_unpack_canonical(&p128, a, s)) {
2783         parts_float_to_float_narrow(&p64, &p128, s);
2784     } else {
2785         parts_default_nan(&p64, s);
2786     }
2787     return float32_round_pack_canonical(&p64, s);
2788 }
2789
2790 float64 floatx80_to_float64(floatx80 a, float_status *s)
2791 {
2792     FloatParts64 p64;
2793     FloatParts128 p128;
2794
2795     if (floatx80_unpack_canonical(&p128, a, s)) {
2796         parts_float_to_float_narrow(&p64, &p128, s);
2797     } else {
2798         parts_default_nan(&p64, s);
2799     }
2800     return float64_round_pack_canonical(&p64, s);
2801 }
2802
2803 float128 floatx80_to_float128(floatx80 a, float_status *s)
2804 {
2805     FloatParts128 p;
2806
2807     if (floatx80_unpack_canonical(&p, a, s)) {
2808         parts_float_to_float(&p, s);
2809     } else {
2810         parts_default_nan(&p, s);
2811     }
2812     return float128_round_pack_canonical(&p, s);
2813 }
2814
2815 floatx80 float32_to_floatx80(float32 a, float_status *s)
2816 {
2817     FloatParts64 p64;
2818     FloatParts128 p128;
2819
2820     float32_unpack_canonical(&p64, a, s);
2821     parts_float_to_float_widen(&p128, &p64, s);
2822     return floatx80_round_pack_canonical(&p128, s);
2823 }
2824
2825 floatx80 float64_to_floatx80(float64 a, float_status *s)
2826 {
2827     FloatParts64 p64;
2828     FloatParts128 p128;
2829
2830     float64_unpack_canonical(&p64, a, s);
2831     parts_float_to_float_widen(&p128, &p64, s);
2832     return floatx80_round_pack_canonical(&p128, s);
2833 }
2834
2835 floatx80 float128_to_floatx80(float128 a, float_status *s)
2836 {
2837     FloatParts128 p;
2838
2839     float128_unpack_canonical(&p, a, s);
2840     parts_float_to_float(&p, s);
2841     return floatx80_round_pack_canonical(&p, s);
2842 }
2843
2844 /*
2845  * Round to integral value
2846  */
2847
2848 float16 float16_round_to_int(float16 a, float_status *s)
2849 {
2850     FloatParts64 p;
2851
2852     float16_unpack_canonical(&p, a, s);
2853     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2854     return float16_round_pack_canonical(&p, s);
2855 }
2856
2857 float32 float32_round_to_int(float32 a, float_status *s)
2858 {
2859     FloatParts64 p;
2860
2861     float32_unpack_canonical(&p, a, s);
2862     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2863     return float32_round_pack_canonical(&p, s);
2864 }
2865
2866 float64 float64_round_to_int(float64 a, float_status *s)
2867 {
2868     FloatParts64 p;
2869
2870     float64_unpack_canonical(&p, a, s);
2871     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2872     return float64_round_pack_canonical(&p, s);
2873 }
2874
2875 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2876 {
2877     FloatParts64 p;
2878
2879     bfloat16_unpack_canonical(&p, a, s);
2880     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2881     return bfloat16_round_pack_canonical(&p, s);
2882 }
2883
2884 float128 float128_round_to_int(float128 a, float_status *s)
2885 {
2886     FloatParts128 p;
2887
2888     float128_unpack_canonical(&p, a, s);
2889     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2890     return float128_round_pack_canonical(&p, s);
2891 }
2892
2893 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2894 {
2895     FloatParts128 p;
2896
2897     if (!floatx80_unpack_canonical(&p, a, status)) {
2898         return floatx80_default_nan(status);
2899     }
2900
2901     parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2902                        &floatx80_params[status->floatx80_rounding_precision]);
2903     return floatx80_round_pack_canonical(&p, status);
2904 }
2905
2906 /*
2907  * Floating-point to signed integer conversions
2908  */
2909
2910 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2911                               float_status *s)
2912 {
2913     FloatParts64 p;
2914
2915     float16_unpack_canonical(&p, a, s);
2916     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2917 }
2918
2919 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920                                 float_status *s)
2921 {
2922     FloatParts64 p;
2923
2924     float16_unpack_canonical(&p, a, s);
2925     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2926 }
2927
2928 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929                                 float_status *s)
2930 {
2931     FloatParts64 p;
2932
2933     float16_unpack_canonical(&p, a, s);
2934     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2935 }
2936
2937 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2938                                 float_status *s)
2939 {
2940     FloatParts64 p;
2941
2942     float16_unpack_canonical(&p, a, s);
2943     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2944 }
2945
2946 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2947                                 float_status *s)
2948 {
2949     FloatParts64 p;
2950
2951     float32_unpack_canonical(&p, a, s);
2952     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2953 }
2954
2955 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956                                 float_status *s)
2957 {
2958     FloatParts64 p;
2959
2960     float32_unpack_canonical(&p, a, s);
2961     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2962 }
2963
2964 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2965                                 float_status *s)
2966 {
2967     FloatParts64 p;
2968
2969     float32_unpack_canonical(&p, a, s);
2970     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2971 }
2972
2973 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2974                                 float_status *s)
2975 {
2976     FloatParts64 p;
2977
2978     float64_unpack_canonical(&p, a, s);
2979     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2980 }
2981
2982 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983                                 float_status *s)
2984 {
2985     FloatParts64 p;
2986
2987     float64_unpack_canonical(&p, a, s);
2988     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2989 }
2990
2991 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2992                                 float_status *s)
2993 {
2994     FloatParts64 p;
2995
2996     float64_unpack_canonical(&p, a, s);
2997     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2998 }
2999
3000 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3001                                  float_status *s)
3002 {
3003     FloatParts64 p;
3004
3005     bfloat16_unpack_canonical(&p, a, s);
3006     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3007 }
3008
3009 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3010                                  float_status *s)
3011 {
3012     FloatParts64 p;
3013
3014     bfloat16_unpack_canonical(&p, a, s);
3015     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3016 }
3017
3018 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3019                                  float_status *s)
3020 {
3021     FloatParts64 p;
3022
3023     bfloat16_unpack_canonical(&p, a, s);
3024     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3025 }
3026
3027 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3028                                         int scale, float_status *s)
3029 {
3030     FloatParts128 p;
3031
3032     float128_unpack_canonical(&p, a, s);
3033     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3034 }
3035
3036 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3037                                         int scale, float_status *s)
3038 {
3039     FloatParts128 p;
3040
3041     float128_unpack_canonical(&p, a, s);
3042     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3043 }
3044
3045 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3046                                         int scale, float_status *s)
3047 {
3048     FloatParts128 p;
3049
3050     if (!floatx80_unpack_canonical(&p, a, s)) {
3051         parts_default_nan(&p, s);
3052     }
3053     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3054 }
3055
3056 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3057                                         int scale, float_status *s)
3058 {
3059     FloatParts128 p;
3060
3061     if (!floatx80_unpack_canonical(&p, a, s)) {
3062         parts_default_nan(&p, s);
3063     }
3064     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3065 }
3066
3067 int8_t float16_to_int8(float16 a, float_status *s)
3068 {
3069     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3070 }
3071
3072 int16_t float16_to_int16(float16 a, float_status *s)
3073 {
3074     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3075 }
3076
3077 int32_t float16_to_int32(float16 a, float_status *s)
3078 {
3079     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3080 }
3081
3082 int64_t float16_to_int64(float16 a, float_status *s)
3083 {
3084     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3085 }
3086
3087 int16_t float32_to_int16(float32 a, float_status *s)
3088 {
3089     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3090 }
3091
3092 int32_t float32_to_int32(float32 a, float_status *s)
3093 {
3094     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3095 }
3096
3097 int64_t float32_to_int64(float32 a, float_status *s)
3098 {
3099     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3100 }
3101
3102 int16_t float64_to_int16(float64 a, float_status *s)
3103 {
3104     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3105 }
3106
3107 int32_t float64_to_int32(float64 a, float_status *s)
3108 {
3109     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3110 }
3111
3112 int64_t float64_to_int64(float64 a, float_status *s)
3113 {
3114     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3115 }
3116
3117 int32_t float128_to_int32(float128 a, float_status *s)
3118 {
3119     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3120 }
3121
3122 int64_t float128_to_int64(float128 a, float_status *s)
3123 {
3124     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3125 }
3126
3127 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3128 {
3129     return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3130 }
3131
3132 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3133 {
3134     return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3135 }
3136
3137 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3138 {
3139     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3140 }
3141
3142 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3143 {
3144     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3145 }
3146
3147 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3148 {
3149     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3150 }
3151
3152 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3153 {
3154     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3155 }
3156
3157 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3158 {
3159     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3160 }
3161
3162 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3163 {
3164     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3165 }
3166
3167 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3168 {
3169     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3170 }
3171
3172 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3173 {
3174     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3175 }
3176
3177 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3178 {
3179     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3180 }
3181
3182 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3183 {
3184     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3185 }
3186
3187 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3188 {
3189     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3190 }
3191
3192 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3193 {
3194     return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3195 }
3196
3197 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3198 {
3199     return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3200 }
3201
3202 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3203 {
3204     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3205 }
3206
3207 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3208 {
3209     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3210 }
3211
3212 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3213 {
3214     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3215 }
3216
3217 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3218 {
3219     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3220 }
3221
3222 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3223 {
3224     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3225 }
3226
3227 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3228 {
3229     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3230 }
3231
3232 /*
3233  * Floating-point to unsigned integer conversions
3234  */
3235
3236 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3237                                 float_status *s)
3238 {
3239     FloatParts64 p;
3240
3241     float16_unpack_canonical(&p, a, s);
3242     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3243 }
3244
3245 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3246                                   float_status *s)
3247 {
3248     FloatParts64 p;
3249
3250     float16_unpack_canonical(&p, a, s);
3251     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3252 }
3253
3254 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3255                                   float_status *s)
3256 {
3257     FloatParts64 p;
3258
3259     float16_unpack_canonical(&p, a, s);
3260     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3261 }
3262
3263 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3264                                   float_status *s)
3265 {
3266     FloatParts64 p;
3267
3268     float16_unpack_canonical(&p, a, s);
3269     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3270 }
3271
3272 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3273                                   float_status *s)
3274 {
3275     FloatParts64 p;
3276
3277     float32_unpack_canonical(&p, a, s);
3278     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3279 }
3280
3281 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3282                                   float_status *s)
3283 {
3284     FloatParts64 p;
3285
3286     float32_unpack_canonical(&p, a, s);
3287     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3288 }
3289
3290 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3291                                   float_status *s)
3292 {
3293     FloatParts64 p;
3294
3295     float32_unpack_canonical(&p, a, s);
3296     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3297 }
3298
3299 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3300                                   float_status *s)
3301 {
3302     FloatParts64 p;
3303
3304     float64_unpack_canonical(&p, a, s);
3305     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3306 }
3307
3308 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3309                                   float_status *s)
3310 {
3311     FloatParts64 p;
3312
3313     float64_unpack_canonical(&p, a, s);
3314     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3315 }
3316
3317 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3318                                   float_status *s)
3319 {
3320     FloatParts64 p;
3321
3322     float64_unpack_canonical(&p, a, s);
3323     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3324 }
3325
3326 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3327                                    int scale, float_status *s)
3328 {
3329     FloatParts64 p;
3330
3331     bfloat16_unpack_canonical(&p, a, s);
3332     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3333 }
3334
3335 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3336                                    int scale, float_status *s)
3337 {
3338     FloatParts64 p;
3339
3340     bfloat16_unpack_canonical(&p, a, s);
3341     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3342 }
3343
3344 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3345                                    int scale, float_status *s)
3346 {
3347     FloatParts64 p;
3348
3349     bfloat16_unpack_canonical(&p, a, s);
3350     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3351 }
3352
3353 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3354                                           int scale, float_status *s)
3355 {
3356     FloatParts128 p;
3357
3358     float128_unpack_canonical(&p, a, s);
3359     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3360 }
3361
3362 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3363                                           int scale, float_status *s)
3364 {
3365     FloatParts128 p;
3366
3367     float128_unpack_canonical(&p, a, s);
3368     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3369 }
3370
3371 uint8_t float16_to_uint8(float16 a, float_status *s)
3372 {
3373     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3374 }
3375
3376 uint16_t float16_to_uint16(float16 a, float_status *s)
3377 {
3378     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3379 }
3380
3381 uint32_t float16_to_uint32(float16 a, float_status *s)
3382 {
3383     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3384 }
3385
3386 uint64_t float16_to_uint64(float16 a, float_status *s)
3387 {
3388     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3389 }
3390
3391 uint16_t float32_to_uint16(float32 a, float_status *s)
3392 {
3393     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3394 }
3395
3396 uint32_t float32_to_uint32(float32 a, float_status *s)
3397 {
3398     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3399 }
3400
3401 uint64_t float32_to_uint64(float32 a, float_status *s)
3402 {
3403     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3404 }
3405
3406 uint16_t float64_to_uint16(float64 a, float_status *s)
3407 {
3408     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3409 }
3410
3411 uint32_t float64_to_uint32(float64 a, float_status *s)
3412 {
3413     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3414 }
3415
3416 uint64_t float64_to_uint64(float64 a, float_status *s)
3417 {
3418     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3419 }
3420
3421 uint32_t float128_to_uint32(float128 a, float_status *s)
3422 {
3423     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3424 }
3425
3426 uint64_t float128_to_uint64(float128 a, float_status *s)
3427 {
3428     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3429 }
3430
3431 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3432 {
3433     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3434 }
3435
3436 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3437 {
3438     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3439 }
3440
3441 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3442 {
3443     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3444 }
3445
3446 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3447 {
3448     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3449 }
3450
3451 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3452 {
3453     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3454 }
3455
3456 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3457 {
3458     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3459 }
3460
3461 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3462 {
3463     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3464 }
3465
3466 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3467 {
3468     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3469 }
3470
3471 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3472 {
3473     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3474 }
3475
3476 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3477 {
3478     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3479 }
3480
3481 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3482 {
3483     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3484 }
3485
3486 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3487 {
3488     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3489 }
3490
3491 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3492 {
3493     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3494 }
3495
3496 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3497 {
3498     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3499 }
3500
3501 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3502 {
3503     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3504 }
3505
3506 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3507 {
3508     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3509 }
3510
3511 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3512 {
3513     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3514 }
3515
3516 /*
3517  * Signed integer to floating-point conversions
3518  */
3519
3520 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3521 {
3522     FloatParts64 p;
3523
3524     parts_sint_to_float(&p, a, scale, status);
3525     return float16_round_pack_canonical(&p, status);
3526 }
3527
3528 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3529 {
3530     return int64_to_float16_scalbn(a, scale, status);
3531 }
3532
3533 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3534 {
3535     return int64_to_float16_scalbn(a, scale, status);
3536 }
3537
3538 float16 int64_to_float16(int64_t a, float_status *status)
3539 {
3540     return int64_to_float16_scalbn(a, 0, status);
3541 }
3542
3543 float16 int32_to_float16(int32_t a, float_status *status)
3544 {
3545     return int64_to_float16_scalbn(a, 0, status);
3546 }
3547
3548 float16 int16_to_float16(int16_t a, float_status *status)
3549 {
3550     return int64_to_float16_scalbn(a, 0, status);
3551 }
3552
3553 float16 int8_to_float16(int8_t a, float_status *status)
3554 {
3555     return int64_to_float16_scalbn(a, 0, status);
3556 }
3557
3558 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3559 {
3560     FloatParts64 p;
3561
3562     /* Without scaling, there are no overflow concerns. */
3563     if (likely(scale == 0) && can_use_fpu(status)) {
3564         union_float32 ur;
3565         ur.h = a;
3566         return ur.s;
3567     }
3568
3569     parts64_sint_to_float(&p, a, scale, status);
3570     return float32_round_pack_canonical(&p, status);
3571 }
3572
3573 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3574 {
3575     return int64_to_float32_scalbn(a, scale, status);
3576 }
3577
3578 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3579 {
3580     return int64_to_float32_scalbn(a, scale, status);
3581 }
3582
3583 float32 int64_to_float32(int64_t a, float_status *status)
3584 {
3585     return int64_to_float32_scalbn(a, 0, status);
3586 }
3587
3588 float32 int32_to_float32(int32_t a, float_status *status)
3589 {
3590     return int64_to_float32_scalbn(a, 0, status);
3591 }
3592
3593 float32 int16_to_float32(int16_t a, float_status *status)
3594 {
3595     return int64_to_float32_scalbn(a, 0, status);
3596 }
3597
3598 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3599 {
3600     FloatParts64 p;
3601
3602     /* Without scaling, there are no overflow concerns. */
3603     if (likely(scale == 0) && can_use_fpu(status)) {
3604         union_float64 ur;
3605         ur.h = a;
3606         return ur.s;
3607     }
3608
3609     parts_sint_to_float(&p, a, scale, status);
3610     return float64_round_pack_canonical(&p, status);
3611 }
3612
3613 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3614 {
3615     return int64_to_float64_scalbn(a, scale, status);
3616 }
3617
3618 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3619 {
3620     return int64_to_float64_scalbn(a, scale, status);
3621 }
3622
3623 float64 int64_to_float64(int64_t a, float_status *status)
3624 {
3625     return int64_to_float64_scalbn(a, 0, status);
3626 }
3627
3628 float64 int32_to_float64(int32_t a, float_status *status)
3629 {
3630     return int64_to_float64_scalbn(a, 0, status);
3631 }
3632
3633 float64 int16_to_float64(int16_t a, float_status *status)
3634 {
3635     return int64_to_float64_scalbn(a, 0, status);
3636 }
3637
3638 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3639 {
3640     FloatParts64 p;
3641
3642     parts_sint_to_float(&p, a, scale, status);
3643     return bfloat16_round_pack_canonical(&p, status);
3644 }
3645
3646 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3647 {
3648     return int64_to_bfloat16_scalbn(a, scale, status);
3649 }
3650
3651 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3652 {
3653     return int64_to_bfloat16_scalbn(a, scale, status);
3654 }
3655
3656 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3657 {
3658     return int64_to_bfloat16_scalbn(a, 0, status);
3659 }
3660
3661 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3662 {
3663     return int64_to_bfloat16_scalbn(a, 0, status);
3664 }
3665
3666 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3667 {
3668     return int64_to_bfloat16_scalbn(a, 0, status);
3669 }
3670
3671 float128 int64_to_float128(int64_t a, float_status *status)
3672 {
3673     FloatParts128 p;
3674
3675     parts_sint_to_float(&p, a, 0, status);
3676     return float128_round_pack_canonical(&p, status);
3677 }
3678
3679 float128 int32_to_float128(int32_t a, float_status *status)
3680 {
3681     return int64_to_float128(a, status);
3682 }
3683
3684 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3685 {
3686     FloatParts128 p;
3687
3688     parts_sint_to_float(&p, a, 0, status);
3689     return floatx80_round_pack_canonical(&p, status);
3690 }
3691
3692 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3693 {
3694     return int64_to_floatx80(a, status);
3695 }
3696
3697 /*
3698  * Unsigned Integer to floating-point conversions
3699  */
3700
3701 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3702 {
3703     FloatParts64 p;
3704
3705     parts_uint_to_float(&p, a, scale, status);
3706     return float16_round_pack_canonical(&p, status);
3707 }
3708
3709 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3710 {
3711     return uint64_to_float16_scalbn(a, scale, status);
3712 }
3713
3714 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3715 {
3716     return uint64_to_float16_scalbn(a, scale, status);
3717 }
3718
3719 float16 uint64_to_float16(uint64_t a, float_status *status)
3720 {
3721     return uint64_to_float16_scalbn(a, 0, status);
3722 }
3723
3724 float16 uint32_to_float16(uint32_t a, float_status *status)
3725 {
3726     return uint64_to_float16_scalbn(a, 0, status);
3727 }
3728
3729 float16 uint16_to_float16(uint16_t a, float_status *status)
3730 {
3731     return uint64_to_float16_scalbn(a, 0, status);
3732 }
3733
3734 float16 uint8_to_float16(uint8_t a, float_status *status)
3735 {
3736     return uint64_to_float16_scalbn(a, 0, status);
3737 }
3738
3739 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3740 {
3741     FloatParts64 p;
3742
3743     /* Without scaling, there are no overflow concerns. */
3744     if (likely(scale == 0) && can_use_fpu(status)) {
3745         union_float32 ur;
3746         ur.h = a;
3747         return ur.s;
3748     }
3749
3750     parts_uint_to_float(&p, a, scale, status);
3751     return float32_round_pack_canonical(&p, status);
3752 }
3753
3754 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3755 {
3756     return uint64_to_float32_scalbn(a, scale, status);
3757 }
3758
3759 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3760 {
3761     return uint64_to_float32_scalbn(a, scale, status);
3762 }
3763
3764 float32 uint64_to_float32(uint64_t a, float_status *status)
3765 {
3766     return uint64_to_float32_scalbn(a, 0, status);
3767 }
3768
3769 float32 uint32_to_float32(uint32_t a, float_status *status)
3770 {
3771     return uint64_to_float32_scalbn(a, 0, status);
3772 }
3773
3774 float32 uint16_to_float32(uint16_t a, float_status *status)
3775 {
3776     return uint64_to_float32_scalbn(a, 0, status);
3777 }
3778
3779 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3780 {
3781     FloatParts64 p;
3782
3783     /* Without scaling, there are no overflow concerns. */
3784     if (likely(scale == 0) && can_use_fpu(status)) {
3785         union_float64 ur;
3786         ur.h = a;
3787         return ur.s;
3788     }
3789
3790     parts_uint_to_float(&p, a, scale, status);
3791     return float64_round_pack_canonical(&p, status);
3792 }
3793
3794 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3795 {
3796     return uint64_to_float64_scalbn(a, scale, status);
3797 }
3798
3799 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3800 {
3801     return uint64_to_float64_scalbn(a, scale, status);
3802 }
3803
3804 float64 uint64_to_float64(uint64_t a, float_status *status)
3805 {
3806     return uint64_to_float64_scalbn(a, 0, status);
3807 }
3808
3809 float64 uint32_to_float64(uint32_t a, float_status *status)
3810 {
3811     return uint64_to_float64_scalbn(a, 0, status);
3812 }
3813
3814 float64 uint16_to_float64(uint16_t a, float_status *status)
3815 {
3816     return uint64_to_float64_scalbn(a, 0, status);
3817 }
3818
3819 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3820 {
3821     FloatParts64 p;
3822
3823     parts_uint_to_float(&p, a, scale, status);
3824     return bfloat16_round_pack_canonical(&p, status);
3825 }
3826
3827 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3828 {
3829     return uint64_to_bfloat16_scalbn(a, scale, status);
3830 }
3831
3832 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3833 {
3834     return uint64_to_bfloat16_scalbn(a, scale, status);
3835 }
3836
3837 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3838 {
3839     return uint64_to_bfloat16_scalbn(a, 0, status);
3840 }
3841
3842 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3843 {
3844     return uint64_to_bfloat16_scalbn(a, 0, status);
3845 }
3846
3847 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3848 {
3849     return uint64_to_bfloat16_scalbn(a, 0, status);
3850 }
3851
3852 float128 uint64_to_float128(uint64_t a, float_status *status)
3853 {
3854     FloatParts128 p;
3855
3856     parts_uint_to_float(&p, a, 0, status);
3857     return float128_round_pack_canonical(&p, status);
3858 }
3859
3860 /*
3861  * Minimum and maximum
3862  */
3863
3864 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3865 {
3866     FloatParts64 pa, pb, *pr;
3867
3868     float16_unpack_canonical(&pa, a, s);
3869     float16_unpack_canonical(&pb, b, s);
3870     pr = parts_minmax(&pa, &pb, s, flags);
3871
3872     return float16_round_pack_canonical(pr, s);
3873 }
3874
3875 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3876                                 float_status *s, int flags)
3877 {
3878     FloatParts64 pa, pb, *pr;
3879
3880     bfloat16_unpack_canonical(&pa, a, s);
3881     bfloat16_unpack_canonical(&pb, b, s);
3882     pr = parts_minmax(&pa, &pb, s, flags);
3883
3884     return bfloat16_round_pack_canonical(pr, s);
3885 }
3886
3887 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3888 {
3889     FloatParts64 pa, pb, *pr;
3890
3891     float32_unpack_canonical(&pa, a, s);
3892     float32_unpack_canonical(&pb, b, s);
3893     pr = parts_minmax(&pa, &pb, s, flags);
3894
3895     return float32_round_pack_canonical(pr, s);
3896 }
3897
3898 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3899 {
3900     FloatParts64 pa, pb, *pr;
3901
3902     float64_unpack_canonical(&pa, a, s);
3903     float64_unpack_canonical(&pb, b, s);
3904     pr = parts_minmax(&pa, &pb, s, flags);
3905
3906     return float64_round_pack_canonical(pr, s);
3907 }
3908
3909 static float128 float128_minmax(float128 a, float128 b,
3910                                 float_status *s, int flags)
3911 {
3912     FloatParts128 pa, pb, *pr;
3913
3914     float128_unpack_canonical(&pa, a, s);
3915     float128_unpack_canonical(&pb, b, s);
3916     pr = parts_minmax(&pa, &pb, s, flags);
3917
3918     return float128_round_pack_canonical(pr, s);
3919 }
3920
3921 #define MINMAX_1(type, name, flags) \
3922     type type##_##name(type a, type b, float_status *s) \
3923     { return type##_minmax(a, b, s, flags); }
3924
3925 #define MINMAX_2(type) \
3926     MINMAX_1(type, max, 0)                                      \
3927     MINMAX_1(type, maxnum, minmax_isnum)                        \
3928     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3929     MINMAX_1(type, min, minmax_ismin)                           \
3930     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3931     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3932
3933 MINMAX_2(float16)
3934 MINMAX_2(bfloat16)
3935 MINMAX_2(float32)
3936 MINMAX_2(float64)
3937 MINMAX_2(float128)
3938
3939 #undef MINMAX_1
3940 #undef MINMAX_2
3941
3942 /*
3943  * Floating point compare
3944  */
3945
3946 static FloatRelation QEMU_FLATTEN
3947 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3948 {
3949     FloatParts64 pa, pb;
3950
3951     float16_unpack_canonical(&pa, a, s);
3952     float16_unpack_canonical(&pb, b, s);
3953     return parts_compare(&pa, &pb, s, is_quiet);
3954 }
3955
3956 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3957 {
3958     return float16_do_compare(a, b, s, false);
3959 }
3960
3961 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3962 {
3963     return float16_do_compare(a, b, s, true);
3964 }
3965
3966 static FloatRelation QEMU_SOFTFLOAT_ATTR
3967 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3968 {
3969     FloatParts64 pa, pb;
3970
3971     float32_unpack_canonical(&pa, a, s);
3972     float32_unpack_canonical(&pb, b, s);
3973     return parts_compare(&pa, &pb, s, is_quiet);
3974 }
3975
3976 static FloatRelation QEMU_FLATTEN
3977 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3978 {
3979     union_float32 ua, ub;
3980
3981     ua.s = xa;
3982     ub.s = xb;
3983
3984     if (QEMU_NO_HARDFLOAT) {
3985         goto soft;
3986     }
3987
3988     float32_input_flush2(&ua.s, &ub.s, s);
3989     if (isgreaterequal(ua.h, ub.h)) {
3990         if (isgreater(ua.h, ub.h)) {
3991             return float_relation_greater;
3992         }
3993         return float_relation_equal;
3994     }
3995     if (likely(isless(ua.h, ub.h))) {
3996         return float_relation_less;
3997     }
3998     /*
3999      * The only condition remaining is unordered.
4000      * Fall through to set flags.
4001      */
4002  soft:
4003     return float32_do_compare(ua.s, ub.s, s, is_quiet);
4004 }
4005
4006 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4007 {
4008     return float32_hs_compare(a, b, s, false);
4009 }
4010
4011 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4012 {
4013     return float32_hs_compare(a, b, s, true);
4014 }
4015
4016 static FloatRelation QEMU_SOFTFLOAT_ATTR
4017 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4018 {
4019     FloatParts64 pa, pb;
4020
4021     float64_unpack_canonical(&pa, a, s);
4022     float64_unpack_canonical(&pb, b, s);
4023     return parts_compare(&pa, &pb, s, is_quiet);
4024 }
4025
4026 static FloatRelation QEMU_FLATTEN
4027 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4028 {
4029     union_float64 ua, ub;
4030
4031     ua.s = xa;
4032     ub.s = xb;
4033
4034     if (QEMU_NO_HARDFLOAT) {
4035         goto soft;
4036     }
4037
4038     float64_input_flush2(&ua.s, &ub.s, s);
4039     if (isgreaterequal(ua.h, ub.h)) {
4040         if (isgreater(ua.h, ub.h)) {
4041             return float_relation_greater;
4042         }
4043         return float_relation_equal;
4044     }
4045     if (likely(isless(ua.h, ub.h))) {
4046         return float_relation_less;
4047     }
4048     /*
4049      * The only condition remaining is unordered.
4050      * Fall through to set flags.
4051      */
4052  soft:
4053     return float64_do_compare(ua.s, ub.s, s, is_quiet);
4054 }
4055
4056 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4057 {
4058     return float64_hs_compare(a, b, s, false);
4059 }
4060
4061 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4062 {
4063     return float64_hs_compare(a, b, s, true);
4064 }
4065
4066 static FloatRelation QEMU_FLATTEN
4067 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4068 {
4069     FloatParts64 pa, pb;
4070
4071     bfloat16_unpack_canonical(&pa, a, s);
4072     bfloat16_unpack_canonical(&pb, b, s);
4073     return parts_compare(&pa, &pb, s, is_quiet);
4074 }
4075
4076 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4077 {
4078     return bfloat16_do_compare(a, b, s, false);
4079 }
4080
4081 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4082 {
4083     return bfloat16_do_compare(a, b, s, true);
4084 }
4085
4086 static FloatRelation QEMU_FLATTEN
4087 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4088 {
4089     FloatParts128 pa, pb;
4090
4091     float128_unpack_canonical(&pa, a, s);
4092     float128_unpack_canonical(&pb, b, s);
4093     return parts_compare(&pa, &pb, s, is_quiet);
4094 }
4095
4096 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4097 {
4098     return float128_do_compare(a, b, s, false);
4099 }
4100
4101 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4102 {
4103     return float128_do_compare(a, b, s, true);
4104 }
4105
4106 static FloatRelation QEMU_FLATTEN
4107 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4108 {
4109     FloatParts128 pa, pb;
4110
4111     if (!floatx80_unpack_canonical(&pa, a, s) ||
4112         !floatx80_unpack_canonical(&pb, b, s)) {
4113         return float_relation_unordered;
4114     }
4115     return parts_compare(&pa, &pb, s, is_quiet);
4116 }
4117
4118 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4119 {
4120     return floatx80_do_compare(a, b, s, false);
4121 }
4122
4123 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4124 {
4125     return floatx80_do_compare(a, b, s, true);
4126 }
4127
4128 /*
4129  * Scale by 2**N
4130  */
4131
4132 float16 float16_scalbn(float16 a, int n, float_status *status)
4133 {
4134     FloatParts64 p;
4135
4136     float16_unpack_canonical(&p, a, status);
4137     parts_scalbn(&p, n, status);
4138     return float16_round_pack_canonical(&p, status);
4139 }
4140
4141 float32 float32_scalbn(float32 a, int n, float_status *status)
4142 {
4143     FloatParts64 p;
4144
4145     float32_unpack_canonical(&p, a, status);
4146     parts_scalbn(&p, n, status);
4147     return float32_round_pack_canonical(&p, status);
4148 }
4149
4150 float64 float64_scalbn(float64 a, int n, float_status *status)
4151 {
4152     FloatParts64 p;
4153
4154     float64_unpack_canonical(&p, a, status);
4155     parts_scalbn(&p, n, status);
4156     return float64_round_pack_canonical(&p, status);
4157 }
4158
4159 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4160 {
4161     FloatParts64 p;
4162
4163     bfloat16_unpack_canonical(&p, a, status);
4164     parts_scalbn(&p, n, status);
4165     return bfloat16_round_pack_canonical(&p, status);
4166 }
4167
4168 float128 float128_scalbn(float128 a, int n, float_status *status)
4169 {
4170     FloatParts128 p;
4171
4172     float128_unpack_canonical(&p, a, status);
4173     parts_scalbn(&p, n, status);
4174     return float128_round_pack_canonical(&p, status);
4175 }
4176
4177 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4178 {
4179     FloatParts128 p;
4180
4181     if (!floatx80_unpack_canonical(&p, a, status)) {
4182         return floatx80_default_nan(status);
4183     }
4184     parts_scalbn(&p, n, status);
4185     return floatx80_round_pack_canonical(&p, status);
4186 }
4187
4188 /*
4189  * Square Root
4190  */
4191
4192 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4193 {
4194     FloatParts64 p;
4195
4196     float16_unpack_canonical(&p, a, status);
4197     parts_sqrt(&p, status, &float16_params);
4198     return float16_round_pack_canonical(&p, status);
4199 }
4200
4201 static float32 QEMU_SOFTFLOAT_ATTR
4202 soft_f32_sqrt(float32 a, float_status *status)
4203 {
4204     FloatParts64 p;
4205
4206     float32_unpack_canonical(&p, a, status);
4207     parts_sqrt(&p, status, &float32_params);
4208     return float32_round_pack_canonical(&p, status);
4209 }
4210
4211 static float64 QEMU_SOFTFLOAT_ATTR
4212 soft_f64_sqrt(float64 a, float_status *status)
4213 {
4214     FloatParts64 p;
4215
4216     float64_unpack_canonical(&p, a, status);
4217     parts_sqrt(&p, status, &float64_params);
4218     return float64_round_pack_canonical(&p, status);
4219 }
4220
4221 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4222 {
4223     union_float32 ua, ur;
4224
4225     ua.s = xa;
4226     if (unlikely(!can_use_fpu(s))) {
4227         goto soft;
4228     }
4229
4230     float32_input_flush1(&ua.s, s);
4231     if (QEMU_HARDFLOAT_1F32_USE_FP) {
4232         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4233                        fpclassify(ua.h) == FP_ZERO) ||
4234                      signbit(ua.h))) {
4235             goto soft;
4236         }
4237     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4238                         float32_is_neg(ua.s))) {
4239         goto soft;
4240     }
4241     ur.h = sqrtf(ua.h);
4242     return ur.s;
4243
4244  soft:
4245     return soft_f32_sqrt(ua.s, s);
4246 }
4247
4248 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4249 {
4250     union_float64 ua, ur;
4251
4252     ua.s = xa;
4253     if (unlikely(!can_use_fpu(s))) {
4254         goto soft;
4255     }
4256
4257     float64_input_flush1(&ua.s, s);
4258     if (QEMU_HARDFLOAT_1F64_USE_FP) {
4259         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4260                        fpclassify(ua.h) == FP_ZERO) ||
4261                      signbit(ua.h))) {
4262             goto soft;
4263         }
4264     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4265                         float64_is_neg(ua.s))) {
4266         goto soft;
4267     }
4268     ur.h = sqrt(ua.h);
4269     return ur.s;
4270
4271  soft:
4272     return soft_f64_sqrt(ua.s, s);
4273 }
4274
4275 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4276 {
4277     FloatParts64 p;
4278
4279     bfloat16_unpack_canonical(&p, a, status);
4280     parts_sqrt(&p, status, &bfloat16_params);
4281     return bfloat16_round_pack_canonical(&p, status);
4282 }
4283
4284 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4285 {
4286     FloatParts128 p;
4287
4288     float128_unpack_canonical(&p, a, status);
4289     parts_sqrt(&p, status, &float128_params);
4290     return float128_round_pack_canonical(&p, status);
4291 }
4292
4293 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4294 {
4295     FloatParts128 p;
4296
4297     if (!floatx80_unpack_canonical(&p, a, s)) {
4298         return floatx80_default_nan(s);
4299     }
4300     parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4301     return floatx80_round_pack_canonical(&p, s);
4302 }
4303
4304 /*
4305  * log2
4306  */
4307 float32 float32_log2(float32 a, float_status *status)
4308 {
4309     FloatParts64 p;
4310
4311     float32_unpack_canonical(&p, a, status);
4312     parts_log2(&p, status, &float32_params);
4313     return float32_round_pack_canonical(&p, status);
4314 }
4315
4316 float64 float64_log2(float64 a, float_status *status)
4317 {
4318     FloatParts64 p;
4319
4320     float64_unpack_canonical(&p, a, status);
4321     parts_log2(&p, status, &float64_params);
4322     return float64_round_pack_canonical(&p, status);
4323 }
4324
4325 /*----------------------------------------------------------------------------
4326 | The pattern for a default generated NaN.
4327 *----------------------------------------------------------------------------*/
4328
4329 float16 float16_default_nan(float_status *status)
4330 {
4331     FloatParts64 p;
4332
4333     parts_default_nan(&p, status);
4334     p.frac >>= float16_params.frac_shift;
4335     return float16_pack_raw(&p);
4336 }
4337
4338 float32 float32_default_nan(float_status *status)
4339 {
4340     FloatParts64 p;
4341
4342     parts_default_nan(&p, status);
4343     p.frac >>= float32_params.frac_shift;
4344     return float32_pack_raw(&p);
4345 }
4346
4347 float64 float64_default_nan(float_status *status)
4348 {
4349     FloatParts64 p;
4350
4351     parts_default_nan(&p, status);
4352     p.frac >>= float64_params.frac_shift;
4353     return float64_pack_raw(&p);
4354 }
4355
4356 float128 float128_default_nan(float_status *status)
4357 {
4358     FloatParts128 p;
4359
4360     parts_default_nan(&p, status);
4361     frac_shr(&p, float128_params.frac_shift);
4362     return float128_pack_raw(&p);
4363 }
4364
4365 bfloat16 bfloat16_default_nan(float_status *status)
4366 {
4367     FloatParts64 p;
4368
4369     parts_default_nan(&p, status);
4370     p.frac >>= bfloat16_params.frac_shift;
4371     return bfloat16_pack_raw(&p);
4372 }
4373
4374 /*----------------------------------------------------------------------------
4375 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4376 *----------------------------------------------------------------------------*/
4377
4378 float16 float16_silence_nan(float16 a, float_status *status)
4379 {
4380     FloatParts64 p;
4381
4382     float16_unpack_raw(&p, a);
4383     p.frac <<= float16_params.frac_shift;
4384     parts_silence_nan(&p, status);
4385     p.frac >>= float16_params.frac_shift;
4386     return float16_pack_raw(&p);
4387 }
4388
4389 float32 float32_silence_nan(float32 a, float_status *status)
4390 {
4391     FloatParts64 p;
4392
4393     float32_unpack_raw(&p, a);
4394     p.frac <<= float32_params.frac_shift;
4395     parts_silence_nan(&p, status);
4396     p.frac >>= float32_params.frac_shift;
4397     return float32_pack_raw(&p);
4398 }
4399
4400 float64 float64_silence_nan(float64 a, float_status *status)
4401 {
4402     FloatParts64 p;
4403
4404     float64_unpack_raw(&p, a);
4405     p.frac <<= float64_params.frac_shift;
4406     parts_silence_nan(&p, status);
4407     p.frac >>= float64_params.frac_shift;
4408     return float64_pack_raw(&p);
4409 }
4410
4411 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4412 {
4413     FloatParts64 p;
4414
4415     bfloat16_unpack_raw(&p, a);
4416     p.frac <<= bfloat16_params.frac_shift;
4417     parts_silence_nan(&p, status);
4418     p.frac >>= bfloat16_params.frac_shift;
4419     return bfloat16_pack_raw(&p);
4420 }
4421
4422 float128 float128_silence_nan(float128 a, float_status *status)
4423 {
4424     FloatParts128 p;
4425
4426     float128_unpack_raw(&p, a);
4427     frac_shl(&p, float128_params.frac_shift);
4428     parts_silence_nan(&p, status);
4429     frac_shr(&p, float128_params.frac_shift);
4430     return float128_pack_raw(&p);
4431 }
4432
4433 /*----------------------------------------------------------------------------
4434 | If `a' is denormal and we are in flush-to-zero mode then set the
4435 | input-denormal exception and return zero. Otherwise just return the value.
4436 *----------------------------------------------------------------------------*/
4437
4438 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4439 {
4440     if (p.exp == 0 && p.frac != 0) {
4441         float_raise(float_flag_input_denormal, status);
4442         return true;
4443     }
4444
4445     return false;
4446 }
4447
4448 float16 float16_squash_input_denormal(float16 a, float_status *status)
4449 {
4450     if (status->flush_inputs_to_zero) {
4451         FloatParts64 p;
4452
4453         float16_unpack_raw(&p, a);
4454         if (parts_squash_denormal(p, status)) {
4455             return float16_set_sign(float16_zero, p.sign);
4456         }
4457     }
4458     return a;
4459 }
4460
4461 float32 float32_squash_input_denormal(float32 a, float_status *status)
4462 {
4463     if (status->flush_inputs_to_zero) {
4464         FloatParts64 p;
4465
4466         float32_unpack_raw(&p, a);
4467         if (parts_squash_denormal(p, status)) {
4468             return float32_set_sign(float32_zero, p.sign);
4469         }
4470     }
4471     return a;
4472 }
4473
4474 float64 float64_squash_input_denormal(float64 a, float_status *status)
4475 {
4476     if (status->flush_inputs_to_zero) {
4477         FloatParts64 p;
4478
4479         float64_unpack_raw(&p, a);
4480         if (parts_squash_denormal(p, status)) {
4481             return float64_set_sign(float64_zero, p.sign);
4482         }
4483     }
4484     return a;
4485 }
4486
4487 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4488 {
4489     if (status->flush_inputs_to_zero) {
4490         FloatParts64 p;
4491
4492         bfloat16_unpack_raw(&p, a);
4493         if (parts_squash_denormal(p, status)) {
4494             return bfloat16_set_sign(bfloat16_zero, p.sign);
4495         }
4496     }
4497     return a;
4498 }
4499
4500 /*----------------------------------------------------------------------------
4501 | Normalizes the subnormal extended double-precision floating-point value
4502 | represented by the denormalized significand `aSig'.  The normalized exponent
4503 | and significand are stored at the locations pointed to by `zExpPtr' and
4504 | `zSigPtr', respectively.
4505 *----------------------------------------------------------------------------*/
4506
4507 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4508                                 uint64_t *zSigPtr)
4509 {
4510     int8_t shiftCount;
4511
4512     shiftCount = clz64(aSig);
4513     *zSigPtr = aSig<<shiftCount;
4514     *zExpPtr = 1 - shiftCount;
4515 }
4516
4517 /*----------------------------------------------------------------------------
4518 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4519 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4520 | and returns the proper extended double-precision floating-point value
4521 | corresponding to the abstract input.  Ordinarily, the abstract value is
4522 | rounded and packed into the extended double-precision format, with the
4523 | inexact exception raised if the abstract input cannot be represented
4524 | exactly.  However, if the abstract value is too large, the overflow and
4525 | inexact exceptions are raised and an infinity or maximal finite value is
4526 | returned.  If the abstract value is too small, the input value is rounded to
4527 | a subnormal number, and the underflow and inexact exceptions are raised if
4528 | the abstract input cannot be represented exactly as a subnormal extended
4529 | double-precision floating-point number.
4530 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4531 | the result is rounded to the same number of bits as single or double
4532 | precision, respectively.  Otherwise, the result is rounded to the full
4533 | precision of the extended double-precision format.
4534 |     The input significand must be normalized or smaller.  If the input
4535 | significand is not normalized, `zExp' must be 0; in that case, the result
4536 | returned is a subnormal number, and it must not require rounding.  The
4537 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4538 | Floating-Point Arithmetic.
4539 *----------------------------------------------------------------------------*/
4540
4541 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4542                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4543                               float_status *status)
4544 {
4545     FloatRoundMode roundingMode;
4546     bool roundNearestEven, increment, isTiny;
4547     int64_t roundIncrement, roundMask, roundBits;
4548
4549     roundingMode = status->float_rounding_mode;
4550     roundNearestEven = ( roundingMode == float_round_nearest_even );
4551     switch (roundingPrecision) {
4552     case floatx80_precision_x:
4553         goto precision80;
4554     case floatx80_precision_d:
4555         roundIncrement = UINT64_C(0x0000000000000400);
4556         roundMask = UINT64_C(0x00000000000007FF);
4557         break;
4558     case floatx80_precision_s:
4559         roundIncrement = UINT64_C(0x0000008000000000);
4560         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4561         break;
4562     default:
4563         g_assert_not_reached();
4564     }
4565     zSig0 |= ( zSig1 != 0 );
4566     switch (roundingMode) {
4567     case float_round_nearest_even:
4568     case float_round_ties_away:
4569         break;
4570     case float_round_to_zero:
4571         roundIncrement = 0;
4572         break;
4573     case float_round_up:
4574         roundIncrement = zSign ? 0 : roundMask;
4575         break;
4576     case float_round_down:
4577         roundIncrement = zSign ? roundMask : 0;
4578         break;
4579     default:
4580         abort();
4581     }
4582     roundBits = zSig0 & roundMask;
4583     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4584         if (    ( 0x7FFE < zExp )
4585              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4586            ) {
4587             goto overflow;
4588         }
4589         if ( zExp <= 0 ) {
4590             if (status->flush_to_zero) {
4591                 float_raise(float_flag_output_denormal, status);
4592                 return packFloatx80(zSign, 0, 0);
4593             }
4594             isTiny = status->tininess_before_rounding
4595                   || (zExp < 0 )
4596                   || (zSig0 <= zSig0 + roundIncrement);
4597             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4598             zExp = 0;
4599             roundBits = zSig0 & roundMask;
4600             if (isTiny && roundBits) {
4601                 float_raise(float_flag_underflow, status);
4602             }
4603             if (roundBits) {
4604                 float_raise(float_flag_inexact, status);
4605             }
4606             zSig0 += roundIncrement;
4607             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4608             roundIncrement = roundMask + 1;
4609             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4610                 roundMask |= roundIncrement;
4611             }
4612             zSig0 &= ~ roundMask;
4613             return packFloatx80( zSign, zExp, zSig0 );
4614         }
4615     }
4616     if (roundBits) {
4617         float_raise(float_flag_inexact, status);
4618     }
4619     zSig0 += roundIncrement;
4620     if ( zSig0 < roundIncrement ) {
4621         ++zExp;
4622         zSig0 = UINT64_C(0x8000000000000000);
4623     }
4624     roundIncrement = roundMask + 1;
4625     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4626         roundMask |= roundIncrement;
4627     }
4628     zSig0 &= ~ roundMask;
4629     if ( zSig0 == 0 ) zExp = 0;
4630     return packFloatx80( zSign, zExp, zSig0 );
4631  precision80:
4632     switch (roundingMode) {
4633     case float_round_nearest_even:
4634     case float_round_ties_away:
4635         increment = ((int64_t)zSig1 < 0);
4636         break;
4637     case float_round_to_zero:
4638         increment = 0;
4639         break;
4640     case float_round_up:
4641         increment = !zSign && zSig1;
4642         break;
4643     case float_round_down:
4644         increment = zSign && zSig1;
4645         break;
4646     default:
4647         abort();
4648     }
4649     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4650         if (    ( 0x7FFE < zExp )
4651              || (    ( zExp == 0x7FFE )
4652                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4653                   && increment
4654                 )
4655            ) {
4656             roundMask = 0;
4657  overflow:
4658             float_raise(float_flag_overflow | float_flag_inexact, status);
4659             if (    ( roundingMode == float_round_to_zero )
4660                  || ( zSign && ( roundingMode == float_round_up ) )
4661                  || ( ! zSign && ( roundingMode == float_round_down ) )
4662                ) {
4663                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4664             }
4665             return packFloatx80(zSign,
4666                                 floatx80_infinity_high,
4667                                 floatx80_infinity_low);
4668         }
4669         if ( zExp <= 0 ) {
4670             isTiny = status->tininess_before_rounding
4671                   || (zExp < 0)
4672                   || !increment
4673                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4674             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4675             zExp = 0;
4676             if (isTiny && zSig1) {
4677                 float_raise(float_flag_underflow, status);
4678             }
4679             if (zSig1) {
4680                 float_raise(float_flag_inexact, status);
4681             }
4682             switch (roundingMode) {
4683             case float_round_nearest_even:
4684             case float_round_ties_away:
4685                 increment = ((int64_t)zSig1 < 0);
4686                 break;
4687             case float_round_to_zero:
4688                 increment = 0;
4689                 break;
4690             case float_round_up:
4691                 increment = !zSign && zSig1;
4692                 break;
4693             case float_round_down:
4694                 increment = zSign && zSig1;
4695                 break;
4696             default:
4697                 abort();
4698             }
4699             if ( increment ) {
4700                 ++zSig0;
4701                 if (!(zSig1 << 1) && roundNearestEven) {
4702                     zSig0 &= ~1;
4703                 }
4704                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4705             }
4706             return packFloatx80( zSign, zExp, zSig0 );
4707         }
4708     }
4709     if (zSig1) {
4710         float_raise(float_flag_inexact, status);
4711     }
4712     if ( increment ) {
4713         ++zSig0;
4714         if ( zSig0 == 0 ) {
4715             ++zExp;
4716             zSig0 = UINT64_C(0x8000000000000000);
4717         }
4718         else {
4719             if (!(zSig1 << 1) && roundNearestEven) {
4720                 zSig0 &= ~1;
4721             }
4722         }
4723     }
4724     else {
4725         if ( zSig0 == 0 ) zExp = 0;
4726     }
4727     return packFloatx80( zSign, zExp, zSig0 );
4728
4729 }
4730
4731 /*----------------------------------------------------------------------------
4732 | Takes an abstract floating-point value having sign `zSign', exponent
4733 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4734 | and returns the proper extended double-precision floating-point value
4735 | corresponding to the abstract input.  This routine is just like
4736 | `roundAndPackFloatx80' except that the input significand does not have to be
4737 | normalized.
4738 *----------------------------------------------------------------------------*/
4739
4740 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4741                                        bool zSign, int32_t zExp,
4742                                        uint64_t zSig0, uint64_t zSig1,
4743                                        float_status *status)
4744 {
4745     int8_t shiftCount;
4746
4747     if ( zSig0 == 0 ) {
4748         zSig0 = zSig1;
4749         zSig1 = 0;
4750         zExp -= 64;
4751     }
4752     shiftCount = clz64(zSig0);
4753     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4754     zExp -= shiftCount;
4755     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4756                                 zSig0, zSig1, status);
4757
4758 }
4759
4760 /*----------------------------------------------------------------------------
4761 | Returns the binary exponential of the single-precision floating-point value
4762 | `a'. The operation is performed according to the IEC/IEEE Standard for
4763 | Binary Floating-Point Arithmetic.
4764 |
4765 | Uses the following identities:
4766 |
4767 | 1. -------------------------------------------------------------------------
4768 |      x    x*ln(2)
4769 |     2  = e
4770 |
4771 | 2. -------------------------------------------------------------------------
4772 |                      2     3     4     5           n
4773 |      x        x     x     x     x     x           x
4774 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4775 |               1!    2!    3!    4!    5!          n!
4776 *----------------------------------------------------------------------------*/
4777
4778 static const float64 float32_exp2_coefficients[15] =
4779 {
4780     const_float64( 0x3ff0000000000000ll ), /*  1 */
4781     const_float64( 0x3fe0000000000000ll ), /*  2 */
4782     const_float64( 0x3fc5555555555555ll ), /*  3 */
4783     const_float64( 0x3fa5555555555555ll ), /*  4 */
4784     const_float64( 0x3f81111111111111ll ), /*  5 */
4785     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4786     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4787     const_float64( 0x3efa01a01a01a01all ), /*  8 */
4788     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4789     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4790     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4791     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4792     const_float64( 0x3de6124613a86d09ll ), /* 13 */
4793     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4794     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4795 };
4796
4797 float32 float32_exp2(float32 a, float_status *status)
4798 {
4799     FloatParts64 xp, xnp, tp, rp;
4800     int i;
4801
4802     float32_unpack_canonical(&xp, a, status);
4803     if (unlikely(xp.cls != float_class_normal)) {
4804         switch (xp.cls) {
4805         case float_class_snan:
4806         case float_class_qnan:
4807             parts_return_nan(&xp, status);
4808             return float32_round_pack_canonical(&xp, status);
4809         case float_class_inf:
4810             return xp.sign ? float32_zero : a;
4811         case float_class_zero:
4812             return float32_one;
4813         default:
4814             break;
4815         }
4816         g_assert_not_reached();
4817     }
4818
4819     float_raise(float_flag_inexact, status);
4820
4821     float64_unpack_canonical(&tp, float64_ln2, status);
4822     xp = *parts_mul(&xp, &tp, status);
4823     xnp = xp;
4824
4825     float64_unpack_canonical(&rp, float64_one, status);
4826     for (i = 0 ; i < 15 ; i++) {
4827         float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4828         rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4829         xnp = *parts_mul(&xnp, &xp, status);
4830     }
4831
4832     return float32_round_pack_canonical(&rp, status);
4833 }
4834
4835 /*----------------------------------------------------------------------------
4836 | Rounds the extended double-precision floating-point value `a'
4837 | to the precision provided by floatx80_rounding_precision and returns the
4838 | result as an extended double-precision floating-point value.
4839 | The operation is performed according to the IEC/IEEE Standard for Binary
4840 | Floating-Point Arithmetic.
4841 *----------------------------------------------------------------------------*/
4842
4843 floatx80 floatx80_round(floatx80 a, float_status *status)
4844 {
4845     FloatParts128 p;
4846
4847     if (!floatx80_unpack_canonical(&p, a, status)) {
4848         return floatx80_default_nan(status);
4849     }
4850     return floatx80_round_pack_canonical(&p, status);
4851 }
4852
4853 static void __attribute__((constructor)) softfloat_init(void)
4854 {
4855     union_float64 ua, ub, uc, ur;
4856
4857     if (QEMU_NO_HARDFLOAT) {
4858         return;
4859     }
4860     /*
4861      * Test that the host's FMA is not obviously broken. For example,
4862      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4863      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4864      */
4865     ua.s = 0x0020000000000001ULL;
4866     ub.s = 0x3ca0000000000000ULL;
4867     uc.s = 0x0020000000000000ULL;
4868     ur.h = fma(ua.h, ub.h, uc.h);
4869     if (ur.s != 0x0020000000000001ULL) {
4870         force_soft_fma = true;
4871     }
4872 }