fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485 /* Flags for parts_minmax. */
 486 enum {
 487     /* Set for minimum; clear for maximum. */
 488     minmax_ismin = 1,
 489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 490     minmax_isnum = 2,
 491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 492     minmax_ismag = 4,
 493 };
 494
 495 /* Simple helpers for checking if, or what kind of, NaN we have */
 496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 497 {
 498     return unlikely(c >= float_class_qnan);
 499 }
 500
 501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 502 {
 503     return c == float_class_snan;
 504 }
 505
 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 507 {
 508     return c == float_class_qnan;
 509 }
 510
 511 /*
 512  * Structure holding all of the decomposed parts of a float.
 513  * The exponent is unbiased and the fraction is normalized.
 514  *
 515  * The fraction words are stored in big-endian word ordering,
 516  * so that truncation from a larger format to a smaller format
 517  * can be done simply by ignoring subsequent elements.
 518  */
 519
 520 typedef struct {
 521     FloatClass cls;
 522     bool sign;
 523     int32_t exp;
 524     union {
 525         /* Routines that know the structure may reference the singular name. */
 526         uint64_t frac;
 527         /*
 528          * Routines expanded with multiple structures reference "hi" and "lo"
 529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 530          * both the same word and aliased here.
 531          */
 532         uint64_t frac_hi;
 533         uint64_t frac_lo;
 534     };
 535 } FloatParts64;
 536
 537 typedef struct {
 538     FloatClass cls;
 539     bool sign;
 540     int32_t exp;
 541     uint64_t frac_hi;
 542     uint64_t frac_lo;
 543 } FloatParts128;
 544
 545 typedef struct {
 546     FloatClass cls;
 547     bool sign;
 548     int32_t exp;
 549     uint64_t frac_hi;
 550     uint64_t frac_hm;  /* high-middle */
 551     uint64_t frac_lm;  /* low-middle */
 552     uint64_t frac_lo;
 553 } FloatParts256;
 554
 555 /* These apply to the most significant word of each FloatPartsN. */
 556 #define DECOMPOSED_BINARY_POINT    63
 557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 558
 559 /* Structure holding all of the relevant parameters for a format.
 560  *   exp_size: the size of the exponent field
 561  *   exp_bias: the offset applied to the exponent field
 562  *   exp_max: the maximum normalised exponent
 563  *   frac_size: the size of the fraction field
 564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 565  * The following are computed based the size of fraction
 566  *   round_mask: bits below lsb which must be rounded
 567  * The following optional modifiers are available:
 568  *   arm_althp: handle ARM Alternative Half Precision
 569  */
 570 typedef struct {
 571     int exp_size;
 572     int exp_bias;
 573     int exp_max;
 574     int frac_size;
 575     int frac_shift;
 576     bool arm_althp;
 577     uint64_t round_mask;
 578 } FloatFmt;
 579
 580 /* Expand fields based on the size of exponent and fraction */
 581 #define FLOAT_PARAMS_(E)                                \
 582     .exp_size       = E,                                \
 583     .exp_bias       = ((1 << E) - 1) >> 1,              \
 584     .exp_max        = (1 << E) - 1
 585
 586 #define FLOAT_PARAMS(E, F)                              \
 587     FLOAT_PARAMS_(E),                                   \
 588     .frac_size      = F,                                \
 589     .frac_shift     = (-F - 1) & 63,                    \
 590     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 591
 592 static const FloatFmt float16_params = {
 593     FLOAT_PARAMS(5, 10)
 594 };
 595
 596 static const FloatFmt float16_params_ahp = {
 597     FLOAT_PARAMS(5, 10),
 598     .arm_althp = true
 599 };
 600
 601 static const FloatFmt bfloat16_params = {
 602     FLOAT_PARAMS(8, 7)
 603 };
 604
 605 static const FloatFmt float32_params = {
 606     FLOAT_PARAMS(8, 23)
 607 };
 608
 609 static const FloatFmt float64_params = {
 610     FLOAT_PARAMS(11, 52)
 611 };
 612
 613 static const FloatFmt float128_params = {
 614     FLOAT_PARAMS(15, 112)
 615 };
 616
 617 #define FLOATX80_PARAMS(R)              \
 618     FLOAT_PARAMS_(15),                  \
 619     .frac_size = R == 64 ? 63 : R,      \
 620     .frac_shift = 0,                    \
 621     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 622
 623 static const FloatFmt floatx80_params[3] = {
 624     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 625     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 626     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 627 };
 628
 629 /* Unpack a float to parts, but do not canonicalize.  */
 630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 631 {
 632     const int f_size = fmt->frac_size;
 633     const int e_size = fmt->exp_size;
 634
 635     *r = (FloatParts64) {
 636         .cls = float_class_unclassified,
 637         .sign = extract64(raw, f_size + e_size, 1),
 638         .exp = extract64(raw, f_size, e_size),
 639         .frac = extract64(raw, 0, f_size)
 640     };
 641 }
 642
 643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 644 {
 645     unpack_raw64(p, &float16_params, f);
 646 }
 647
 648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 649 {
 650     unpack_raw64(p, &bfloat16_params, f);
 651 }
 652
 653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 654 {
 655     unpack_raw64(p, &float32_params, f);
 656 }
 657
 658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 659 {
 660     unpack_raw64(p, &float64_params, f);
 661 }
 662
 663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 664 {
 665     *p = (FloatParts128) {
 666         .cls = float_class_unclassified,
 667         .sign = extract32(f.high, 15, 1),
 668         .exp = extract32(f.high, 0, 15),
 669         .frac_hi = f.low
 670     };
 671 }
 672
 673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 674 {
 675     const int f_size = float128_params.frac_size - 64;
 676     const int e_size = float128_params.exp_size;
 677
 678     *p = (FloatParts128) {
 679         .cls = float_class_unclassified,
 680         .sign = extract64(f.high, f_size + e_size, 1),
 681         .exp = extract64(f.high, f_size, e_size),
 682         .frac_hi = extract64(f.high, 0, f_size),
 683         .frac_lo = f.low,
 684     };
 685 }
 686
 687 /* Pack a float from parts, but do not canonicalize.  */
 688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 689 {
 690     const int f_size = fmt->frac_size;
 691     const int e_size = fmt->exp_size;
 692     uint64_t ret;
 693
 694     ret = (uint64_t)p->sign << (f_size + e_size);
 695     ret = deposit64(ret, f_size, e_size, p->exp);
 696     ret = deposit64(ret, 0, f_size, p->frac);
 697     return ret;
 698 }
 699
 700 static inline float16 float16_pack_raw(const FloatParts64 *p)
 701 {
 702     return make_float16(pack_raw64(p, &float16_params));
 703 }
 704
 705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 706 {
 707     return pack_raw64(p, &bfloat16_params);
 708 }
 709
 710 static inline float32 float32_pack_raw(const FloatParts64 *p)
 711 {
 712     return make_float32(pack_raw64(p, &float32_params));
 713 }
 714
 715 static inline float64 float64_pack_raw(const FloatParts64 *p)
 716 {
 717     return make_float64(pack_raw64(p, &float64_params));
 718 }
 719
 720 static float128 float128_pack_raw(const FloatParts128 *p)
 721 {
 722     const int f_size = float128_params.frac_size - 64;
 723     const int e_size = float128_params.exp_size;
 724     uint64_t hi;
 725
 726     hi = (uint64_t)p->sign << (f_size + e_size);
 727     hi = deposit64(hi, f_size, e_size, p->exp);
 728     hi = deposit64(hi, 0, f_size, p->frac_hi);
 729     return make_float128(hi, p->frac_lo);
 730 }
 731
 732 /*----------------------------------------------------------------------------
 733 | Functions and definitions to determine:  (1) whether tininess for underflow
 734 | is detected before or after rounding by default, (2) what (if anything)
 735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 737 | are propagated from function inputs to output.  These details are target-
 738 | specific.
 739 *----------------------------------------------------------------------------*/
 740 #include "softfloat-specialize.c.inc"
 741
 742 #define PARTS_GENERIC_64_128(NAME, P) \
 743     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 744
 745 #define PARTS_GENERIC_64_128_256(NAME, P) \
 746     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 747                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 748
 749 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 750 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 751
 752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 754
 755 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 756
 757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 758                                       float_status *s);
 759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 760                                         float_status *s);
 761
 762 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 763
 764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 765                                              FloatParts64 *c, float_status *s,
 766                                              int ab_mask, int abc_mask);
 767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 768                                                FloatParts128 *b,
 769                                                FloatParts128 *c,
 770                                                float_status *s,
 771                                                int ab_mask, int abc_mask);
 772
 773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 774     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 775
 776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 777                                  const FloatFmt *fmt);
 778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 779                                   const FloatFmt *fmt);
 780
 781 #define parts_canonicalize(A, S, F) \
 782     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 783
 784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 785                                    const FloatFmt *fmt);
 786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 787                                     const FloatFmt *fmt);
 788
 789 #define parts_uncanon_normal(A, S, F) \
 790     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 791
 792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 793                             const FloatFmt *fmt);
 794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 795                              const FloatFmt *fmt);
 796
 797 #define parts_uncanon(A, S, F) \
 798     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 799
 800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 803
 804 #define parts_add_normal(A, B) \
 805     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 806
 807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 810
 811 #define parts_sub_normal(A, B) \
 812     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 813
 814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 815                                     float_status *s, bool subtract);
 816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 817                                       float_status *s, bool subtract);
 818
 819 #define parts_addsub(A, B, S, Z) \
 820     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 821
 822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 823                                  float_status *s);
 824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 825                                    float_status *s);
 826
 827 #define parts_mul(A, B, S) \
 828     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 829
 830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 831                                     FloatParts64 *c, int flags,
 832                                     float_status *s);
 833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 834                                       FloatParts128 *c, int flags,
 835                                       float_status *s);
 836
 837 #define parts_muladd(A, B, C, Z, S) \
 838     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 839
 840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 841                                  float_status *s);
 842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 843                                    float_status *s);
 844
 845 #define parts_div(A, B, S) \
 846     PARTS_GENERIC_64_128(div, A)(A, B, S)
 847
 848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 850
 851 #define parts_sqrt(A, S, F) \
 852     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 853
 854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 855                                         int scale, int frac_size);
 856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 857                                          int scale, int frac_size);
 858
 859 #define parts_round_to_int_normal(A, R, C, F) \
 860     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 861
 862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 863                                  int scale, float_status *s,
 864                                  const FloatFmt *fmt);
 865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 866                                   int scale, float_status *s,
 867                                   const FloatFmt *fmt);
 868
 869 #define parts_round_to_int(A, R, C, S, F) \
 870     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 871
 872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 873                                      int scale, int64_t min, int64_t max,
 874                                      float_status *s);
 875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 876                                      int scale, int64_t min, int64_t max,
 877                                      float_status *s);
 878
 879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 880     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 881
 882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 883                                       int scale, uint64_t max,
 884                                       float_status *s);
 885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 886                                        int scale, uint64_t max,
 887                                        float_status *s);
 888
 889 #define parts_float_to_uint(P, R, Z, M, S) \
 890     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 891
 892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 893                                   int scale, float_status *s);
 894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 895                                    int scale, float_status *s);
 896
 897 #define parts_sint_to_float(P, I, Z, S) \
 898     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 899
 900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 901                                   int scale, float_status *s);
 902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 903                                    int scale, float_status *s);
 904
 905 #define parts_uint_to_float(P, I, Z, S) \
 906     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 907
 908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 909                                     float_status *s, int flags);
 910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 911                                       float_status *s, int flags);
 912
 913 #define parts_minmax(A, B, S, F) \
 914     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 915
 916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 917                            float_status *s, bool q);
 918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 919                             float_status *s, bool q);
 920
 921 #define parts_compare(A, B, S, Q) \
 922     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 923
 924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 926
 927 #define parts_scalbn(A, N, S) \
 928     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 929
 930 /*
 931  * Helper functions for softfloat-parts.c.inc, per-size operations.
 932  */
 933
 934 #define FRAC_GENERIC_64_128(NAME, P) \
 935     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 936
 937 #define FRAC_GENERIC_64_128_256(NAME, P) \
 938     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 939                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 940
 941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 942 {
 943     return uadd64_overflow(a->frac, b->frac, &r->frac);
 944 }
 945
 946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 947 {
 948     bool c = 0;
 949     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 950     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 951     return c;
 952 }
 953
 954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 955 {
 956     bool c = 0;
 957     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 958     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 959     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 960     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 961     return c;
 962 }
 963
 964 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 965
 966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 967 {
 968     return uadd64_overflow(a->frac, c, &r->frac);
 969 }
 970
 971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 972 {
 973     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 974     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 975 }
 976
 977 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 978
 979 static void frac64_allones(FloatParts64 *a)
 980 {
 981     a->frac = -1;
 982 }
 983
 984 static void frac128_allones(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = -1;
 987 }
 988
 989 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 990
 991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 994 }
 995
 996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 997 {
 998     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 999     if (ta == tb) {
1000         ta = a->frac_lo, tb = b->frac_lo;
1001         if (ta == tb) {
1002             return 0;
1003         }
1004     }
1005     return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012     a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017     a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024     uint64_t n1, n0, r, q;
1025     bool ret;
1026
1027     /*
1028      * We want a 2*N / N-bit division to produce exactly an N-bit
1029      * result, so that we do not lose any precision and so that we
1030      * do not have to renormalize afterward.  If A.frac < B.frac,
1031      * then division would produce an (N-1)-bit result; shift A left
1032      * by one to produce the an N-bit result, and return true to
1033      * decrement the exponent to match.
1034      *
1035      * The udiv_qrnnd algorithm that we're using requires normalization,
1036      * i.e. the msb of the denominator must be set, which is already true.
1037      */
1038     ret = a->frac < b->frac;
1039     if (ret) {
1040         n0 = a->frac;
1041         n1 = 0;
1042     } else {
1043         n0 = a->frac >> 1;
1044         n1 = a->frac << 63;
1045     }
1046     q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048     /* Set lsb if there is a remainder, to set inexact. */
1049     a->frac = q | (r != 0);
1050
1051     return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056     uint64_t q0, q1, a0, a1, b0, b1;
1057     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058     bool ret = false;
1059
1060     a0 = a->frac_hi, a1 = a->frac_lo;
1061     b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063     ret = lt128(a0, a1, b0, b1);
1064     if (!ret) {
1065         a1 = shr_double(a0, a1, 1);
1066         a0 = a0 >> 1;
1067     }
1068
1069     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070     q0 = estimateDiv128To64(a0, a1, b0);
1071
1072     /*
1073      * Estimate is high because B1 was not included (unless B1 == 0).
1074      * Reduce quotient and increase remainder until remainder is non-negative.
1075      * This loop will execute 0 to 2 times.
1076      */
1077     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079     while (r0 != 0) {
1080         q0--;
1081         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082     }
1083
1084     /* Repeat using the remainder, producing a second word of quotient. */
1085     q1 = estimateDiv128To64(r1, r2, b0);
1086     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088     while (r1 != 0) {
1089         q1--;
1090         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091     }
1092
1093     /* Any remainder indicates inexact; set sticky bit. */
1094     q1 |= (r2 | r3) != 0;
1095
1096     a->frac_hi = q0;
1097     a->frac_lo = q1;
1098     return ret;
1099 }
1100
1101 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105     return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110     return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130     a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153     if (a->frac) {
1154         int shift = clz64(a->frac);
1155         a->frac <<= shift;
1156         return shift;
1157     }
1158     return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163     if (a->frac_hi) {
1164         int shl = clz64(a->frac_hi);
1165         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166         a->frac_lo <<= shl;
1167         return shl;
1168     } else if (a->frac_lo) {
1169         int shl = clz64(a->frac_lo);
1170         a->frac_hi = a->frac_lo << shl;
1171         a->frac_lo = 0;
1172         return shl + 64;
1173     }
1174     return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181     int ret, shl;
1182
1183     if (likely(a0)) {
1184         shl = clz64(a0);
1185         if (shl == 0) {
1186             return 0;
1187         }
1188         ret = shl;
1189     } else {
1190         if (a1) {
1191             ret = 64;
1192             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193         } else if (a2) {
1194             ret = 128;
1195             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196         } else if (a3) {
1197             ret = 192;
1198             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199         } else {
1200             ret = 256;
1201             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202             goto done;
1203         }
1204         shl = clz64(a0);
1205         if (shl == 0) {
1206             goto done;
1207         }
1208         ret += shl;
1209     }
1210
1211     a0 = shl_double(a0, a1, shl);
1212     a1 = shl_double(a1, a2, shl);
1213     a2 = shl_double(a2, a3, shl);
1214     a3 <<= shl;
1215
1216  done:
1217     a->frac_hi = a0;
1218     a->frac_hm = a1;
1219     a->frac_lm = a2;
1220     a->frac_lo = a3;
1221     return ret;
1222 }
1223
1224 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228     a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235     if (c & 64) {
1236         a0 = a1, a1 = 0;
1237     }
1238
1239     c &= 63;
1240     if (c) {
1241         a0 = shl_double(a0, a1, c);
1242         a1 = a1 << c;
1243     }
1244
1245     a->frac_hi = a0;
1246     a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253     a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260     if (c & 64) {
1261         a1 = a0, a0 = 0;
1262     }
1263
1264     c &= 63;
1265     if (c) {
1266         a1 = shr_double(a0, a1, c);
1267         a0 = a0 >> c;
1268     }
1269
1270     a->frac_hi = a0;
1271     a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278     uint64_t a0 = a->frac;
1279
1280     if (likely(c != 0)) {
1281         if (likely(c < 64)) {
1282             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283         } else {
1284             a0 = a0 != 0;
1285         }
1286         a->frac = a0;
1287     }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293     uint64_t sticky = 0;
1294
1295     if (unlikely(c == 0)) {
1296         return;
1297     } else if (likely(c < 64)) {
1298         /* nothing */
1299     } else if (likely(c < 128)) {
1300         sticky = a1;
1301         a1 = a0;
1302         a0 = 0;
1303         c &= 63;
1304         if (c == 0) {
1305             goto done;
1306         }
1307     } else {
1308         sticky = a0 | a1;
1309         a0 = a1 = 0;
1310         goto done;
1311     }
1312
1313     sticky |= shr_double(a1, 0, c);
1314     a1 = shr_double(a0, a1, c);
1315     a0 = a0 >> c;
1316
1317  done:
1318     a->frac_lo = a1 | (sticky != 0);
1319     a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326     uint64_t sticky = 0;
1327
1328     if (unlikely(c == 0)) {
1329         return;
1330     } else if (likely(c < 64)) {
1331         /* nothing */
1332     } else if (likely(c < 256)) {
1333         if (unlikely(c & 128)) {
1334             sticky |= a2 | a3;
1335             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336         }
1337         if (unlikely(c & 64)) {
1338             sticky |= a3;
1339             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340         }
1341         c &= 63;
1342         if (c == 0) {
1343             goto done;
1344         }
1345     } else {
1346         sticky = a0 | a1 | a2 | a3;
1347         a0 = a1 = a2 = a3 = 0;
1348         goto done;
1349     }
1350
1351     sticky |= shr_double(a3, 0, c);
1352     a3 = shr_double(a2, a3, c);
1353     a2 = shr_double(a1, a2, c);
1354     a1 = shr_double(a0, a1, c);
1355     a0 = a0 >> c;
1356
1357  done:
1358     a->frac_lo = a3 | (sticky != 0);
1359     a->frac_lm = a2;
1360     a->frac_hm = a1;
1361     a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368     return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373     bool c = 0;
1374     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376     return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381     bool c = 0;
1382     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386     return c;
1387 }
1388
1389 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393     r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398     r->frac_hi = a->frac_hi;
1399     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406     r->frac_hi = a->frac;
1407     r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412     r->frac_hi = a->frac_hi;
1413     r->frac_hm = a->frac_lo;
1414     r->frac_lm = 0;
1415     r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1422  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423  * and thus MIT licenced.
1424  */
1425 static const uint16_t rsqrt_tab[128] = {
1426     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN    glue(FloatParts,N)
1446 #define FloatPartsW    glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef  N
1455 #undef  W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef  N
1463 #undef  W
1464 #define N            256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef  N
1469 #undef  W
1470 #undef  partsN
1471 #undef  FloatPartsN
1472 #undef  FloatPartsW
1473
1474 /*
1475  * Pack/unpack routines with a specific FloatFmt.
1476  */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479                                       float_status *s, const FloatFmt *params)
1480 {
1481     float16_unpack_raw(p, f);
1482     parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486                                      float_status *s)
1487 {
1488     float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492                                       float_status *s)
1493 {
1494     bfloat16_unpack_raw(p, f);
1495     parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499                                              float_status *s,
1500                                              const FloatFmt *params)
1501 {
1502     parts_uncanon(p, s, params);
1503     return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507                                             float_status *s)
1508 {
1509     return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513                                               float_status *s)
1514 {
1515     parts_uncanon(p, s, &bfloat16_params);
1516     return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520                                      float_status *s)
1521 {
1522     float32_unpack_raw(p, f);
1523     parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527                                             float_status *s)
1528 {
1529     parts_uncanon(p, s, &float32_params);
1530     return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534                                      float_status *s)
1535 {
1536     float64_unpack_raw(p, f);
1537     parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541                                             float_status *s)
1542 {
1543     parts_uncanon(p, s, &float64_params);
1544     return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548                                       float_status *s)
1549 {
1550     float128_unpack_raw(p, f);
1551     parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555                                               float_status *s)
1556 {
1557     parts_uncanon(p, s, &float128_params);
1558     return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563                                       float_status *s)
1564 {
1565     /* Ensure rounding precision is set before beginning. */
1566     switch (s->floatx80_rounding_precision) {
1567     case floatx80_precision_x:
1568     case floatx80_precision_d:
1569     case floatx80_precision_s:
1570         break;
1571     default:
1572         g_assert_not_reached();
1573     }
1574
1575     if (unlikely(floatx80_invalid_encoding(f))) {
1576         float_raise(float_flag_invalid, s);
1577         return false;
1578     }
1579
1580     floatx80_unpack_raw(p, f);
1581
1582     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584     } else {
1585         /* The explicit integer bit is ignored, after invalid checks. */
1586         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587         p->cls = (p->frac_hi == 0 ? float_class_inf
1588                   : parts_is_snan_frac(p->frac_hi, s)
1589                   ? float_class_snan : float_class_qnan);
1590     }
1591     return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595                                               float_status *s)
1596 {
1597     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598     uint64_t frac;
1599     int exp;
1600
1601     switch (p->cls) {
1602     case float_class_normal:
1603         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604             parts_uncanon_normal(p, s, fmt);
1605             frac = p->frac_hi;
1606             exp = p->exp;
1607         } else {
1608             FloatParts64 p64;
1609
1610             p64.sign = p->sign;
1611             p64.exp = p->exp;
1612             frac_truncjam(&p64, p);
1613             parts_uncanon_normal(&p64, s, fmt);
1614             frac = p64.frac;
1615             exp = p64.exp;
1616         }
1617         if (exp != fmt->exp_max) {
1618             break;
1619         }
1620         /* rounded to inf -- fall through to set frac correctly */
1621
1622     case float_class_inf:
1623         /* x86 and m68k differ in the setting of the integer bit. */
1624         frac = floatx80_infinity_low;
1625         exp = fmt->exp_max;
1626         break;
1627
1628     case float_class_zero:
1629         frac = 0;
1630         exp = 0;
1631         break;
1632
1633     case float_class_snan:
1634     case float_class_qnan:
1635         /* NaNs have the integer bit set. */
1636         frac = p->frac_hi | (1ull << 63);
1637         exp = fmt->exp_max;
1638         break;
1639
1640     default:
1641         g_assert_not_reached();
1642     }
1643
1644     return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648  * Addition and subtraction
1649  */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654     FloatParts64 pa, pb, *pr;
1655
1656     float16_unpack_canonical(&pa, a, status);
1657     float16_unpack_canonical(&pb, b, status);
1658     pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660     return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665     return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670     return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677
1678     float32_unpack_canonical(&pa, a, status);
1679     float32_unpack_canonical(&pb, b, status);
1680     pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682     return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687     return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692     return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698     FloatParts64 pa, pb, *pr;
1699
1700     float64_unpack_canonical(&pa, a, status);
1701     float64_unpack_canonical(&pb, b, status);
1702     pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704     return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709     return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714     return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719     return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724     return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729     return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734     return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741     }
1742     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749     } else {
1750         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751     }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757     return float32_gen2(a, b, s, hard, soft,
1758                         f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764     return float64_gen2(a, b, s, hard, soft,
1765                         f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795     FloatParts64 pa, pb, *pr;
1796
1797     bfloat16_unpack_canonical(&pa, a, status);
1798     bfloat16_unpack_canonical(&pb, b, status);
1799     pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801     return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806     return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811     return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817     FloatParts128 pa, pb, *pr;
1818
1819     float128_unpack_canonical(&pa, a, status);
1820     float128_unpack_canonical(&pb, b, status);
1821     pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823     return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828     return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833     return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839     FloatParts128 pa, pb, *pr;
1840
1841     if (!floatx80_unpack_canonical(&pa, a, status) ||
1842         !floatx80_unpack_canonical(&pb, b, status)) {
1843         return floatx80_default_nan(status);
1844     }
1845
1846     pr = parts_addsub(&pa, &pb, status, subtract);
1847     return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852     return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857     return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861  * Multiplication
1862  */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866     FloatParts64 pa, pb, *pr;
1867
1868     float16_unpack_canonical(&pa, a, status);
1869     float16_unpack_canonical(&pb, b, status);
1870     pr = parts_mul(&pa, &pb, status);
1871
1872     return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878     FloatParts64 pa, pb, *pr;
1879
1880     float32_unpack_canonical(&pa, a, status);
1881     float32_unpack_canonical(&pb, b, status);
1882     pr = parts_mul(&pa, &pb, status);
1883
1884     return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890     FloatParts64 pa, pb, *pr;
1891
1892     float64_unpack_canonical(&pa, a, status);
1893     float64_unpack_canonical(&pb, b, status);
1894     pr = parts_mul(&pa, &pb, status);
1895
1896     return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901     return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906     return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913                         f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920                         f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926     FloatParts64 pa, pb, *pr;
1927
1928     bfloat16_unpack_canonical(&pa, a, status);
1929     bfloat16_unpack_canonical(&pb, b, status);
1930     pr = parts_mul(&pa, &pb, status);
1931
1932     return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938     FloatParts128 pa, pb, *pr;
1939
1940     float128_unpack_canonical(&pa, a, status);
1941     float128_unpack_canonical(&pb, b, status);
1942     pr = parts_mul(&pa, &pb, status);
1943
1944     return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 /*
1948  * Fused multiply-add
1949  */
1950
1951 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1952                                     int flags, float_status *status)
1953 {
1954     FloatParts64 pa, pb, pc, *pr;
1955
1956     float16_unpack_canonical(&pa, a, status);
1957     float16_unpack_canonical(&pb, b, status);
1958     float16_unpack_canonical(&pc, c, status);
1959     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1960
1961     return float16_round_pack_canonical(pr, status);
1962 }
1963
1964 static float32 QEMU_SOFTFLOAT_ATTR
1965 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1966                 float_status *status)
1967 {
1968     FloatParts64 pa, pb, pc, *pr;
1969
1970     float32_unpack_canonical(&pa, a, status);
1971     float32_unpack_canonical(&pb, b, status);
1972     float32_unpack_canonical(&pc, c, status);
1973     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975     return float32_round_pack_canonical(pr, status);
1976 }
1977
1978 static float64 QEMU_SOFTFLOAT_ATTR
1979 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1980                 float_status *status)
1981 {
1982     FloatParts64 pa, pb, pc, *pr;
1983
1984     float64_unpack_canonical(&pa, a, status);
1985     float64_unpack_canonical(&pb, b, status);
1986     float64_unpack_canonical(&pc, c, status);
1987     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989     return float64_round_pack_canonical(pr, status);
1990 }
1991
1992 static bool force_soft_fma;
1993
1994 float32 QEMU_FLATTEN
1995 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1996 {
1997     union_float32 ua, ub, uc, ur;
1998
1999     ua.s = xa;
2000     ub.s = xb;
2001     uc.s = xc;
2002
2003     if (unlikely(!can_use_fpu(s))) {
2004         goto soft;
2005     }
2006     if (unlikely(flags & float_muladd_halve_result)) {
2007         goto soft;
2008     }
2009
2010     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2011     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2012         goto soft;
2013     }
2014
2015     if (unlikely(force_soft_fma)) {
2016         goto soft;
2017     }
2018
2019     /*
2020      * When (a || b) == 0, there's no need to check for under/over flow,
2021      * since we know the addend is (normal || 0) and the product is 0.
2022      */
2023     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2024         union_float32 up;
2025         bool prod_sign;
2026
2027         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2028         prod_sign ^= !!(flags & float_muladd_negate_product);
2029         up.s = float32_set_sign(float32_zero, prod_sign);
2030
2031         if (flags & float_muladd_negate_c) {
2032             uc.h = -uc.h;
2033         }
2034         ur.h = up.h + uc.h;
2035     } else {
2036         union_float32 ua_orig = ua;
2037         union_float32 uc_orig = uc;
2038
2039         if (flags & float_muladd_negate_product) {
2040             ua.h = -ua.h;
2041         }
2042         if (flags & float_muladd_negate_c) {
2043             uc.h = -uc.h;
2044         }
2045
2046         ur.h = fmaf(ua.h, ub.h, uc.h);
2047
2048         if (unlikely(f32_is_inf(ur))) {
2049             float_raise(float_flag_overflow, s);
2050         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2051             ua = ua_orig;
2052             uc = uc_orig;
2053             goto soft;
2054         }
2055     }
2056     if (flags & float_muladd_negate_result) {
2057         return float32_chs(ur.s);
2058     }
2059     return ur.s;
2060
2061  soft:
2062     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2063 }
2064
2065 float64 QEMU_FLATTEN
2066 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2067 {
2068     union_float64 ua, ub, uc, ur;
2069
2070     ua.s = xa;
2071     ub.s = xb;
2072     uc.s = xc;
2073
2074     if (unlikely(!can_use_fpu(s))) {
2075         goto soft;
2076     }
2077     if (unlikely(flags & float_muladd_halve_result)) {
2078         goto soft;
2079     }
2080
2081     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2082     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2083         goto soft;
2084     }
2085
2086     if (unlikely(force_soft_fma)) {
2087         goto soft;
2088     }
2089
2090     /*
2091      * When (a || b) == 0, there's no need to check for under/over flow,
2092      * since we know the addend is (normal || 0) and the product is 0.
2093      */
2094     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2095         union_float64 up;
2096         bool prod_sign;
2097
2098         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2099         prod_sign ^= !!(flags & float_muladd_negate_product);
2100         up.s = float64_set_sign(float64_zero, prod_sign);
2101
2102         if (flags & float_muladd_negate_c) {
2103             uc.h = -uc.h;
2104         }
2105         ur.h = up.h + uc.h;
2106     } else {
2107         union_float64 ua_orig = ua;
2108         union_float64 uc_orig = uc;
2109
2110         if (flags & float_muladd_negate_product) {
2111             ua.h = -ua.h;
2112         }
2113         if (flags & float_muladd_negate_c) {
2114             uc.h = -uc.h;
2115         }
2116
2117         ur.h = fma(ua.h, ub.h, uc.h);
2118
2119         if (unlikely(f64_is_inf(ur))) {
2120             float_raise(float_flag_overflow, s);
2121         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2122             ua = ua_orig;
2123             uc = uc_orig;
2124             goto soft;
2125         }
2126     }
2127     if (flags & float_muladd_negate_result) {
2128         return float64_chs(ur.s);
2129     }
2130     return ur.s;
2131
2132  soft:
2133     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2134 }
2135
2136 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2137                                       int flags, float_status *status)
2138 {
2139     FloatParts64 pa, pb, pc, *pr;
2140
2141     bfloat16_unpack_canonical(&pa, a, status);
2142     bfloat16_unpack_canonical(&pb, b, status);
2143     bfloat16_unpack_canonical(&pc, c, status);
2144     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2145
2146     return bfloat16_round_pack_canonical(pr, status);
2147 }
2148
2149 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2150                                       int flags, float_status *status)
2151 {
2152     FloatParts128 pa, pb, pc, *pr;
2153
2154     float128_unpack_canonical(&pa, a, status);
2155     float128_unpack_canonical(&pb, b, status);
2156     float128_unpack_canonical(&pc, c, status);
2157     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2158
2159     return float128_round_pack_canonical(pr, status);
2160 }
2161
2162 /*
2163  * Division
2164  */
2165
2166 float16 float16_div(float16 a, float16 b, float_status *status)
2167 {
2168     FloatParts64 pa, pb, *pr;
2169
2170     float16_unpack_canonical(&pa, a, status);
2171     float16_unpack_canonical(&pb, b, status);
2172     pr = parts_div(&pa, &pb, status);
2173
2174     return float16_round_pack_canonical(pr, status);
2175 }
2176
2177 static float32 QEMU_SOFTFLOAT_ATTR
2178 soft_f32_div(float32 a, float32 b, float_status *status)
2179 {
2180     FloatParts64 pa, pb, *pr;
2181
2182     float32_unpack_canonical(&pa, a, status);
2183     float32_unpack_canonical(&pb, b, status);
2184     pr = parts_div(&pa, &pb, status);
2185
2186     return float32_round_pack_canonical(pr, status);
2187 }
2188
2189 static float64 QEMU_SOFTFLOAT_ATTR
2190 soft_f64_div(float64 a, float64 b, float_status *status)
2191 {
2192     FloatParts64 pa, pb, *pr;
2193
2194     float64_unpack_canonical(&pa, a, status);
2195     float64_unpack_canonical(&pb, b, status);
2196     pr = parts_div(&pa, &pb, status);
2197
2198     return float64_round_pack_canonical(pr, status);
2199 }
2200
2201 static float hard_f32_div(float a, float b)
2202 {
2203     return a / b;
2204 }
2205
2206 static double hard_f64_div(double a, double b)
2207 {
2208     return a / b;
2209 }
2210
2211 static bool f32_div_pre(union_float32 a, union_float32 b)
2212 {
2213     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2214         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2215                fpclassify(b.h) == FP_NORMAL;
2216     }
2217     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2218 }
2219
2220 static bool f64_div_pre(union_float64 a, union_float64 b)
2221 {
2222     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2223         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2224                fpclassify(b.h) == FP_NORMAL;
2225     }
2226     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2227 }
2228
2229 static bool f32_div_post(union_float32 a, union_float32 b)
2230 {
2231     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2232         return fpclassify(a.h) != FP_ZERO;
2233     }
2234     return !float32_is_zero(a.s);
2235 }
2236
2237 static bool f64_div_post(union_float64 a, union_float64 b)
2238 {
2239     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2240         return fpclassify(a.h) != FP_ZERO;
2241     }
2242     return !float64_is_zero(a.s);
2243 }
2244
2245 float32 QEMU_FLATTEN
2246 float32_div(float32 a, float32 b, float_status *s)
2247 {
2248     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2249                         f32_div_pre, f32_div_post);
2250 }
2251
2252 float64 QEMU_FLATTEN
2253 float64_div(float64 a, float64 b, float_status *s)
2254 {
2255     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2256                         f64_div_pre, f64_div_post);
2257 }
2258
2259 bfloat16 QEMU_FLATTEN
2260 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2261 {
2262     FloatParts64 pa, pb, *pr;
2263
2264     bfloat16_unpack_canonical(&pa, a, status);
2265     bfloat16_unpack_canonical(&pb, b, status);
2266     pr = parts_div(&pa, &pb, status);
2267
2268     return bfloat16_round_pack_canonical(pr, status);
2269 }
2270
2271 float128 QEMU_FLATTEN
2272 float128_div(float128 a, float128 b, float_status *status)
2273 {
2274     FloatParts128 pa, pb, *pr;
2275
2276     float128_unpack_canonical(&pa, a, status);
2277     float128_unpack_canonical(&pb, b, status);
2278     pr = parts_div(&pa, &pb, status);
2279
2280     return float128_round_pack_canonical(pr, status);
2281 }
2282
2283 /*
2284  * Float to Float conversions
2285  *
2286  * Returns the result of converting one float format to another. The
2287  * conversion is performed according to the IEC/IEEE Standard for
2288  * Binary Floating-Point Arithmetic.
2289  *
2290  * Usually this only needs to take care of raising invalid exceptions
2291  * and handling the conversion on NaNs.
2292  */
2293
2294 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2295 {
2296     switch (a->cls) {
2297     case float_class_qnan:
2298     case float_class_snan:
2299         /*
2300          * There is no NaN in the destination format.  Raise Invalid
2301          * and return a zero with the sign of the input NaN.
2302          */
2303         float_raise(float_flag_invalid, s);
2304         a->cls = float_class_zero;
2305         break;
2306
2307     case float_class_inf:
2308         /*
2309          * There is no Inf in the destination format.  Raise Invalid
2310          * and return the maximum normal with the correct sign.
2311          */
2312         float_raise(float_flag_invalid, s);
2313         a->cls = float_class_normal;
2314         a->exp = float16_params_ahp.exp_max;
2315         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2316                                   float16_params_ahp.frac_size + 1);
2317         break;
2318
2319     case float_class_normal:
2320     case float_class_zero:
2321         break;
2322
2323     default:
2324         g_assert_not_reached();
2325     }
2326 }
2327
2328 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2329 {
2330     if (is_nan(a->cls)) {
2331         parts_return_nan(a, s);
2332     }
2333 }
2334
2335 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2336 {
2337     if (is_nan(a->cls)) {
2338         parts_return_nan(a, s);
2339     }
2340 }
2341
2342 #define parts_float_to_float(P, S) \
2343     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2344
2345 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2346                                         float_status *s)
2347 {
2348     a->cls = b->cls;
2349     a->sign = b->sign;
2350     a->exp = b->exp;
2351
2352     if (a->cls == float_class_normal) {
2353         frac_truncjam(a, b);
2354     } else if (is_nan(a->cls)) {
2355         /* Discard the low bits of the NaN. */
2356         a->frac = b->frac_hi;
2357         parts_return_nan(a, s);
2358     }
2359 }
2360
2361 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2362                                        float_status *s)
2363 {
2364     a->cls = b->cls;
2365     a->sign = b->sign;
2366     a->exp = b->exp;
2367     frac_widen(a, b);
2368
2369     if (is_nan(a->cls)) {
2370         parts_return_nan(a, s);
2371     }
2372 }
2373
2374 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2375 {
2376     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2377     FloatParts64 p;
2378
2379     float16a_unpack_canonical(&p, a, s, fmt16);
2380     parts_float_to_float(&p, s);
2381     return float32_round_pack_canonical(&p, s);
2382 }
2383
2384 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2385 {
2386     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2387     FloatParts64 p;
2388
2389     float16a_unpack_canonical(&p, a, s, fmt16);
2390     parts_float_to_float(&p, s);
2391     return float64_round_pack_canonical(&p, s);
2392 }
2393
2394 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2395 {
2396     FloatParts64 p;
2397     const FloatFmt *fmt;
2398
2399     float32_unpack_canonical(&p, a, s);
2400     if (ieee) {
2401         parts_float_to_float(&p, s);
2402         fmt = &float16_params;
2403     } else {
2404         parts_float_to_ahp(&p, s);
2405         fmt = &float16_params_ahp;
2406     }
2407     return float16a_round_pack_canonical(&p, s, fmt);
2408 }
2409
2410 static float64 QEMU_SOFTFLOAT_ATTR
2411 soft_float32_to_float64(float32 a, float_status *s)
2412 {
2413     FloatParts64 p;
2414
2415     float32_unpack_canonical(&p, a, s);
2416     parts_float_to_float(&p, s);
2417     return float64_round_pack_canonical(&p, s);
2418 }
2419
2420 float64 float32_to_float64(float32 a, float_status *s)
2421 {
2422     if (likely(float32_is_normal(a))) {
2423         /* Widening conversion can never produce inexact results.  */
2424         union_float32 uf;
2425         union_float64 ud;
2426         uf.s = a;
2427         ud.h = uf.h;
2428         return ud.s;
2429     } else if (float32_is_zero(a)) {
2430         return float64_set_sign(float64_zero, float32_is_neg(a));
2431     } else {
2432         return soft_float32_to_float64(a, s);
2433     }
2434 }
2435
2436 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2437 {
2438     FloatParts64 p;
2439     const FloatFmt *fmt;
2440
2441     float64_unpack_canonical(&p, a, s);
2442     if (ieee) {
2443         parts_float_to_float(&p, s);
2444         fmt = &float16_params;
2445     } else {
2446         parts_float_to_ahp(&p, s);
2447         fmt = &float16_params_ahp;
2448     }
2449     return float16a_round_pack_canonical(&p, s, fmt);
2450 }
2451
2452 float32 float64_to_float32(float64 a, float_status *s)
2453 {
2454     FloatParts64 p;
2455
2456     float64_unpack_canonical(&p, a, s);
2457     parts_float_to_float(&p, s);
2458     return float32_round_pack_canonical(&p, s);
2459 }
2460
2461 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2462 {
2463     FloatParts64 p;
2464
2465     bfloat16_unpack_canonical(&p, a, s);
2466     parts_float_to_float(&p, s);
2467     return float32_round_pack_canonical(&p, s);
2468 }
2469
2470 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2471 {
2472     FloatParts64 p;
2473
2474     bfloat16_unpack_canonical(&p, a, s);
2475     parts_float_to_float(&p, s);
2476     return float64_round_pack_canonical(&p, s);
2477 }
2478
2479 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2480 {
2481     FloatParts64 p;
2482
2483     float32_unpack_canonical(&p, a, s);
2484     parts_float_to_float(&p, s);
2485     return bfloat16_round_pack_canonical(&p, s);
2486 }
2487
2488 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2489 {
2490     FloatParts64 p;
2491
2492     float64_unpack_canonical(&p, a, s);
2493     parts_float_to_float(&p, s);
2494     return bfloat16_round_pack_canonical(&p, s);
2495 }
2496
2497 float32 float128_to_float32(float128 a, float_status *s)
2498 {
2499     FloatParts64 p64;
2500     FloatParts128 p128;
2501
2502     float128_unpack_canonical(&p128, a, s);
2503     parts_float_to_float_narrow(&p64, &p128, s);
2504     return float32_round_pack_canonical(&p64, s);
2505 }
2506
2507 float64 float128_to_float64(float128 a, float_status *s)
2508 {
2509     FloatParts64 p64;
2510     FloatParts128 p128;
2511
2512     float128_unpack_canonical(&p128, a, s);
2513     parts_float_to_float_narrow(&p64, &p128, s);
2514     return float64_round_pack_canonical(&p64, s);
2515 }
2516
2517 float128 float32_to_float128(float32 a, float_status *s)
2518 {
2519     FloatParts64 p64;
2520     FloatParts128 p128;
2521
2522     float32_unpack_canonical(&p64, a, s);
2523     parts_float_to_float_widen(&p128, &p64, s);
2524     return float128_round_pack_canonical(&p128, s);
2525 }
2526
2527 float128 float64_to_float128(float64 a, float_status *s)
2528 {
2529     FloatParts64 p64;
2530     FloatParts128 p128;
2531
2532     float64_unpack_canonical(&p64, a, s);
2533     parts_float_to_float_widen(&p128, &p64, s);
2534     return float128_round_pack_canonical(&p128, s);
2535 }
2536
2537 /*
2538  * Round to integral value
2539  */
2540
2541 float16 float16_round_to_int(float16 a, float_status *s)
2542 {
2543     FloatParts64 p;
2544
2545     float16_unpack_canonical(&p, a, s);
2546     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2547     return float16_round_pack_canonical(&p, s);
2548 }
2549
2550 float32 float32_round_to_int(float32 a, float_status *s)
2551 {
2552     FloatParts64 p;
2553
2554     float32_unpack_canonical(&p, a, s);
2555     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2556     return float32_round_pack_canonical(&p, s);
2557 }
2558
2559 float64 float64_round_to_int(float64 a, float_status *s)
2560 {
2561     FloatParts64 p;
2562
2563     float64_unpack_canonical(&p, a, s);
2564     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2565     return float64_round_pack_canonical(&p, s);
2566 }
2567
2568 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2569 {
2570     FloatParts64 p;
2571
2572     bfloat16_unpack_canonical(&p, a, s);
2573     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2574     return bfloat16_round_pack_canonical(&p, s);
2575 }
2576
2577 float128 float128_round_to_int(float128 a, float_status *s)
2578 {
2579     FloatParts128 p;
2580
2581     float128_unpack_canonical(&p, a, s);
2582     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2583     return float128_round_pack_canonical(&p, s);
2584 }
2585
2586 /*
2587  * Floating-point to signed integer conversions
2588  */
2589
2590 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2591                               float_status *s)
2592 {
2593     FloatParts64 p;
2594
2595     float16_unpack_canonical(&p, a, s);
2596     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2597 }
2598
2599 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2600                                 float_status *s)
2601 {
2602     FloatParts64 p;
2603
2604     float16_unpack_canonical(&p, a, s);
2605     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2606 }
2607
2608 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2609                                 float_status *s)
2610 {
2611     FloatParts64 p;
2612
2613     float16_unpack_canonical(&p, a, s);
2614     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2615 }
2616
2617 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2618                                 float_status *s)
2619 {
2620     FloatParts64 p;
2621
2622     float16_unpack_canonical(&p, a, s);
2623     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2624 }
2625
2626 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2627                                 float_status *s)
2628 {
2629     FloatParts64 p;
2630
2631     float32_unpack_canonical(&p, a, s);
2632     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2633 }
2634
2635 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2636                                 float_status *s)
2637 {
2638     FloatParts64 p;
2639
2640     float32_unpack_canonical(&p, a, s);
2641     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2642 }
2643
2644 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2645                                 float_status *s)
2646 {
2647     FloatParts64 p;
2648
2649     float32_unpack_canonical(&p, a, s);
2650     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2651 }
2652
2653 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2654                                 float_status *s)
2655 {
2656     FloatParts64 p;
2657
2658     float64_unpack_canonical(&p, a, s);
2659     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2660 }
2661
2662 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2663                                 float_status *s)
2664 {
2665     FloatParts64 p;
2666
2667     float64_unpack_canonical(&p, a, s);
2668     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2669 }
2670
2671 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2672                                 float_status *s)
2673 {
2674     FloatParts64 p;
2675
2676     float64_unpack_canonical(&p, a, s);
2677     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2678 }
2679
2680 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2681                                  float_status *s)
2682 {
2683     FloatParts64 p;
2684
2685     bfloat16_unpack_canonical(&p, a, s);
2686     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2687 }
2688
2689 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2690                                  float_status *s)
2691 {
2692     FloatParts64 p;
2693
2694     bfloat16_unpack_canonical(&p, a, s);
2695     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2696 }
2697
2698 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2699                                  float_status *s)
2700 {
2701     FloatParts64 p;
2702
2703     bfloat16_unpack_canonical(&p, a, s);
2704     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2705 }
2706
2707 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2708                                         int scale, float_status *s)
2709 {
2710     FloatParts128 p;
2711
2712     float128_unpack_canonical(&p, a, s);
2713     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2714 }
2715
2716 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2717                                         int scale, float_status *s)
2718 {
2719     FloatParts128 p;
2720
2721     float128_unpack_canonical(&p, a, s);
2722     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2723 }
2724
2725 int8_t float16_to_int8(float16 a, float_status *s)
2726 {
2727     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2728 }
2729
2730 int16_t float16_to_int16(float16 a, float_status *s)
2731 {
2732     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2733 }
2734
2735 int32_t float16_to_int32(float16 a, float_status *s)
2736 {
2737     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2738 }
2739
2740 int64_t float16_to_int64(float16 a, float_status *s)
2741 {
2742     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2743 }
2744
2745 int16_t float32_to_int16(float32 a, float_status *s)
2746 {
2747     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2748 }
2749
2750 int32_t float32_to_int32(float32 a, float_status *s)
2751 {
2752     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2753 }
2754
2755 int64_t float32_to_int64(float32 a, float_status *s)
2756 {
2757     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2758 }
2759
2760 int16_t float64_to_int16(float64 a, float_status *s)
2761 {
2762     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2763 }
2764
2765 int32_t float64_to_int32(float64 a, float_status *s)
2766 {
2767     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2768 }
2769
2770 int64_t float64_to_int64(float64 a, float_status *s)
2771 {
2772     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2773 }
2774
2775 int32_t float128_to_int32(float128 a, float_status *s)
2776 {
2777     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2778 }
2779
2780 int64_t float128_to_int64(float128 a, float_status *s)
2781 {
2782     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2783 }
2784
2785 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2786 {
2787     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2788 }
2789
2790 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2791 {
2792     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2793 }
2794
2795 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2796 {
2797     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2798 }
2799
2800 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2801 {
2802     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2803 }
2804
2805 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2806 {
2807     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2808 }
2809
2810 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2811 {
2812     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2813 }
2814
2815 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2816 {
2817     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2818 }
2819
2820 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2821 {
2822     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2823 }
2824
2825 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2826 {
2827     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2828 }
2829
2830 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2831 {
2832     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2833 }
2834
2835 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2836 {
2837     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2838 }
2839
2840 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2841 {
2842     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2843 }
2844
2845 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2846 {
2847     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2848 }
2849
2850 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2851 {
2852     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2853 }
2854
2855 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2856 {
2857     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2858 }
2859
2860 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2861 {
2862     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2863 }
2864
2865 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2866 {
2867     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2868 }
2869
2870 /*
2871  * Floating-point to unsigned integer conversions
2872  */
2873
2874 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2875                                 float_status *s)
2876 {
2877     FloatParts64 p;
2878
2879     float16_unpack_canonical(&p, a, s);
2880     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2881 }
2882
2883 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2884                                   float_status *s)
2885 {
2886     FloatParts64 p;
2887
2888     float16_unpack_canonical(&p, a, s);
2889     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2890 }
2891
2892 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2893                                   float_status *s)
2894 {
2895     FloatParts64 p;
2896
2897     float16_unpack_canonical(&p, a, s);
2898     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2899 }
2900
2901 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2902                                   float_status *s)
2903 {
2904     FloatParts64 p;
2905
2906     float16_unpack_canonical(&p, a, s);
2907     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2908 }
2909
2910 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2911                                   float_status *s)
2912 {
2913     FloatParts64 p;
2914
2915     float32_unpack_canonical(&p, a, s);
2916     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2917 }
2918
2919 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2920                                   float_status *s)
2921 {
2922     FloatParts64 p;
2923
2924     float32_unpack_canonical(&p, a, s);
2925     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2926 }
2927
2928 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2929                                   float_status *s)
2930 {
2931     FloatParts64 p;
2932
2933     float32_unpack_canonical(&p, a, s);
2934     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2935 }
2936
2937 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2938                                   float_status *s)
2939 {
2940     FloatParts64 p;
2941
2942     float64_unpack_canonical(&p, a, s);
2943     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2944 }
2945
2946 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2947                                   float_status *s)
2948 {
2949     FloatParts64 p;
2950
2951     float64_unpack_canonical(&p, a, s);
2952     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2953 }
2954
2955 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2956                                   float_status *s)
2957 {
2958     FloatParts64 p;
2959
2960     float64_unpack_canonical(&p, a, s);
2961     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2962 }
2963
2964 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2965                                    int scale, float_status *s)
2966 {
2967     FloatParts64 p;
2968
2969     bfloat16_unpack_canonical(&p, a, s);
2970     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2971 }
2972
2973 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2974                                    int scale, float_status *s)
2975 {
2976     FloatParts64 p;
2977
2978     bfloat16_unpack_canonical(&p, a, s);
2979     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2980 }
2981
2982 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2983                                    int scale, float_status *s)
2984 {
2985     FloatParts64 p;
2986
2987     bfloat16_unpack_canonical(&p, a, s);
2988     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2989 }
2990
2991 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2992                                           int scale, float_status *s)
2993 {
2994     FloatParts128 p;
2995
2996     float128_unpack_canonical(&p, a, s);
2997     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2998 }
2999
3000 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3001                                           int scale, float_status *s)
3002 {
3003     FloatParts128 p;
3004
3005     float128_unpack_canonical(&p, a, s);
3006     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3007 }
3008
3009 uint8_t float16_to_uint8(float16 a, float_status *s)
3010 {
3011     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3012 }
3013
3014 uint16_t float16_to_uint16(float16 a, float_status *s)
3015 {
3016     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3017 }
3018
3019 uint32_t float16_to_uint32(float16 a, float_status *s)
3020 {
3021     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3022 }
3023
3024 uint64_t float16_to_uint64(float16 a, float_status *s)
3025 {
3026     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3027 }
3028
3029 uint16_t float32_to_uint16(float32 a, float_status *s)
3030 {
3031     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3032 }
3033
3034 uint32_t float32_to_uint32(float32 a, float_status *s)
3035 {
3036     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3037 }
3038
3039 uint64_t float32_to_uint64(float32 a, float_status *s)
3040 {
3041     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3042 }
3043
3044 uint16_t float64_to_uint16(float64 a, float_status *s)
3045 {
3046     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3047 }
3048
3049 uint32_t float64_to_uint32(float64 a, float_status *s)
3050 {
3051     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3052 }
3053
3054 uint64_t float64_to_uint64(float64 a, float_status *s)
3055 {
3056     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3057 }
3058
3059 uint32_t float128_to_uint32(float128 a, float_status *s)
3060 {
3061     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3062 }
3063
3064 uint64_t float128_to_uint64(float128 a, float_status *s)
3065 {
3066     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3067 }
3068
3069 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3070 {
3071     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3072 }
3073
3074 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3075 {
3076     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3077 }
3078
3079 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3080 {
3081     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3082 }
3083
3084 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3085 {
3086     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3087 }
3088
3089 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3090 {
3091     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3092 }
3093
3094 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3095 {
3096     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3097 }
3098
3099 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3100 {
3101     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3102 }
3103
3104 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3105 {
3106     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3107 }
3108
3109 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3110 {
3111     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3112 }
3113
3114 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3115 {
3116     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3117 }
3118
3119 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3120 {
3121     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3122 }
3123
3124 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3125 {
3126     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3127 }
3128
3129 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3130 {
3131     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3132 }
3133
3134 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3135 {
3136     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3137 }
3138
3139 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3140 {
3141     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3142 }
3143
3144 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3145 {
3146     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3147 }
3148
3149 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3150 {
3151     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3152 }
3153
3154 /*
3155  * Signed integer to floating-point conversions
3156  */
3157
3158 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3159 {
3160     FloatParts64 p;
3161
3162     parts_sint_to_float(&p, a, scale, status);
3163     return float16_round_pack_canonical(&p, status);
3164 }
3165
3166 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3167 {
3168     return int64_to_float16_scalbn(a, scale, status);
3169 }
3170
3171 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3172 {
3173     return int64_to_float16_scalbn(a, scale, status);
3174 }
3175
3176 float16 int64_to_float16(int64_t a, float_status *status)
3177 {
3178     return int64_to_float16_scalbn(a, 0, status);
3179 }
3180
3181 float16 int32_to_float16(int32_t a, float_status *status)
3182 {
3183     return int64_to_float16_scalbn(a, 0, status);
3184 }
3185
3186 float16 int16_to_float16(int16_t a, float_status *status)
3187 {
3188     return int64_to_float16_scalbn(a, 0, status);
3189 }
3190
3191 float16 int8_to_float16(int8_t a, float_status *status)
3192 {
3193     return int64_to_float16_scalbn(a, 0, status);
3194 }
3195
3196 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3197 {
3198     FloatParts64 p;
3199
3200     parts64_sint_to_float(&p, a, scale, status);
3201     return float32_round_pack_canonical(&p, status);
3202 }
3203
3204 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3205 {
3206     return int64_to_float32_scalbn(a, scale, status);
3207 }
3208
3209 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3210 {
3211     return int64_to_float32_scalbn(a, scale, status);
3212 }
3213
3214 float32 int64_to_float32(int64_t a, float_status *status)
3215 {
3216     return int64_to_float32_scalbn(a, 0, status);
3217 }
3218
3219 float32 int32_to_float32(int32_t a, float_status *status)
3220 {
3221     return int64_to_float32_scalbn(a, 0, status);
3222 }
3223
3224 float32 int16_to_float32(int16_t a, float_status *status)
3225 {
3226     return int64_to_float32_scalbn(a, 0, status);
3227 }
3228
3229 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3230 {
3231     FloatParts64 p;
3232
3233     parts_sint_to_float(&p, a, scale, status);
3234     return float64_round_pack_canonical(&p, status);
3235 }
3236
3237 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3238 {
3239     return int64_to_float64_scalbn(a, scale, status);
3240 }
3241
3242 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3243 {
3244     return int64_to_float64_scalbn(a, scale, status);
3245 }
3246
3247 float64 int64_to_float64(int64_t a, float_status *status)
3248 {
3249     return int64_to_float64_scalbn(a, 0, status);
3250 }
3251
3252 float64 int32_to_float64(int32_t a, float_status *status)
3253 {
3254     return int64_to_float64_scalbn(a, 0, status);
3255 }
3256
3257 float64 int16_to_float64(int16_t a, float_status *status)
3258 {
3259     return int64_to_float64_scalbn(a, 0, status);
3260 }
3261
3262 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3263 {
3264     FloatParts64 p;
3265
3266     parts_sint_to_float(&p, a, scale, status);
3267     return bfloat16_round_pack_canonical(&p, status);
3268 }
3269
3270 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3271 {
3272     return int64_to_bfloat16_scalbn(a, scale, status);
3273 }
3274
3275 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3276 {
3277     return int64_to_bfloat16_scalbn(a, scale, status);
3278 }
3279
3280 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3281 {
3282     return int64_to_bfloat16_scalbn(a, 0, status);
3283 }
3284
3285 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3286 {
3287     return int64_to_bfloat16_scalbn(a, 0, status);
3288 }
3289
3290 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3291 {
3292     return int64_to_bfloat16_scalbn(a, 0, status);
3293 }
3294
3295 float128 int64_to_float128(int64_t a, float_status *status)
3296 {
3297     FloatParts128 p;
3298
3299     parts_sint_to_float(&p, a, 0, status);
3300     return float128_round_pack_canonical(&p, status);
3301 }
3302
3303 float128 int32_to_float128(int32_t a, float_status *status)
3304 {
3305     return int64_to_float128(a, status);
3306 }
3307
3308 /*
3309  * Unsigned Integer to floating-point conversions
3310  */
3311
3312 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3313 {
3314     FloatParts64 p;
3315
3316     parts_uint_to_float(&p, a, scale, status);
3317     return float16_round_pack_canonical(&p, status);
3318 }
3319
3320 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3321 {
3322     return uint64_to_float16_scalbn(a, scale, status);
3323 }
3324
3325 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3326 {
3327     return uint64_to_float16_scalbn(a, scale, status);
3328 }
3329
3330 float16 uint64_to_float16(uint64_t a, float_status *status)
3331 {
3332     return uint64_to_float16_scalbn(a, 0, status);
3333 }
3334
3335 float16 uint32_to_float16(uint32_t a, float_status *status)
3336 {
3337     return uint64_to_float16_scalbn(a, 0, status);
3338 }
3339
3340 float16 uint16_to_float16(uint16_t a, float_status *status)
3341 {
3342     return uint64_to_float16_scalbn(a, 0, status);
3343 }
3344
3345 float16 uint8_to_float16(uint8_t a, float_status *status)
3346 {
3347     return uint64_to_float16_scalbn(a, 0, status);
3348 }
3349
3350 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3351 {
3352     FloatParts64 p;
3353
3354     parts_uint_to_float(&p, a, scale, status);
3355     return float32_round_pack_canonical(&p, status);
3356 }
3357
3358 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3359 {
3360     return uint64_to_float32_scalbn(a, scale, status);
3361 }
3362
3363 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3364 {
3365     return uint64_to_float32_scalbn(a, scale, status);
3366 }
3367
3368 float32 uint64_to_float32(uint64_t a, float_status *status)
3369 {
3370     return uint64_to_float32_scalbn(a, 0, status);
3371 }
3372
3373 float32 uint32_to_float32(uint32_t a, float_status *status)
3374 {
3375     return uint64_to_float32_scalbn(a, 0, status);
3376 }
3377
3378 float32 uint16_to_float32(uint16_t a, float_status *status)
3379 {
3380     return uint64_to_float32_scalbn(a, 0, status);
3381 }
3382
3383 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3384 {
3385     FloatParts64 p;
3386
3387     parts_uint_to_float(&p, a, scale, status);
3388     return float64_round_pack_canonical(&p, status);
3389 }
3390
3391 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3392 {
3393     return uint64_to_float64_scalbn(a, scale, status);
3394 }
3395
3396 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3397 {
3398     return uint64_to_float64_scalbn(a, scale, status);
3399 }
3400
3401 float64 uint64_to_float64(uint64_t a, float_status *status)
3402 {
3403     return uint64_to_float64_scalbn(a, 0, status);
3404 }
3405
3406 float64 uint32_to_float64(uint32_t a, float_status *status)
3407 {
3408     return uint64_to_float64_scalbn(a, 0, status);
3409 }
3410
3411 float64 uint16_to_float64(uint16_t a, float_status *status)
3412 {
3413     return uint64_to_float64_scalbn(a, 0, status);
3414 }
3415
3416 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3417 {
3418     FloatParts64 p;
3419
3420     parts_uint_to_float(&p, a, scale, status);
3421     return bfloat16_round_pack_canonical(&p, status);
3422 }
3423
3424 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3425 {
3426     return uint64_to_bfloat16_scalbn(a, scale, status);
3427 }
3428
3429 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3430 {
3431     return uint64_to_bfloat16_scalbn(a, scale, status);
3432 }
3433
3434 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3435 {
3436     return uint64_to_bfloat16_scalbn(a, 0, status);
3437 }
3438
3439 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3440 {
3441     return uint64_to_bfloat16_scalbn(a, 0, status);
3442 }
3443
3444 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3445 {
3446     return uint64_to_bfloat16_scalbn(a, 0, status);
3447 }
3448
3449 float128 uint64_to_float128(uint64_t a, float_status *status)
3450 {
3451     FloatParts128 p;
3452
3453     parts_uint_to_float(&p, a, 0, status);
3454     return float128_round_pack_canonical(&p, status);
3455 }
3456
3457 /*
3458  * Minimum and maximum
3459  */
3460
3461 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3462 {
3463     FloatParts64 pa, pb, *pr;
3464
3465     float16_unpack_canonical(&pa, a, s);
3466     float16_unpack_canonical(&pb, b, s);
3467     pr = parts_minmax(&pa, &pb, s, flags);
3468
3469     return float16_round_pack_canonical(pr, s);
3470 }
3471
3472 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3473                                 float_status *s, int flags)
3474 {
3475     FloatParts64 pa, pb, *pr;
3476
3477     bfloat16_unpack_canonical(&pa, a, s);
3478     bfloat16_unpack_canonical(&pb, b, s);
3479     pr = parts_minmax(&pa, &pb, s, flags);
3480
3481     return bfloat16_round_pack_canonical(pr, s);
3482 }
3483
3484 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3485 {
3486     FloatParts64 pa, pb, *pr;
3487
3488     float32_unpack_canonical(&pa, a, s);
3489     float32_unpack_canonical(&pb, b, s);
3490     pr = parts_minmax(&pa, &pb, s, flags);
3491
3492     return float32_round_pack_canonical(pr, s);
3493 }
3494
3495 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3496 {
3497     FloatParts64 pa, pb, *pr;
3498
3499     float64_unpack_canonical(&pa, a, s);
3500     float64_unpack_canonical(&pb, b, s);
3501     pr = parts_minmax(&pa, &pb, s, flags);
3502
3503     return float64_round_pack_canonical(pr, s);
3504 }
3505
3506 static float128 float128_minmax(float128 a, float128 b,
3507                                 float_status *s, int flags)
3508 {
3509     FloatParts128 pa, pb, *pr;
3510
3511     float128_unpack_canonical(&pa, a, s);
3512     float128_unpack_canonical(&pb, b, s);
3513     pr = parts_minmax(&pa, &pb, s, flags);
3514
3515     return float128_round_pack_canonical(pr, s);
3516 }
3517
3518 #define MINMAX_1(type, name, flags) \
3519     type type##_##name(type a, type b, float_status *s) \
3520     { return type##_minmax(a, b, s, flags); }
3521
3522 #define MINMAX_2(type) \
3523     MINMAX_1(type, max, 0)                                      \
3524     MINMAX_1(type, maxnum, minmax_isnum)                        \
3525     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3526     MINMAX_1(type, min, minmax_ismin)                           \
3527     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3528     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3529
3530 MINMAX_2(float16)
3531 MINMAX_2(bfloat16)
3532 MINMAX_2(float32)
3533 MINMAX_2(float64)
3534 MINMAX_2(float128)
3535
3536 #undef MINMAX_1
3537 #undef MINMAX_2
3538
3539 /*
3540  * Floating point compare
3541  */
3542
3543 static FloatRelation QEMU_FLATTEN
3544 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3545 {
3546     FloatParts64 pa, pb;
3547
3548     float16_unpack_canonical(&pa, a, s);
3549     float16_unpack_canonical(&pb, b, s);
3550     return parts_compare(&pa, &pb, s, is_quiet);
3551 }
3552
3553 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3554 {
3555     return float16_do_compare(a, b, s, false);
3556 }
3557
3558 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3559 {
3560     return float16_do_compare(a, b, s, true);
3561 }
3562
3563 static FloatRelation QEMU_SOFTFLOAT_ATTR
3564 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3565 {
3566     FloatParts64 pa, pb;
3567
3568     float32_unpack_canonical(&pa, a, s);
3569     float32_unpack_canonical(&pb, b, s);
3570     return parts_compare(&pa, &pb, s, is_quiet);
3571 }
3572
3573 static FloatRelation QEMU_FLATTEN
3574 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3575 {
3576     union_float32 ua, ub;
3577
3578     ua.s = xa;
3579     ub.s = xb;
3580
3581     if (QEMU_NO_HARDFLOAT) {
3582         goto soft;
3583     }
3584
3585     float32_input_flush2(&ua.s, &ub.s, s);
3586     if (isgreaterequal(ua.h, ub.h)) {
3587         if (isgreater(ua.h, ub.h)) {
3588             return float_relation_greater;
3589         }
3590         return float_relation_equal;
3591     }
3592     if (likely(isless(ua.h, ub.h))) {
3593         return float_relation_less;
3594     }
3595     /*
3596      * The only condition remaining is unordered.
3597      * Fall through to set flags.
3598      */
3599  soft:
3600     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3601 }
3602
3603 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3604 {
3605     return float32_hs_compare(a, b, s, false);
3606 }
3607
3608 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3609 {
3610     return float32_hs_compare(a, b, s, true);
3611 }
3612
3613 static FloatRelation QEMU_SOFTFLOAT_ATTR
3614 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3615 {
3616     FloatParts64 pa, pb;
3617
3618     float64_unpack_canonical(&pa, a, s);
3619     float64_unpack_canonical(&pb, b, s);
3620     return parts_compare(&pa, &pb, s, is_quiet);
3621 }
3622
3623 static FloatRelation QEMU_FLATTEN
3624 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3625 {
3626     union_float64 ua, ub;
3627
3628     ua.s = xa;
3629     ub.s = xb;
3630
3631     if (QEMU_NO_HARDFLOAT) {
3632         goto soft;
3633     }
3634
3635     float64_input_flush2(&ua.s, &ub.s, s);
3636     if (isgreaterequal(ua.h, ub.h)) {
3637         if (isgreater(ua.h, ub.h)) {
3638             return float_relation_greater;
3639         }
3640         return float_relation_equal;
3641     }
3642     if (likely(isless(ua.h, ub.h))) {
3643         return float_relation_less;
3644     }
3645     /*
3646      * The only condition remaining is unordered.
3647      * Fall through to set flags.
3648      */
3649  soft:
3650     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3651 }
3652
3653 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3654 {
3655     return float64_hs_compare(a, b, s, false);
3656 }
3657
3658 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3659 {
3660     return float64_hs_compare(a, b, s, true);
3661 }
3662
3663 static FloatRelation QEMU_FLATTEN
3664 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3665 {
3666     FloatParts64 pa, pb;
3667
3668     bfloat16_unpack_canonical(&pa, a, s);
3669     bfloat16_unpack_canonical(&pb, b, s);
3670     return parts_compare(&pa, &pb, s, is_quiet);
3671 }
3672
3673 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3674 {
3675     return bfloat16_do_compare(a, b, s, false);
3676 }
3677
3678 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3679 {
3680     return bfloat16_do_compare(a, b, s, true);
3681 }
3682
3683 static FloatRelation QEMU_FLATTEN
3684 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3685 {
3686     FloatParts128 pa, pb;
3687
3688     float128_unpack_canonical(&pa, a, s);
3689     float128_unpack_canonical(&pb, b, s);
3690     return parts_compare(&pa, &pb, s, is_quiet);
3691 }
3692
3693 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3694 {
3695     return float128_do_compare(a, b, s, false);
3696 }
3697
3698 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3699 {
3700     return float128_do_compare(a, b, s, true);
3701 }
3702
3703 /*
3704  * Scale by 2**N
3705  */
3706
3707 float16 float16_scalbn(float16 a, int n, float_status *status)
3708 {
3709     FloatParts64 p;
3710
3711     float16_unpack_canonical(&p, a, status);
3712     parts_scalbn(&p, n, status);
3713     return float16_round_pack_canonical(&p, status);
3714 }
3715
3716 float32 float32_scalbn(float32 a, int n, float_status *status)
3717 {
3718     FloatParts64 p;
3719
3720     float32_unpack_canonical(&p, a, status);
3721     parts_scalbn(&p, n, status);
3722     return float32_round_pack_canonical(&p, status);
3723 }
3724
3725 float64 float64_scalbn(float64 a, int n, float_status *status)
3726 {
3727     FloatParts64 p;
3728
3729     float64_unpack_canonical(&p, a, status);
3730     parts_scalbn(&p, n, status);
3731     return float64_round_pack_canonical(&p, status);
3732 }
3733
3734 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3735 {
3736     FloatParts64 p;
3737
3738     bfloat16_unpack_canonical(&p, a, status);
3739     parts_scalbn(&p, n, status);
3740     return bfloat16_round_pack_canonical(&p, status);
3741 }
3742
3743 float128 float128_scalbn(float128 a, int n, float_status *status)
3744 {
3745     FloatParts128 p;
3746
3747     float128_unpack_canonical(&p, a, status);
3748     parts_scalbn(&p, n, status);
3749     return float128_round_pack_canonical(&p, status);
3750 }
3751
3752 /*
3753  * Square Root
3754  */
3755
3756 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3757 {
3758     FloatParts64 p;
3759
3760     float16_unpack_canonical(&p, a, status);
3761     parts_sqrt(&p, status, &float16_params);
3762     return float16_round_pack_canonical(&p, status);
3763 }
3764
3765 static float32 QEMU_SOFTFLOAT_ATTR
3766 soft_f32_sqrt(float32 a, float_status *status)
3767 {
3768     FloatParts64 p;
3769
3770     float32_unpack_canonical(&p, a, status);
3771     parts_sqrt(&p, status, &float32_params);
3772     return float32_round_pack_canonical(&p, status);
3773 }
3774
3775 static float64 QEMU_SOFTFLOAT_ATTR
3776 soft_f64_sqrt(float64 a, float_status *status)
3777 {
3778     FloatParts64 p;
3779
3780     float64_unpack_canonical(&p, a, status);
3781     parts_sqrt(&p, status, &float64_params);
3782     return float64_round_pack_canonical(&p, status);
3783 }
3784
3785 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3786 {
3787     union_float32 ua, ur;
3788
3789     ua.s = xa;
3790     if (unlikely(!can_use_fpu(s))) {
3791         goto soft;
3792     }
3793
3794     float32_input_flush1(&ua.s, s);
3795     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3796         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3797                        fpclassify(ua.h) == FP_ZERO) ||
3798                      signbit(ua.h))) {
3799             goto soft;
3800         }
3801     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3802                         float32_is_neg(ua.s))) {
3803         goto soft;
3804     }
3805     ur.h = sqrtf(ua.h);
3806     return ur.s;
3807
3808  soft:
3809     return soft_f32_sqrt(ua.s, s);
3810 }
3811
3812 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3813 {
3814     union_float64 ua, ur;
3815
3816     ua.s = xa;
3817     if (unlikely(!can_use_fpu(s))) {
3818         goto soft;
3819     }
3820
3821     float64_input_flush1(&ua.s, s);
3822     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3823         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3824                        fpclassify(ua.h) == FP_ZERO) ||
3825                      signbit(ua.h))) {
3826             goto soft;
3827         }
3828     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3829                         float64_is_neg(ua.s))) {
3830         goto soft;
3831     }
3832     ur.h = sqrt(ua.h);
3833     return ur.s;
3834
3835  soft:
3836     return soft_f64_sqrt(ua.s, s);
3837 }
3838
3839 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3840 {
3841     FloatParts64 p;
3842
3843     bfloat16_unpack_canonical(&p, a, status);
3844     parts_sqrt(&p, status, &bfloat16_params);
3845     return bfloat16_round_pack_canonical(&p, status);
3846 }
3847
3848 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3849 {
3850     FloatParts128 p;
3851
3852     float128_unpack_canonical(&p, a, status);
3853     parts_sqrt(&p, status, &float128_params);
3854     return float128_round_pack_canonical(&p, status);
3855 }
3856
3857 /*----------------------------------------------------------------------------
3858 | The pattern for a default generated NaN.
3859 *----------------------------------------------------------------------------*/
3860
3861 float16 float16_default_nan(float_status *status)
3862 {
3863     FloatParts64 p;
3864
3865     parts_default_nan(&p, status);
3866     p.frac >>= float16_params.frac_shift;
3867     return float16_pack_raw(&p);
3868 }
3869
3870 float32 float32_default_nan(float_status *status)
3871 {
3872     FloatParts64 p;
3873
3874     parts_default_nan(&p, status);
3875     p.frac >>= float32_params.frac_shift;
3876     return float32_pack_raw(&p);
3877 }
3878
3879 float64 float64_default_nan(float_status *status)
3880 {
3881     FloatParts64 p;
3882
3883     parts_default_nan(&p, status);
3884     p.frac >>= float64_params.frac_shift;
3885     return float64_pack_raw(&p);
3886 }
3887
3888 float128 float128_default_nan(float_status *status)
3889 {
3890     FloatParts128 p;
3891
3892     parts_default_nan(&p, status);
3893     frac_shr(&p, float128_params.frac_shift);
3894     return float128_pack_raw(&p);
3895 }
3896
3897 bfloat16 bfloat16_default_nan(float_status *status)
3898 {
3899     FloatParts64 p;
3900
3901     parts_default_nan(&p, status);
3902     p.frac >>= bfloat16_params.frac_shift;
3903     return bfloat16_pack_raw(&p);
3904 }
3905
3906 /*----------------------------------------------------------------------------
3907 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3908 *----------------------------------------------------------------------------*/
3909
3910 float16 float16_silence_nan(float16 a, float_status *status)
3911 {
3912     FloatParts64 p;
3913
3914     float16_unpack_raw(&p, a);
3915     p.frac <<= float16_params.frac_shift;
3916     parts_silence_nan(&p, status);
3917     p.frac >>= float16_params.frac_shift;
3918     return float16_pack_raw(&p);
3919 }
3920
3921 float32 float32_silence_nan(float32 a, float_status *status)
3922 {
3923     FloatParts64 p;
3924
3925     float32_unpack_raw(&p, a);
3926     p.frac <<= float32_params.frac_shift;
3927     parts_silence_nan(&p, status);
3928     p.frac >>= float32_params.frac_shift;
3929     return float32_pack_raw(&p);
3930 }
3931
3932 float64 float64_silence_nan(float64 a, float_status *status)
3933 {
3934     FloatParts64 p;
3935
3936     float64_unpack_raw(&p, a);
3937     p.frac <<= float64_params.frac_shift;
3938     parts_silence_nan(&p, status);
3939     p.frac >>= float64_params.frac_shift;
3940     return float64_pack_raw(&p);
3941 }
3942
3943 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3944 {
3945     FloatParts64 p;
3946
3947     bfloat16_unpack_raw(&p, a);
3948     p.frac <<= bfloat16_params.frac_shift;
3949     parts_silence_nan(&p, status);
3950     p.frac >>= bfloat16_params.frac_shift;
3951     return bfloat16_pack_raw(&p);
3952 }
3953
3954 float128 float128_silence_nan(float128 a, float_status *status)
3955 {
3956     FloatParts128 p;
3957
3958     float128_unpack_raw(&p, a);
3959     frac_shl(&p, float128_params.frac_shift);
3960     parts_silence_nan(&p, status);
3961     frac_shr(&p, float128_params.frac_shift);
3962     return float128_pack_raw(&p);
3963 }
3964
3965 /*----------------------------------------------------------------------------
3966 | If `a' is denormal and we are in flush-to-zero mode then set the
3967 | input-denormal exception and return zero. Otherwise just return the value.
3968 *----------------------------------------------------------------------------*/
3969
3970 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3971 {
3972     if (p.exp == 0 && p.frac != 0) {
3973         float_raise(float_flag_input_denormal, status);
3974         return true;
3975     }
3976
3977     return false;
3978 }
3979
3980 float16 float16_squash_input_denormal(float16 a, float_status *status)
3981 {
3982     if (status->flush_inputs_to_zero) {
3983         FloatParts64 p;
3984
3985         float16_unpack_raw(&p, a);
3986         if (parts_squash_denormal(p, status)) {
3987             return float16_set_sign(float16_zero, p.sign);
3988         }
3989     }
3990     return a;
3991 }
3992
3993 float32 float32_squash_input_denormal(float32 a, float_status *status)
3994 {
3995     if (status->flush_inputs_to_zero) {
3996         FloatParts64 p;
3997
3998         float32_unpack_raw(&p, a);
3999         if (parts_squash_denormal(p, status)) {
4000             return float32_set_sign(float32_zero, p.sign);
4001         }
4002     }
4003     return a;
4004 }
4005
4006 float64 float64_squash_input_denormal(float64 a, float_status *status)
4007 {
4008     if (status->flush_inputs_to_zero) {
4009         FloatParts64 p;
4010
4011         float64_unpack_raw(&p, a);
4012         if (parts_squash_denormal(p, status)) {
4013             return float64_set_sign(float64_zero, p.sign);
4014         }
4015     }
4016     return a;
4017 }
4018
4019 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4020 {
4021     if (status->flush_inputs_to_zero) {
4022         FloatParts64 p;
4023
4024         bfloat16_unpack_raw(&p, a);
4025         if (parts_squash_denormal(p, status)) {
4026             return bfloat16_set_sign(bfloat16_zero, p.sign);
4027         }
4028     }
4029     return a;
4030 }
4031
4032 /*----------------------------------------------------------------------------
4033 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4034 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4035 | input.  If `zSign' is 1, the input is negated before being converted to an
4036 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
4037 | is simply rounded to an integer, with the inexact exception raised if the
4038 | input cannot be represented exactly as an integer.  However, if the fixed-
4039 | point input is too large, the invalid exception is raised and the largest
4040 | positive or negative integer is returned.
4041 *----------------------------------------------------------------------------*/
4042
4043 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4044                                  float_status *status)
4045 {
4046     int8_t roundingMode;
4047     bool roundNearestEven;
4048     int8_t roundIncrement, roundBits;
4049     int32_t z;
4050
4051     roundingMode = status->float_rounding_mode;
4052     roundNearestEven = ( roundingMode == float_round_nearest_even );
4053     switch (roundingMode) {
4054     case float_round_nearest_even:
4055     case float_round_ties_away:
4056         roundIncrement = 0x40;
4057         break;
4058     case float_round_to_zero:
4059         roundIncrement = 0;
4060         break;
4061     case float_round_up:
4062         roundIncrement = zSign ? 0 : 0x7f;
4063         break;
4064     case float_round_down:
4065         roundIncrement = zSign ? 0x7f : 0;
4066         break;
4067     case float_round_to_odd:
4068         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4069         break;
4070     default:
4071         abort();
4072     }
4073     roundBits = absZ & 0x7F;
4074     absZ = ( absZ + roundIncrement )>>7;
4075     if (!(roundBits ^ 0x40) && roundNearestEven) {
4076         absZ &= ~1;
4077     }
4078     z = absZ;
4079     if ( zSign ) z = - z;
4080     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4081         float_raise(float_flag_invalid, status);
4082         return zSign ? INT32_MIN : INT32_MAX;
4083     }
4084     if (roundBits) {
4085         float_raise(float_flag_inexact, status);
4086     }
4087     return z;
4088
4089 }
4090
4091 /*----------------------------------------------------------------------------
4092 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4093 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4094 | and returns the properly rounded 64-bit integer corresponding to the input.
4095 | If `zSign' is 1, the input is negated before being converted to an integer.
4096 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4097 | the inexact exception raised if the input cannot be represented exactly as
4098 | an integer.  However, if the fixed-point input is too large, the invalid
4099 | exception is raised and the largest positive or negative integer is
4100 | returned.
4101 *----------------------------------------------------------------------------*/
4102
4103 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4104                                float_status *status)
4105 {
4106     int8_t roundingMode;
4107     bool roundNearestEven, increment;
4108     int64_t z;
4109
4110     roundingMode = status->float_rounding_mode;
4111     roundNearestEven = ( roundingMode == float_round_nearest_even );
4112     switch (roundingMode) {
4113     case float_round_nearest_even:
4114     case float_round_ties_away:
4115         increment = ((int64_t) absZ1 < 0);
4116         break;
4117     case float_round_to_zero:
4118         increment = 0;
4119         break;
4120     case float_round_up:
4121         increment = !zSign && absZ1;
4122         break;
4123     case float_round_down:
4124         increment = zSign && absZ1;
4125         break;
4126     case float_round_to_odd:
4127         increment = !(absZ0 & 1) && absZ1;
4128         break;
4129     default:
4130         abort();
4131     }
4132     if ( increment ) {
4133         ++absZ0;
4134         if ( absZ0 == 0 ) goto overflow;
4135         if (!(absZ1 << 1) && roundNearestEven) {
4136             absZ0 &= ~1;
4137         }
4138     }
4139     z = absZ0;
4140     if ( zSign ) z = - z;
4141     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4142  overflow:
4143         float_raise(float_flag_invalid, status);
4144         return zSign ? INT64_MIN : INT64_MAX;
4145     }
4146     if (absZ1) {
4147         float_raise(float_flag_inexact, status);
4148     }
4149     return z;
4150
4151 }
4152
4153 /*----------------------------------------------------------------------------
4154 | Normalizes the subnormal single-precision floating-point value represented
4155 | by the denormalized significand `aSig'.  The normalized exponent and
4156 | significand are stored at the locations pointed to by `zExpPtr' and
4157 | `zSigPtr', respectively.
4158 *----------------------------------------------------------------------------*/
4159
4160 static void
4161  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4162 {
4163     int8_t shiftCount;
4164
4165     shiftCount = clz32(aSig) - 8;
4166     *zSigPtr = aSig<<shiftCount;
4167     *zExpPtr = 1 - shiftCount;
4168
4169 }
4170
4171 /*----------------------------------------------------------------------------
4172 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4173 | and significand `zSig', and returns the proper single-precision floating-
4174 | point value corresponding to the abstract input.  Ordinarily, the abstract
4175 | value is simply rounded and packed into the single-precision format, with
4176 | the inexact exception raised if the abstract input cannot be represented
4177 | exactly.  However, if the abstract value is too large, the overflow and
4178 | inexact exceptions are raised and an infinity or maximal finite value is
4179 | returned.  If the abstract value is too small, the input value is rounded to
4180 | a subnormal number, and the underflow and inexact exceptions are raised if
4181 | the abstract input cannot be represented exactly as a subnormal single-
4182 | precision floating-point number.
4183 |     The input significand `zSig' has its binary point between bits 30
4184 | and 29, which is 7 bits to the left of the usual location.  This shifted
4185 | significand must be normalized or smaller.  If `zSig' is not normalized,
4186 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4187 | and it must not require rounding.  In the usual case that `zSig' is
4188 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4189 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4190 | Binary Floating-Point Arithmetic.
4191 *----------------------------------------------------------------------------*/
4192
4193 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4194                                    float_status *status)
4195 {
4196     int8_t roundingMode;
4197     bool roundNearestEven;
4198     int8_t roundIncrement, roundBits;
4199     bool isTiny;
4200
4201     roundingMode = status->float_rounding_mode;
4202     roundNearestEven = ( roundingMode == float_round_nearest_even );
4203     switch (roundingMode) {
4204     case float_round_nearest_even:
4205     case float_round_ties_away:
4206         roundIncrement = 0x40;
4207         break;
4208     case float_round_to_zero:
4209         roundIncrement = 0;
4210         break;
4211     case float_round_up:
4212         roundIncrement = zSign ? 0 : 0x7f;
4213         break;
4214     case float_round_down:
4215         roundIncrement = zSign ? 0x7f : 0;
4216         break;
4217     case float_round_to_odd:
4218         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4219         break;
4220     default:
4221         abort();
4222         break;
4223     }
4224     roundBits = zSig & 0x7F;
4225     if ( 0xFD <= (uint16_t) zExp ) {
4226         if (    ( 0xFD < zExp )
4227              || (    ( zExp == 0xFD )
4228                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4229            ) {
4230             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4231                                    roundIncrement != 0;
4232             float_raise(float_flag_overflow | float_flag_inexact, status);
4233             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4234         }
4235         if ( zExp < 0 ) {
4236             if (status->flush_to_zero) {
4237                 float_raise(float_flag_output_denormal, status);
4238                 return packFloat32(zSign, 0, 0);
4239             }
4240             isTiny = status->tininess_before_rounding
4241                   || (zExp < -1)
4242                   || (zSig + roundIncrement < 0x80000000);
4243             shift32RightJamming( zSig, - zExp, &zSig );
4244             zExp = 0;
4245             roundBits = zSig & 0x7F;
4246             if (isTiny && roundBits) {
4247                 float_raise(float_flag_underflow, status);
4248             }
4249             if (roundingMode == float_round_to_odd) {
4250                 /*
4251                  * For round-to-odd case, the roundIncrement depends on
4252                  * zSig which just changed.
4253                  */
4254                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4255             }
4256         }
4257     }
4258     if (roundBits) {
4259         float_raise(float_flag_inexact, status);
4260     }
4261     zSig = ( zSig + roundIncrement )>>7;
4262     if (!(roundBits ^ 0x40) && roundNearestEven) {
4263         zSig &= ~1;
4264     }
4265     if ( zSig == 0 ) zExp = 0;
4266     return packFloat32( zSign, zExp, zSig );
4267
4268 }
4269
4270 /*----------------------------------------------------------------------------
4271 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4272 | and significand `zSig', and returns the proper single-precision floating-
4273 | point value corresponding to the abstract input.  This routine is just like
4274 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4275 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4276 | floating-point exponent.
4277 *----------------------------------------------------------------------------*/
4278
4279 static float32
4280  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4281                               float_status *status)
4282 {
4283     int8_t shiftCount;
4284
4285     shiftCount = clz32(zSig) - 1;
4286     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4287                                status);
4288
4289 }
4290
4291 /*----------------------------------------------------------------------------
4292 | Normalizes the subnormal double-precision floating-point value represented
4293 | by the denormalized significand `aSig'.  The normalized exponent and
4294 | significand are stored at the locations pointed to by `zExpPtr' and
4295 | `zSigPtr', respectively.
4296 *----------------------------------------------------------------------------*/
4297
4298 static void
4299  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4300 {
4301     int8_t shiftCount;
4302
4303     shiftCount = clz64(aSig) - 11;
4304     *zSigPtr = aSig<<shiftCount;
4305     *zExpPtr = 1 - shiftCount;
4306
4307 }
4308
4309 /*----------------------------------------------------------------------------
4310 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4311 | double-precision floating-point value, returning the result.  After being
4312 | shifted into the proper positions, the three fields are simply added
4313 | together to form the result.  This means that any integer portion of `zSig'
4314 | will be added into the exponent.  Since a properly normalized significand
4315 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4316 | than the desired result exponent whenever `zSig' is a complete, normalized
4317 | significand.
4318 *----------------------------------------------------------------------------*/
4319
4320 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4321 {
4322
4323     return make_float64(
4324         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4325
4326 }
4327
4328 /*----------------------------------------------------------------------------
4329 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4330 | and significand `zSig', and returns the proper double-precision floating-
4331 | point value corresponding to the abstract input.  Ordinarily, the abstract
4332 | value is simply rounded and packed into the double-precision format, with
4333 | the inexact exception raised if the abstract input cannot be represented
4334 | exactly.  However, if the abstract value is too large, the overflow and
4335 | inexact exceptions are raised and an infinity or maximal finite value is
4336 | returned.  If the abstract value is too small, the input value is rounded to
4337 | a subnormal number, and the underflow and inexact exceptions are raised if
4338 | the abstract input cannot be represented exactly as a subnormal double-
4339 | precision floating-point number.
4340 |     The input significand `zSig' has its binary point between bits 62
4341 | and 61, which is 10 bits to the left of the usual location.  This shifted
4342 | significand must be normalized or smaller.  If `zSig' is not normalized,
4343 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4344 | and it must not require rounding.  In the usual case that `zSig' is
4345 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4346 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4347 | Binary Floating-Point Arithmetic.
4348 *----------------------------------------------------------------------------*/
4349
4350 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4351                                    float_status *status)
4352 {
4353     int8_t roundingMode;
4354     bool roundNearestEven;
4355     int roundIncrement, roundBits;
4356     bool isTiny;
4357
4358     roundingMode = status->float_rounding_mode;
4359     roundNearestEven = ( roundingMode == float_round_nearest_even );
4360     switch (roundingMode) {
4361     case float_round_nearest_even:
4362     case float_round_ties_away:
4363         roundIncrement = 0x200;
4364         break;
4365     case float_round_to_zero:
4366         roundIncrement = 0;
4367         break;
4368     case float_round_up:
4369         roundIncrement = zSign ? 0 : 0x3ff;
4370         break;
4371     case float_round_down:
4372         roundIncrement = zSign ? 0x3ff : 0;
4373         break;
4374     case float_round_to_odd:
4375         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4376         break;
4377     default:
4378         abort();
4379     }
4380     roundBits = zSig & 0x3FF;
4381     if ( 0x7FD <= (uint16_t) zExp ) {
4382         if (    ( 0x7FD < zExp )
4383              || (    ( zExp == 0x7FD )
4384                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4385            ) {
4386             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4387                                    roundIncrement != 0;
4388             float_raise(float_flag_overflow | float_flag_inexact, status);
4389             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4390         }
4391         if ( zExp < 0 ) {
4392             if (status->flush_to_zero) {
4393                 float_raise(float_flag_output_denormal, status);
4394                 return packFloat64(zSign, 0, 0);
4395             }
4396             isTiny = status->tininess_before_rounding
4397                   || (zExp < -1)
4398                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4399             shift64RightJamming( zSig, - zExp, &zSig );
4400             zExp = 0;
4401             roundBits = zSig & 0x3FF;
4402             if (isTiny && roundBits) {
4403                 float_raise(float_flag_underflow, status);
4404             }
4405             if (roundingMode == float_round_to_odd) {
4406                 /*
4407                  * For round-to-odd case, the roundIncrement depends on
4408                  * zSig which just changed.
4409                  */
4410                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4411             }
4412         }
4413     }
4414     if (roundBits) {
4415         float_raise(float_flag_inexact, status);
4416     }
4417     zSig = ( zSig + roundIncrement )>>10;
4418     if (!(roundBits ^ 0x200) && roundNearestEven) {
4419         zSig &= ~1;
4420     }
4421     if ( zSig == 0 ) zExp = 0;
4422     return packFloat64( zSign, zExp, zSig );
4423
4424 }
4425
4426 /*----------------------------------------------------------------------------
4427 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4428 | and significand `zSig', and returns the proper double-precision floating-
4429 | point value corresponding to the abstract input.  This routine is just like
4430 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4431 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4432 | floating-point exponent.
4433 *----------------------------------------------------------------------------*/
4434
4435 static float64
4436  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4437                               float_status *status)
4438 {
4439     int8_t shiftCount;
4440
4441     shiftCount = clz64(zSig) - 1;
4442     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4443                                status);
4444
4445 }
4446
4447 /*----------------------------------------------------------------------------
4448 | Normalizes the subnormal extended double-precision floating-point value
4449 | represented by the denormalized significand `aSig'.  The normalized exponent
4450 | and significand are stored at the locations pointed to by `zExpPtr' and
4451 | `zSigPtr', respectively.
4452 *----------------------------------------------------------------------------*/
4453
4454 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4455                                 uint64_t *zSigPtr)
4456 {
4457     int8_t shiftCount;
4458
4459     shiftCount = clz64(aSig);
4460     *zSigPtr = aSig<<shiftCount;
4461     *zExpPtr = 1 - shiftCount;
4462 }
4463
4464 /*----------------------------------------------------------------------------
4465 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4466 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4467 | and returns the proper extended double-precision floating-point value
4468 | corresponding to the abstract input.  Ordinarily, the abstract value is
4469 | rounded and packed into the extended double-precision format, with the
4470 | inexact exception raised if the abstract input cannot be represented
4471 | exactly.  However, if the abstract value is too large, the overflow and
4472 | inexact exceptions are raised and an infinity or maximal finite value is
4473 | returned.  If the abstract value is too small, the input value is rounded to
4474 | a subnormal number, and the underflow and inexact exceptions are raised if
4475 | the abstract input cannot be represented exactly as a subnormal extended
4476 | double-precision floating-point number.
4477 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4478 | the result is rounded to the same number of bits as single or double
4479 | precision, respectively.  Otherwise, the result is rounded to the full
4480 | precision of the extended double-precision format.
4481 |     The input significand must be normalized or smaller.  If the input
4482 | significand is not normalized, `zExp' must be 0; in that case, the result
4483 | returned is a subnormal number, and it must not require rounding.  The
4484 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4485 | Floating-Point Arithmetic.
4486 *----------------------------------------------------------------------------*/
4487
4488 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4489                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4490                               float_status *status)
4491 {
4492     FloatRoundMode roundingMode;
4493     bool roundNearestEven, increment, isTiny;
4494     int64_t roundIncrement, roundMask, roundBits;
4495
4496     roundingMode = status->float_rounding_mode;
4497     roundNearestEven = ( roundingMode == float_round_nearest_even );
4498     switch (roundingPrecision) {
4499     case floatx80_precision_x:
4500         goto precision80;
4501     case floatx80_precision_d:
4502         roundIncrement = UINT64_C(0x0000000000000400);
4503         roundMask = UINT64_C(0x00000000000007FF);
4504         break;
4505     case floatx80_precision_s:
4506         roundIncrement = UINT64_C(0x0000008000000000);
4507         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4508         break;
4509     default:
4510         g_assert_not_reached();
4511     }
4512     zSig0 |= ( zSig1 != 0 );
4513     switch (roundingMode) {
4514     case float_round_nearest_even:
4515     case float_round_ties_away:
4516         break;
4517     case float_round_to_zero:
4518         roundIncrement = 0;
4519         break;
4520     case float_round_up:
4521         roundIncrement = zSign ? 0 : roundMask;
4522         break;
4523     case float_round_down:
4524         roundIncrement = zSign ? roundMask : 0;
4525         break;
4526     default:
4527         abort();
4528     }
4529     roundBits = zSig0 & roundMask;
4530     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4531         if (    ( 0x7FFE < zExp )
4532              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4533            ) {
4534             goto overflow;
4535         }
4536         if ( zExp <= 0 ) {
4537             if (status->flush_to_zero) {
4538                 float_raise(float_flag_output_denormal, status);
4539                 return packFloatx80(zSign, 0, 0);
4540             }
4541             isTiny = status->tininess_before_rounding
4542                   || (zExp < 0 )
4543                   || (zSig0 <= zSig0 + roundIncrement);
4544             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4545             zExp = 0;
4546             roundBits = zSig0 & roundMask;
4547             if (isTiny && roundBits) {
4548                 float_raise(float_flag_underflow, status);
4549             }
4550             if (roundBits) {
4551                 float_raise(float_flag_inexact, status);
4552             }
4553             zSig0 += roundIncrement;
4554             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4555             roundIncrement = roundMask + 1;
4556             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4557                 roundMask |= roundIncrement;
4558             }
4559             zSig0 &= ~ roundMask;
4560             return packFloatx80( zSign, zExp, zSig0 );
4561         }
4562     }
4563     if (roundBits) {
4564         float_raise(float_flag_inexact, status);
4565     }
4566     zSig0 += roundIncrement;
4567     if ( zSig0 < roundIncrement ) {
4568         ++zExp;
4569         zSig0 = UINT64_C(0x8000000000000000);
4570     }
4571     roundIncrement = roundMask + 1;
4572     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4573         roundMask |= roundIncrement;
4574     }
4575     zSig0 &= ~ roundMask;
4576     if ( zSig0 == 0 ) zExp = 0;
4577     return packFloatx80( zSign, zExp, zSig0 );
4578  precision80:
4579     switch (roundingMode) {
4580     case float_round_nearest_even:
4581     case float_round_ties_away:
4582         increment = ((int64_t)zSig1 < 0);
4583         break;
4584     case float_round_to_zero:
4585         increment = 0;
4586         break;
4587     case float_round_up:
4588         increment = !zSign && zSig1;
4589         break;
4590     case float_round_down:
4591         increment = zSign && zSig1;
4592         break;
4593     default:
4594         abort();
4595     }
4596     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4597         if (    ( 0x7FFE < zExp )
4598              || (    ( zExp == 0x7FFE )
4599                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4600                   && increment
4601                 )
4602            ) {
4603             roundMask = 0;
4604  overflow:
4605             float_raise(float_flag_overflow | float_flag_inexact, status);
4606             if (    ( roundingMode == float_round_to_zero )
4607                  || ( zSign && ( roundingMode == float_round_up ) )
4608                  || ( ! zSign && ( roundingMode == float_round_down ) )
4609                ) {
4610                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4611             }
4612             return packFloatx80(zSign,
4613                                 floatx80_infinity_high,
4614                                 floatx80_infinity_low);
4615         }
4616         if ( zExp <= 0 ) {
4617             isTiny = status->tininess_before_rounding
4618                   || (zExp < 0)
4619                   || !increment
4620                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4621             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4622             zExp = 0;
4623             if (isTiny && zSig1) {
4624                 float_raise(float_flag_underflow, status);
4625             }
4626             if (zSig1) {
4627                 float_raise(float_flag_inexact, status);
4628             }
4629             switch (roundingMode) {
4630             case float_round_nearest_even:
4631             case float_round_ties_away:
4632                 increment = ((int64_t)zSig1 < 0);
4633                 break;
4634             case float_round_to_zero:
4635                 increment = 0;
4636                 break;
4637             case float_round_up:
4638                 increment = !zSign && zSig1;
4639                 break;
4640             case float_round_down:
4641                 increment = zSign && zSig1;
4642                 break;
4643             default:
4644                 abort();
4645             }
4646             if ( increment ) {
4647                 ++zSig0;
4648                 if (!(zSig1 << 1) && roundNearestEven) {
4649                     zSig0 &= ~1;
4650                 }
4651                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4652             }
4653             return packFloatx80( zSign, zExp, zSig0 );
4654         }
4655     }
4656     if (zSig1) {
4657         float_raise(float_flag_inexact, status);
4658     }
4659     if ( increment ) {
4660         ++zSig0;
4661         if ( zSig0 == 0 ) {
4662             ++zExp;
4663             zSig0 = UINT64_C(0x8000000000000000);
4664         }
4665         else {
4666             if (!(zSig1 << 1) && roundNearestEven) {
4667                 zSig0 &= ~1;
4668             }
4669         }
4670     }
4671     else {
4672         if ( zSig0 == 0 ) zExp = 0;
4673     }
4674     return packFloatx80( zSign, zExp, zSig0 );
4675
4676 }
4677
4678 /*----------------------------------------------------------------------------
4679 | Takes an abstract floating-point value having sign `zSign', exponent
4680 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4681 | and returns the proper extended double-precision floating-point value
4682 | corresponding to the abstract input.  This routine is just like
4683 | `roundAndPackFloatx80' except that the input significand does not have to be
4684 | normalized.
4685 *----------------------------------------------------------------------------*/
4686
4687 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4688                                        bool zSign, int32_t zExp,
4689                                        uint64_t zSig0, uint64_t zSig1,
4690                                        float_status *status)
4691 {
4692     int8_t shiftCount;
4693
4694     if ( zSig0 == 0 ) {
4695         zSig0 = zSig1;
4696         zSig1 = 0;
4697         zExp -= 64;
4698     }
4699     shiftCount = clz64(zSig0);
4700     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4701     zExp -= shiftCount;
4702     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4703                                 zSig0, zSig1, status);
4704
4705 }
4706
4707 /*----------------------------------------------------------------------------
4708 | Returns the least-significant 64 fraction bits of the quadruple-precision
4709 | floating-point value `a'.
4710 *----------------------------------------------------------------------------*/
4711
4712 static inline uint64_t extractFloat128Frac1( float128 a )
4713 {
4714
4715     return a.low;
4716
4717 }
4718
4719 /*----------------------------------------------------------------------------
4720 | Returns the most-significant 48 fraction bits of the quadruple-precision
4721 | floating-point value `a'.
4722 *----------------------------------------------------------------------------*/
4723
4724 static inline uint64_t extractFloat128Frac0( float128 a )
4725 {
4726
4727     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4728
4729 }
4730
4731 /*----------------------------------------------------------------------------
4732 | Returns the exponent bits of the quadruple-precision floating-point value
4733 | `a'.
4734 *----------------------------------------------------------------------------*/
4735
4736 static inline int32_t extractFloat128Exp( float128 a )
4737 {
4738
4739     return ( a.high>>48 ) & 0x7FFF;
4740
4741 }
4742
4743 /*----------------------------------------------------------------------------
4744 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4745 *----------------------------------------------------------------------------*/
4746
4747 static inline bool extractFloat128Sign(float128 a)
4748 {
4749     return a.high >> 63;
4750 }
4751
4752 /*----------------------------------------------------------------------------
4753 | Normalizes the subnormal quadruple-precision floating-point value
4754 | represented by the denormalized significand formed by the concatenation of
4755 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4756 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4757 | significand are stored at the location pointed to by `zSig0Ptr', and the
4758 | least significant 64 bits of the normalized significand are stored at the
4759 | location pointed to by `zSig1Ptr'.
4760 *----------------------------------------------------------------------------*/
4761
4762 static void
4763  normalizeFloat128Subnormal(
4764      uint64_t aSig0,
4765      uint64_t aSig1,
4766      int32_t *zExpPtr,
4767      uint64_t *zSig0Ptr,
4768      uint64_t *zSig1Ptr
4769  )
4770 {
4771     int8_t shiftCount;
4772
4773     if ( aSig0 == 0 ) {
4774         shiftCount = clz64(aSig1) - 15;
4775         if ( shiftCount < 0 ) {
4776             *zSig0Ptr = aSig1>>( - shiftCount );
4777             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4778         }
4779         else {
4780             *zSig0Ptr = aSig1<<shiftCount;
4781             *zSig1Ptr = 0;
4782         }
4783         *zExpPtr = - shiftCount - 63;
4784     }
4785     else {
4786         shiftCount = clz64(aSig0) - 15;
4787         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4788         *zExpPtr = 1 - shiftCount;
4789     }
4790
4791 }
4792
4793 /*----------------------------------------------------------------------------
4794 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4795 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4796 | floating-point value, returning the result.  After being shifted into the
4797 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4798 | added together to form the most significant 32 bits of the result.  This
4799 | means that any integer portion of `zSig0' will be added into the exponent.
4800 | Since a properly normalized significand will have an integer portion equal
4801 | to 1, the `zExp' input should be 1 less than the desired result exponent
4802 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4803 | significand.
4804 *----------------------------------------------------------------------------*/
4805
4806 static inline float128
4807 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4808 {
4809     float128 z;
4810
4811     z.low = zSig1;
4812     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4813     return z;
4814 }
4815
4816 /*----------------------------------------------------------------------------
4817 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4818 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4819 | and `zSig2', and returns the proper quadruple-precision floating-point value
4820 | corresponding to the abstract input.  Ordinarily, the abstract value is
4821 | simply rounded and packed into the quadruple-precision format, with the
4822 | inexact exception raised if the abstract input cannot be represented
4823 | exactly.  However, if the abstract value is too large, the overflow and
4824 | inexact exceptions are raised and an infinity or maximal finite value is
4825 | returned.  If the abstract value is too small, the input value is rounded to
4826 | a subnormal number, and the underflow and inexact exceptions are raised if
4827 | the abstract input cannot be represented exactly as a subnormal quadruple-
4828 | precision floating-point number.
4829 |     The input significand must be normalized or smaller.  If the input
4830 | significand is not normalized, `zExp' must be 0; in that case, the result
4831 | returned is a subnormal number, and it must not require rounding.  In the
4832 | usual case that the input significand is normalized, `zExp' must be 1 less
4833 | than the ``true'' floating-point exponent.  The handling of underflow and
4834 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4835 *----------------------------------------------------------------------------*/
4836
4837 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4838                                      uint64_t zSig0, uint64_t zSig1,
4839                                      uint64_t zSig2, float_status *status)
4840 {
4841     int8_t roundingMode;
4842     bool roundNearestEven, increment, isTiny;
4843
4844     roundingMode = status->float_rounding_mode;
4845     roundNearestEven = ( roundingMode == float_round_nearest_even );
4846     switch (roundingMode) {
4847     case float_round_nearest_even:
4848     case float_round_ties_away:
4849         increment = ((int64_t)zSig2 < 0);
4850         break;
4851     case float_round_to_zero:
4852         increment = 0;
4853         break;
4854     case float_round_up:
4855         increment = !zSign && zSig2;
4856         break;
4857     case float_round_down:
4858         increment = zSign && zSig2;
4859         break;
4860     case float_round_to_odd:
4861         increment = !(zSig1 & 0x1) && zSig2;
4862         break;
4863     default:
4864         abort();
4865     }
4866     if ( 0x7FFD <= (uint32_t) zExp ) {
4867         if (    ( 0x7FFD < zExp )
4868              || (    ( zExp == 0x7FFD )
4869                   && eq128(
4870                          UINT64_C(0x0001FFFFFFFFFFFF),
4871                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4872                          zSig0,
4873                          zSig1
4874                      )
4875                   && increment
4876                 )
4877            ) {
4878             float_raise(float_flag_overflow | float_flag_inexact, status);
4879             if (    ( roundingMode == float_round_to_zero )
4880                  || ( zSign && ( roundingMode == float_round_up ) )
4881                  || ( ! zSign && ( roundingMode == float_round_down ) )
4882                  || (roundingMode == float_round_to_odd)
4883                ) {
4884                 return
4885                     packFloat128(
4886                         zSign,
4887                         0x7FFE,
4888                         UINT64_C(0x0000FFFFFFFFFFFF),
4889                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4890                     );
4891             }
4892             return packFloat128( zSign, 0x7FFF, 0, 0 );
4893         }
4894         if ( zExp < 0 ) {
4895             if (status->flush_to_zero) {
4896                 float_raise(float_flag_output_denormal, status);
4897                 return packFloat128(zSign, 0, 0, 0);
4898             }
4899             isTiny = status->tininess_before_rounding
4900                   || (zExp < -1)
4901                   || !increment
4902                   || lt128(zSig0, zSig1,
4903                            UINT64_C(0x0001FFFFFFFFFFFF),
4904                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4905             shift128ExtraRightJamming(
4906                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4907             zExp = 0;
4908             if (isTiny && zSig2) {
4909                 float_raise(float_flag_underflow, status);
4910             }
4911             switch (roundingMode) {
4912             case float_round_nearest_even:
4913             case float_round_ties_away:
4914                 increment = ((int64_t)zSig2 < 0);
4915                 break;
4916             case float_round_to_zero:
4917                 increment = 0;
4918                 break;
4919             case float_round_up:
4920                 increment = !zSign && zSig2;
4921                 break;
4922             case float_round_down:
4923                 increment = zSign && zSig2;
4924                 break;
4925             case float_round_to_odd:
4926                 increment = !(zSig1 & 0x1) && zSig2;
4927                 break;
4928             default:
4929                 abort();
4930             }
4931         }
4932     }
4933     if (zSig2) {
4934         float_raise(float_flag_inexact, status);
4935     }
4936     if ( increment ) {
4937         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4938         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4939             zSig1 &= ~1;
4940         }
4941     }
4942     else {
4943         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4944     }
4945     return packFloat128( zSign, zExp, zSig0, zSig1 );
4946
4947 }
4948
4949 /*----------------------------------------------------------------------------
4950 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4951 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4952 | returns the proper quadruple-precision floating-point value corresponding
4953 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4954 | except that the input significand has fewer bits and does not have to be
4955 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4956 | point exponent.
4957 *----------------------------------------------------------------------------*/
4958
4959 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4960                                               uint64_t zSig0, uint64_t zSig1,
4961                                               float_status *status)
4962 {
4963     int8_t shiftCount;
4964     uint64_t zSig2;
4965
4966     if ( zSig0 == 0 ) {
4967         zSig0 = zSig1;
4968         zSig1 = 0;
4969         zExp -= 64;
4970     }
4971     shiftCount = clz64(zSig0) - 15;
4972     if ( 0 <= shiftCount ) {
4973         zSig2 = 0;
4974         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4975     }
4976     else {
4977         shift128ExtraRightJamming(
4978             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4979     }
4980     zExp -= shiftCount;
4981     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4982
4983 }
4984
4985
4986 /*----------------------------------------------------------------------------
4987 | Returns the result of converting the 32-bit two's complement integer `a'
4988 | to the extended double-precision floating-point format.  The conversion
4989 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4990 | Arithmetic.
4991 *----------------------------------------------------------------------------*/
4992
4993 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4994 {
4995     bool zSign;
4996     uint32_t absA;
4997     int8_t shiftCount;
4998     uint64_t zSig;
4999
5000     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5001     zSign = ( a < 0 );
5002     absA = zSign ? - a : a;
5003     shiftCount = clz32(absA) + 32;
5004     zSig = absA;
5005     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5006
5007 }
5008
5009 /*----------------------------------------------------------------------------
5010 | Returns the result of converting the 64-bit two's complement integer `a'
5011 | to the extended double-precision floating-point format.  The conversion
5012 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5013 | Arithmetic.
5014 *----------------------------------------------------------------------------*/
5015
5016 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5017 {
5018     bool zSign;
5019     uint64_t absA;
5020     int8_t shiftCount;
5021
5022     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5023     zSign = ( a < 0 );
5024     absA = zSign ? - a : a;
5025     shiftCount = clz64(absA);
5026     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5027
5028 }
5029
5030 /*----------------------------------------------------------------------------
5031 | Returns the result of converting the single-precision floating-point value
5032 | `a' to the extended double-precision floating-point format.  The conversion
5033 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5034 | Arithmetic.
5035 *----------------------------------------------------------------------------*/
5036
5037 floatx80 float32_to_floatx80(float32 a, float_status *status)
5038 {
5039     bool aSign;
5040     int aExp;
5041     uint32_t aSig;
5042
5043     a = float32_squash_input_denormal(a, status);
5044     aSig = extractFloat32Frac( a );
5045     aExp = extractFloat32Exp( a );
5046     aSign = extractFloat32Sign( a );
5047     if ( aExp == 0xFF ) {
5048         if (aSig) {
5049             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5050                                                status);
5051             return floatx80_silence_nan(res, status);
5052         }
5053         return packFloatx80(aSign,
5054                             floatx80_infinity_high,
5055                             floatx80_infinity_low);
5056     }
5057     if ( aExp == 0 ) {
5058         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5059         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5060     }
5061     aSig |= 0x00800000;
5062     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5063
5064 }
5065
5066 /*----------------------------------------------------------------------------
5067 | Returns the remainder of the single-precision floating-point value `a'
5068 | with respect to the corresponding value `b'.  The operation is performed
5069 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5070 *----------------------------------------------------------------------------*/
5071
5072 float32 float32_rem(float32 a, float32 b, float_status *status)
5073 {
5074     bool aSign, zSign;
5075     int aExp, bExp, expDiff;
5076     uint32_t aSig, bSig;
5077     uint32_t q;
5078     uint64_t aSig64, bSig64, q64;
5079     uint32_t alternateASig;
5080     int32_t sigMean;
5081     a = float32_squash_input_denormal(a, status);
5082     b = float32_squash_input_denormal(b, status);
5083
5084     aSig = extractFloat32Frac( a );
5085     aExp = extractFloat32Exp( a );
5086     aSign = extractFloat32Sign( a );
5087     bSig = extractFloat32Frac( b );
5088     bExp = extractFloat32Exp( b );
5089     if ( aExp == 0xFF ) {
5090         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5091             return propagateFloat32NaN(a, b, status);
5092         }
5093         float_raise(float_flag_invalid, status);
5094         return float32_default_nan(status);
5095     }
5096     if ( bExp == 0xFF ) {
5097         if (bSig) {
5098             return propagateFloat32NaN(a, b, status);
5099         }
5100         return a;
5101     }
5102     if ( bExp == 0 ) {
5103         if ( bSig == 0 ) {
5104             float_raise(float_flag_invalid, status);
5105             return float32_default_nan(status);
5106         }
5107         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5108     }
5109     if ( aExp == 0 ) {
5110         if ( aSig == 0 ) return a;
5111         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5112     }
5113     expDiff = aExp - bExp;
5114     aSig |= 0x00800000;
5115     bSig |= 0x00800000;
5116     if ( expDiff < 32 ) {
5117         aSig <<= 8;
5118         bSig <<= 8;
5119         if ( expDiff < 0 ) {
5120             if ( expDiff < -1 ) return a;
5121             aSig >>= 1;
5122         }
5123         q = ( bSig <= aSig );
5124         if ( q ) aSig -= bSig;
5125         if ( 0 < expDiff ) {
5126             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5127             q >>= 32 - expDiff;
5128             bSig >>= 2;
5129             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5130         }
5131         else {
5132             aSig >>= 2;
5133             bSig >>= 2;
5134         }
5135     }
5136     else {
5137         if ( bSig <= aSig ) aSig -= bSig;
5138         aSig64 = ( (uint64_t) aSig )<<40;
5139         bSig64 = ( (uint64_t) bSig )<<40;
5140         expDiff -= 64;
5141         while ( 0 < expDiff ) {
5142             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5143             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5144             aSig64 = - ( ( bSig * q64 )<<38 );
5145             expDiff -= 62;
5146         }
5147         expDiff += 64;
5148         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5149         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5150         q = q64>>( 64 - expDiff );
5151         bSig <<= 6;
5152         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5153     }
5154     do {
5155         alternateASig = aSig;
5156         ++q;
5157         aSig -= bSig;
5158     } while ( 0 <= (int32_t) aSig );
5159     sigMean = aSig + alternateASig;
5160     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5161         aSig = alternateASig;
5162     }
5163     zSign = ( (int32_t) aSig < 0 );
5164     if ( zSign ) aSig = - aSig;
5165     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5166 }
5167
5168
5169
5170 /*----------------------------------------------------------------------------
5171 | Returns the binary exponential of the single-precision floating-point value
5172 | `a'. The operation is performed according to the IEC/IEEE Standard for
5173 | Binary Floating-Point Arithmetic.
5174 |
5175 | Uses the following identities:
5176 |
5177 | 1. -------------------------------------------------------------------------
5178 |      x    x*ln(2)
5179 |     2  = e
5180 |
5181 | 2. -------------------------------------------------------------------------
5182 |                      2     3     4     5           n
5183 |      x        x     x     x     x     x           x
5184 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5185 |               1!    2!    3!    4!    5!          n!
5186 *----------------------------------------------------------------------------*/
5187
5188 static const float64 float32_exp2_coefficients[15] =
5189 {
5190     const_float64( 0x3ff0000000000000ll ), /*  1 */
5191     const_float64( 0x3fe0000000000000ll ), /*  2 */
5192     const_float64( 0x3fc5555555555555ll ), /*  3 */
5193     const_float64( 0x3fa5555555555555ll ), /*  4 */
5194     const_float64( 0x3f81111111111111ll ), /*  5 */
5195     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5196     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5197     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5198     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5199     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5200     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5201     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5202     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5203     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5204     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5205 };
5206
5207 float32 float32_exp2(float32 a, float_status *status)
5208 {
5209     bool aSign;
5210     int aExp;
5211     uint32_t aSig;
5212     float64 r, x, xn;
5213     int i;
5214     a = float32_squash_input_denormal(a, status);
5215
5216     aSig = extractFloat32Frac( a );
5217     aExp = extractFloat32Exp( a );
5218     aSign = extractFloat32Sign( a );
5219
5220     if ( aExp == 0xFF) {
5221         if (aSig) {
5222             return propagateFloat32NaN(a, float32_zero, status);
5223         }
5224         return (aSign) ? float32_zero : a;
5225     }
5226     if (aExp == 0) {
5227         if (aSig == 0) return float32_one;
5228     }
5229
5230     float_raise(float_flag_inexact, status);
5231
5232     /* ******************************* */
5233     /* using float64 for approximation */
5234     /* ******************************* */
5235     x = float32_to_float64(a, status);
5236     x = float64_mul(x, float64_ln2, status);
5237
5238     xn = x;
5239     r = float64_one;
5240     for (i = 0 ; i < 15 ; i++) {
5241         float64 f;
5242
5243         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5244         r = float64_add(r, f, status);
5245
5246         xn = float64_mul(xn, x, status);
5247     }
5248
5249     return float64_to_float32(r, status);
5250 }
5251
5252 /*----------------------------------------------------------------------------
5253 | Returns the binary log of the single-precision floating-point value `a'.
5254 | The operation is performed according to the IEC/IEEE Standard for Binary
5255 | Floating-Point Arithmetic.
5256 *----------------------------------------------------------------------------*/
5257 float32 float32_log2(float32 a, float_status *status)
5258 {
5259     bool aSign, zSign;
5260     int aExp;
5261     uint32_t aSig, zSig, i;
5262
5263     a = float32_squash_input_denormal(a, status);
5264     aSig = extractFloat32Frac( a );
5265     aExp = extractFloat32Exp( a );
5266     aSign = extractFloat32Sign( a );
5267
5268     if ( aExp == 0 ) {
5269         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5270         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5271     }
5272     if ( aSign ) {
5273         float_raise(float_flag_invalid, status);
5274         return float32_default_nan(status);
5275     }
5276     if ( aExp == 0xFF ) {
5277         if (aSig) {
5278             return propagateFloat32NaN(a, float32_zero, status);
5279         }
5280         return a;
5281     }
5282
5283     aExp -= 0x7F;
5284     aSig |= 0x00800000;
5285     zSign = aExp < 0;
5286     zSig = aExp << 23;
5287
5288     for (i = 1 << 22; i > 0; i >>= 1) {
5289         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5290         if ( aSig & 0x01000000 ) {
5291             aSig >>= 1;
5292             zSig |= i;
5293         }
5294     }
5295
5296     if ( zSign )
5297         zSig = -zSig;
5298
5299     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5300 }
5301
5302 /*----------------------------------------------------------------------------
5303 | Returns the result of converting the double-precision floating-point value
5304 | `a' to the extended double-precision floating-point format.  The conversion
5305 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5306 | Arithmetic.
5307 *----------------------------------------------------------------------------*/
5308
5309 floatx80 float64_to_floatx80(float64 a, float_status *status)
5310 {
5311     bool aSign;
5312     int aExp;
5313     uint64_t aSig;
5314
5315     a = float64_squash_input_denormal(a, status);
5316     aSig = extractFloat64Frac( a );
5317     aExp = extractFloat64Exp( a );
5318     aSign = extractFloat64Sign( a );
5319     if ( aExp == 0x7FF ) {
5320         if (aSig) {
5321             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5322                                                status);
5323             return floatx80_silence_nan(res, status);
5324         }
5325         return packFloatx80(aSign,
5326                             floatx80_infinity_high,
5327                             floatx80_infinity_low);
5328     }
5329     if ( aExp == 0 ) {
5330         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5331         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5332     }
5333     return
5334         packFloatx80(
5335             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5336
5337 }
5338
5339 /*----------------------------------------------------------------------------
5340 | Returns the remainder of the double-precision floating-point value `a'
5341 | with respect to the corresponding value `b'.  The operation is performed
5342 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5343 *----------------------------------------------------------------------------*/
5344
5345 float64 float64_rem(float64 a, float64 b, float_status *status)
5346 {
5347     bool aSign, zSign;
5348     int aExp, bExp, expDiff;
5349     uint64_t aSig, bSig;
5350     uint64_t q, alternateASig;
5351     int64_t sigMean;
5352
5353     a = float64_squash_input_denormal(a, status);
5354     b = float64_squash_input_denormal(b, status);
5355     aSig = extractFloat64Frac( a );
5356     aExp = extractFloat64Exp( a );
5357     aSign = extractFloat64Sign( a );
5358     bSig = extractFloat64Frac( b );
5359     bExp = extractFloat64Exp( b );
5360     if ( aExp == 0x7FF ) {
5361         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5362             return propagateFloat64NaN(a, b, status);
5363         }
5364         float_raise(float_flag_invalid, status);
5365         return float64_default_nan(status);
5366     }
5367     if ( bExp == 0x7FF ) {
5368         if (bSig) {
5369             return propagateFloat64NaN(a, b, status);
5370         }
5371         return a;
5372     }
5373     if ( bExp == 0 ) {
5374         if ( bSig == 0 ) {
5375             float_raise(float_flag_invalid, status);
5376             return float64_default_nan(status);
5377         }
5378         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5379     }
5380     if ( aExp == 0 ) {
5381         if ( aSig == 0 ) return a;
5382         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5383     }
5384     expDiff = aExp - bExp;
5385     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5386     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5387     if ( expDiff < 0 ) {
5388         if ( expDiff < -1 ) return a;
5389         aSig >>= 1;
5390     }
5391     q = ( bSig <= aSig );
5392     if ( q ) aSig -= bSig;
5393     expDiff -= 64;
5394     while ( 0 < expDiff ) {
5395         q = estimateDiv128To64( aSig, 0, bSig );
5396         q = ( 2 < q ) ? q - 2 : 0;
5397         aSig = - ( ( bSig>>2 ) * q );
5398         expDiff -= 62;
5399     }
5400     expDiff += 64;
5401     if ( 0 < expDiff ) {
5402         q = estimateDiv128To64( aSig, 0, bSig );
5403         q = ( 2 < q ) ? q - 2 : 0;
5404         q >>= 64 - expDiff;
5405         bSig >>= 2;
5406         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5407     }
5408     else {
5409         aSig >>= 2;
5410         bSig >>= 2;
5411     }
5412     do {
5413         alternateASig = aSig;
5414         ++q;
5415         aSig -= bSig;
5416     } while ( 0 <= (int64_t) aSig );
5417     sigMean = aSig + alternateASig;
5418     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5419         aSig = alternateASig;
5420     }
5421     zSign = ( (int64_t) aSig < 0 );
5422     if ( zSign ) aSig = - aSig;
5423     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5424
5425 }
5426
5427 /*----------------------------------------------------------------------------
5428 | Returns the binary log of the double-precision floating-point value `a'.
5429 | The operation is performed according to the IEC/IEEE Standard for Binary
5430 | Floating-Point Arithmetic.
5431 *----------------------------------------------------------------------------*/
5432 float64 float64_log2(float64 a, float_status *status)
5433 {
5434     bool aSign, zSign;
5435     int aExp;
5436     uint64_t aSig, aSig0, aSig1, zSig, i;
5437     a = float64_squash_input_denormal(a, status);
5438
5439     aSig = extractFloat64Frac( a );
5440     aExp = extractFloat64Exp( a );
5441     aSign = extractFloat64Sign( a );
5442
5443     if ( aExp == 0 ) {
5444         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5445         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5446     }
5447     if ( aSign ) {
5448         float_raise(float_flag_invalid, status);
5449         return float64_default_nan(status);
5450     }
5451     if ( aExp == 0x7FF ) {
5452         if (aSig) {
5453             return propagateFloat64NaN(a, float64_zero, status);
5454         }
5455         return a;
5456     }
5457
5458     aExp -= 0x3FF;
5459     aSig |= UINT64_C(0x0010000000000000);
5460     zSign = aExp < 0;
5461     zSig = (uint64_t)aExp << 52;
5462     for (i = 1LL << 51; i > 0; i >>= 1) {
5463         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5464         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5465         if ( aSig & UINT64_C(0x0020000000000000) ) {
5466             aSig >>= 1;
5467             zSig |= i;
5468         }
5469     }
5470
5471     if ( zSign )
5472         zSig = -zSig;
5473     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5474 }
5475
5476 /*----------------------------------------------------------------------------
5477 | Returns the result of converting the extended double-precision floating-
5478 | point value `a' to the 32-bit two's complement integer format.  The
5479 | conversion is performed according to the IEC/IEEE Standard for Binary
5480 | Floating-Point Arithmetic---which means in particular that the conversion
5481 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5482 | largest positive integer is returned.  Otherwise, if the conversion
5483 | overflows, the largest integer with the same sign as `a' is returned.
5484 *----------------------------------------------------------------------------*/
5485
5486 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5487 {
5488     bool aSign;
5489     int32_t aExp, shiftCount;
5490     uint64_t aSig;
5491
5492     if (floatx80_invalid_encoding(a)) {
5493         float_raise(float_flag_invalid, status);
5494         return 1 << 31;
5495     }
5496     aSig = extractFloatx80Frac( a );
5497     aExp = extractFloatx80Exp( a );
5498     aSign = extractFloatx80Sign( a );
5499     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5500     shiftCount = 0x4037 - aExp;
5501     if ( shiftCount <= 0 ) shiftCount = 1;
5502     shift64RightJamming( aSig, shiftCount, &aSig );
5503     return roundAndPackInt32(aSign, aSig, status);
5504
5505 }
5506
5507 /*----------------------------------------------------------------------------
5508 | Returns the result of converting the extended double-precision floating-
5509 | point value `a' to the 32-bit two's complement integer format.  The
5510 | conversion is performed according to the IEC/IEEE Standard for Binary
5511 | Floating-Point Arithmetic, except that the conversion is always rounded
5512 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5513 | Otherwise, if the conversion overflows, the largest integer with the same
5514 | sign as `a' is returned.
5515 *----------------------------------------------------------------------------*/
5516
5517 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5518 {
5519     bool aSign;
5520     int32_t aExp, shiftCount;
5521     uint64_t aSig, savedASig;
5522     int32_t z;
5523
5524     if (floatx80_invalid_encoding(a)) {
5525         float_raise(float_flag_invalid, status);
5526         return 1 << 31;
5527     }
5528     aSig = extractFloatx80Frac( a );
5529     aExp = extractFloatx80Exp( a );
5530     aSign = extractFloatx80Sign( a );
5531     if ( 0x401E < aExp ) {
5532         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5533         goto invalid;
5534     }
5535     else if ( aExp < 0x3FFF ) {
5536         if (aExp || aSig) {
5537             float_raise(float_flag_inexact, status);
5538         }
5539         return 0;
5540     }
5541     shiftCount = 0x403E - aExp;
5542     savedASig = aSig;
5543     aSig >>= shiftCount;
5544     z = aSig;
5545     if ( aSign ) z = - z;
5546     if ( ( z < 0 ) ^ aSign ) {
5547  invalid:
5548         float_raise(float_flag_invalid, status);
5549         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5550     }
5551     if ( ( aSig<<shiftCount ) != savedASig ) {
5552         float_raise(float_flag_inexact, status);
5553     }
5554     return z;
5555
5556 }
5557
5558 /*----------------------------------------------------------------------------
5559 | Returns the result of converting the extended double-precision floating-
5560 | point value `a' to the 64-bit two's complement integer format.  The
5561 | conversion is performed according to the IEC/IEEE Standard for Binary
5562 | Floating-Point Arithmetic---which means in particular that the conversion
5563 | is rounded according to the current rounding mode.  If `a' is a NaN,
5564 | the largest positive integer is returned.  Otherwise, if the conversion
5565 | overflows, the largest integer with the same sign as `a' is returned.
5566 *----------------------------------------------------------------------------*/
5567
5568 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5569 {
5570     bool aSign;
5571     int32_t aExp, shiftCount;
5572     uint64_t aSig, aSigExtra;
5573
5574     if (floatx80_invalid_encoding(a)) {
5575         float_raise(float_flag_invalid, status);
5576         return 1ULL << 63;
5577     }
5578     aSig = extractFloatx80Frac( a );
5579     aExp = extractFloatx80Exp( a );
5580     aSign = extractFloatx80Sign( a );
5581     shiftCount = 0x403E - aExp;
5582     if ( shiftCount <= 0 ) {
5583         if ( shiftCount ) {
5584             float_raise(float_flag_invalid, status);
5585             if (!aSign || floatx80_is_any_nan(a)) {
5586                 return INT64_MAX;
5587             }
5588             return INT64_MIN;
5589         }
5590         aSigExtra = 0;
5591     }
5592     else {
5593         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5594     }
5595     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5596
5597 }
5598
5599 /*----------------------------------------------------------------------------
5600 | Returns the result of converting the extended double-precision floating-
5601 | point value `a' to the 64-bit two's complement integer format.  The
5602 | conversion is performed according to the IEC/IEEE Standard for Binary
5603 | Floating-Point Arithmetic, except that the conversion is always rounded
5604 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5605 | Otherwise, if the conversion overflows, the largest integer with the same
5606 | sign as `a' is returned.
5607 *----------------------------------------------------------------------------*/
5608
5609 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5610 {
5611     bool aSign;
5612     int32_t aExp, shiftCount;
5613     uint64_t aSig;
5614     int64_t z;
5615
5616     if (floatx80_invalid_encoding(a)) {
5617         float_raise(float_flag_invalid, status);
5618         return 1ULL << 63;
5619     }
5620     aSig = extractFloatx80Frac( a );
5621     aExp = extractFloatx80Exp( a );
5622     aSign = extractFloatx80Sign( a );
5623     shiftCount = aExp - 0x403E;
5624     if ( 0 <= shiftCount ) {
5625         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5626         if ( ( a.high != 0xC03E ) || aSig ) {
5627             float_raise(float_flag_invalid, status);
5628             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5629                 return INT64_MAX;
5630             }
5631         }
5632         return INT64_MIN;
5633     }
5634     else if ( aExp < 0x3FFF ) {
5635         if (aExp | aSig) {
5636             float_raise(float_flag_inexact, status);
5637         }
5638         return 0;
5639     }
5640     z = aSig>>( - shiftCount );
5641     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5642         float_raise(float_flag_inexact, status);
5643     }
5644     if ( aSign ) z = - z;
5645     return z;
5646
5647 }
5648
5649 /*----------------------------------------------------------------------------
5650 | Returns the result of converting the extended double-precision floating-
5651 | point value `a' to the single-precision floating-point format.  The
5652 | conversion is performed according to the IEC/IEEE Standard for Binary
5653 | Floating-Point Arithmetic.
5654 *----------------------------------------------------------------------------*/
5655
5656 float32 floatx80_to_float32(floatx80 a, float_status *status)
5657 {
5658     bool aSign;
5659     int32_t aExp;
5660     uint64_t aSig;
5661
5662     if (floatx80_invalid_encoding(a)) {
5663         float_raise(float_flag_invalid, status);
5664         return float32_default_nan(status);
5665     }
5666     aSig = extractFloatx80Frac( a );
5667     aExp = extractFloatx80Exp( a );
5668     aSign = extractFloatx80Sign( a );
5669     if ( aExp == 0x7FFF ) {
5670         if ( (uint64_t) ( aSig<<1 ) ) {
5671             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5672                                              status);
5673             return float32_silence_nan(res, status);
5674         }
5675         return packFloat32( aSign, 0xFF, 0 );
5676     }
5677     shift64RightJamming( aSig, 33, &aSig );
5678     if ( aExp || aSig ) aExp -= 0x3F81;
5679     return roundAndPackFloat32(aSign, aExp, aSig, status);
5680
5681 }
5682
5683 /*----------------------------------------------------------------------------
5684 | Returns the result of converting the extended double-precision floating-
5685 | point value `a' to the double-precision floating-point format.  The
5686 | conversion is performed according to the IEC/IEEE Standard for Binary
5687 | Floating-Point Arithmetic.
5688 *----------------------------------------------------------------------------*/
5689
5690 float64 floatx80_to_float64(floatx80 a, float_status *status)
5691 {
5692     bool aSign;
5693     int32_t aExp;
5694     uint64_t aSig, zSig;
5695
5696     if (floatx80_invalid_encoding(a)) {
5697         float_raise(float_flag_invalid, status);
5698         return float64_default_nan(status);
5699     }
5700     aSig = extractFloatx80Frac( a );
5701     aExp = extractFloatx80Exp( a );
5702     aSign = extractFloatx80Sign( a );
5703     if ( aExp == 0x7FFF ) {
5704         if ( (uint64_t) ( aSig<<1 ) ) {
5705             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5706                                              status);
5707             return float64_silence_nan(res, status);
5708         }
5709         return packFloat64( aSign, 0x7FF, 0 );
5710     }
5711     shift64RightJamming( aSig, 1, &zSig );
5712     if ( aExp || aSig ) aExp -= 0x3C01;
5713     return roundAndPackFloat64(aSign, aExp, zSig, status);
5714
5715 }
5716
5717 /*----------------------------------------------------------------------------
5718 | Returns the result of converting the extended double-precision floating-
5719 | point value `a' to the quadruple-precision floating-point format.  The
5720 | conversion is performed according to the IEC/IEEE Standard for Binary
5721 | Floating-Point Arithmetic.
5722 *----------------------------------------------------------------------------*/
5723
5724 float128 floatx80_to_float128(floatx80 a, float_status *status)
5725 {
5726     bool aSign;
5727     int aExp;
5728     uint64_t aSig, zSig0, zSig1;
5729
5730     if (floatx80_invalid_encoding(a)) {
5731         float_raise(float_flag_invalid, status);
5732         return float128_default_nan(status);
5733     }
5734     aSig = extractFloatx80Frac( a );
5735     aExp = extractFloatx80Exp( a );
5736     aSign = extractFloatx80Sign( a );
5737     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5738         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5739                                            status);
5740         return float128_silence_nan(res, status);
5741     }
5742     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5743     return packFloat128( aSign, aExp, zSig0, zSig1 );
5744
5745 }
5746
5747 /*----------------------------------------------------------------------------
5748 | Rounds the extended double-precision floating-point value `a'
5749 | to the precision provided by floatx80_rounding_precision and returns the
5750 | result as an extended double-precision floating-point value.
5751 | The operation is performed according to the IEC/IEEE Standard for Binary
5752 | Floating-Point Arithmetic.
5753 *----------------------------------------------------------------------------*/
5754
5755 floatx80 floatx80_round(floatx80 a, float_status *status)
5756 {
5757     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5758                                 extractFloatx80Sign(a),
5759                                 extractFloatx80Exp(a),
5760                                 extractFloatx80Frac(a), 0, status);
5761 }
5762
5763 /*----------------------------------------------------------------------------
5764 | Rounds the extended double-precision floating-point value `a' to an integer,
5765 | and returns the result as an extended quadruple-precision floating-point
5766 | value.  The operation is performed according to the IEC/IEEE Standard for
5767 | Binary Floating-Point Arithmetic.
5768 *----------------------------------------------------------------------------*/
5769
5770 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5771 {
5772     bool aSign;
5773     int32_t aExp;
5774     uint64_t lastBitMask, roundBitsMask;
5775     floatx80 z;
5776
5777     if (floatx80_invalid_encoding(a)) {
5778         float_raise(float_flag_invalid, status);
5779         return floatx80_default_nan(status);
5780     }
5781     aExp = extractFloatx80Exp( a );
5782     if ( 0x403E <= aExp ) {
5783         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5784             return propagateFloatx80NaN(a, a, status);
5785         }
5786         return a;
5787     }
5788     if ( aExp < 0x3FFF ) {
5789         if (    ( aExp == 0 )
5790              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5791             return a;
5792         }
5793         float_raise(float_flag_inexact, status);
5794         aSign = extractFloatx80Sign( a );
5795         switch (status->float_rounding_mode) {
5796          case float_round_nearest_even:
5797             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5798                ) {
5799                 return
5800                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5801             }
5802             break;
5803         case float_round_ties_away:
5804             if (aExp == 0x3FFE) {
5805                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5806             }
5807             break;
5808          case float_round_down:
5809             return
5810                   aSign ?
5811                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5812                 : packFloatx80( 0, 0, 0 );
5813          case float_round_up:
5814             return
5815                   aSign ? packFloatx80( 1, 0, 0 )
5816                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5817
5818         case float_round_to_zero:
5819             break;
5820         default:
5821             g_assert_not_reached();
5822         }
5823         return packFloatx80( aSign, 0, 0 );
5824     }
5825     lastBitMask = 1;
5826     lastBitMask <<= 0x403E - aExp;
5827     roundBitsMask = lastBitMask - 1;
5828     z = a;
5829     switch (status->float_rounding_mode) {
5830     case float_round_nearest_even:
5831         z.low += lastBitMask>>1;
5832         if ((z.low & roundBitsMask) == 0) {
5833             z.low &= ~lastBitMask;
5834         }
5835         break;
5836     case float_round_ties_away:
5837         z.low += lastBitMask >> 1;
5838         break;
5839     case float_round_to_zero:
5840         break;
5841     case float_round_up:
5842         if (!extractFloatx80Sign(z)) {
5843             z.low += roundBitsMask;
5844         }
5845         break;
5846     case float_round_down:
5847         if (extractFloatx80Sign(z)) {
5848             z.low += roundBitsMask;
5849         }
5850         break;
5851     default:
5852         abort();
5853     }
5854     z.low &= ~ roundBitsMask;
5855     if ( z.low == 0 ) {
5856         ++z.high;
5857         z.low = UINT64_C(0x8000000000000000);
5858     }
5859     if (z.low != a.low) {
5860         float_raise(float_flag_inexact, status);
5861     }
5862     return z;
5863
5864 }
5865
5866 /*----------------------------------------------------------------------------
5867 | Returns the result of multiplying the extended double-precision floating-
5868 | point values `a' and `b'.  The operation is performed according to the
5869 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5870 *----------------------------------------------------------------------------*/
5871
5872 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5873 {
5874     bool aSign, bSign, zSign;
5875     int32_t aExp, bExp, zExp;
5876     uint64_t aSig, bSig, zSig0, zSig1;
5877
5878     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5879         float_raise(float_flag_invalid, status);
5880         return floatx80_default_nan(status);
5881     }
5882     aSig = extractFloatx80Frac( a );
5883     aExp = extractFloatx80Exp( a );
5884     aSign = extractFloatx80Sign( a );
5885     bSig = extractFloatx80Frac( b );
5886     bExp = extractFloatx80Exp( b );
5887     bSign = extractFloatx80Sign( b );
5888     zSign = aSign ^ bSign;
5889     if ( aExp == 0x7FFF ) {
5890         if (    (uint64_t) ( aSig<<1 )
5891              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5892             return propagateFloatx80NaN(a, b, status);
5893         }
5894         if ( ( bExp | bSig ) == 0 ) goto invalid;
5895         return packFloatx80(zSign, floatx80_infinity_high,
5896                                    floatx80_infinity_low);
5897     }
5898     if ( bExp == 0x7FFF ) {
5899         if ((uint64_t)(bSig << 1)) {
5900             return propagateFloatx80NaN(a, b, status);
5901         }
5902         if ( ( aExp | aSig ) == 0 ) {
5903  invalid:
5904             float_raise(float_flag_invalid, status);
5905             return floatx80_default_nan(status);
5906         }
5907         return packFloatx80(zSign, floatx80_infinity_high,
5908                                    floatx80_infinity_low);
5909     }
5910     if ( aExp == 0 ) {
5911         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5912         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5913     }
5914     if ( bExp == 0 ) {
5915         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5916         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5917     }
5918     zExp = aExp + bExp - 0x3FFE;
5919     mul64To128( aSig, bSig, &zSig0, &zSig1 );
5920     if ( 0 < (int64_t) zSig0 ) {
5921         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5922         --zExp;
5923     }
5924     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5925                                 zSign, zExp, zSig0, zSig1, status);
5926 }
5927
5928 /*----------------------------------------------------------------------------
5929 | Returns the result of dividing the extended double-precision floating-point
5930 | value `a' by the corresponding value `b'.  The operation is performed
5931 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5932 *----------------------------------------------------------------------------*/
5933
5934 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5935 {
5936     bool aSign, bSign, zSign;
5937     int32_t aExp, bExp, zExp;
5938     uint64_t aSig, bSig, zSig0, zSig1;
5939     uint64_t rem0, rem1, rem2, term0, term1, term2;
5940
5941     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5942         float_raise(float_flag_invalid, status);
5943         return floatx80_default_nan(status);
5944     }
5945     aSig = extractFloatx80Frac( a );
5946     aExp = extractFloatx80Exp( a );
5947     aSign = extractFloatx80Sign( a );
5948     bSig = extractFloatx80Frac( b );
5949     bExp = extractFloatx80Exp( b );
5950     bSign = extractFloatx80Sign( b );
5951     zSign = aSign ^ bSign;
5952     if ( aExp == 0x7FFF ) {
5953         if ((uint64_t)(aSig << 1)) {
5954             return propagateFloatx80NaN(a, b, status);
5955         }
5956         if ( bExp == 0x7FFF ) {
5957             if ((uint64_t)(bSig << 1)) {
5958                 return propagateFloatx80NaN(a, b, status);
5959             }
5960             goto invalid;
5961         }
5962         return packFloatx80(zSign, floatx80_infinity_high,
5963                                    floatx80_infinity_low);
5964     }
5965     if ( bExp == 0x7FFF ) {
5966         if ((uint64_t)(bSig << 1)) {
5967             return propagateFloatx80NaN(a, b, status);
5968         }
5969         return packFloatx80( zSign, 0, 0 );
5970     }
5971     if ( bExp == 0 ) {
5972         if ( bSig == 0 ) {
5973             if ( ( aExp | aSig ) == 0 ) {
5974  invalid:
5975                 float_raise(float_flag_invalid, status);
5976                 return floatx80_default_nan(status);
5977             }
5978             float_raise(float_flag_divbyzero, status);
5979             return packFloatx80(zSign, floatx80_infinity_high,
5980                                        floatx80_infinity_low);
5981         }
5982         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5983     }
5984     if ( aExp == 0 ) {
5985         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5986         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5987     }
5988     zExp = aExp - bExp + 0x3FFE;
5989     rem1 = 0;
5990     if ( bSig <= aSig ) {
5991         shift128Right( aSig, 0, 1, &aSig, &rem1 );
5992         ++zExp;
5993     }
5994     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5995     mul64To128( bSig, zSig0, &term0, &term1 );
5996     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5997     while ( (int64_t) rem0 < 0 ) {
5998         --zSig0;
5999         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6000     }
6001     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6002     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6003         mul64To128( bSig, zSig1, &term1, &term2 );
6004         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6005         while ( (int64_t) rem1 < 0 ) {
6006             --zSig1;
6007             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6008         }
6009         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6010     }
6011     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6012                                 zSign, zExp, zSig0, zSig1, status);
6013 }
6014
6015 /*----------------------------------------------------------------------------
6016 | Returns the remainder of the extended double-precision floating-point value
6017 | `a' with respect to the corresponding value `b'.  The operation is performed
6018 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6019 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6020 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6021 | the absolute value of the integer quotient.
6022 *----------------------------------------------------------------------------*/
6023
6024 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6025                          float_status *status)
6026 {
6027     bool aSign, zSign;
6028     int32_t aExp, bExp, expDiff, aExpOrig;
6029     uint64_t aSig0, aSig1, bSig;
6030     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6031
6032     *quotient = 0;
6033     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6034         float_raise(float_flag_invalid, status);
6035         return floatx80_default_nan(status);
6036     }
6037     aSig0 = extractFloatx80Frac( a );
6038     aExpOrig = aExp = extractFloatx80Exp( a );
6039     aSign = extractFloatx80Sign( a );
6040     bSig = extractFloatx80Frac( b );
6041     bExp = extractFloatx80Exp( b );
6042     if ( aExp == 0x7FFF ) {
6043         if (    (uint64_t) ( aSig0<<1 )
6044              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6045             return propagateFloatx80NaN(a, b, status);
6046         }
6047         goto invalid;
6048     }
6049     if ( bExp == 0x7FFF ) {
6050         if ((uint64_t)(bSig << 1)) {
6051             return propagateFloatx80NaN(a, b, status);
6052         }
6053         if (aExp == 0 && aSig0 >> 63) {
6054             /*
6055              * Pseudo-denormal argument must be returned in normalized
6056              * form.
6057              */
6058             return packFloatx80(aSign, 1, aSig0);
6059         }
6060         return a;
6061     }
6062     if ( bExp == 0 ) {
6063         if ( bSig == 0 ) {
6064  invalid:
6065             float_raise(float_flag_invalid, status);
6066             return floatx80_default_nan(status);
6067         }
6068         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6069     }
6070     if ( aExp == 0 ) {
6071         if ( aSig0 == 0 ) return a;
6072         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6073     }
6074     zSign = aSign;
6075     expDiff = aExp - bExp;
6076     aSig1 = 0;
6077     if ( expDiff < 0 ) {
6078         if ( mod || expDiff < -1 ) {
6079             if (aExp == 1 && aExpOrig == 0) {
6080                 /*
6081                  * Pseudo-denormal argument must be returned in
6082                  * normalized form.
6083                  */
6084                 return packFloatx80(aSign, aExp, aSig0);
6085             }
6086             return a;
6087         }
6088         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6089         expDiff = 0;
6090     }
6091     *quotient = q = ( bSig <= aSig0 );
6092     if ( q ) aSig0 -= bSig;
6093     expDiff -= 64;
6094     while ( 0 < expDiff ) {
6095         q = estimateDiv128To64( aSig0, aSig1, bSig );
6096         q = ( 2 < q ) ? q - 2 : 0;
6097         mul64To128( bSig, q, &term0, &term1 );
6098         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6099         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6100         expDiff -= 62;
6101         *quotient <<= 62;
6102         *quotient += q;
6103     }
6104     expDiff += 64;
6105     if ( 0 < expDiff ) {
6106         q = estimateDiv128To64( aSig0, aSig1, bSig );
6107         q = ( 2 < q ) ? q - 2 : 0;
6108         q >>= 64 - expDiff;
6109         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6110         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6111         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6112         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6113             ++q;
6114             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6115         }
6116         if (expDiff < 64) {
6117             *quotient <<= expDiff;
6118         } else {
6119             *quotient = 0;
6120         }
6121         *quotient += q;
6122     }
6123     else {
6124         term1 = 0;
6125         term0 = bSig;
6126     }
6127     if (!mod) {
6128         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6129         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6130                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6131                         && ( q & 1 ) )
6132             ) {
6133             aSig0 = alternateASig0;
6134             aSig1 = alternateASig1;
6135             zSign = ! zSign;
6136             ++*quotient;
6137         }
6138     }
6139     return
6140         normalizeRoundAndPackFloatx80(
6141             floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
6142
6143 }
6144
6145 /*----------------------------------------------------------------------------
6146 | Returns the remainder of the extended double-precision floating-point value
6147 | `a' with respect to the corresponding value `b'.  The operation is performed
6148 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6149 *----------------------------------------------------------------------------*/
6150
6151 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6152 {
6153     uint64_t quotient;
6154     return floatx80_modrem(a, b, false, &quotient, status);
6155 }
6156
6157 /*----------------------------------------------------------------------------
6158 | Returns the remainder of the extended double-precision floating-point value
6159 | `a' with respect to the corresponding value `b', with the quotient truncated
6160 | toward zero.
6161 *----------------------------------------------------------------------------*/
6162
6163 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6164 {
6165     uint64_t quotient;
6166     return floatx80_modrem(a, b, true, &quotient, status);
6167 }
6168
6169 /*----------------------------------------------------------------------------
6170 | Returns the square root of the extended double-precision floating-point
6171 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6172 | for Binary Floating-Point Arithmetic.
6173 *----------------------------------------------------------------------------*/
6174
6175 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6176 {
6177     bool aSign;
6178     int32_t aExp, zExp;
6179     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6180     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6181
6182     if (floatx80_invalid_encoding(a)) {
6183         float_raise(float_flag_invalid, status);
6184         return floatx80_default_nan(status);
6185     }
6186     aSig0 = extractFloatx80Frac( a );
6187     aExp = extractFloatx80Exp( a );
6188     aSign = extractFloatx80Sign( a );
6189     if ( aExp == 0x7FFF ) {
6190         if ((uint64_t)(aSig0 << 1)) {
6191             return propagateFloatx80NaN(a, a, status);
6192         }
6193         if ( ! aSign ) return a;
6194         goto invalid;
6195     }
6196     if ( aSign ) {
6197         if ( ( aExp | aSig0 ) == 0 ) return a;
6198  invalid:
6199         float_raise(float_flag_invalid, status);
6200         return floatx80_default_nan(status);
6201     }
6202     if ( aExp == 0 ) {
6203         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6204         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6205     }
6206     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6207     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6208     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6209     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6210     doubleZSig0 = zSig0<<1;
6211     mul64To128( zSig0, zSig0, &term0, &term1 );
6212     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6213     while ( (int64_t) rem0 < 0 ) {
6214         --zSig0;
6215         doubleZSig0 -= 2;
6216         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6217     }
6218     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6219     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6220         if ( zSig1 == 0 ) zSig1 = 1;
6221         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6222         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6223         mul64To128( zSig1, zSig1, &term2, &term3 );
6224         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6225         while ( (int64_t) rem1 < 0 ) {
6226             --zSig1;
6227             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6228             term3 |= 1;
6229             term2 |= doubleZSig0;
6230             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6231         }
6232         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6233     }
6234     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6235     zSig0 |= doubleZSig0;
6236     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6237                                 0, zExp, zSig0, zSig1, status);
6238 }
6239
6240 /*----------------------------------------------------------------------------
6241 | Returns the result of converting the quadruple-precision floating-point
6242 | value `a' to the extended double-precision floating-point format.  The
6243 | conversion is performed according to the IEC/IEEE Standard for Binary
6244 | Floating-Point Arithmetic.
6245 *----------------------------------------------------------------------------*/
6246
6247 floatx80 float128_to_floatx80(float128 a, float_status *status)
6248 {
6249     bool aSign;
6250     int32_t aExp;
6251     uint64_t aSig0, aSig1;
6252
6253     aSig1 = extractFloat128Frac1( a );
6254     aSig0 = extractFloat128Frac0( a );
6255     aExp = extractFloat128Exp( a );
6256     aSign = extractFloat128Sign( a );
6257     if ( aExp == 0x7FFF ) {
6258         if ( aSig0 | aSig1 ) {
6259             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6260                                                status);
6261             return floatx80_silence_nan(res, status);
6262         }
6263         return packFloatx80(aSign, floatx80_infinity_high,
6264                                    floatx80_infinity_low);
6265     }
6266     if ( aExp == 0 ) {
6267         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6268         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6269     }
6270     else {
6271         aSig0 |= UINT64_C(0x0001000000000000);
6272     }
6273     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6274     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6275
6276 }
6277
6278 /*----------------------------------------------------------------------------
6279 | Returns the remainder of the quadruple-precision floating-point value `a'
6280 | with respect to the corresponding value `b'.  The operation is performed
6281 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6282 *----------------------------------------------------------------------------*/
6283
6284 float128 float128_rem(float128 a, float128 b, float_status *status)
6285 {
6286     bool aSign, zSign;
6287     int32_t aExp, bExp, expDiff;
6288     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6289     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6290     int64_t sigMean0;
6291
6292     aSig1 = extractFloat128Frac1( a );
6293     aSig0 = extractFloat128Frac0( a );
6294     aExp = extractFloat128Exp( a );
6295     aSign = extractFloat128Sign( a );
6296     bSig1 = extractFloat128Frac1( b );
6297     bSig0 = extractFloat128Frac0( b );
6298     bExp = extractFloat128Exp( b );
6299     if ( aExp == 0x7FFF ) {
6300         if (    ( aSig0 | aSig1 )
6301              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6302             return propagateFloat128NaN(a, b, status);
6303         }
6304         goto invalid;
6305     }
6306     if ( bExp == 0x7FFF ) {
6307         if (bSig0 | bSig1) {
6308             return propagateFloat128NaN(a, b, status);
6309         }
6310         return a;
6311     }
6312     if ( bExp == 0 ) {
6313         if ( ( bSig0 | bSig1 ) == 0 ) {
6314  invalid:
6315             float_raise(float_flag_invalid, status);
6316             return float128_default_nan(status);
6317         }
6318         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6319     }
6320     if ( aExp == 0 ) {
6321         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6322         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6323     }
6324     expDiff = aExp - bExp;
6325     if ( expDiff < -1 ) return a;
6326     shortShift128Left(
6327         aSig0 | UINT64_C(0x0001000000000000),
6328         aSig1,
6329         15 - ( expDiff < 0 ),
6330         &aSig0,
6331         &aSig1
6332     );
6333     shortShift128Left(
6334         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6335     q = le128( bSig0, bSig1, aSig0, aSig1 );
6336     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6337     expDiff -= 64;
6338     while ( 0 < expDiff ) {
6339         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6340         q = ( 4 < q ) ? q - 4 : 0;
6341         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6342         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6343         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6344         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6345         expDiff -= 61;
6346     }
6347     if ( -64 < expDiff ) {
6348         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6349         q = ( 4 < q ) ? q - 4 : 0;
6350         q >>= - expDiff;
6351         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6352         expDiff += 52;
6353         if ( expDiff < 0 ) {
6354             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6355         }
6356         else {
6357             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6358         }
6359         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6360         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6361     }
6362     else {
6363         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6364         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6365     }
6366     do {
6367         alternateASig0 = aSig0;
6368         alternateASig1 = aSig1;
6369         ++q;
6370         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6371     } while ( 0 <= (int64_t) aSig0 );
6372     add128(
6373         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6374     if (    ( sigMean0 < 0 )
6375          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6376         aSig0 = alternateASig0;
6377         aSig1 = alternateASig1;
6378     }
6379     zSign = ( (int64_t) aSig0 < 0 );
6380     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6381     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6382                                          status);
6383 }
6384
6385 static inline FloatRelation
6386 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6387                           float_status *status)
6388 {
6389     bool aSign, bSign;
6390
6391     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6392         float_raise(float_flag_invalid, status);
6393         return float_relation_unordered;
6394     }
6395     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6396           ( extractFloatx80Frac( a )<<1 ) ) ||
6397         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6398           ( extractFloatx80Frac( b )<<1 ) )) {
6399         if (!is_quiet ||
6400             floatx80_is_signaling_nan(a, status) ||
6401             floatx80_is_signaling_nan(b, status)) {
6402             float_raise(float_flag_invalid, status);
6403         }
6404         return float_relation_unordered;
6405     }
6406     aSign = extractFloatx80Sign( a );
6407     bSign = extractFloatx80Sign( b );
6408     if ( aSign != bSign ) {
6409
6410         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6411              ( ( a.low | b.low ) == 0 ) ) {
6412             /* zero case */
6413             return float_relation_equal;
6414         } else {
6415             return 1 - (2 * aSign);
6416         }
6417     } else {
6418         /* Normalize pseudo-denormals before comparison.  */
6419         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6420             ++a.high;
6421         }
6422         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6423             ++b.high;
6424         }
6425         if (a.low == b.low && a.high == b.high) {
6426             return float_relation_equal;
6427         } else {
6428             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6429         }
6430     }
6431 }
6432
6433 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6434 {
6435     return floatx80_compare_internal(a, b, 0, status);
6436 }
6437
6438 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6439                                      float_status *status)
6440 {
6441     return floatx80_compare_internal(a, b, 1, status);
6442 }
6443
6444 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6445 {
6446     bool aSign;
6447     int32_t aExp;
6448     uint64_t aSig;
6449
6450     if (floatx80_invalid_encoding(a)) {
6451         float_raise(float_flag_invalid, status);
6452         return floatx80_default_nan(status);
6453     }
6454     aSig = extractFloatx80Frac( a );
6455     aExp = extractFloatx80Exp( a );
6456     aSign = extractFloatx80Sign( a );
6457
6458     if ( aExp == 0x7FFF ) {
6459         if ( aSig<<1 ) {
6460             return propagateFloatx80NaN(a, a, status);
6461         }
6462         return a;
6463     }
6464
6465     if (aExp == 0) {
6466         if (aSig == 0) {
6467             return a;
6468         }
6469         aExp++;
6470     }
6471
6472     if (n > 0x10000) {
6473         n = 0x10000;
6474     } else if (n < -0x10000) {
6475         n = -0x10000;
6476     }
6477
6478     aExp += n;
6479     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6480                                          aSign, aExp, aSig, 0, status);
6481 }
6482
6483 static void __attribute__((constructor)) softfloat_init(void)
6484 {
6485     union_float64 ua, ub, uc, ur;
6486
6487     if (QEMU_NO_HARDFLOAT) {
6488         return;
6489     }
6490     /*
6491      * Test that the host's FMA is not obviously broken. For example,
6492      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6493      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6494      */
6495     ua.s = 0x0020000000000001ULL;
6496     ub.s = 0x3ca0000000000000ULL;
6497     uc.s = 0x0020000000000000ULL;
6498     ur.h = fma(ua.h, ub.h, uc.h);
6499     if (ur.s != 0x0020000000000001ULL) {
6500         force_soft_fma = true;
6501     }
6502 }