fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485 /* Flags for parts_minmax. */
 486 enum {
 487     /* Set for minimum; clear for maximum. */
 488     minmax_ismin = 1,
 489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 490     minmax_isnum = 2,
 491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 492     minmax_ismag = 4,
 493 };
 494
 495 /* Simple helpers for checking if, or what kind of, NaN we have */
 496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 497 {
 498     return unlikely(c >= float_class_qnan);
 499 }
 500
 501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 502 {
 503     return c == float_class_snan;
 504 }
 505
 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 507 {
 508     return c == float_class_qnan;
 509 }
 510
 511 /*
 512  * Structure holding all of the decomposed parts of a float.
 513  * The exponent is unbiased and the fraction is normalized.
 514  *
 515  * The fraction words are stored in big-endian word ordering,
 516  * so that truncation from a larger format to a smaller format
 517  * can be done simply by ignoring subsequent elements.
 518  */
 519
 520 typedef struct {
 521     FloatClass cls;
 522     bool sign;
 523     int32_t exp;
 524     union {
 525         /* Routines that know the structure may reference the singular name. */
 526         uint64_t frac;
 527         /*
 528          * Routines expanded with multiple structures reference "hi" and "lo"
 529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 530          * both the same word and aliased here.
 531          */
 532         uint64_t frac_hi;
 533         uint64_t frac_lo;
 534     };
 535 } FloatParts64;
 536
 537 typedef struct {
 538     FloatClass cls;
 539     bool sign;
 540     int32_t exp;
 541     uint64_t frac_hi;
 542     uint64_t frac_lo;
 543 } FloatParts128;
 544
 545 typedef struct {
 546     FloatClass cls;
 547     bool sign;
 548     int32_t exp;
 549     uint64_t frac_hi;
 550     uint64_t frac_hm;  /* high-middle */
 551     uint64_t frac_lm;  /* low-middle */
 552     uint64_t frac_lo;
 553 } FloatParts256;
 554
 555 /* These apply to the most significant word of each FloatPartsN. */
 556 #define DECOMPOSED_BINARY_POINT    63
 557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 558
 559 /* Structure holding all of the relevant parameters for a format.
 560  *   exp_size: the size of the exponent field
 561  *   exp_bias: the offset applied to the exponent field
 562  *   exp_max: the maximum normalised exponent
 563  *   frac_size: the size of the fraction field
 564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 565  * The following are computed based the size of fraction
 566  *   round_mask: bits below lsb which must be rounded
 567  * The following optional modifiers are available:
 568  *   arm_althp: handle ARM Alternative Half Precision
 569  */
 570 typedef struct {
 571     int exp_size;
 572     int exp_bias;
 573     int exp_max;
 574     int frac_size;
 575     int frac_shift;
 576     bool arm_althp;
 577     uint64_t round_mask;
 578 } FloatFmt;
 579
 580 /* Expand fields based on the size of exponent and fraction */
 581 #define FLOAT_PARAMS_(E)                                \
 582     .exp_size       = E,                                \
 583     .exp_bias       = ((1 << E) - 1) >> 1,              \
 584     .exp_max        = (1 << E) - 1
 585
 586 #define FLOAT_PARAMS(E, F)                              \
 587     FLOAT_PARAMS_(E),                                   \
 588     .frac_size      = F,                                \
 589     .frac_shift     = (-F - 1) & 63,                    \
 590     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 591
 592 static const FloatFmt float16_params = {
 593     FLOAT_PARAMS(5, 10)
 594 };
 595
 596 static const FloatFmt float16_params_ahp = {
 597     FLOAT_PARAMS(5, 10),
 598     .arm_althp = true
 599 };
 600
 601 static const FloatFmt bfloat16_params = {
 602     FLOAT_PARAMS(8, 7)
 603 };
 604
 605 static const FloatFmt float32_params = {
 606     FLOAT_PARAMS(8, 23)
 607 };
 608
 609 static const FloatFmt float64_params = {
 610     FLOAT_PARAMS(11, 52)
 611 };
 612
 613 static const FloatFmt float128_params = {
 614     FLOAT_PARAMS(15, 112)
 615 };
 616
 617 #define FLOATX80_PARAMS(R)              \
 618     FLOAT_PARAMS_(15),                  \
 619     .frac_size = R == 64 ? 63 : R,      \
 620     .frac_shift = 0,                    \
 621     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 622
 623 static const FloatFmt floatx80_params[3] = {
 624     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 625     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 626     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 627 };
 628
 629 /* Unpack a float to parts, but do not canonicalize.  */
 630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 631 {
 632     const int f_size = fmt->frac_size;
 633     const int e_size = fmt->exp_size;
 634
 635     *r = (FloatParts64) {
 636         .cls = float_class_unclassified,
 637         .sign = extract64(raw, f_size + e_size, 1),
 638         .exp = extract64(raw, f_size, e_size),
 639         .frac = extract64(raw, 0, f_size)
 640     };
 641 }
 642
 643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 644 {
 645     unpack_raw64(p, &float16_params, f);
 646 }
 647
 648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 649 {
 650     unpack_raw64(p, &bfloat16_params, f);
 651 }
 652
 653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 654 {
 655     unpack_raw64(p, &float32_params, f);
 656 }
 657
 658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 659 {
 660     unpack_raw64(p, &float64_params, f);
 661 }
 662
 663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 664 {
 665     *p = (FloatParts128) {
 666         .cls = float_class_unclassified,
 667         .sign = extract32(f.high, 15, 1),
 668         .exp = extract32(f.high, 0, 15),
 669         .frac_hi = f.low
 670     };
 671 }
 672
 673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 674 {
 675     const int f_size = float128_params.frac_size - 64;
 676     const int e_size = float128_params.exp_size;
 677
 678     *p = (FloatParts128) {
 679         .cls = float_class_unclassified,
 680         .sign = extract64(f.high, f_size + e_size, 1),
 681         .exp = extract64(f.high, f_size, e_size),
 682         .frac_hi = extract64(f.high, 0, f_size),
 683         .frac_lo = f.low,
 684     };
 685 }
 686
 687 /* Pack a float from parts, but do not canonicalize.  */
 688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 689 {
 690     const int f_size = fmt->frac_size;
 691     const int e_size = fmt->exp_size;
 692     uint64_t ret;
 693
 694     ret = (uint64_t)p->sign << (f_size + e_size);
 695     ret = deposit64(ret, f_size, e_size, p->exp);
 696     ret = deposit64(ret, 0, f_size, p->frac);
 697     return ret;
 698 }
 699
 700 static inline float16 float16_pack_raw(const FloatParts64 *p)
 701 {
 702     return make_float16(pack_raw64(p, &float16_params));
 703 }
 704
 705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 706 {
 707     return pack_raw64(p, &bfloat16_params);
 708 }
 709
 710 static inline float32 float32_pack_raw(const FloatParts64 *p)
 711 {
 712     return make_float32(pack_raw64(p, &float32_params));
 713 }
 714
 715 static inline float64 float64_pack_raw(const FloatParts64 *p)
 716 {
 717     return make_float64(pack_raw64(p, &float64_params));
 718 }
 719
 720 static float128 float128_pack_raw(const FloatParts128 *p)
 721 {
 722     const int f_size = float128_params.frac_size - 64;
 723     const int e_size = float128_params.exp_size;
 724     uint64_t hi;
 725
 726     hi = (uint64_t)p->sign << (f_size + e_size);
 727     hi = deposit64(hi, f_size, e_size, p->exp);
 728     hi = deposit64(hi, 0, f_size, p->frac_hi);
 729     return make_float128(hi, p->frac_lo);
 730 }
 731
 732 /*----------------------------------------------------------------------------
 733 | Functions and definitions to determine:  (1) whether tininess for underflow
 734 | is detected before or after rounding by default, (2) what (if anything)
 735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 737 | are propagated from function inputs to output.  These details are target-
 738 | specific.
 739 *----------------------------------------------------------------------------*/
 740 #include "softfloat-specialize.c.inc"
 741
 742 #define PARTS_GENERIC_64_128(NAME, P) \
 743     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 744
 745 #define PARTS_GENERIC_64_128_256(NAME, P) \
 746     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 747                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 748
 749 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 750 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 751
 752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 754
 755 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 756
 757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 758                                       float_status *s);
 759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 760                                         float_status *s);
 761
 762 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 763
 764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 765                                              FloatParts64 *c, float_status *s,
 766                                              int ab_mask, int abc_mask);
 767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 768                                                FloatParts128 *b,
 769                                                FloatParts128 *c,
 770                                                float_status *s,
 771                                                int ab_mask, int abc_mask);
 772
 773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 774     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 775
 776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 777                                  const FloatFmt *fmt);
 778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 779                                   const FloatFmt *fmt);
 780
 781 #define parts_canonicalize(A, S, F) \
 782     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 783
 784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 785                                    const FloatFmt *fmt);
 786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 787                                     const FloatFmt *fmt);
 788
 789 #define parts_uncanon_normal(A, S, F) \
 790     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 791
 792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 793                             const FloatFmt *fmt);
 794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 795                              const FloatFmt *fmt);
 796
 797 #define parts_uncanon(A, S, F) \
 798     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 799
 800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 803
 804 #define parts_add_normal(A, B) \
 805     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 806
 807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 810
 811 #define parts_sub_normal(A, B) \
 812     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 813
 814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 815                                     float_status *s, bool subtract);
 816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 817                                       float_status *s, bool subtract);
 818
 819 #define parts_addsub(A, B, S, Z) \
 820     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 821
 822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 823                                  float_status *s);
 824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 825                                    float_status *s);
 826
 827 #define parts_mul(A, B, S) \
 828     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 829
 830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 831                                     FloatParts64 *c, int flags,
 832                                     float_status *s);
 833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 834                                       FloatParts128 *c, int flags,
 835                                       float_status *s);
 836
 837 #define parts_muladd(A, B, C, Z, S) \
 838     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 839
 840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 841                                  float_status *s);
 842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 843                                    float_status *s);
 844
 845 #define parts_div(A, B, S) \
 846     PARTS_GENERIC_64_128(div, A)(A, B, S)
 847
 848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 850
 851 #define parts_sqrt(A, S, F) \
 852     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 853
 854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 855                                         int scale, int frac_size);
 856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 857                                          int scale, int frac_size);
 858
 859 #define parts_round_to_int_normal(A, R, C, F) \
 860     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 861
 862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 863                                  int scale, float_status *s,
 864                                  const FloatFmt *fmt);
 865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 866                                   int scale, float_status *s,
 867                                   const FloatFmt *fmt);
 868
 869 #define parts_round_to_int(A, R, C, S, F) \
 870     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 871
 872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 873                                      int scale, int64_t min, int64_t max,
 874                                      float_status *s);
 875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 876                                      int scale, int64_t min, int64_t max,
 877                                      float_status *s);
 878
 879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 880     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 881
 882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 883                                       int scale, uint64_t max,
 884                                       float_status *s);
 885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 886                                        int scale, uint64_t max,
 887                                        float_status *s);
 888
 889 #define parts_float_to_uint(P, R, Z, M, S) \
 890     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 891
 892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 893                                   int scale, float_status *s);
 894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 895                                    int scale, float_status *s);
 896
 897 #define parts_sint_to_float(P, I, Z, S) \
 898     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 899
 900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 901                                   int scale, float_status *s);
 902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 903                                    int scale, float_status *s);
 904
 905 #define parts_uint_to_float(P, I, Z, S) \
 906     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 907
 908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 909                                     float_status *s, int flags);
 910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 911                                       float_status *s, int flags);
 912
 913 #define parts_minmax(A, B, S, F) \
 914     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 915
 916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 917                            float_status *s, bool q);
 918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 919                             float_status *s, bool q);
 920
 921 #define parts_compare(A, B, S, Q) \
 922     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 923
 924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 926
 927 #define parts_scalbn(A, N, S) \
 928     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 929
 930 /*
 931  * Helper functions for softfloat-parts.c.inc, per-size operations.
 932  */
 933
 934 #define FRAC_GENERIC_64_128(NAME, P) \
 935     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 936
 937 #define FRAC_GENERIC_64_128_256(NAME, P) \
 938     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 939                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 940
 941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 942 {
 943     return uadd64_overflow(a->frac, b->frac, &r->frac);
 944 }
 945
 946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 947 {
 948     bool c = 0;
 949     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 950     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 951     return c;
 952 }
 953
 954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 955 {
 956     bool c = 0;
 957     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 958     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 959     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 960     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 961     return c;
 962 }
 963
 964 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 965
 966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 967 {
 968     return uadd64_overflow(a->frac, c, &r->frac);
 969 }
 970
 971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 972 {
 973     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 974     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 975 }
 976
 977 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 978
 979 static void frac64_allones(FloatParts64 *a)
 980 {
 981     a->frac = -1;
 982 }
 983
 984 static void frac128_allones(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = -1;
 987 }
 988
 989 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 990
 991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 994 }
 995
 996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 997 {
 998     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 999     if (ta == tb) {
1000         ta = a->frac_lo, tb = b->frac_lo;
1001         if (ta == tb) {
1002             return 0;
1003         }
1004     }
1005     return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012     a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017     a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024     uint64_t n1, n0, r, q;
1025     bool ret;
1026
1027     /*
1028      * We want a 2*N / N-bit division to produce exactly an N-bit
1029      * result, so that we do not lose any precision and so that we
1030      * do not have to renormalize afterward.  If A.frac < B.frac,
1031      * then division would produce an (N-1)-bit result; shift A left
1032      * by one to produce the an N-bit result, and return true to
1033      * decrement the exponent to match.
1034      *
1035      * The udiv_qrnnd algorithm that we're using requires normalization,
1036      * i.e. the msb of the denominator must be set, which is already true.
1037      */
1038     ret = a->frac < b->frac;
1039     if (ret) {
1040         n0 = a->frac;
1041         n1 = 0;
1042     } else {
1043         n0 = a->frac >> 1;
1044         n1 = a->frac << 63;
1045     }
1046     q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048     /* Set lsb if there is a remainder, to set inexact. */
1049     a->frac = q | (r != 0);
1050
1051     return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056     uint64_t q0, q1, a0, a1, b0, b1;
1057     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058     bool ret = false;
1059
1060     a0 = a->frac_hi, a1 = a->frac_lo;
1061     b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063     ret = lt128(a0, a1, b0, b1);
1064     if (!ret) {
1065         a1 = shr_double(a0, a1, 1);
1066         a0 = a0 >> 1;
1067     }
1068
1069     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070     q0 = estimateDiv128To64(a0, a1, b0);
1071
1072     /*
1073      * Estimate is high because B1 was not included (unless B1 == 0).
1074      * Reduce quotient and increase remainder until remainder is non-negative.
1075      * This loop will execute 0 to 2 times.
1076      */
1077     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079     while (r0 != 0) {
1080         q0--;
1081         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082     }
1083
1084     /* Repeat using the remainder, producing a second word of quotient. */
1085     q1 = estimateDiv128To64(r1, r2, b0);
1086     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088     while (r1 != 0) {
1089         q1--;
1090         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091     }
1092
1093     /* Any remainder indicates inexact; set sticky bit. */
1094     q1 |= (r2 | r3) != 0;
1095
1096     a->frac_hi = q0;
1097     a->frac_lo = q1;
1098     return ret;
1099 }
1100
1101 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105     return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110     return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130     a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153     if (a->frac) {
1154         int shift = clz64(a->frac);
1155         a->frac <<= shift;
1156         return shift;
1157     }
1158     return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163     if (a->frac_hi) {
1164         int shl = clz64(a->frac_hi);
1165         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166         a->frac_lo <<= shl;
1167         return shl;
1168     } else if (a->frac_lo) {
1169         int shl = clz64(a->frac_lo);
1170         a->frac_hi = a->frac_lo << shl;
1171         a->frac_lo = 0;
1172         return shl + 64;
1173     }
1174     return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181     int ret, shl;
1182
1183     if (likely(a0)) {
1184         shl = clz64(a0);
1185         if (shl == 0) {
1186             return 0;
1187         }
1188         ret = shl;
1189     } else {
1190         if (a1) {
1191             ret = 64;
1192             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193         } else if (a2) {
1194             ret = 128;
1195             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196         } else if (a3) {
1197             ret = 192;
1198             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199         } else {
1200             ret = 256;
1201             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202             goto done;
1203         }
1204         shl = clz64(a0);
1205         if (shl == 0) {
1206             goto done;
1207         }
1208         ret += shl;
1209     }
1210
1211     a0 = shl_double(a0, a1, shl);
1212     a1 = shl_double(a1, a2, shl);
1213     a2 = shl_double(a2, a3, shl);
1214     a3 <<= shl;
1215
1216  done:
1217     a->frac_hi = a0;
1218     a->frac_hm = a1;
1219     a->frac_lm = a2;
1220     a->frac_lo = a3;
1221     return ret;
1222 }
1223
1224 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228     a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235     if (c & 64) {
1236         a0 = a1, a1 = 0;
1237     }
1238
1239     c &= 63;
1240     if (c) {
1241         a0 = shl_double(a0, a1, c);
1242         a1 = a1 << c;
1243     }
1244
1245     a->frac_hi = a0;
1246     a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253     a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260     if (c & 64) {
1261         a1 = a0, a0 = 0;
1262     }
1263
1264     c &= 63;
1265     if (c) {
1266         a1 = shr_double(a0, a1, c);
1267         a0 = a0 >> c;
1268     }
1269
1270     a->frac_hi = a0;
1271     a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278     uint64_t a0 = a->frac;
1279
1280     if (likely(c != 0)) {
1281         if (likely(c < 64)) {
1282             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283         } else {
1284             a0 = a0 != 0;
1285         }
1286         a->frac = a0;
1287     }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293     uint64_t sticky = 0;
1294
1295     if (unlikely(c == 0)) {
1296         return;
1297     } else if (likely(c < 64)) {
1298         /* nothing */
1299     } else if (likely(c < 128)) {
1300         sticky = a1;
1301         a1 = a0;
1302         a0 = 0;
1303         c &= 63;
1304         if (c == 0) {
1305             goto done;
1306         }
1307     } else {
1308         sticky = a0 | a1;
1309         a0 = a1 = 0;
1310         goto done;
1311     }
1312
1313     sticky |= shr_double(a1, 0, c);
1314     a1 = shr_double(a0, a1, c);
1315     a0 = a0 >> c;
1316
1317  done:
1318     a->frac_lo = a1 | (sticky != 0);
1319     a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326     uint64_t sticky = 0;
1327
1328     if (unlikely(c == 0)) {
1329         return;
1330     } else if (likely(c < 64)) {
1331         /* nothing */
1332     } else if (likely(c < 256)) {
1333         if (unlikely(c & 128)) {
1334             sticky |= a2 | a3;
1335             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336         }
1337         if (unlikely(c & 64)) {
1338             sticky |= a3;
1339             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340         }
1341         c &= 63;
1342         if (c == 0) {
1343             goto done;
1344         }
1345     } else {
1346         sticky = a0 | a1 | a2 | a3;
1347         a0 = a1 = a2 = a3 = 0;
1348         goto done;
1349     }
1350
1351     sticky |= shr_double(a3, 0, c);
1352     a3 = shr_double(a2, a3, c);
1353     a2 = shr_double(a1, a2, c);
1354     a1 = shr_double(a0, a1, c);
1355     a0 = a0 >> c;
1356
1357  done:
1358     a->frac_lo = a3 | (sticky != 0);
1359     a->frac_lm = a2;
1360     a->frac_hm = a1;
1361     a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368     return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373     bool c = 0;
1374     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376     return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381     bool c = 0;
1382     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386     return c;
1387 }
1388
1389 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393     r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398     r->frac_hi = a->frac_hi;
1399     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406     r->frac_hi = a->frac;
1407     r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412     r->frac_hi = a->frac_hi;
1413     r->frac_hm = a->frac_lo;
1414     r->frac_lm = 0;
1415     r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1422  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423  * and thus MIT licenced.
1424  */
1425 static const uint16_t rsqrt_tab[128] = {
1426     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN    glue(FloatParts,N)
1446 #define FloatPartsW    glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef  N
1455 #undef  W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef  N
1463 #undef  W
1464 #define N            256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef  N
1469 #undef  W
1470 #undef  partsN
1471 #undef  FloatPartsN
1472 #undef  FloatPartsW
1473
1474 /*
1475  * Pack/unpack routines with a specific FloatFmt.
1476  */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479                                       float_status *s, const FloatFmt *params)
1480 {
1481     float16_unpack_raw(p, f);
1482     parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486                                      float_status *s)
1487 {
1488     float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492                                       float_status *s)
1493 {
1494     bfloat16_unpack_raw(p, f);
1495     parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499                                              float_status *s,
1500                                              const FloatFmt *params)
1501 {
1502     parts_uncanon(p, s, params);
1503     return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507                                             float_status *s)
1508 {
1509     return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513                                               float_status *s)
1514 {
1515     parts_uncanon(p, s, &bfloat16_params);
1516     return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520                                      float_status *s)
1521 {
1522     float32_unpack_raw(p, f);
1523     parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527                                             float_status *s)
1528 {
1529     parts_uncanon(p, s, &float32_params);
1530     return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534                                      float_status *s)
1535 {
1536     float64_unpack_raw(p, f);
1537     parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541                                             float_status *s)
1542 {
1543     parts_uncanon(p, s, &float64_params);
1544     return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548                                       float_status *s)
1549 {
1550     float128_unpack_raw(p, f);
1551     parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555                                               float_status *s)
1556 {
1557     parts_uncanon(p, s, &float128_params);
1558     return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563                                       float_status *s)
1564 {
1565     /* Ensure rounding precision is set before beginning. */
1566     switch (s->floatx80_rounding_precision) {
1567     case floatx80_precision_x:
1568     case floatx80_precision_d:
1569     case floatx80_precision_s:
1570         break;
1571     default:
1572         g_assert_not_reached();
1573     }
1574
1575     if (unlikely(floatx80_invalid_encoding(f))) {
1576         float_raise(float_flag_invalid, s);
1577         return false;
1578     }
1579
1580     floatx80_unpack_raw(p, f);
1581
1582     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584     } else {
1585         /* The explicit integer bit is ignored, after invalid checks. */
1586         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587         p->cls = (p->frac_hi == 0 ? float_class_inf
1588                   : parts_is_snan_frac(p->frac_hi, s)
1589                   ? float_class_snan : float_class_qnan);
1590     }
1591     return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595                                               float_status *s)
1596 {
1597     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598     uint64_t frac;
1599     int exp;
1600
1601     switch (p->cls) {
1602     case float_class_normal:
1603         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604             parts_uncanon_normal(p, s, fmt);
1605             frac = p->frac_hi;
1606             exp = p->exp;
1607         } else {
1608             FloatParts64 p64;
1609
1610             p64.sign = p->sign;
1611             p64.exp = p->exp;
1612             frac_truncjam(&p64, p);
1613             parts_uncanon_normal(&p64, s, fmt);
1614             frac = p64.frac;
1615             exp = p64.exp;
1616         }
1617         if (exp != fmt->exp_max) {
1618             break;
1619         }
1620         /* rounded to inf -- fall through to set frac correctly */
1621
1622     case float_class_inf:
1623         /* x86 and m68k differ in the setting of the integer bit. */
1624         frac = floatx80_infinity_low;
1625         exp = fmt->exp_max;
1626         break;
1627
1628     case float_class_zero:
1629         frac = 0;
1630         exp = 0;
1631         break;
1632
1633     case float_class_snan:
1634     case float_class_qnan:
1635         /* NaNs have the integer bit set. */
1636         frac = p->frac_hi | (1ull << 63);
1637         exp = fmt->exp_max;
1638         break;
1639
1640     default:
1641         g_assert_not_reached();
1642     }
1643
1644     return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648  * Addition and subtraction
1649  */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654     FloatParts64 pa, pb, *pr;
1655
1656     float16_unpack_canonical(&pa, a, status);
1657     float16_unpack_canonical(&pb, b, status);
1658     pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660     return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665     return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670     return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677
1678     float32_unpack_canonical(&pa, a, status);
1679     float32_unpack_canonical(&pb, b, status);
1680     pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682     return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687     return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692     return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698     FloatParts64 pa, pb, *pr;
1699
1700     float64_unpack_canonical(&pa, a, status);
1701     float64_unpack_canonical(&pb, b, status);
1702     pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704     return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709     return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714     return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719     return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724     return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729     return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734     return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741     }
1742     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749     } else {
1750         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751     }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757     return float32_gen2(a, b, s, hard, soft,
1758                         f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764     return float64_gen2(a, b, s, hard, soft,
1765                         f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795     FloatParts64 pa, pb, *pr;
1796
1797     bfloat16_unpack_canonical(&pa, a, status);
1798     bfloat16_unpack_canonical(&pb, b, status);
1799     pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801     return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806     return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811     return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817     FloatParts128 pa, pb, *pr;
1818
1819     float128_unpack_canonical(&pa, a, status);
1820     float128_unpack_canonical(&pb, b, status);
1821     pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823     return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828     return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833     return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839     FloatParts128 pa, pb, *pr;
1840
1841     if (!floatx80_unpack_canonical(&pa, a, status) ||
1842         !floatx80_unpack_canonical(&pb, b, status)) {
1843         return floatx80_default_nan(status);
1844     }
1845
1846     pr = parts_addsub(&pa, &pb, status, subtract);
1847     return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852     return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857     return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861  * Multiplication
1862  */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866     FloatParts64 pa, pb, *pr;
1867
1868     float16_unpack_canonical(&pa, a, status);
1869     float16_unpack_canonical(&pb, b, status);
1870     pr = parts_mul(&pa, &pb, status);
1871
1872     return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878     FloatParts64 pa, pb, *pr;
1879
1880     float32_unpack_canonical(&pa, a, status);
1881     float32_unpack_canonical(&pb, b, status);
1882     pr = parts_mul(&pa, &pb, status);
1883
1884     return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890     FloatParts64 pa, pb, *pr;
1891
1892     float64_unpack_canonical(&pa, a, status);
1893     float64_unpack_canonical(&pb, b, status);
1894     pr = parts_mul(&pa, &pb, status);
1895
1896     return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901     return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906     return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913                         f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920                         f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926     FloatParts64 pa, pb, *pr;
1927
1928     bfloat16_unpack_canonical(&pa, a, status);
1929     bfloat16_unpack_canonical(&pb, b, status);
1930     pr = parts_mul(&pa, &pb, status);
1931
1932     return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938     FloatParts128 pa, pb, *pr;
1939
1940     float128_unpack_canonical(&pa, a, status);
1941     float128_unpack_canonical(&pb, b, status);
1942     pr = parts_mul(&pa, &pb, status);
1943
1944     return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
1949 {
1950     FloatParts128 pa, pb, *pr;
1951
1952     if (!floatx80_unpack_canonical(&pa, a, status) ||
1953         !floatx80_unpack_canonical(&pb, b, status)) {
1954         return floatx80_default_nan(status);
1955     }
1956
1957     pr = parts_mul(&pa, &pb, status);
1958     return floatx80_round_pack_canonical(pr, status);
1959 }
1960
1961 /*
1962  * Fused multiply-add
1963  */
1964
1965 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1966                                     int flags, float_status *status)
1967 {
1968     FloatParts64 pa, pb, pc, *pr;
1969
1970     float16_unpack_canonical(&pa, a, status);
1971     float16_unpack_canonical(&pb, b, status);
1972     float16_unpack_canonical(&pc, c, status);
1973     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975     return float16_round_pack_canonical(pr, status);
1976 }
1977
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1980                 float_status *status)
1981 {
1982     FloatParts64 pa, pb, pc, *pr;
1983
1984     float32_unpack_canonical(&pa, a, status);
1985     float32_unpack_canonical(&pb, b, status);
1986     float32_unpack_canonical(&pc, c, status);
1987     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989     return float32_round_pack_canonical(pr, status);
1990 }
1991
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1994                 float_status *status)
1995 {
1996     FloatParts64 pa, pb, pc, *pr;
1997
1998     float64_unpack_canonical(&pa, a, status);
1999     float64_unpack_canonical(&pb, b, status);
2000     float64_unpack_canonical(&pc, c, status);
2001     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2002
2003     return float64_round_pack_canonical(pr, status);
2004 }
2005
2006 static bool force_soft_fma;
2007
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2010 {
2011     union_float32 ua, ub, uc, ur;
2012
2013     ua.s = xa;
2014     ub.s = xb;
2015     uc.s = xc;
2016
2017     if (unlikely(!can_use_fpu(s))) {
2018         goto soft;
2019     }
2020     if (unlikely(flags & float_muladd_halve_result)) {
2021         goto soft;
2022     }
2023
2024     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2025     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2026         goto soft;
2027     }
2028
2029     if (unlikely(force_soft_fma)) {
2030         goto soft;
2031     }
2032
2033     /*
2034      * When (a || b) == 0, there's no need to check for under/over flow,
2035      * since we know the addend is (normal || 0) and the product is 0.
2036      */
2037     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2038         union_float32 up;
2039         bool prod_sign;
2040
2041         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2042         prod_sign ^= !!(flags & float_muladd_negate_product);
2043         up.s = float32_set_sign(float32_zero, prod_sign);
2044
2045         if (flags & float_muladd_negate_c) {
2046             uc.h = -uc.h;
2047         }
2048         ur.h = up.h + uc.h;
2049     } else {
2050         union_float32 ua_orig = ua;
2051         union_float32 uc_orig = uc;
2052
2053         if (flags & float_muladd_negate_product) {
2054             ua.h = -ua.h;
2055         }
2056         if (flags & float_muladd_negate_c) {
2057             uc.h = -uc.h;
2058         }
2059
2060         ur.h = fmaf(ua.h, ub.h, uc.h);
2061
2062         if (unlikely(f32_is_inf(ur))) {
2063             float_raise(float_flag_overflow, s);
2064         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2065             ua = ua_orig;
2066             uc = uc_orig;
2067             goto soft;
2068         }
2069     }
2070     if (flags & float_muladd_negate_result) {
2071         return float32_chs(ur.s);
2072     }
2073     return ur.s;
2074
2075  soft:
2076     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2077 }
2078
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2081 {
2082     union_float64 ua, ub, uc, ur;
2083
2084     ua.s = xa;
2085     ub.s = xb;
2086     uc.s = xc;
2087
2088     if (unlikely(!can_use_fpu(s))) {
2089         goto soft;
2090     }
2091     if (unlikely(flags & float_muladd_halve_result)) {
2092         goto soft;
2093     }
2094
2095     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2096     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2097         goto soft;
2098     }
2099
2100     if (unlikely(force_soft_fma)) {
2101         goto soft;
2102     }
2103
2104     /*
2105      * When (a || b) == 0, there's no need to check for under/over flow,
2106      * since we know the addend is (normal || 0) and the product is 0.
2107      */
2108     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2109         union_float64 up;
2110         bool prod_sign;
2111
2112         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2113         prod_sign ^= !!(flags & float_muladd_negate_product);
2114         up.s = float64_set_sign(float64_zero, prod_sign);
2115
2116         if (flags & float_muladd_negate_c) {
2117             uc.h = -uc.h;
2118         }
2119         ur.h = up.h + uc.h;
2120     } else {
2121         union_float64 ua_orig = ua;
2122         union_float64 uc_orig = uc;
2123
2124         if (flags & float_muladd_negate_product) {
2125             ua.h = -ua.h;
2126         }
2127         if (flags & float_muladd_negate_c) {
2128             uc.h = -uc.h;
2129         }
2130
2131         ur.h = fma(ua.h, ub.h, uc.h);
2132
2133         if (unlikely(f64_is_inf(ur))) {
2134             float_raise(float_flag_overflow, s);
2135         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2136             ua = ua_orig;
2137             uc = uc_orig;
2138             goto soft;
2139         }
2140     }
2141     if (flags & float_muladd_negate_result) {
2142         return float64_chs(ur.s);
2143     }
2144     return ur.s;
2145
2146  soft:
2147     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2148 }
2149
2150 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2151                                       int flags, float_status *status)
2152 {
2153     FloatParts64 pa, pb, pc, *pr;
2154
2155     bfloat16_unpack_canonical(&pa, a, status);
2156     bfloat16_unpack_canonical(&pb, b, status);
2157     bfloat16_unpack_canonical(&pc, c, status);
2158     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2159
2160     return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2164                                       int flags, float_status *status)
2165 {
2166     FloatParts128 pa, pb, pc, *pr;
2167
2168     float128_unpack_canonical(&pa, a, status);
2169     float128_unpack_canonical(&pb, b, status);
2170     float128_unpack_canonical(&pc, c, status);
2171     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2172
2173     return float128_round_pack_canonical(pr, status);
2174 }
2175
2176 /*
2177  * Division
2178  */
2179
2180 float16 float16_div(float16 a, float16 b, float_status *status)
2181 {
2182     FloatParts64 pa, pb, *pr;
2183
2184     float16_unpack_canonical(&pa, a, status);
2185     float16_unpack_canonical(&pb, b, status);
2186     pr = parts_div(&pa, &pb, status);
2187
2188     return float16_round_pack_canonical(pr, status);
2189 }
2190
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a, float32 b, float_status *status)
2193 {
2194     FloatParts64 pa, pb, *pr;
2195
2196     float32_unpack_canonical(&pa, a, status);
2197     float32_unpack_canonical(&pb, b, status);
2198     pr = parts_div(&pa, &pb, status);
2199
2200     return float32_round_pack_canonical(pr, status);
2201 }
2202
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a, float64 b, float_status *status)
2205 {
2206     FloatParts64 pa, pb, *pr;
2207
2208     float64_unpack_canonical(&pa, a, status);
2209     float64_unpack_canonical(&pb, b, status);
2210     pr = parts_div(&pa, &pb, status);
2211
2212     return float64_round_pack_canonical(pr, status);
2213 }
2214
2215 static float hard_f32_div(float a, float b)
2216 {
2217     return a / b;
2218 }
2219
2220 static double hard_f64_div(double a, double b)
2221 {
2222     return a / b;
2223 }
2224
2225 static bool f32_div_pre(union_float32 a, union_float32 b)
2226 {
2227     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2228         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2229                fpclassify(b.h) == FP_NORMAL;
2230     }
2231     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2232 }
2233
2234 static bool f64_div_pre(union_float64 a, union_float64 b)
2235 {
2236     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2237         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2238                fpclassify(b.h) == FP_NORMAL;
2239     }
2240     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2241 }
2242
2243 static bool f32_div_post(union_float32 a, union_float32 b)
2244 {
2245     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2246         return fpclassify(a.h) != FP_ZERO;
2247     }
2248     return !float32_is_zero(a.s);
2249 }
2250
2251 static bool f64_div_post(union_float64 a, union_float64 b)
2252 {
2253     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2254         return fpclassify(a.h) != FP_ZERO;
2255     }
2256     return !float64_is_zero(a.s);
2257 }
2258
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a, float32 b, float_status *s)
2261 {
2262     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2263                         f32_div_pre, f32_div_post);
2264 }
2265
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a, float64 b, float_status *s)
2268 {
2269     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2270                         f64_div_pre, f64_div_post);
2271 }
2272
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2275 {
2276     FloatParts64 pa, pb, *pr;
2277
2278     bfloat16_unpack_canonical(&pa, a, status);
2279     bfloat16_unpack_canonical(&pb, b, status);
2280     pr = parts_div(&pa, &pb, status);
2281
2282     return bfloat16_round_pack_canonical(pr, status);
2283 }
2284
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a, float128 b, float_status *status)
2287 {
2288     FloatParts128 pa, pb, *pr;
2289
2290     float128_unpack_canonical(&pa, a, status);
2291     float128_unpack_canonical(&pb, b, status);
2292     pr = parts_div(&pa, &pb, status);
2293
2294     return float128_round_pack_canonical(pr, status);
2295 }
2296
2297 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2298 {
2299     FloatParts128 pa, pb, *pr;
2300
2301     if (!floatx80_unpack_canonical(&pa, a, status) ||
2302         !floatx80_unpack_canonical(&pb, b, status)) {
2303         return floatx80_default_nan(status);
2304     }
2305
2306     pr = parts_div(&pa, &pb, status);
2307     return floatx80_round_pack_canonical(pr, status);
2308 }
2309
2310 /*
2311  * Float to Float conversions
2312  *
2313  * Returns the result of converting one float format to another. The
2314  * conversion is performed according to the IEC/IEEE Standard for
2315  * Binary Floating-Point Arithmetic.
2316  *
2317  * Usually this only needs to take care of raising invalid exceptions
2318  * and handling the conversion on NaNs.
2319  */
2320
2321 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2322 {
2323     switch (a->cls) {
2324     case float_class_qnan:
2325     case float_class_snan:
2326         /*
2327          * There is no NaN in the destination format.  Raise Invalid
2328          * and return a zero with the sign of the input NaN.
2329          */
2330         float_raise(float_flag_invalid, s);
2331         a->cls = float_class_zero;
2332         break;
2333
2334     case float_class_inf:
2335         /*
2336          * There is no Inf in the destination format.  Raise Invalid
2337          * and return the maximum normal with the correct sign.
2338          */
2339         float_raise(float_flag_invalid, s);
2340         a->cls = float_class_normal;
2341         a->exp = float16_params_ahp.exp_max;
2342         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2343                                   float16_params_ahp.frac_size + 1);
2344         break;
2345
2346     case float_class_normal:
2347     case float_class_zero:
2348         break;
2349
2350     default:
2351         g_assert_not_reached();
2352     }
2353 }
2354
2355 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2356 {
2357     if (is_nan(a->cls)) {
2358         parts_return_nan(a, s);
2359     }
2360 }
2361
2362 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2363 {
2364     if (is_nan(a->cls)) {
2365         parts_return_nan(a, s);
2366     }
2367 }
2368
2369 #define parts_float_to_float(P, S) \
2370     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2371
2372 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2373                                         float_status *s)
2374 {
2375     a->cls = b->cls;
2376     a->sign = b->sign;
2377     a->exp = b->exp;
2378
2379     if (a->cls == float_class_normal) {
2380         frac_truncjam(a, b);
2381     } else if (is_nan(a->cls)) {
2382         /* Discard the low bits of the NaN. */
2383         a->frac = b->frac_hi;
2384         parts_return_nan(a, s);
2385     }
2386 }
2387
2388 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2389                                        float_status *s)
2390 {
2391     a->cls = b->cls;
2392     a->sign = b->sign;
2393     a->exp = b->exp;
2394     frac_widen(a, b);
2395
2396     if (is_nan(a->cls)) {
2397         parts_return_nan(a, s);
2398     }
2399 }
2400
2401 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2402 {
2403     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2404     FloatParts64 p;
2405
2406     float16a_unpack_canonical(&p, a, s, fmt16);
2407     parts_float_to_float(&p, s);
2408     return float32_round_pack_canonical(&p, s);
2409 }
2410
2411 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2412 {
2413     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2414     FloatParts64 p;
2415
2416     float16a_unpack_canonical(&p, a, s, fmt16);
2417     parts_float_to_float(&p, s);
2418     return float64_round_pack_canonical(&p, s);
2419 }
2420
2421 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2422 {
2423     FloatParts64 p;
2424     const FloatFmt *fmt;
2425
2426     float32_unpack_canonical(&p, a, s);
2427     if (ieee) {
2428         parts_float_to_float(&p, s);
2429         fmt = &float16_params;
2430     } else {
2431         parts_float_to_ahp(&p, s);
2432         fmt = &float16_params_ahp;
2433     }
2434     return float16a_round_pack_canonical(&p, s, fmt);
2435 }
2436
2437 static float64 QEMU_SOFTFLOAT_ATTR
2438 soft_float32_to_float64(float32 a, float_status *s)
2439 {
2440     FloatParts64 p;
2441
2442     float32_unpack_canonical(&p, a, s);
2443     parts_float_to_float(&p, s);
2444     return float64_round_pack_canonical(&p, s);
2445 }
2446
2447 float64 float32_to_float64(float32 a, float_status *s)
2448 {
2449     if (likely(float32_is_normal(a))) {
2450         /* Widening conversion can never produce inexact results.  */
2451         union_float32 uf;
2452         union_float64 ud;
2453         uf.s = a;
2454         ud.h = uf.h;
2455         return ud.s;
2456     } else if (float32_is_zero(a)) {
2457         return float64_set_sign(float64_zero, float32_is_neg(a));
2458     } else {
2459         return soft_float32_to_float64(a, s);
2460     }
2461 }
2462
2463 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2464 {
2465     FloatParts64 p;
2466     const FloatFmt *fmt;
2467
2468     float64_unpack_canonical(&p, a, s);
2469     if (ieee) {
2470         parts_float_to_float(&p, s);
2471         fmt = &float16_params;
2472     } else {
2473         parts_float_to_ahp(&p, s);
2474         fmt = &float16_params_ahp;
2475     }
2476     return float16a_round_pack_canonical(&p, s, fmt);
2477 }
2478
2479 float32 float64_to_float32(float64 a, float_status *s)
2480 {
2481     FloatParts64 p;
2482
2483     float64_unpack_canonical(&p, a, s);
2484     parts_float_to_float(&p, s);
2485     return float32_round_pack_canonical(&p, s);
2486 }
2487
2488 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2489 {
2490     FloatParts64 p;
2491
2492     bfloat16_unpack_canonical(&p, a, s);
2493     parts_float_to_float(&p, s);
2494     return float32_round_pack_canonical(&p, s);
2495 }
2496
2497 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2498 {
2499     FloatParts64 p;
2500
2501     bfloat16_unpack_canonical(&p, a, s);
2502     parts_float_to_float(&p, s);
2503     return float64_round_pack_canonical(&p, s);
2504 }
2505
2506 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2507 {
2508     FloatParts64 p;
2509
2510     float32_unpack_canonical(&p, a, s);
2511     parts_float_to_float(&p, s);
2512     return bfloat16_round_pack_canonical(&p, s);
2513 }
2514
2515 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2516 {
2517     FloatParts64 p;
2518
2519     float64_unpack_canonical(&p, a, s);
2520     parts_float_to_float(&p, s);
2521     return bfloat16_round_pack_canonical(&p, s);
2522 }
2523
2524 float32 float128_to_float32(float128 a, float_status *s)
2525 {
2526     FloatParts64 p64;
2527     FloatParts128 p128;
2528
2529     float128_unpack_canonical(&p128, a, s);
2530     parts_float_to_float_narrow(&p64, &p128, s);
2531     return float32_round_pack_canonical(&p64, s);
2532 }
2533
2534 float64 float128_to_float64(float128 a, float_status *s)
2535 {
2536     FloatParts64 p64;
2537     FloatParts128 p128;
2538
2539     float128_unpack_canonical(&p128, a, s);
2540     parts_float_to_float_narrow(&p64, &p128, s);
2541     return float64_round_pack_canonical(&p64, s);
2542 }
2543
2544 float128 float32_to_float128(float32 a, float_status *s)
2545 {
2546     FloatParts64 p64;
2547     FloatParts128 p128;
2548
2549     float32_unpack_canonical(&p64, a, s);
2550     parts_float_to_float_widen(&p128, &p64, s);
2551     return float128_round_pack_canonical(&p128, s);
2552 }
2553
2554 float128 float64_to_float128(float64 a, float_status *s)
2555 {
2556     FloatParts64 p64;
2557     FloatParts128 p128;
2558
2559     float64_unpack_canonical(&p64, a, s);
2560     parts_float_to_float_widen(&p128, &p64, s);
2561     return float128_round_pack_canonical(&p128, s);
2562 }
2563
2564 /*
2565  * Round to integral value
2566  */
2567
2568 float16 float16_round_to_int(float16 a, float_status *s)
2569 {
2570     FloatParts64 p;
2571
2572     float16_unpack_canonical(&p, a, s);
2573     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2574     return float16_round_pack_canonical(&p, s);
2575 }
2576
2577 float32 float32_round_to_int(float32 a, float_status *s)
2578 {
2579     FloatParts64 p;
2580
2581     float32_unpack_canonical(&p, a, s);
2582     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2583     return float32_round_pack_canonical(&p, s);
2584 }
2585
2586 float64 float64_round_to_int(float64 a, float_status *s)
2587 {
2588     FloatParts64 p;
2589
2590     float64_unpack_canonical(&p, a, s);
2591     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2592     return float64_round_pack_canonical(&p, s);
2593 }
2594
2595 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2596 {
2597     FloatParts64 p;
2598
2599     bfloat16_unpack_canonical(&p, a, s);
2600     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2601     return bfloat16_round_pack_canonical(&p, s);
2602 }
2603
2604 float128 float128_round_to_int(float128 a, float_status *s)
2605 {
2606     FloatParts128 p;
2607
2608     float128_unpack_canonical(&p, a, s);
2609     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2610     return float128_round_pack_canonical(&p, s);
2611 }
2612
2613 /*
2614  * Floating-point to signed integer conversions
2615  */
2616
2617 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2618                               float_status *s)
2619 {
2620     FloatParts64 p;
2621
2622     float16_unpack_canonical(&p, a, s);
2623     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2624 }
2625
2626 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2627                                 float_status *s)
2628 {
2629     FloatParts64 p;
2630
2631     float16_unpack_canonical(&p, a, s);
2632     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2633 }
2634
2635 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2636                                 float_status *s)
2637 {
2638     FloatParts64 p;
2639
2640     float16_unpack_canonical(&p, a, s);
2641     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2642 }
2643
2644 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2645                                 float_status *s)
2646 {
2647     FloatParts64 p;
2648
2649     float16_unpack_canonical(&p, a, s);
2650     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2651 }
2652
2653 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2654                                 float_status *s)
2655 {
2656     FloatParts64 p;
2657
2658     float32_unpack_canonical(&p, a, s);
2659     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2660 }
2661
2662 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2663                                 float_status *s)
2664 {
2665     FloatParts64 p;
2666
2667     float32_unpack_canonical(&p, a, s);
2668     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2669 }
2670
2671 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2672                                 float_status *s)
2673 {
2674     FloatParts64 p;
2675
2676     float32_unpack_canonical(&p, a, s);
2677     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2678 }
2679
2680 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2681                                 float_status *s)
2682 {
2683     FloatParts64 p;
2684
2685     float64_unpack_canonical(&p, a, s);
2686     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2687 }
2688
2689 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2690                                 float_status *s)
2691 {
2692     FloatParts64 p;
2693
2694     float64_unpack_canonical(&p, a, s);
2695     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2696 }
2697
2698 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2699                                 float_status *s)
2700 {
2701     FloatParts64 p;
2702
2703     float64_unpack_canonical(&p, a, s);
2704     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2705 }
2706
2707 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2708                                  float_status *s)
2709 {
2710     FloatParts64 p;
2711
2712     bfloat16_unpack_canonical(&p, a, s);
2713     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2714 }
2715
2716 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2717                                  float_status *s)
2718 {
2719     FloatParts64 p;
2720
2721     bfloat16_unpack_canonical(&p, a, s);
2722     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2723 }
2724
2725 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2726                                  float_status *s)
2727 {
2728     FloatParts64 p;
2729
2730     bfloat16_unpack_canonical(&p, a, s);
2731     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2732 }
2733
2734 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2735                                         int scale, float_status *s)
2736 {
2737     FloatParts128 p;
2738
2739     float128_unpack_canonical(&p, a, s);
2740     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2741 }
2742
2743 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2744                                         int scale, float_status *s)
2745 {
2746     FloatParts128 p;
2747
2748     float128_unpack_canonical(&p, a, s);
2749     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2750 }
2751
2752 int8_t float16_to_int8(float16 a, float_status *s)
2753 {
2754     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2755 }
2756
2757 int16_t float16_to_int16(float16 a, float_status *s)
2758 {
2759     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2760 }
2761
2762 int32_t float16_to_int32(float16 a, float_status *s)
2763 {
2764     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2765 }
2766
2767 int64_t float16_to_int64(float16 a, float_status *s)
2768 {
2769     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2770 }
2771
2772 int16_t float32_to_int16(float32 a, float_status *s)
2773 {
2774     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2775 }
2776
2777 int32_t float32_to_int32(float32 a, float_status *s)
2778 {
2779     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2780 }
2781
2782 int64_t float32_to_int64(float32 a, float_status *s)
2783 {
2784     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2785 }
2786
2787 int16_t float64_to_int16(float64 a, float_status *s)
2788 {
2789     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2790 }
2791
2792 int32_t float64_to_int32(float64 a, float_status *s)
2793 {
2794     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2795 }
2796
2797 int64_t float64_to_int64(float64 a, float_status *s)
2798 {
2799     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2800 }
2801
2802 int32_t float128_to_int32(float128 a, float_status *s)
2803 {
2804     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2805 }
2806
2807 int64_t float128_to_int64(float128 a, float_status *s)
2808 {
2809     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2810 }
2811
2812 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2813 {
2814     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2815 }
2816
2817 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2818 {
2819     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2820 }
2821
2822 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2823 {
2824     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2825 }
2826
2827 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2828 {
2829     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2830 }
2831
2832 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2833 {
2834     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2835 }
2836
2837 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2838 {
2839     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2840 }
2841
2842 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2843 {
2844     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2845 }
2846
2847 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2848 {
2849     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2850 }
2851
2852 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2853 {
2854     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2855 }
2856
2857 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2858 {
2859     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2860 }
2861
2862 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2863 {
2864     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2865 }
2866
2867 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2868 {
2869     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2870 }
2871
2872 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2873 {
2874     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2875 }
2876
2877 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2878 {
2879     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2880 }
2881
2882 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2883 {
2884     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2885 }
2886
2887 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2888 {
2889     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2890 }
2891
2892 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2893 {
2894     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2895 }
2896
2897 /*
2898  * Floating-point to unsigned integer conversions
2899  */
2900
2901 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2902                                 float_status *s)
2903 {
2904     FloatParts64 p;
2905
2906     float16_unpack_canonical(&p, a, s);
2907     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2908 }
2909
2910 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2911                                   float_status *s)
2912 {
2913     FloatParts64 p;
2914
2915     float16_unpack_canonical(&p, a, s);
2916     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2917 }
2918
2919 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920                                   float_status *s)
2921 {
2922     FloatParts64 p;
2923
2924     float16_unpack_canonical(&p, a, s);
2925     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2926 }
2927
2928 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929                                   float_status *s)
2930 {
2931     FloatParts64 p;
2932
2933     float16_unpack_canonical(&p, a, s);
2934     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2935 }
2936
2937 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2938                                   float_status *s)
2939 {
2940     FloatParts64 p;
2941
2942     float32_unpack_canonical(&p, a, s);
2943     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2944 }
2945
2946 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2947                                   float_status *s)
2948 {
2949     FloatParts64 p;
2950
2951     float32_unpack_canonical(&p, a, s);
2952     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2953 }
2954
2955 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956                                   float_status *s)
2957 {
2958     FloatParts64 p;
2959
2960     float32_unpack_canonical(&p, a, s);
2961     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2962 }
2963
2964 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2965                                   float_status *s)
2966 {
2967     FloatParts64 p;
2968
2969     float64_unpack_canonical(&p, a, s);
2970     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2971 }
2972
2973 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2974                                   float_status *s)
2975 {
2976     FloatParts64 p;
2977
2978     float64_unpack_canonical(&p, a, s);
2979     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2980 }
2981
2982 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983                                   float_status *s)
2984 {
2985     FloatParts64 p;
2986
2987     float64_unpack_canonical(&p, a, s);
2988     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2989 }
2990
2991 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2992                                    int scale, float_status *s)
2993 {
2994     FloatParts64 p;
2995
2996     bfloat16_unpack_canonical(&p, a, s);
2997     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2998 }
2999
3000 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3001                                    int scale, float_status *s)
3002 {
3003     FloatParts64 p;
3004
3005     bfloat16_unpack_canonical(&p, a, s);
3006     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3007 }
3008
3009 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3010                                    int scale, float_status *s)
3011 {
3012     FloatParts64 p;
3013
3014     bfloat16_unpack_canonical(&p, a, s);
3015     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3016 }
3017
3018 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3019                                           int scale, float_status *s)
3020 {
3021     FloatParts128 p;
3022
3023     float128_unpack_canonical(&p, a, s);
3024     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3025 }
3026
3027 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3028                                           int scale, float_status *s)
3029 {
3030     FloatParts128 p;
3031
3032     float128_unpack_canonical(&p, a, s);
3033     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3034 }
3035
3036 uint8_t float16_to_uint8(float16 a, float_status *s)
3037 {
3038     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3039 }
3040
3041 uint16_t float16_to_uint16(float16 a, float_status *s)
3042 {
3043     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3044 }
3045
3046 uint32_t float16_to_uint32(float16 a, float_status *s)
3047 {
3048     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3049 }
3050
3051 uint64_t float16_to_uint64(float16 a, float_status *s)
3052 {
3053     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3054 }
3055
3056 uint16_t float32_to_uint16(float32 a, float_status *s)
3057 {
3058     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3059 }
3060
3061 uint32_t float32_to_uint32(float32 a, float_status *s)
3062 {
3063     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3064 }
3065
3066 uint64_t float32_to_uint64(float32 a, float_status *s)
3067 {
3068     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3069 }
3070
3071 uint16_t float64_to_uint16(float64 a, float_status *s)
3072 {
3073     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3074 }
3075
3076 uint32_t float64_to_uint32(float64 a, float_status *s)
3077 {
3078     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3079 }
3080
3081 uint64_t float64_to_uint64(float64 a, float_status *s)
3082 {
3083     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3084 }
3085
3086 uint32_t float128_to_uint32(float128 a, float_status *s)
3087 {
3088     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3089 }
3090
3091 uint64_t float128_to_uint64(float128 a, float_status *s)
3092 {
3093     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3094 }
3095
3096 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3097 {
3098     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3099 }
3100
3101 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3102 {
3103     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3104 }
3105
3106 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3107 {
3108     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3109 }
3110
3111 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3112 {
3113     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3114 }
3115
3116 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3117 {
3118     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3119 }
3120
3121 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3122 {
3123     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3124 }
3125
3126 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3127 {
3128     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3129 }
3130
3131 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3132 {
3133     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3134 }
3135
3136 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3137 {
3138     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3139 }
3140
3141 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3142 {
3143     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3144 }
3145
3146 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3147 {
3148     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3149 }
3150
3151 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3152 {
3153     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3154 }
3155
3156 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3157 {
3158     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3159 }
3160
3161 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3162 {
3163     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3164 }
3165
3166 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3167 {
3168     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3169 }
3170
3171 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3172 {
3173     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3174 }
3175
3176 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3177 {
3178     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3179 }
3180
3181 /*
3182  * Signed integer to floating-point conversions
3183  */
3184
3185 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3186 {
3187     FloatParts64 p;
3188
3189     parts_sint_to_float(&p, a, scale, status);
3190     return float16_round_pack_canonical(&p, status);
3191 }
3192
3193 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3194 {
3195     return int64_to_float16_scalbn(a, scale, status);
3196 }
3197
3198 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3199 {
3200     return int64_to_float16_scalbn(a, scale, status);
3201 }
3202
3203 float16 int64_to_float16(int64_t a, float_status *status)
3204 {
3205     return int64_to_float16_scalbn(a, 0, status);
3206 }
3207
3208 float16 int32_to_float16(int32_t a, float_status *status)
3209 {
3210     return int64_to_float16_scalbn(a, 0, status);
3211 }
3212
3213 float16 int16_to_float16(int16_t a, float_status *status)
3214 {
3215     return int64_to_float16_scalbn(a, 0, status);
3216 }
3217
3218 float16 int8_to_float16(int8_t a, float_status *status)
3219 {
3220     return int64_to_float16_scalbn(a, 0, status);
3221 }
3222
3223 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3224 {
3225     FloatParts64 p;
3226
3227     parts64_sint_to_float(&p, a, scale, status);
3228     return float32_round_pack_canonical(&p, status);
3229 }
3230
3231 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3232 {
3233     return int64_to_float32_scalbn(a, scale, status);
3234 }
3235
3236 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3237 {
3238     return int64_to_float32_scalbn(a, scale, status);
3239 }
3240
3241 float32 int64_to_float32(int64_t a, float_status *status)
3242 {
3243     return int64_to_float32_scalbn(a, 0, status);
3244 }
3245
3246 float32 int32_to_float32(int32_t a, float_status *status)
3247 {
3248     return int64_to_float32_scalbn(a, 0, status);
3249 }
3250
3251 float32 int16_to_float32(int16_t a, float_status *status)
3252 {
3253     return int64_to_float32_scalbn(a, 0, status);
3254 }
3255
3256 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3257 {
3258     FloatParts64 p;
3259
3260     parts_sint_to_float(&p, a, scale, status);
3261     return float64_round_pack_canonical(&p, status);
3262 }
3263
3264 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3265 {
3266     return int64_to_float64_scalbn(a, scale, status);
3267 }
3268
3269 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3270 {
3271     return int64_to_float64_scalbn(a, scale, status);
3272 }
3273
3274 float64 int64_to_float64(int64_t a, float_status *status)
3275 {
3276     return int64_to_float64_scalbn(a, 0, status);
3277 }
3278
3279 float64 int32_to_float64(int32_t a, float_status *status)
3280 {
3281     return int64_to_float64_scalbn(a, 0, status);
3282 }
3283
3284 float64 int16_to_float64(int16_t a, float_status *status)
3285 {
3286     return int64_to_float64_scalbn(a, 0, status);
3287 }
3288
3289 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3290 {
3291     FloatParts64 p;
3292
3293     parts_sint_to_float(&p, a, scale, status);
3294     return bfloat16_round_pack_canonical(&p, status);
3295 }
3296
3297 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3298 {
3299     return int64_to_bfloat16_scalbn(a, scale, status);
3300 }
3301
3302 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3303 {
3304     return int64_to_bfloat16_scalbn(a, scale, status);
3305 }
3306
3307 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3308 {
3309     return int64_to_bfloat16_scalbn(a, 0, status);
3310 }
3311
3312 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3313 {
3314     return int64_to_bfloat16_scalbn(a, 0, status);
3315 }
3316
3317 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3318 {
3319     return int64_to_bfloat16_scalbn(a, 0, status);
3320 }
3321
3322 float128 int64_to_float128(int64_t a, float_status *status)
3323 {
3324     FloatParts128 p;
3325
3326     parts_sint_to_float(&p, a, 0, status);
3327     return float128_round_pack_canonical(&p, status);
3328 }
3329
3330 float128 int32_to_float128(int32_t a, float_status *status)
3331 {
3332     return int64_to_float128(a, status);
3333 }
3334
3335 /*
3336  * Unsigned Integer to floating-point conversions
3337  */
3338
3339 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3340 {
3341     FloatParts64 p;
3342
3343     parts_uint_to_float(&p, a, scale, status);
3344     return float16_round_pack_canonical(&p, status);
3345 }
3346
3347 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3348 {
3349     return uint64_to_float16_scalbn(a, scale, status);
3350 }
3351
3352 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3353 {
3354     return uint64_to_float16_scalbn(a, scale, status);
3355 }
3356
3357 float16 uint64_to_float16(uint64_t a, float_status *status)
3358 {
3359     return uint64_to_float16_scalbn(a, 0, status);
3360 }
3361
3362 float16 uint32_to_float16(uint32_t a, float_status *status)
3363 {
3364     return uint64_to_float16_scalbn(a, 0, status);
3365 }
3366
3367 float16 uint16_to_float16(uint16_t a, float_status *status)
3368 {
3369     return uint64_to_float16_scalbn(a, 0, status);
3370 }
3371
3372 float16 uint8_to_float16(uint8_t a, float_status *status)
3373 {
3374     return uint64_to_float16_scalbn(a, 0, status);
3375 }
3376
3377 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3378 {
3379     FloatParts64 p;
3380
3381     parts_uint_to_float(&p, a, scale, status);
3382     return float32_round_pack_canonical(&p, status);
3383 }
3384
3385 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3386 {
3387     return uint64_to_float32_scalbn(a, scale, status);
3388 }
3389
3390 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3391 {
3392     return uint64_to_float32_scalbn(a, scale, status);
3393 }
3394
3395 float32 uint64_to_float32(uint64_t a, float_status *status)
3396 {
3397     return uint64_to_float32_scalbn(a, 0, status);
3398 }
3399
3400 float32 uint32_to_float32(uint32_t a, float_status *status)
3401 {
3402     return uint64_to_float32_scalbn(a, 0, status);
3403 }
3404
3405 float32 uint16_to_float32(uint16_t a, float_status *status)
3406 {
3407     return uint64_to_float32_scalbn(a, 0, status);
3408 }
3409
3410 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3411 {
3412     FloatParts64 p;
3413
3414     parts_uint_to_float(&p, a, scale, status);
3415     return float64_round_pack_canonical(&p, status);
3416 }
3417
3418 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3419 {
3420     return uint64_to_float64_scalbn(a, scale, status);
3421 }
3422
3423 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3424 {
3425     return uint64_to_float64_scalbn(a, scale, status);
3426 }
3427
3428 float64 uint64_to_float64(uint64_t a, float_status *status)
3429 {
3430     return uint64_to_float64_scalbn(a, 0, status);
3431 }
3432
3433 float64 uint32_to_float64(uint32_t a, float_status *status)
3434 {
3435     return uint64_to_float64_scalbn(a, 0, status);
3436 }
3437
3438 float64 uint16_to_float64(uint16_t a, float_status *status)
3439 {
3440     return uint64_to_float64_scalbn(a, 0, status);
3441 }
3442
3443 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3444 {
3445     FloatParts64 p;
3446
3447     parts_uint_to_float(&p, a, scale, status);
3448     return bfloat16_round_pack_canonical(&p, status);
3449 }
3450
3451 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3452 {
3453     return uint64_to_bfloat16_scalbn(a, scale, status);
3454 }
3455
3456 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3457 {
3458     return uint64_to_bfloat16_scalbn(a, scale, status);
3459 }
3460
3461 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3462 {
3463     return uint64_to_bfloat16_scalbn(a, 0, status);
3464 }
3465
3466 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3467 {
3468     return uint64_to_bfloat16_scalbn(a, 0, status);
3469 }
3470
3471 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3472 {
3473     return uint64_to_bfloat16_scalbn(a, 0, status);
3474 }
3475
3476 float128 uint64_to_float128(uint64_t a, float_status *status)
3477 {
3478     FloatParts128 p;
3479
3480     parts_uint_to_float(&p, a, 0, status);
3481     return float128_round_pack_canonical(&p, status);
3482 }
3483
3484 /*
3485  * Minimum and maximum
3486  */
3487
3488 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3489 {
3490     FloatParts64 pa, pb, *pr;
3491
3492     float16_unpack_canonical(&pa, a, s);
3493     float16_unpack_canonical(&pb, b, s);
3494     pr = parts_minmax(&pa, &pb, s, flags);
3495
3496     return float16_round_pack_canonical(pr, s);
3497 }
3498
3499 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3500                                 float_status *s, int flags)
3501 {
3502     FloatParts64 pa, pb, *pr;
3503
3504     bfloat16_unpack_canonical(&pa, a, s);
3505     bfloat16_unpack_canonical(&pb, b, s);
3506     pr = parts_minmax(&pa, &pb, s, flags);
3507
3508     return bfloat16_round_pack_canonical(pr, s);
3509 }
3510
3511 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3512 {
3513     FloatParts64 pa, pb, *pr;
3514
3515     float32_unpack_canonical(&pa, a, s);
3516     float32_unpack_canonical(&pb, b, s);
3517     pr = parts_minmax(&pa, &pb, s, flags);
3518
3519     return float32_round_pack_canonical(pr, s);
3520 }
3521
3522 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3523 {
3524     FloatParts64 pa, pb, *pr;
3525
3526     float64_unpack_canonical(&pa, a, s);
3527     float64_unpack_canonical(&pb, b, s);
3528     pr = parts_minmax(&pa, &pb, s, flags);
3529
3530     return float64_round_pack_canonical(pr, s);
3531 }
3532
3533 static float128 float128_minmax(float128 a, float128 b,
3534                                 float_status *s, int flags)
3535 {
3536     FloatParts128 pa, pb, *pr;
3537
3538     float128_unpack_canonical(&pa, a, s);
3539     float128_unpack_canonical(&pb, b, s);
3540     pr = parts_minmax(&pa, &pb, s, flags);
3541
3542     return float128_round_pack_canonical(pr, s);
3543 }
3544
3545 #define MINMAX_1(type, name, flags) \
3546     type type##_##name(type a, type b, float_status *s) \
3547     { return type##_minmax(a, b, s, flags); }
3548
3549 #define MINMAX_2(type) \
3550     MINMAX_1(type, max, 0)                                      \
3551     MINMAX_1(type, maxnum, minmax_isnum)                        \
3552     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3553     MINMAX_1(type, min, minmax_ismin)                           \
3554     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3555     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3556
3557 MINMAX_2(float16)
3558 MINMAX_2(bfloat16)
3559 MINMAX_2(float32)
3560 MINMAX_2(float64)
3561 MINMAX_2(float128)
3562
3563 #undef MINMAX_1
3564 #undef MINMAX_2
3565
3566 /*
3567  * Floating point compare
3568  */
3569
3570 static FloatRelation QEMU_FLATTEN
3571 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3572 {
3573     FloatParts64 pa, pb;
3574
3575     float16_unpack_canonical(&pa, a, s);
3576     float16_unpack_canonical(&pb, b, s);
3577     return parts_compare(&pa, &pb, s, is_quiet);
3578 }
3579
3580 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3581 {
3582     return float16_do_compare(a, b, s, false);
3583 }
3584
3585 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3586 {
3587     return float16_do_compare(a, b, s, true);
3588 }
3589
3590 static FloatRelation QEMU_SOFTFLOAT_ATTR
3591 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3592 {
3593     FloatParts64 pa, pb;
3594
3595     float32_unpack_canonical(&pa, a, s);
3596     float32_unpack_canonical(&pb, b, s);
3597     return parts_compare(&pa, &pb, s, is_quiet);
3598 }
3599
3600 static FloatRelation QEMU_FLATTEN
3601 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3602 {
3603     union_float32 ua, ub;
3604
3605     ua.s = xa;
3606     ub.s = xb;
3607
3608     if (QEMU_NO_HARDFLOAT) {
3609         goto soft;
3610     }
3611
3612     float32_input_flush2(&ua.s, &ub.s, s);
3613     if (isgreaterequal(ua.h, ub.h)) {
3614         if (isgreater(ua.h, ub.h)) {
3615             return float_relation_greater;
3616         }
3617         return float_relation_equal;
3618     }
3619     if (likely(isless(ua.h, ub.h))) {
3620         return float_relation_less;
3621     }
3622     /*
3623      * The only condition remaining is unordered.
3624      * Fall through to set flags.
3625      */
3626  soft:
3627     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3628 }
3629
3630 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3631 {
3632     return float32_hs_compare(a, b, s, false);
3633 }
3634
3635 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3636 {
3637     return float32_hs_compare(a, b, s, true);
3638 }
3639
3640 static FloatRelation QEMU_SOFTFLOAT_ATTR
3641 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3642 {
3643     FloatParts64 pa, pb;
3644
3645     float64_unpack_canonical(&pa, a, s);
3646     float64_unpack_canonical(&pb, b, s);
3647     return parts_compare(&pa, &pb, s, is_quiet);
3648 }
3649
3650 static FloatRelation QEMU_FLATTEN
3651 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3652 {
3653     union_float64 ua, ub;
3654
3655     ua.s = xa;
3656     ub.s = xb;
3657
3658     if (QEMU_NO_HARDFLOAT) {
3659         goto soft;
3660     }
3661
3662     float64_input_flush2(&ua.s, &ub.s, s);
3663     if (isgreaterequal(ua.h, ub.h)) {
3664         if (isgreater(ua.h, ub.h)) {
3665             return float_relation_greater;
3666         }
3667         return float_relation_equal;
3668     }
3669     if (likely(isless(ua.h, ub.h))) {
3670         return float_relation_less;
3671     }
3672     /*
3673      * The only condition remaining is unordered.
3674      * Fall through to set flags.
3675      */
3676  soft:
3677     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3678 }
3679
3680 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3681 {
3682     return float64_hs_compare(a, b, s, false);
3683 }
3684
3685 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3686 {
3687     return float64_hs_compare(a, b, s, true);
3688 }
3689
3690 static FloatRelation QEMU_FLATTEN
3691 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3692 {
3693     FloatParts64 pa, pb;
3694
3695     bfloat16_unpack_canonical(&pa, a, s);
3696     bfloat16_unpack_canonical(&pb, b, s);
3697     return parts_compare(&pa, &pb, s, is_quiet);
3698 }
3699
3700 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3701 {
3702     return bfloat16_do_compare(a, b, s, false);
3703 }
3704
3705 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3706 {
3707     return bfloat16_do_compare(a, b, s, true);
3708 }
3709
3710 static FloatRelation QEMU_FLATTEN
3711 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3712 {
3713     FloatParts128 pa, pb;
3714
3715     float128_unpack_canonical(&pa, a, s);
3716     float128_unpack_canonical(&pb, b, s);
3717     return parts_compare(&pa, &pb, s, is_quiet);
3718 }
3719
3720 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3721 {
3722     return float128_do_compare(a, b, s, false);
3723 }
3724
3725 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3726 {
3727     return float128_do_compare(a, b, s, true);
3728 }
3729
3730 /*
3731  * Scale by 2**N
3732  */
3733
3734 float16 float16_scalbn(float16 a, int n, float_status *status)
3735 {
3736     FloatParts64 p;
3737
3738     float16_unpack_canonical(&p, a, status);
3739     parts_scalbn(&p, n, status);
3740     return float16_round_pack_canonical(&p, status);
3741 }
3742
3743 float32 float32_scalbn(float32 a, int n, float_status *status)
3744 {
3745     FloatParts64 p;
3746
3747     float32_unpack_canonical(&p, a, status);
3748     parts_scalbn(&p, n, status);
3749     return float32_round_pack_canonical(&p, status);
3750 }
3751
3752 float64 float64_scalbn(float64 a, int n, float_status *status)
3753 {
3754     FloatParts64 p;
3755
3756     float64_unpack_canonical(&p, a, status);
3757     parts_scalbn(&p, n, status);
3758     return float64_round_pack_canonical(&p, status);
3759 }
3760
3761 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3762 {
3763     FloatParts64 p;
3764
3765     bfloat16_unpack_canonical(&p, a, status);
3766     parts_scalbn(&p, n, status);
3767     return bfloat16_round_pack_canonical(&p, status);
3768 }
3769
3770 float128 float128_scalbn(float128 a, int n, float_status *status)
3771 {
3772     FloatParts128 p;
3773
3774     float128_unpack_canonical(&p, a, status);
3775     parts_scalbn(&p, n, status);
3776     return float128_round_pack_canonical(&p, status);
3777 }
3778
3779 /*
3780  * Square Root
3781  */
3782
3783 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3784 {
3785     FloatParts64 p;
3786
3787     float16_unpack_canonical(&p, a, status);
3788     parts_sqrt(&p, status, &float16_params);
3789     return float16_round_pack_canonical(&p, status);
3790 }
3791
3792 static float32 QEMU_SOFTFLOAT_ATTR
3793 soft_f32_sqrt(float32 a, float_status *status)
3794 {
3795     FloatParts64 p;
3796
3797     float32_unpack_canonical(&p, a, status);
3798     parts_sqrt(&p, status, &float32_params);
3799     return float32_round_pack_canonical(&p, status);
3800 }
3801
3802 static float64 QEMU_SOFTFLOAT_ATTR
3803 soft_f64_sqrt(float64 a, float_status *status)
3804 {
3805     FloatParts64 p;
3806
3807     float64_unpack_canonical(&p, a, status);
3808     parts_sqrt(&p, status, &float64_params);
3809     return float64_round_pack_canonical(&p, status);
3810 }
3811
3812 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3813 {
3814     union_float32 ua, ur;
3815
3816     ua.s = xa;
3817     if (unlikely(!can_use_fpu(s))) {
3818         goto soft;
3819     }
3820
3821     float32_input_flush1(&ua.s, s);
3822     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3823         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3824                        fpclassify(ua.h) == FP_ZERO) ||
3825                      signbit(ua.h))) {
3826             goto soft;
3827         }
3828     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3829                         float32_is_neg(ua.s))) {
3830         goto soft;
3831     }
3832     ur.h = sqrtf(ua.h);
3833     return ur.s;
3834
3835  soft:
3836     return soft_f32_sqrt(ua.s, s);
3837 }
3838
3839 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3840 {
3841     union_float64 ua, ur;
3842
3843     ua.s = xa;
3844     if (unlikely(!can_use_fpu(s))) {
3845         goto soft;
3846     }
3847
3848     float64_input_flush1(&ua.s, s);
3849     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3850         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3851                        fpclassify(ua.h) == FP_ZERO) ||
3852                      signbit(ua.h))) {
3853             goto soft;
3854         }
3855     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3856                         float64_is_neg(ua.s))) {
3857         goto soft;
3858     }
3859     ur.h = sqrt(ua.h);
3860     return ur.s;
3861
3862  soft:
3863     return soft_f64_sqrt(ua.s, s);
3864 }
3865
3866 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3867 {
3868     FloatParts64 p;
3869
3870     bfloat16_unpack_canonical(&p, a, status);
3871     parts_sqrt(&p, status, &bfloat16_params);
3872     return bfloat16_round_pack_canonical(&p, status);
3873 }
3874
3875 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3876 {
3877     FloatParts128 p;
3878
3879     float128_unpack_canonical(&p, a, status);
3880     parts_sqrt(&p, status, &float128_params);
3881     return float128_round_pack_canonical(&p, status);
3882 }
3883
3884 /*----------------------------------------------------------------------------
3885 | The pattern for a default generated NaN.
3886 *----------------------------------------------------------------------------*/
3887
3888 float16 float16_default_nan(float_status *status)
3889 {
3890     FloatParts64 p;
3891
3892     parts_default_nan(&p, status);
3893     p.frac >>= float16_params.frac_shift;
3894     return float16_pack_raw(&p);
3895 }
3896
3897 float32 float32_default_nan(float_status *status)
3898 {
3899     FloatParts64 p;
3900
3901     parts_default_nan(&p, status);
3902     p.frac >>= float32_params.frac_shift;
3903     return float32_pack_raw(&p);
3904 }
3905
3906 float64 float64_default_nan(float_status *status)
3907 {
3908     FloatParts64 p;
3909
3910     parts_default_nan(&p, status);
3911     p.frac >>= float64_params.frac_shift;
3912     return float64_pack_raw(&p);
3913 }
3914
3915 float128 float128_default_nan(float_status *status)
3916 {
3917     FloatParts128 p;
3918
3919     parts_default_nan(&p, status);
3920     frac_shr(&p, float128_params.frac_shift);
3921     return float128_pack_raw(&p);
3922 }
3923
3924 bfloat16 bfloat16_default_nan(float_status *status)
3925 {
3926     FloatParts64 p;
3927
3928     parts_default_nan(&p, status);
3929     p.frac >>= bfloat16_params.frac_shift;
3930     return bfloat16_pack_raw(&p);
3931 }
3932
3933 /*----------------------------------------------------------------------------
3934 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3935 *----------------------------------------------------------------------------*/
3936
3937 float16 float16_silence_nan(float16 a, float_status *status)
3938 {
3939     FloatParts64 p;
3940
3941     float16_unpack_raw(&p, a);
3942     p.frac <<= float16_params.frac_shift;
3943     parts_silence_nan(&p, status);
3944     p.frac >>= float16_params.frac_shift;
3945     return float16_pack_raw(&p);
3946 }
3947
3948 float32 float32_silence_nan(float32 a, float_status *status)
3949 {
3950     FloatParts64 p;
3951
3952     float32_unpack_raw(&p, a);
3953     p.frac <<= float32_params.frac_shift;
3954     parts_silence_nan(&p, status);
3955     p.frac >>= float32_params.frac_shift;
3956     return float32_pack_raw(&p);
3957 }
3958
3959 float64 float64_silence_nan(float64 a, float_status *status)
3960 {
3961     FloatParts64 p;
3962
3963     float64_unpack_raw(&p, a);
3964     p.frac <<= float64_params.frac_shift;
3965     parts_silence_nan(&p, status);
3966     p.frac >>= float64_params.frac_shift;
3967     return float64_pack_raw(&p);
3968 }
3969
3970 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3971 {
3972     FloatParts64 p;
3973
3974     bfloat16_unpack_raw(&p, a);
3975     p.frac <<= bfloat16_params.frac_shift;
3976     parts_silence_nan(&p, status);
3977     p.frac >>= bfloat16_params.frac_shift;
3978     return bfloat16_pack_raw(&p);
3979 }
3980
3981 float128 float128_silence_nan(float128 a, float_status *status)
3982 {
3983     FloatParts128 p;
3984
3985     float128_unpack_raw(&p, a);
3986     frac_shl(&p, float128_params.frac_shift);
3987     parts_silence_nan(&p, status);
3988     frac_shr(&p, float128_params.frac_shift);
3989     return float128_pack_raw(&p);
3990 }
3991
3992 /*----------------------------------------------------------------------------
3993 | If `a' is denormal and we are in flush-to-zero mode then set the
3994 | input-denormal exception and return zero. Otherwise just return the value.
3995 *----------------------------------------------------------------------------*/
3996
3997 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3998 {
3999     if (p.exp == 0 && p.frac != 0) {
4000         float_raise(float_flag_input_denormal, status);
4001         return true;
4002     }
4003
4004     return false;
4005 }
4006
4007 float16 float16_squash_input_denormal(float16 a, float_status *status)
4008 {
4009     if (status->flush_inputs_to_zero) {
4010         FloatParts64 p;
4011
4012         float16_unpack_raw(&p, a);
4013         if (parts_squash_denormal(p, status)) {
4014             return float16_set_sign(float16_zero, p.sign);
4015         }
4016     }
4017     return a;
4018 }
4019
4020 float32 float32_squash_input_denormal(float32 a, float_status *status)
4021 {
4022     if (status->flush_inputs_to_zero) {
4023         FloatParts64 p;
4024
4025         float32_unpack_raw(&p, a);
4026         if (parts_squash_denormal(p, status)) {
4027             return float32_set_sign(float32_zero, p.sign);
4028         }
4029     }
4030     return a;
4031 }
4032
4033 float64 float64_squash_input_denormal(float64 a, float_status *status)
4034 {
4035     if (status->flush_inputs_to_zero) {
4036         FloatParts64 p;
4037
4038         float64_unpack_raw(&p, a);
4039         if (parts_squash_denormal(p, status)) {
4040             return float64_set_sign(float64_zero, p.sign);
4041         }
4042     }
4043     return a;
4044 }
4045
4046 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4047 {
4048     if (status->flush_inputs_to_zero) {
4049         FloatParts64 p;
4050
4051         bfloat16_unpack_raw(&p, a);
4052         if (parts_squash_denormal(p, status)) {
4053             return bfloat16_set_sign(bfloat16_zero, p.sign);
4054         }
4055     }
4056     return a;
4057 }
4058
4059 /*----------------------------------------------------------------------------
4060 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4061 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4062 | input.  If `zSign' is 1, the input is negated before being converted to an
4063 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
4064 | is simply rounded to an integer, with the inexact exception raised if the
4065 | input cannot be represented exactly as an integer.  However, if the fixed-
4066 | point input is too large, the invalid exception is raised and the largest
4067 | positive or negative integer is returned.
4068 *----------------------------------------------------------------------------*/
4069
4070 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4071                                  float_status *status)
4072 {
4073     int8_t roundingMode;
4074     bool roundNearestEven;
4075     int8_t roundIncrement, roundBits;
4076     int32_t z;
4077
4078     roundingMode = status->float_rounding_mode;
4079     roundNearestEven = ( roundingMode == float_round_nearest_even );
4080     switch (roundingMode) {
4081     case float_round_nearest_even:
4082     case float_round_ties_away:
4083         roundIncrement = 0x40;
4084         break;
4085     case float_round_to_zero:
4086         roundIncrement = 0;
4087         break;
4088     case float_round_up:
4089         roundIncrement = zSign ? 0 : 0x7f;
4090         break;
4091     case float_round_down:
4092         roundIncrement = zSign ? 0x7f : 0;
4093         break;
4094     case float_round_to_odd:
4095         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4096         break;
4097     default:
4098         abort();
4099     }
4100     roundBits = absZ & 0x7F;
4101     absZ = ( absZ + roundIncrement )>>7;
4102     if (!(roundBits ^ 0x40) && roundNearestEven) {
4103         absZ &= ~1;
4104     }
4105     z = absZ;
4106     if ( zSign ) z = - z;
4107     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4108         float_raise(float_flag_invalid, status);
4109         return zSign ? INT32_MIN : INT32_MAX;
4110     }
4111     if (roundBits) {
4112         float_raise(float_flag_inexact, status);
4113     }
4114     return z;
4115
4116 }
4117
4118 /*----------------------------------------------------------------------------
4119 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4120 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4121 | and returns the properly rounded 64-bit integer corresponding to the input.
4122 | If `zSign' is 1, the input is negated before being converted to an integer.
4123 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4124 | the inexact exception raised if the input cannot be represented exactly as
4125 | an integer.  However, if the fixed-point input is too large, the invalid
4126 | exception is raised and the largest positive or negative integer is
4127 | returned.
4128 *----------------------------------------------------------------------------*/
4129
4130 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4131                                float_status *status)
4132 {
4133     int8_t roundingMode;
4134     bool roundNearestEven, increment;
4135     int64_t z;
4136
4137     roundingMode = status->float_rounding_mode;
4138     roundNearestEven = ( roundingMode == float_round_nearest_even );
4139     switch (roundingMode) {
4140     case float_round_nearest_even:
4141     case float_round_ties_away:
4142         increment = ((int64_t) absZ1 < 0);
4143         break;
4144     case float_round_to_zero:
4145         increment = 0;
4146         break;
4147     case float_round_up:
4148         increment = !zSign && absZ1;
4149         break;
4150     case float_round_down:
4151         increment = zSign && absZ1;
4152         break;
4153     case float_round_to_odd:
4154         increment = !(absZ0 & 1) && absZ1;
4155         break;
4156     default:
4157         abort();
4158     }
4159     if ( increment ) {
4160         ++absZ0;
4161         if ( absZ0 == 0 ) goto overflow;
4162         if (!(absZ1 << 1) && roundNearestEven) {
4163             absZ0 &= ~1;
4164         }
4165     }
4166     z = absZ0;
4167     if ( zSign ) z = - z;
4168     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4169  overflow:
4170         float_raise(float_flag_invalid, status);
4171         return zSign ? INT64_MIN : INT64_MAX;
4172     }
4173     if (absZ1) {
4174         float_raise(float_flag_inexact, status);
4175     }
4176     return z;
4177
4178 }
4179
4180 /*----------------------------------------------------------------------------
4181 | Normalizes the subnormal single-precision floating-point value represented
4182 | by the denormalized significand `aSig'.  The normalized exponent and
4183 | significand are stored at the locations pointed to by `zExpPtr' and
4184 | `zSigPtr', respectively.
4185 *----------------------------------------------------------------------------*/
4186
4187 static void
4188  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4189 {
4190     int8_t shiftCount;
4191
4192     shiftCount = clz32(aSig) - 8;
4193     *zSigPtr = aSig<<shiftCount;
4194     *zExpPtr = 1 - shiftCount;
4195
4196 }
4197
4198 /*----------------------------------------------------------------------------
4199 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4200 | and significand `zSig', and returns the proper single-precision floating-
4201 | point value corresponding to the abstract input.  Ordinarily, the abstract
4202 | value is simply rounded and packed into the single-precision format, with
4203 | the inexact exception raised if the abstract input cannot be represented
4204 | exactly.  However, if the abstract value is too large, the overflow and
4205 | inexact exceptions are raised and an infinity or maximal finite value is
4206 | returned.  If the abstract value is too small, the input value is rounded to
4207 | a subnormal number, and the underflow and inexact exceptions are raised if
4208 | the abstract input cannot be represented exactly as a subnormal single-
4209 | precision floating-point number.
4210 |     The input significand `zSig' has its binary point between bits 30
4211 | and 29, which is 7 bits to the left of the usual location.  This shifted
4212 | significand must be normalized or smaller.  If `zSig' is not normalized,
4213 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4214 | and it must not require rounding.  In the usual case that `zSig' is
4215 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4216 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4217 | Binary Floating-Point Arithmetic.
4218 *----------------------------------------------------------------------------*/
4219
4220 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4221                                    float_status *status)
4222 {
4223     int8_t roundingMode;
4224     bool roundNearestEven;
4225     int8_t roundIncrement, roundBits;
4226     bool isTiny;
4227
4228     roundingMode = status->float_rounding_mode;
4229     roundNearestEven = ( roundingMode == float_round_nearest_even );
4230     switch (roundingMode) {
4231     case float_round_nearest_even:
4232     case float_round_ties_away:
4233         roundIncrement = 0x40;
4234         break;
4235     case float_round_to_zero:
4236         roundIncrement = 0;
4237         break;
4238     case float_round_up:
4239         roundIncrement = zSign ? 0 : 0x7f;
4240         break;
4241     case float_round_down:
4242         roundIncrement = zSign ? 0x7f : 0;
4243         break;
4244     case float_round_to_odd:
4245         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4246         break;
4247     default:
4248         abort();
4249         break;
4250     }
4251     roundBits = zSig & 0x7F;
4252     if ( 0xFD <= (uint16_t) zExp ) {
4253         if (    ( 0xFD < zExp )
4254              || (    ( zExp == 0xFD )
4255                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4256            ) {
4257             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4258                                    roundIncrement != 0;
4259             float_raise(float_flag_overflow | float_flag_inexact, status);
4260             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4261         }
4262         if ( zExp < 0 ) {
4263             if (status->flush_to_zero) {
4264                 float_raise(float_flag_output_denormal, status);
4265                 return packFloat32(zSign, 0, 0);
4266             }
4267             isTiny = status->tininess_before_rounding
4268                   || (zExp < -1)
4269                   || (zSig + roundIncrement < 0x80000000);
4270             shift32RightJamming( zSig, - zExp, &zSig );
4271             zExp = 0;
4272             roundBits = zSig & 0x7F;
4273             if (isTiny && roundBits) {
4274                 float_raise(float_flag_underflow, status);
4275             }
4276             if (roundingMode == float_round_to_odd) {
4277                 /*
4278                  * For round-to-odd case, the roundIncrement depends on
4279                  * zSig which just changed.
4280                  */
4281                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4282             }
4283         }
4284     }
4285     if (roundBits) {
4286         float_raise(float_flag_inexact, status);
4287     }
4288     zSig = ( zSig + roundIncrement )>>7;
4289     if (!(roundBits ^ 0x40) && roundNearestEven) {
4290         zSig &= ~1;
4291     }
4292     if ( zSig == 0 ) zExp = 0;
4293     return packFloat32( zSign, zExp, zSig );
4294
4295 }
4296
4297 /*----------------------------------------------------------------------------
4298 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4299 | and significand `zSig', and returns the proper single-precision floating-
4300 | point value corresponding to the abstract input.  This routine is just like
4301 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4302 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4303 | floating-point exponent.
4304 *----------------------------------------------------------------------------*/
4305
4306 static float32
4307  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4308                               float_status *status)
4309 {
4310     int8_t shiftCount;
4311
4312     shiftCount = clz32(zSig) - 1;
4313     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4314                                status);
4315
4316 }
4317
4318 /*----------------------------------------------------------------------------
4319 | Normalizes the subnormal double-precision floating-point value represented
4320 | by the denormalized significand `aSig'.  The normalized exponent and
4321 | significand are stored at the locations pointed to by `zExpPtr' and
4322 | `zSigPtr', respectively.
4323 *----------------------------------------------------------------------------*/
4324
4325 static void
4326  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4327 {
4328     int8_t shiftCount;
4329
4330     shiftCount = clz64(aSig) - 11;
4331     *zSigPtr = aSig<<shiftCount;
4332     *zExpPtr = 1 - shiftCount;
4333
4334 }
4335
4336 /*----------------------------------------------------------------------------
4337 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4338 | double-precision floating-point value, returning the result.  After being
4339 | shifted into the proper positions, the three fields are simply added
4340 | together to form the result.  This means that any integer portion of `zSig'
4341 | will be added into the exponent.  Since a properly normalized significand
4342 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4343 | than the desired result exponent whenever `zSig' is a complete, normalized
4344 | significand.
4345 *----------------------------------------------------------------------------*/
4346
4347 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4348 {
4349
4350     return make_float64(
4351         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4352
4353 }
4354
4355 /*----------------------------------------------------------------------------
4356 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4357 | and significand `zSig', and returns the proper double-precision floating-
4358 | point value corresponding to the abstract input.  Ordinarily, the abstract
4359 | value is simply rounded and packed into the double-precision format, with
4360 | the inexact exception raised if the abstract input cannot be represented
4361 | exactly.  However, if the abstract value is too large, the overflow and
4362 | inexact exceptions are raised and an infinity or maximal finite value is
4363 | returned.  If the abstract value is too small, the input value is rounded to
4364 | a subnormal number, and the underflow and inexact exceptions are raised if
4365 | the abstract input cannot be represented exactly as a subnormal double-
4366 | precision floating-point number.
4367 |     The input significand `zSig' has its binary point between bits 62
4368 | and 61, which is 10 bits to the left of the usual location.  This shifted
4369 | significand must be normalized or smaller.  If `zSig' is not normalized,
4370 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4371 | and it must not require rounding.  In the usual case that `zSig' is
4372 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4373 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4374 | Binary Floating-Point Arithmetic.
4375 *----------------------------------------------------------------------------*/
4376
4377 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4378                                    float_status *status)
4379 {
4380     int8_t roundingMode;
4381     bool roundNearestEven;
4382     int roundIncrement, roundBits;
4383     bool isTiny;
4384
4385     roundingMode = status->float_rounding_mode;
4386     roundNearestEven = ( roundingMode == float_round_nearest_even );
4387     switch (roundingMode) {
4388     case float_round_nearest_even:
4389     case float_round_ties_away:
4390         roundIncrement = 0x200;
4391         break;
4392     case float_round_to_zero:
4393         roundIncrement = 0;
4394         break;
4395     case float_round_up:
4396         roundIncrement = zSign ? 0 : 0x3ff;
4397         break;
4398     case float_round_down:
4399         roundIncrement = zSign ? 0x3ff : 0;
4400         break;
4401     case float_round_to_odd:
4402         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4403         break;
4404     default:
4405         abort();
4406     }
4407     roundBits = zSig & 0x3FF;
4408     if ( 0x7FD <= (uint16_t) zExp ) {
4409         if (    ( 0x7FD < zExp )
4410              || (    ( zExp == 0x7FD )
4411                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4412            ) {
4413             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4414                                    roundIncrement != 0;
4415             float_raise(float_flag_overflow | float_flag_inexact, status);
4416             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4417         }
4418         if ( zExp < 0 ) {
4419             if (status->flush_to_zero) {
4420                 float_raise(float_flag_output_denormal, status);
4421                 return packFloat64(zSign, 0, 0);
4422             }
4423             isTiny = status->tininess_before_rounding
4424                   || (zExp < -1)
4425                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4426             shift64RightJamming( zSig, - zExp, &zSig );
4427             zExp = 0;
4428             roundBits = zSig & 0x3FF;
4429             if (isTiny && roundBits) {
4430                 float_raise(float_flag_underflow, status);
4431             }
4432             if (roundingMode == float_round_to_odd) {
4433                 /*
4434                  * For round-to-odd case, the roundIncrement depends on
4435                  * zSig which just changed.
4436                  */
4437                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4438             }
4439         }
4440     }
4441     if (roundBits) {
4442         float_raise(float_flag_inexact, status);
4443     }
4444     zSig = ( zSig + roundIncrement )>>10;
4445     if (!(roundBits ^ 0x200) && roundNearestEven) {
4446         zSig &= ~1;
4447     }
4448     if ( zSig == 0 ) zExp = 0;
4449     return packFloat64( zSign, zExp, zSig );
4450
4451 }
4452
4453 /*----------------------------------------------------------------------------
4454 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4455 | and significand `zSig', and returns the proper double-precision floating-
4456 | point value corresponding to the abstract input.  This routine is just like
4457 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4458 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4459 | floating-point exponent.
4460 *----------------------------------------------------------------------------*/
4461
4462 static float64
4463  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4464                               float_status *status)
4465 {
4466     int8_t shiftCount;
4467
4468     shiftCount = clz64(zSig) - 1;
4469     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4470                                status);
4471
4472 }
4473
4474 /*----------------------------------------------------------------------------
4475 | Normalizes the subnormal extended double-precision floating-point value
4476 | represented by the denormalized significand `aSig'.  The normalized exponent
4477 | and significand are stored at the locations pointed to by `zExpPtr' and
4478 | `zSigPtr', respectively.
4479 *----------------------------------------------------------------------------*/
4480
4481 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4482                                 uint64_t *zSigPtr)
4483 {
4484     int8_t shiftCount;
4485
4486     shiftCount = clz64(aSig);
4487     *zSigPtr = aSig<<shiftCount;
4488     *zExpPtr = 1 - shiftCount;
4489 }
4490
4491 /*----------------------------------------------------------------------------
4492 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4493 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4494 | and returns the proper extended double-precision floating-point value
4495 | corresponding to the abstract input.  Ordinarily, the abstract value is
4496 | rounded and packed into the extended double-precision format, with the
4497 | inexact exception raised if the abstract input cannot be represented
4498 | exactly.  However, if the abstract value is too large, the overflow and
4499 | inexact exceptions are raised and an infinity or maximal finite value is
4500 | returned.  If the abstract value is too small, the input value is rounded to
4501 | a subnormal number, and the underflow and inexact exceptions are raised if
4502 | the abstract input cannot be represented exactly as a subnormal extended
4503 | double-precision floating-point number.
4504 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4505 | the result is rounded to the same number of bits as single or double
4506 | precision, respectively.  Otherwise, the result is rounded to the full
4507 | precision of the extended double-precision format.
4508 |     The input significand must be normalized or smaller.  If the input
4509 | significand is not normalized, `zExp' must be 0; in that case, the result
4510 | returned is a subnormal number, and it must not require rounding.  The
4511 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4512 | Floating-Point Arithmetic.
4513 *----------------------------------------------------------------------------*/
4514
4515 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4516                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4517                               float_status *status)
4518 {
4519     FloatRoundMode roundingMode;
4520     bool roundNearestEven, increment, isTiny;
4521     int64_t roundIncrement, roundMask, roundBits;
4522
4523     roundingMode = status->float_rounding_mode;
4524     roundNearestEven = ( roundingMode == float_round_nearest_even );
4525     switch (roundingPrecision) {
4526     case floatx80_precision_x:
4527         goto precision80;
4528     case floatx80_precision_d:
4529         roundIncrement = UINT64_C(0x0000000000000400);
4530         roundMask = UINT64_C(0x00000000000007FF);
4531         break;
4532     case floatx80_precision_s:
4533         roundIncrement = UINT64_C(0x0000008000000000);
4534         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4535         break;
4536     default:
4537         g_assert_not_reached();
4538     }
4539     zSig0 |= ( zSig1 != 0 );
4540     switch (roundingMode) {
4541     case float_round_nearest_even:
4542     case float_round_ties_away:
4543         break;
4544     case float_round_to_zero:
4545         roundIncrement = 0;
4546         break;
4547     case float_round_up:
4548         roundIncrement = zSign ? 0 : roundMask;
4549         break;
4550     case float_round_down:
4551         roundIncrement = zSign ? roundMask : 0;
4552         break;
4553     default:
4554         abort();
4555     }
4556     roundBits = zSig0 & roundMask;
4557     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4558         if (    ( 0x7FFE < zExp )
4559              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4560            ) {
4561             goto overflow;
4562         }
4563         if ( zExp <= 0 ) {
4564             if (status->flush_to_zero) {
4565                 float_raise(float_flag_output_denormal, status);
4566                 return packFloatx80(zSign, 0, 0);
4567             }
4568             isTiny = status->tininess_before_rounding
4569                   || (zExp < 0 )
4570                   || (zSig0 <= zSig0 + roundIncrement);
4571             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4572             zExp = 0;
4573             roundBits = zSig0 & roundMask;
4574             if (isTiny && roundBits) {
4575                 float_raise(float_flag_underflow, status);
4576             }
4577             if (roundBits) {
4578                 float_raise(float_flag_inexact, status);
4579             }
4580             zSig0 += roundIncrement;
4581             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4582             roundIncrement = roundMask + 1;
4583             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4584                 roundMask |= roundIncrement;
4585             }
4586             zSig0 &= ~ roundMask;
4587             return packFloatx80( zSign, zExp, zSig0 );
4588         }
4589     }
4590     if (roundBits) {
4591         float_raise(float_flag_inexact, status);
4592     }
4593     zSig0 += roundIncrement;
4594     if ( zSig0 < roundIncrement ) {
4595         ++zExp;
4596         zSig0 = UINT64_C(0x8000000000000000);
4597     }
4598     roundIncrement = roundMask + 1;
4599     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4600         roundMask |= roundIncrement;
4601     }
4602     zSig0 &= ~ roundMask;
4603     if ( zSig0 == 0 ) zExp = 0;
4604     return packFloatx80( zSign, zExp, zSig0 );
4605  precision80:
4606     switch (roundingMode) {
4607     case float_round_nearest_even:
4608     case float_round_ties_away:
4609         increment = ((int64_t)zSig1 < 0);
4610         break;
4611     case float_round_to_zero:
4612         increment = 0;
4613         break;
4614     case float_round_up:
4615         increment = !zSign && zSig1;
4616         break;
4617     case float_round_down:
4618         increment = zSign && zSig1;
4619         break;
4620     default:
4621         abort();
4622     }
4623     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4624         if (    ( 0x7FFE < zExp )
4625              || (    ( zExp == 0x7FFE )
4626                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4627                   && increment
4628                 )
4629            ) {
4630             roundMask = 0;
4631  overflow:
4632             float_raise(float_flag_overflow | float_flag_inexact, status);
4633             if (    ( roundingMode == float_round_to_zero )
4634                  || ( zSign && ( roundingMode == float_round_up ) )
4635                  || ( ! zSign && ( roundingMode == float_round_down ) )
4636                ) {
4637                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4638             }
4639             return packFloatx80(zSign,
4640                                 floatx80_infinity_high,
4641                                 floatx80_infinity_low);
4642         }
4643         if ( zExp <= 0 ) {
4644             isTiny = status->tininess_before_rounding
4645                   || (zExp < 0)
4646                   || !increment
4647                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4648             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4649             zExp = 0;
4650             if (isTiny && zSig1) {
4651                 float_raise(float_flag_underflow, status);
4652             }
4653             if (zSig1) {
4654                 float_raise(float_flag_inexact, status);
4655             }
4656             switch (roundingMode) {
4657             case float_round_nearest_even:
4658             case float_round_ties_away:
4659                 increment = ((int64_t)zSig1 < 0);
4660                 break;
4661             case float_round_to_zero:
4662                 increment = 0;
4663                 break;
4664             case float_round_up:
4665                 increment = !zSign && zSig1;
4666                 break;
4667             case float_round_down:
4668                 increment = zSign && zSig1;
4669                 break;
4670             default:
4671                 abort();
4672             }
4673             if ( increment ) {
4674                 ++zSig0;
4675                 if (!(zSig1 << 1) && roundNearestEven) {
4676                     zSig0 &= ~1;
4677                 }
4678                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4679             }
4680             return packFloatx80( zSign, zExp, zSig0 );
4681         }
4682     }
4683     if (zSig1) {
4684         float_raise(float_flag_inexact, status);
4685     }
4686     if ( increment ) {
4687         ++zSig0;
4688         if ( zSig0 == 0 ) {
4689             ++zExp;
4690             zSig0 = UINT64_C(0x8000000000000000);
4691         }
4692         else {
4693             if (!(zSig1 << 1) && roundNearestEven) {
4694                 zSig0 &= ~1;
4695             }
4696         }
4697     }
4698     else {
4699         if ( zSig0 == 0 ) zExp = 0;
4700     }
4701     return packFloatx80( zSign, zExp, zSig0 );
4702
4703 }
4704
4705 /*----------------------------------------------------------------------------
4706 | Takes an abstract floating-point value having sign `zSign', exponent
4707 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4708 | and returns the proper extended double-precision floating-point value
4709 | corresponding to the abstract input.  This routine is just like
4710 | `roundAndPackFloatx80' except that the input significand does not have to be
4711 | normalized.
4712 *----------------------------------------------------------------------------*/
4713
4714 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4715                                        bool zSign, int32_t zExp,
4716                                        uint64_t zSig0, uint64_t zSig1,
4717                                        float_status *status)
4718 {
4719     int8_t shiftCount;
4720
4721     if ( zSig0 == 0 ) {
4722         zSig0 = zSig1;
4723         zSig1 = 0;
4724         zExp -= 64;
4725     }
4726     shiftCount = clz64(zSig0);
4727     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4728     zExp -= shiftCount;
4729     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4730                                 zSig0, zSig1, status);
4731
4732 }
4733
4734 /*----------------------------------------------------------------------------
4735 | Returns the least-significant 64 fraction bits of the quadruple-precision
4736 | floating-point value `a'.
4737 *----------------------------------------------------------------------------*/
4738
4739 static inline uint64_t extractFloat128Frac1( float128 a )
4740 {
4741
4742     return a.low;
4743
4744 }
4745
4746 /*----------------------------------------------------------------------------
4747 | Returns the most-significant 48 fraction bits of the quadruple-precision
4748 | floating-point value `a'.
4749 *----------------------------------------------------------------------------*/
4750
4751 static inline uint64_t extractFloat128Frac0( float128 a )
4752 {
4753
4754     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4755
4756 }
4757
4758 /*----------------------------------------------------------------------------
4759 | Returns the exponent bits of the quadruple-precision floating-point value
4760 | `a'.
4761 *----------------------------------------------------------------------------*/
4762
4763 static inline int32_t extractFloat128Exp( float128 a )
4764 {
4765
4766     return ( a.high>>48 ) & 0x7FFF;
4767
4768 }
4769
4770 /*----------------------------------------------------------------------------
4771 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4772 *----------------------------------------------------------------------------*/
4773
4774 static inline bool extractFloat128Sign(float128 a)
4775 {
4776     return a.high >> 63;
4777 }
4778
4779 /*----------------------------------------------------------------------------
4780 | Normalizes the subnormal quadruple-precision floating-point value
4781 | represented by the denormalized significand formed by the concatenation of
4782 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4783 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4784 | significand are stored at the location pointed to by `zSig0Ptr', and the
4785 | least significant 64 bits of the normalized significand are stored at the
4786 | location pointed to by `zSig1Ptr'.
4787 *----------------------------------------------------------------------------*/
4788
4789 static void
4790  normalizeFloat128Subnormal(
4791      uint64_t aSig0,
4792      uint64_t aSig1,
4793      int32_t *zExpPtr,
4794      uint64_t *zSig0Ptr,
4795      uint64_t *zSig1Ptr
4796  )
4797 {
4798     int8_t shiftCount;
4799
4800     if ( aSig0 == 0 ) {
4801         shiftCount = clz64(aSig1) - 15;
4802         if ( shiftCount < 0 ) {
4803             *zSig0Ptr = aSig1>>( - shiftCount );
4804             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4805         }
4806         else {
4807             *zSig0Ptr = aSig1<<shiftCount;
4808             *zSig1Ptr = 0;
4809         }
4810         *zExpPtr = - shiftCount - 63;
4811     }
4812     else {
4813         shiftCount = clz64(aSig0) - 15;
4814         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4815         *zExpPtr = 1 - shiftCount;
4816     }
4817
4818 }
4819
4820 /*----------------------------------------------------------------------------
4821 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4822 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4823 | floating-point value, returning the result.  After being shifted into the
4824 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4825 | added together to form the most significant 32 bits of the result.  This
4826 | means that any integer portion of `zSig0' will be added into the exponent.
4827 | Since a properly normalized significand will have an integer portion equal
4828 | to 1, the `zExp' input should be 1 less than the desired result exponent
4829 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4830 | significand.
4831 *----------------------------------------------------------------------------*/
4832
4833 static inline float128
4834 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4835 {
4836     float128 z;
4837
4838     z.low = zSig1;
4839     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4840     return z;
4841 }
4842
4843 /*----------------------------------------------------------------------------
4844 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4845 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4846 | and `zSig2', and returns the proper quadruple-precision floating-point value
4847 | corresponding to the abstract input.  Ordinarily, the abstract value is
4848 | simply rounded and packed into the quadruple-precision format, with the
4849 | inexact exception raised if the abstract input cannot be represented
4850 | exactly.  However, if the abstract value is too large, the overflow and
4851 | inexact exceptions are raised and an infinity or maximal finite value is
4852 | returned.  If the abstract value is too small, the input value is rounded to
4853 | a subnormal number, and the underflow and inexact exceptions are raised if
4854 | the abstract input cannot be represented exactly as a subnormal quadruple-
4855 | precision floating-point number.
4856 |     The input significand must be normalized or smaller.  If the input
4857 | significand is not normalized, `zExp' must be 0; in that case, the result
4858 | returned is a subnormal number, and it must not require rounding.  In the
4859 | usual case that the input significand is normalized, `zExp' must be 1 less
4860 | than the ``true'' floating-point exponent.  The handling of underflow and
4861 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4862 *----------------------------------------------------------------------------*/
4863
4864 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4865                                      uint64_t zSig0, uint64_t zSig1,
4866                                      uint64_t zSig2, float_status *status)
4867 {
4868     int8_t roundingMode;
4869     bool roundNearestEven, increment, isTiny;
4870
4871     roundingMode = status->float_rounding_mode;
4872     roundNearestEven = ( roundingMode == float_round_nearest_even );
4873     switch (roundingMode) {
4874     case float_round_nearest_even:
4875     case float_round_ties_away:
4876         increment = ((int64_t)zSig2 < 0);
4877         break;
4878     case float_round_to_zero:
4879         increment = 0;
4880         break;
4881     case float_round_up:
4882         increment = !zSign && zSig2;
4883         break;
4884     case float_round_down:
4885         increment = zSign && zSig2;
4886         break;
4887     case float_round_to_odd:
4888         increment = !(zSig1 & 0x1) && zSig2;
4889         break;
4890     default:
4891         abort();
4892     }
4893     if ( 0x7FFD <= (uint32_t) zExp ) {
4894         if (    ( 0x7FFD < zExp )
4895              || (    ( zExp == 0x7FFD )
4896                   && eq128(
4897                          UINT64_C(0x0001FFFFFFFFFFFF),
4898                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4899                          zSig0,
4900                          zSig1
4901                      )
4902                   && increment
4903                 )
4904            ) {
4905             float_raise(float_flag_overflow | float_flag_inexact, status);
4906             if (    ( roundingMode == float_round_to_zero )
4907                  || ( zSign && ( roundingMode == float_round_up ) )
4908                  || ( ! zSign && ( roundingMode == float_round_down ) )
4909                  || (roundingMode == float_round_to_odd)
4910                ) {
4911                 return
4912                     packFloat128(
4913                         zSign,
4914                         0x7FFE,
4915                         UINT64_C(0x0000FFFFFFFFFFFF),
4916                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4917                     );
4918             }
4919             return packFloat128( zSign, 0x7FFF, 0, 0 );
4920         }
4921         if ( zExp < 0 ) {
4922             if (status->flush_to_zero) {
4923                 float_raise(float_flag_output_denormal, status);
4924                 return packFloat128(zSign, 0, 0, 0);
4925             }
4926             isTiny = status->tininess_before_rounding
4927                   || (zExp < -1)
4928                   || !increment
4929                   || lt128(zSig0, zSig1,
4930                            UINT64_C(0x0001FFFFFFFFFFFF),
4931                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4932             shift128ExtraRightJamming(
4933                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4934             zExp = 0;
4935             if (isTiny && zSig2) {
4936                 float_raise(float_flag_underflow, status);
4937             }
4938             switch (roundingMode) {
4939             case float_round_nearest_even:
4940             case float_round_ties_away:
4941                 increment = ((int64_t)zSig2 < 0);
4942                 break;
4943             case float_round_to_zero:
4944                 increment = 0;
4945                 break;
4946             case float_round_up:
4947                 increment = !zSign && zSig2;
4948                 break;
4949             case float_round_down:
4950                 increment = zSign && zSig2;
4951                 break;
4952             case float_round_to_odd:
4953                 increment = !(zSig1 & 0x1) && zSig2;
4954                 break;
4955             default:
4956                 abort();
4957             }
4958         }
4959     }
4960     if (zSig2) {
4961         float_raise(float_flag_inexact, status);
4962     }
4963     if ( increment ) {
4964         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4965         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4966             zSig1 &= ~1;
4967         }
4968     }
4969     else {
4970         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4971     }
4972     return packFloat128( zSign, zExp, zSig0, zSig1 );
4973
4974 }
4975
4976 /*----------------------------------------------------------------------------
4977 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4978 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4979 | returns the proper quadruple-precision floating-point value corresponding
4980 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4981 | except that the input significand has fewer bits and does not have to be
4982 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4983 | point exponent.
4984 *----------------------------------------------------------------------------*/
4985
4986 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4987                                               uint64_t zSig0, uint64_t zSig1,
4988                                               float_status *status)
4989 {
4990     int8_t shiftCount;
4991     uint64_t zSig2;
4992
4993     if ( zSig0 == 0 ) {
4994         zSig0 = zSig1;
4995         zSig1 = 0;
4996         zExp -= 64;
4997     }
4998     shiftCount = clz64(zSig0) - 15;
4999     if ( 0 <= shiftCount ) {
5000         zSig2 = 0;
5001         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5002     }
5003     else {
5004         shift128ExtraRightJamming(
5005             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5006     }
5007     zExp -= shiftCount;
5008     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5009
5010 }
5011
5012
5013 /*----------------------------------------------------------------------------
5014 | Returns the result of converting the 32-bit two's complement integer `a'
5015 | to the extended double-precision floating-point format.  The conversion
5016 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5017 | Arithmetic.
5018 *----------------------------------------------------------------------------*/
5019
5020 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5021 {
5022     bool zSign;
5023     uint32_t absA;
5024     int8_t shiftCount;
5025     uint64_t zSig;
5026
5027     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5028     zSign = ( a < 0 );
5029     absA = zSign ? - a : a;
5030     shiftCount = clz32(absA) + 32;
5031     zSig = absA;
5032     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5033
5034 }
5035
5036 /*----------------------------------------------------------------------------
5037 | Returns the result of converting the 64-bit two's complement integer `a'
5038 | to the extended double-precision floating-point format.  The conversion
5039 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5040 | Arithmetic.
5041 *----------------------------------------------------------------------------*/
5042
5043 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5044 {
5045     bool zSign;
5046     uint64_t absA;
5047     int8_t shiftCount;
5048
5049     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5050     zSign = ( a < 0 );
5051     absA = zSign ? - a : a;
5052     shiftCount = clz64(absA);
5053     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5054
5055 }
5056
5057 /*----------------------------------------------------------------------------
5058 | Returns the result of converting the single-precision floating-point value
5059 | `a' to the extended double-precision floating-point format.  The conversion
5060 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5061 | Arithmetic.
5062 *----------------------------------------------------------------------------*/
5063
5064 floatx80 float32_to_floatx80(float32 a, float_status *status)
5065 {
5066     bool aSign;
5067     int aExp;
5068     uint32_t aSig;
5069
5070     a = float32_squash_input_denormal(a, status);
5071     aSig = extractFloat32Frac( a );
5072     aExp = extractFloat32Exp( a );
5073     aSign = extractFloat32Sign( a );
5074     if ( aExp == 0xFF ) {
5075         if (aSig) {
5076             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5077                                                status);
5078             return floatx80_silence_nan(res, status);
5079         }
5080         return packFloatx80(aSign,
5081                             floatx80_infinity_high,
5082                             floatx80_infinity_low);
5083     }
5084     if ( aExp == 0 ) {
5085         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5086         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5087     }
5088     aSig |= 0x00800000;
5089     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5090
5091 }
5092
5093 /*----------------------------------------------------------------------------
5094 | Returns the remainder of the single-precision floating-point value `a'
5095 | with respect to the corresponding value `b'.  The operation is performed
5096 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5097 *----------------------------------------------------------------------------*/
5098
5099 float32 float32_rem(float32 a, float32 b, float_status *status)
5100 {
5101     bool aSign, zSign;
5102     int aExp, bExp, expDiff;
5103     uint32_t aSig, bSig;
5104     uint32_t q;
5105     uint64_t aSig64, bSig64, q64;
5106     uint32_t alternateASig;
5107     int32_t sigMean;
5108     a = float32_squash_input_denormal(a, status);
5109     b = float32_squash_input_denormal(b, status);
5110
5111     aSig = extractFloat32Frac( a );
5112     aExp = extractFloat32Exp( a );
5113     aSign = extractFloat32Sign( a );
5114     bSig = extractFloat32Frac( b );
5115     bExp = extractFloat32Exp( b );
5116     if ( aExp == 0xFF ) {
5117         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5118             return propagateFloat32NaN(a, b, status);
5119         }
5120         float_raise(float_flag_invalid, status);
5121         return float32_default_nan(status);
5122     }
5123     if ( bExp == 0xFF ) {
5124         if (bSig) {
5125             return propagateFloat32NaN(a, b, status);
5126         }
5127         return a;
5128     }
5129     if ( bExp == 0 ) {
5130         if ( bSig == 0 ) {
5131             float_raise(float_flag_invalid, status);
5132             return float32_default_nan(status);
5133         }
5134         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5135     }
5136     if ( aExp == 0 ) {
5137         if ( aSig == 0 ) return a;
5138         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5139     }
5140     expDiff = aExp - bExp;
5141     aSig |= 0x00800000;
5142     bSig |= 0x00800000;
5143     if ( expDiff < 32 ) {
5144         aSig <<= 8;
5145         bSig <<= 8;
5146         if ( expDiff < 0 ) {
5147             if ( expDiff < -1 ) return a;
5148             aSig >>= 1;
5149         }
5150         q = ( bSig <= aSig );
5151         if ( q ) aSig -= bSig;
5152         if ( 0 < expDiff ) {
5153             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5154             q >>= 32 - expDiff;
5155             bSig >>= 2;
5156             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5157         }
5158         else {
5159             aSig >>= 2;
5160             bSig >>= 2;
5161         }
5162     }
5163     else {
5164         if ( bSig <= aSig ) aSig -= bSig;
5165         aSig64 = ( (uint64_t) aSig )<<40;
5166         bSig64 = ( (uint64_t) bSig )<<40;
5167         expDiff -= 64;
5168         while ( 0 < expDiff ) {
5169             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5170             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5171             aSig64 = - ( ( bSig * q64 )<<38 );
5172             expDiff -= 62;
5173         }
5174         expDiff += 64;
5175         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5176         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5177         q = q64>>( 64 - expDiff );
5178         bSig <<= 6;
5179         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5180     }
5181     do {
5182         alternateASig = aSig;
5183         ++q;
5184         aSig -= bSig;
5185     } while ( 0 <= (int32_t) aSig );
5186     sigMean = aSig + alternateASig;
5187     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5188         aSig = alternateASig;
5189     }
5190     zSign = ( (int32_t) aSig < 0 );
5191     if ( zSign ) aSig = - aSig;
5192     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5193 }
5194
5195
5196
5197 /*----------------------------------------------------------------------------
5198 | Returns the binary exponential of the single-precision floating-point value
5199 | `a'. The operation is performed according to the IEC/IEEE Standard for
5200 | Binary Floating-Point Arithmetic.
5201 |
5202 | Uses the following identities:
5203 |
5204 | 1. -------------------------------------------------------------------------
5205 |      x    x*ln(2)
5206 |     2  = e
5207 |
5208 | 2. -------------------------------------------------------------------------
5209 |                      2     3     4     5           n
5210 |      x        x     x     x     x     x           x
5211 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5212 |               1!    2!    3!    4!    5!          n!
5213 *----------------------------------------------------------------------------*/
5214
5215 static const float64 float32_exp2_coefficients[15] =
5216 {
5217     const_float64( 0x3ff0000000000000ll ), /*  1 */
5218     const_float64( 0x3fe0000000000000ll ), /*  2 */
5219     const_float64( 0x3fc5555555555555ll ), /*  3 */
5220     const_float64( 0x3fa5555555555555ll ), /*  4 */
5221     const_float64( 0x3f81111111111111ll ), /*  5 */
5222     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5223     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5224     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5225     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5226     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5227     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5228     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5229     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5230     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5231     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5232 };
5233
5234 float32 float32_exp2(float32 a, float_status *status)
5235 {
5236     bool aSign;
5237     int aExp;
5238     uint32_t aSig;
5239     float64 r, x, xn;
5240     int i;
5241     a = float32_squash_input_denormal(a, status);
5242
5243     aSig = extractFloat32Frac( a );
5244     aExp = extractFloat32Exp( a );
5245     aSign = extractFloat32Sign( a );
5246
5247     if ( aExp == 0xFF) {
5248         if (aSig) {
5249             return propagateFloat32NaN(a, float32_zero, status);
5250         }
5251         return (aSign) ? float32_zero : a;
5252     }
5253     if (aExp == 0) {
5254         if (aSig == 0) return float32_one;
5255     }
5256
5257     float_raise(float_flag_inexact, status);
5258
5259     /* ******************************* */
5260     /* using float64 for approximation */
5261     /* ******************************* */
5262     x = float32_to_float64(a, status);
5263     x = float64_mul(x, float64_ln2, status);
5264
5265     xn = x;
5266     r = float64_one;
5267     for (i = 0 ; i < 15 ; i++) {
5268         float64 f;
5269
5270         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5271         r = float64_add(r, f, status);
5272
5273         xn = float64_mul(xn, x, status);
5274     }
5275
5276     return float64_to_float32(r, status);
5277 }
5278
5279 /*----------------------------------------------------------------------------
5280 | Returns the binary log of the single-precision floating-point value `a'.
5281 | The operation is performed according to the IEC/IEEE Standard for Binary
5282 | Floating-Point Arithmetic.
5283 *----------------------------------------------------------------------------*/
5284 float32 float32_log2(float32 a, float_status *status)
5285 {
5286     bool aSign, zSign;
5287     int aExp;
5288     uint32_t aSig, zSig, i;
5289
5290     a = float32_squash_input_denormal(a, status);
5291     aSig = extractFloat32Frac( a );
5292     aExp = extractFloat32Exp( a );
5293     aSign = extractFloat32Sign( a );
5294
5295     if ( aExp == 0 ) {
5296         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5297         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5298     }
5299     if ( aSign ) {
5300         float_raise(float_flag_invalid, status);
5301         return float32_default_nan(status);
5302     }
5303     if ( aExp == 0xFF ) {
5304         if (aSig) {
5305             return propagateFloat32NaN(a, float32_zero, status);
5306         }
5307         return a;
5308     }
5309
5310     aExp -= 0x7F;
5311     aSig |= 0x00800000;
5312     zSign = aExp < 0;
5313     zSig = aExp << 23;
5314
5315     for (i = 1 << 22; i > 0; i >>= 1) {
5316         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5317         if ( aSig & 0x01000000 ) {
5318             aSig >>= 1;
5319             zSig |= i;
5320         }
5321     }
5322
5323     if ( zSign )
5324         zSig = -zSig;
5325
5326     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5327 }
5328
5329 /*----------------------------------------------------------------------------
5330 | Returns the result of converting the double-precision floating-point value
5331 | `a' to the extended double-precision floating-point format.  The conversion
5332 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5333 | Arithmetic.
5334 *----------------------------------------------------------------------------*/
5335
5336 floatx80 float64_to_floatx80(float64 a, float_status *status)
5337 {
5338     bool aSign;
5339     int aExp;
5340     uint64_t aSig;
5341
5342     a = float64_squash_input_denormal(a, status);
5343     aSig = extractFloat64Frac( a );
5344     aExp = extractFloat64Exp( a );
5345     aSign = extractFloat64Sign( a );
5346     if ( aExp == 0x7FF ) {
5347         if (aSig) {
5348             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5349                                                status);
5350             return floatx80_silence_nan(res, status);
5351         }
5352         return packFloatx80(aSign,
5353                             floatx80_infinity_high,
5354                             floatx80_infinity_low);
5355     }
5356     if ( aExp == 0 ) {
5357         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5358         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5359     }
5360     return
5361         packFloatx80(
5362             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5363
5364 }
5365
5366 /*----------------------------------------------------------------------------
5367 | Returns the remainder of the double-precision floating-point value `a'
5368 | with respect to the corresponding value `b'.  The operation is performed
5369 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5370 *----------------------------------------------------------------------------*/
5371
5372 float64 float64_rem(float64 a, float64 b, float_status *status)
5373 {
5374     bool aSign, zSign;
5375     int aExp, bExp, expDiff;
5376     uint64_t aSig, bSig;
5377     uint64_t q, alternateASig;
5378     int64_t sigMean;
5379
5380     a = float64_squash_input_denormal(a, status);
5381     b = float64_squash_input_denormal(b, status);
5382     aSig = extractFloat64Frac( a );
5383     aExp = extractFloat64Exp( a );
5384     aSign = extractFloat64Sign( a );
5385     bSig = extractFloat64Frac( b );
5386     bExp = extractFloat64Exp( b );
5387     if ( aExp == 0x7FF ) {
5388         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5389             return propagateFloat64NaN(a, b, status);
5390         }
5391         float_raise(float_flag_invalid, status);
5392         return float64_default_nan(status);
5393     }
5394     if ( bExp == 0x7FF ) {
5395         if (bSig) {
5396             return propagateFloat64NaN(a, b, status);
5397         }
5398         return a;
5399     }
5400     if ( bExp == 0 ) {
5401         if ( bSig == 0 ) {
5402             float_raise(float_flag_invalid, status);
5403             return float64_default_nan(status);
5404         }
5405         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5406     }
5407     if ( aExp == 0 ) {
5408         if ( aSig == 0 ) return a;
5409         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5410     }
5411     expDiff = aExp - bExp;
5412     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5413     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5414     if ( expDiff < 0 ) {
5415         if ( expDiff < -1 ) return a;
5416         aSig >>= 1;
5417     }
5418     q = ( bSig <= aSig );
5419     if ( q ) aSig -= bSig;
5420     expDiff -= 64;
5421     while ( 0 < expDiff ) {
5422         q = estimateDiv128To64( aSig, 0, bSig );
5423         q = ( 2 < q ) ? q - 2 : 0;
5424         aSig = - ( ( bSig>>2 ) * q );
5425         expDiff -= 62;
5426     }
5427     expDiff += 64;
5428     if ( 0 < expDiff ) {
5429         q = estimateDiv128To64( aSig, 0, bSig );
5430         q = ( 2 < q ) ? q - 2 : 0;
5431         q >>= 64 - expDiff;
5432         bSig >>= 2;
5433         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5434     }
5435     else {
5436         aSig >>= 2;
5437         bSig >>= 2;
5438     }
5439     do {
5440         alternateASig = aSig;
5441         ++q;
5442         aSig -= bSig;
5443     } while ( 0 <= (int64_t) aSig );
5444     sigMean = aSig + alternateASig;
5445     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5446         aSig = alternateASig;
5447     }
5448     zSign = ( (int64_t) aSig < 0 );
5449     if ( zSign ) aSig = - aSig;
5450     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5451
5452 }
5453
5454 /*----------------------------------------------------------------------------
5455 | Returns the binary log of the double-precision floating-point value `a'.
5456 | The operation is performed according to the IEC/IEEE Standard for Binary
5457 | Floating-Point Arithmetic.
5458 *----------------------------------------------------------------------------*/
5459 float64 float64_log2(float64 a, float_status *status)
5460 {
5461     bool aSign, zSign;
5462     int aExp;
5463     uint64_t aSig, aSig0, aSig1, zSig, i;
5464     a = float64_squash_input_denormal(a, status);
5465
5466     aSig = extractFloat64Frac( a );
5467     aExp = extractFloat64Exp( a );
5468     aSign = extractFloat64Sign( a );
5469
5470     if ( aExp == 0 ) {
5471         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5472         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5473     }
5474     if ( aSign ) {
5475         float_raise(float_flag_invalid, status);
5476         return float64_default_nan(status);
5477     }
5478     if ( aExp == 0x7FF ) {
5479         if (aSig) {
5480             return propagateFloat64NaN(a, float64_zero, status);
5481         }
5482         return a;
5483     }
5484
5485     aExp -= 0x3FF;
5486     aSig |= UINT64_C(0x0010000000000000);
5487     zSign = aExp < 0;
5488     zSig = (uint64_t)aExp << 52;
5489     for (i = 1LL << 51; i > 0; i >>= 1) {
5490         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5491         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5492         if ( aSig & UINT64_C(0x0020000000000000) ) {
5493             aSig >>= 1;
5494             zSig |= i;
5495         }
5496     }
5497
5498     if ( zSign )
5499         zSig = -zSig;
5500     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5501 }
5502
5503 /*----------------------------------------------------------------------------
5504 | Returns the result of converting the extended double-precision floating-
5505 | point value `a' to the 32-bit two's complement integer format.  The
5506 | conversion is performed according to the IEC/IEEE Standard for Binary
5507 | Floating-Point Arithmetic---which means in particular that the conversion
5508 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5509 | largest positive integer is returned.  Otherwise, if the conversion
5510 | overflows, the largest integer with the same sign as `a' is returned.
5511 *----------------------------------------------------------------------------*/
5512
5513 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5514 {
5515     bool aSign;
5516     int32_t aExp, shiftCount;
5517     uint64_t aSig;
5518
5519     if (floatx80_invalid_encoding(a)) {
5520         float_raise(float_flag_invalid, status);
5521         return 1 << 31;
5522     }
5523     aSig = extractFloatx80Frac( a );
5524     aExp = extractFloatx80Exp( a );
5525     aSign = extractFloatx80Sign( a );
5526     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5527     shiftCount = 0x4037 - aExp;
5528     if ( shiftCount <= 0 ) shiftCount = 1;
5529     shift64RightJamming( aSig, shiftCount, &aSig );
5530     return roundAndPackInt32(aSign, aSig, status);
5531
5532 }
5533
5534 /*----------------------------------------------------------------------------
5535 | Returns the result of converting the extended double-precision floating-
5536 | point value `a' to the 32-bit two's complement integer format.  The
5537 | conversion is performed according to the IEC/IEEE Standard for Binary
5538 | Floating-Point Arithmetic, except that the conversion is always rounded
5539 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5540 | Otherwise, if the conversion overflows, the largest integer with the same
5541 | sign as `a' is returned.
5542 *----------------------------------------------------------------------------*/
5543
5544 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5545 {
5546     bool aSign;
5547     int32_t aExp, shiftCount;
5548     uint64_t aSig, savedASig;
5549     int32_t z;
5550
5551     if (floatx80_invalid_encoding(a)) {
5552         float_raise(float_flag_invalid, status);
5553         return 1 << 31;
5554     }
5555     aSig = extractFloatx80Frac( a );
5556     aExp = extractFloatx80Exp( a );
5557     aSign = extractFloatx80Sign( a );
5558     if ( 0x401E < aExp ) {
5559         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5560         goto invalid;
5561     }
5562     else if ( aExp < 0x3FFF ) {
5563         if (aExp || aSig) {
5564             float_raise(float_flag_inexact, status);
5565         }
5566         return 0;
5567     }
5568     shiftCount = 0x403E - aExp;
5569     savedASig = aSig;
5570     aSig >>= shiftCount;
5571     z = aSig;
5572     if ( aSign ) z = - z;
5573     if ( ( z < 0 ) ^ aSign ) {
5574  invalid:
5575         float_raise(float_flag_invalid, status);
5576         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5577     }
5578     if ( ( aSig<<shiftCount ) != savedASig ) {
5579         float_raise(float_flag_inexact, status);
5580     }
5581     return z;
5582
5583 }
5584
5585 /*----------------------------------------------------------------------------
5586 | Returns the result of converting the extended double-precision floating-
5587 | point value `a' to the 64-bit two's complement integer format.  The
5588 | conversion is performed according to the IEC/IEEE Standard for Binary
5589 | Floating-Point Arithmetic---which means in particular that the conversion
5590 | is rounded according to the current rounding mode.  If `a' is a NaN,
5591 | the largest positive integer is returned.  Otherwise, if the conversion
5592 | overflows, the largest integer with the same sign as `a' is returned.
5593 *----------------------------------------------------------------------------*/
5594
5595 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5596 {
5597     bool aSign;
5598     int32_t aExp, shiftCount;
5599     uint64_t aSig, aSigExtra;
5600
5601     if (floatx80_invalid_encoding(a)) {
5602         float_raise(float_flag_invalid, status);
5603         return 1ULL << 63;
5604     }
5605     aSig = extractFloatx80Frac( a );
5606     aExp = extractFloatx80Exp( a );
5607     aSign = extractFloatx80Sign( a );
5608     shiftCount = 0x403E - aExp;
5609     if ( shiftCount <= 0 ) {
5610         if ( shiftCount ) {
5611             float_raise(float_flag_invalid, status);
5612             if (!aSign || floatx80_is_any_nan(a)) {
5613                 return INT64_MAX;
5614             }
5615             return INT64_MIN;
5616         }
5617         aSigExtra = 0;
5618     }
5619     else {
5620         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5621     }
5622     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5623
5624 }
5625
5626 /*----------------------------------------------------------------------------
5627 | Returns the result of converting the extended double-precision floating-
5628 | point value `a' to the 64-bit two's complement integer format.  The
5629 | conversion is performed according to the IEC/IEEE Standard for Binary
5630 | Floating-Point Arithmetic, except that the conversion is always rounded
5631 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5632 | Otherwise, if the conversion overflows, the largest integer with the same
5633 | sign as `a' is returned.
5634 *----------------------------------------------------------------------------*/
5635
5636 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5637 {
5638     bool aSign;
5639     int32_t aExp, shiftCount;
5640     uint64_t aSig;
5641     int64_t z;
5642
5643     if (floatx80_invalid_encoding(a)) {
5644         float_raise(float_flag_invalid, status);
5645         return 1ULL << 63;
5646     }
5647     aSig = extractFloatx80Frac( a );
5648     aExp = extractFloatx80Exp( a );
5649     aSign = extractFloatx80Sign( a );
5650     shiftCount = aExp - 0x403E;
5651     if ( 0 <= shiftCount ) {
5652         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5653         if ( ( a.high != 0xC03E ) || aSig ) {
5654             float_raise(float_flag_invalid, status);
5655             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5656                 return INT64_MAX;
5657             }
5658         }
5659         return INT64_MIN;
5660     }
5661     else if ( aExp < 0x3FFF ) {
5662         if (aExp | aSig) {
5663             float_raise(float_flag_inexact, status);
5664         }
5665         return 0;
5666     }
5667     z = aSig>>( - shiftCount );
5668     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5669         float_raise(float_flag_inexact, status);
5670     }
5671     if ( aSign ) z = - z;
5672     return z;
5673
5674 }
5675
5676 /*----------------------------------------------------------------------------
5677 | Returns the result of converting the extended double-precision floating-
5678 | point value `a' to the single-precision floating-point format.  The
5679 | conversion is performed according to the IEC/IEEE Standard for Binary
5680 | Floating-Point Arithmetic.
5681 *----------------------------------------------------------------------------*/
5682
5683 float32 floatx80_to_float32(floatx80 a, float_status *status)
5684 {
5685     bool aSign;
5686     int32_t aExp;
5687     uint64_t aSig;
5688
5689     if (floatx80_invalid_encoding(a)) {
5690         float_raise(float_flag_invalid, status);
5691         return float32_default_nan(status);
5692     }
5693     aSig = extractFloatx80Frac( a );
5694     aExp = extractFloatx80Exp( a );
5695     aSign = extractFloatx80Sign( a );
5696     if ( aExp == 0x7FFF ) {
5697         if ( (uint64_t) ( aSig<<1 ) ) {
5698             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5699                                              status);
5700             return float32_silence_nan(res, status);
5701         }
5702         return packFloat32( aSign, 0xFF, 0 );
5703     }
5704     shift64RightJamming( aSig, 33, &aSig );
5705     if ( aExp || aSig ) aExp -= 0x3F81;
5706     return roundAndPackFloat32(aSign, aExp, aSig, status);
5707
5708 }
5709
5710 /*----------------------------------------------------------------------------
5711 | Returns the result of converting the extended double-precision floating-
5712 | point value `a' to the double-precision floating-point format.  The
5713 | conversion is performed according to the IEC/IEEE Standard for Binary
5714 | Floating-Point Arithmetic.
5715 *----------------------------------------------------------------------------*/
5716
5717 float64 floatx80_to_float64(floatx80 a, float_status *status)
5718 {
5719     bool aSign;
5720     int32_t aExp;
5721     uint64_t aSig, zSig;
5722
5723     if (floatx80_invalid_encoding(a)) {
5724         float_raise(float_flag_invalid, status);
5725         return float64_default_nan(status);
5726     }
5727     aSig = extractFloatx80Frac( a );
5728     aExp = extractFloatx80Exp( a );
5729     aSign = extractFloatx80Sign( a );
5730     if ( aExp == 0x7FFF ) {
5731         if ( (uint64_t) ( aSig<<1 ) ) {
5732             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5733                                              status);
5734             return float64_silence_nan(res, status);
5735         }
5736         return packFloat64( aSign, 0x7FF, 0 );
5737     }
5738     shift64RightJamming( aSig, 1, &zSig );
5739     if ( aExp || aSig ) aExp -= 0x3C01;
5740     return roundAndPackFloat64(aSign, aExp, zSig, status);
5741
5742 }
5743
5744 /*----------------------------------------------------------------------------
5745 | Returns the result of converting the extended double-precision floating-
5746 | point value `a' to the quadruple-precision floating-point format.  The
5747 | conversion is performed according to the IEC/IEEE Standard for Binary
5748 | Floating-Point Arithmetic.
5749 *----------------------------------------------------------------------------*/
5750
5751 float128 floatx80_to_float128(floatx80 a, float_status *status)
5752 {
5753     bool aSign;
5754     int aExp;
5755     uint64_t aSig, zSig0, zSig1;
5756
5757     if (floatx80_invalid_encoding(a)) {
5758         float_raise(float_flag_invalid, status);
5759         return float128_default_nan(status);
5760     }
5761     aSig = extractFloatx80Frac( a );
5762     aExp = extractFloatx80Exp( a );
5763     aSign = extractFloatx80Sign( a );
5764     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5765         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5766                                            status);
5767         return float128_silence_nan(res, status);
5768     }
5769     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5770     return packFloat128( aSign, aExp, zSig0, zSig1 );
5771
5772 }
5773
5774 /*----------------------------------------------------------------------------
5775 | Rounds the extended double-precision floating-point value `a'
5776 | to the precision provided by floatx80_rounding_precision and returns the
5777 | result as an extended double-precision floating-point value.
5778 | The operation is performed according to the IEC/IEEE Standard for Binary
5779 | Floating-Point Arithmetic.
5780 *----------------------------------------------------------------------------*/
5781
5782 floatx80 floatx80_round(floatx80 a, float_status *status)
5783 {
5784     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5785                                 extractFloatx80Sign(a),
5786                                 extractFloatx80Exp(a),
5787                                 extractFloatx80Frac(a), 0, status);
5788 }
5789
5790 /*----------------------------------------------------------------------------
5791 | Rounds the extended double-precision floating-point value `a' to an integer,
5792 | and returns the result as an extended quadruple-precision floating-point
5793 | value.  The operation is performed according to the IEC/IEEE Standard for
5794 | Binary Floating-Point Arithmetic.
5795 *----------------------------------------------------------------------------*/
5796
5797 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5798 {
5799     bool aSign;
5800     int32_t aExp;
5801     uint64_t lastBitMask, roundBitsMask;
5802     floatx80 z;
5803
5804     if (floatx80_invalid_encoding(a)) {
5805         float_raise(float_flag_invalid, status);
5806         return floatx80_default_nan(status);
5807     }
5808     aExp = extractFloatx80Exp( a );
5809     if ( 0x403E <= aExp ) {
5810         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5811             return propagateFloatx80NaN(a, a, status);
5812         }
5813         return a;
5814     }
5815     if ( aExp < 0x3FFF ) {
5816         if (    ( aExp == 0 )
5817              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5818             return a;
5819         }
5820         float_raise(float_flag_inexact, status);
5821         aSign = extractFloatx80Sign( a );
5822         switch (status->float_rounding_mode) {
5823          case float_round_nearest_even:
5824             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5825                ) {
5826                 return
5827                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5828             }
5829             break;
5830         case float_round_ties_away:
5831             if (aExp == 0x3FFE) {
5832                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5833             }
5834             break;
5835          case float_round_down:
5836             return
5837                   aSign ?
5838                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5839                 : packFloatx80( 0, 0, 0 );
5840          case float_round_up:
5841             return
5842                   aSign ? packFloatx80( 1, 0, 0 )
5843                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5844
5845         case float_round_to_zero:
5846             break;
5847         default:
5848             g_assert_not_reached();
5849         }
5850         return packFloatx80( aSign, 0, 0 );
5851     }
5852     lastBitMask = 1;
5853     lastBitMask <<= 0x403E - aExp;
5854     roundBitsMask = lastBitMask - 1;
5855     z = a;
5856     switch (status->float_rounding_mode) {
5857     case float_round_nearest_even:
5858         z.low += lastBitMask>>1;
5859         if ((z.low & roundBitsMask) == 0) {
5860             z.low &= ~lastBitMask;
5861         }
5862         break;
5863     case float_round_ties_away:
5864         z.low += lastBitMask >> 1;
5865         break;
5866     case float_round_to_zero:
5867         break;
5868     case float_round_up:
5869         if (!extractFloatx80Sign(z)) {
5870             z.low += roundBitsMask;
5871         }
5872         break;
5873     case float_round_down:
5874         if (extractFloatx80Sign(z)) {
5875             z.low += roundBitsMask;
5876         }
5877         break;
5878     default:
5879         abort();
5880     }
5881     z.low &= ~ roundBitsMask;
5882     if ( z.low == 0 ) {
5883         ++z.high;
5884         z.low = UINT64_C(0x8000000000000000);
5885     }
5886     if (z.low != a.low) {
5887         float_raise(float_flag_inexact, status);
5888     }
5889     return z;
5890
5891 }
5892
5893 /*----------------------------------------------------------------------------
5894 | Returns the remainder of the extended double-precision floating-point value
5895 | `a' with respect to the corresponding value `b'.  The operation is performed
5896 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5897 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5898 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
5899 | the absolute value of the integer quotient.
5900 *----------------------------------------------------------------------------*/
5901
5902 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
5903                          float_status *status)
5904 {
5905     bool aSign, zSign;
5906     int32_t aExp, bExp, expDiff, aExpOrig;
5907     uint64_t aSig0, aSig1, bSig;
5908     uint64_t q, term0, term1, alternateASig0, alternateASig1;
5909
5910     *quotient = 0;
5911     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5912         float_raise(float_flag_invalid, status);
5913         return floatx80_default_nan(status);
5914     }
5915     aSig0 = extractFloatx80Frac( a );
5916     aExpOrig = aExp = extractFloatx80Exp( a );
5917     aSign = extractFloatx80Sign( a );
5918     bSig = extractFloatx80Frac( b );
5919     bExp = extractFloatx80Exp( b );
5920     if ( aExp == 0x7FFF ) {
5921         if (    (uint64_t) ( aSig0<<1 )
5922              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5923             return propagateFloatx80NaN(a, b, status);
5924         }
5925         goto invalid;
5926     }
5927     if ( bExp == 0x7FFF ) {
5928         if ((uint64_t)(bSig << 1)) {
5929             return propagateFloatx80NaN(a, b, status);
5930         }
5931         if (aExp == 0 && aSig0 >> 63) {
5932             /*
5933              * Pseudo-denormal argument must be returned in normalized
5934              * form.
5935              */
5936             return packFloatx80(aSign, 1, aSig0);
5937         }
5938         return a;
5939     }
5940     if ( bExp == 0 ) {
5941         if ( bSig == 0 ) {
5942  invalid:
5943             float_raise(float_flag_invalid, status);
5944             return floatx80_default_nan(status);
5945         }
5946         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5947     }
5948     if ( aExp == 0 ) {
5949         if ( aSig0 == 0 ) return a;
5950         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5951     }
5952     zSign = aSign;
5953     expDiff = aExp - bExp;
5954     aSig1 = 0;
5955     if ( expDiff < 0 ) {
5956         if ( mod || expDiff < -1 ) {
5957             if (aExp == 1 && aExpOrig == 0) {
5958                 /*
5959                  * Pseudo-denormal argument must be returned in
5960                  * normalized form.
5961                  */
5962                 return packFloatx80(aSign, aExp, aSig0);
5963             }
5964             return a;
5965         }
5966         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5967         expDiff = 0;
5968     }
5969     *quotient = q = ( bSig <= aSig0 );
5970     if ( q ) aSig0 -= bSig;
5971     expDiff -= 64;
5972     while ( 0 < expDiff ) {
5973         q = estimateDiv128To64( aSig0, aSig1, bSig );
5974         q = ( 2 < q ) ? q - 2 : 0;
5975         mul64To128( bSig, q, &term0, &term1 );
5976         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5977         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5978         expDiff -= 62;
5979         *quotient <<= 62;
5980         *quotient += q;
5981     }
5982     expDiff += 64;
5983     if ( 0 < expDiff ) {
5984         q = estimateDiv128To64( aSig0, aSig1, bSig );
5985         q = ( 2 < q ) ? q - 2 : 0;
5986         q >>= 64 - expDiff;
5987         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5988         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5989         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5990         while ( le128( term0, term1, aSig0, aSig1 ) ) {
5991             ++q;
5992             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5993         }
5994         if (expDiff < 64) {
5995             *quotient <<= expDiff;
5996         } else {
5997             *quotient = 0;
5998         }
5999         *quotient += q;
6000     }
6001     else {
6002         term1 = 0;
6003         term0 = bSig;
6004     }
6005     if (!mod) {
6006         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6007         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6008                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6009                         && ( q & 1 ) )
6010             ) {
6011             aSig0 = alternateASig0;
6012             aSig1 = alternateASig1;
6013             zSign = ! zSign;
6014             ++*quotient;
6015         }
6016     }
6017     return
6018         normalizeRoundAndPackFloatx80(
6019             floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
6020
6021 }
6022
6023 /*----------------------------------------------------------------------------
6024 | Returns the remainder of the extended double-precision floating-point value
6025 | `a' with respect to the corresponding value `b'.  The operation is performed
6026 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6027 *----------------------------------------------------------------------------*/
6028
6029 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6030 {
6031     uint64_t quotient;
6032     return floatx80_modrem(a, b, false, &quotient, status);
6033 }
6034
6035 /*----------------------------------------------------------------------------
6036 | Returns the remainder of the extended double-precision floating-point value
6037 | `a' with respect to the corresponding value `b', with the quotient truncated
6038 | toward zero.
6039 *----------------------------------------------------------------------------*/
6040
6041 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6042 {
6043     uint64_t quotient;
6044     return floatx80_modrem(a, b, true, &quotient, status);
6045 }
6046
6047 /*----------------------------------------------------------------------------
6048 | Returns the square root of the extended double-precision floating-point
6049 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6050 | for Binary Floating-Point Arithmetic.
6051 *----------------------------------------------------------------------------*/
6052
6053 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6054 {
6055     bool aSign;
6056     int32_t aExp, zExp;
6057     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6058     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6059
6060     if (floatx80_invalid_encoding(a)) {
6061         float_raise(float_flag_invalid, status);
6062         return floatx80_default_nan(status);
6063     }
6064     aSig0 = extractFloatx80Frac( a );
6065     aExp = extractFloatx80Exp( a );
6066     aSign = extractFloatx80Sign( a );
6067     if ( aExp == 0x7FFF ) {
6068         if ((uint64_t)(aSig0 << 1)) {
6069             return propagateFloatx80NaN(a, a, status);
6070         }
6071         if ( ! aSign ) return a;
6072         goto invalid;
6073     }
6074     if ( aSign ) {
6075         if ( ( aExp | aSig0 ) == 0 ) return a;
6076  invalid:
6077         float_raise(float_flag_invalid, status);
6078         return floatx80_default_nan(status);
6079     }
6080     if ( aExp == 0 ) {
6081         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6082         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6083     }
6084     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6085     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6086     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6087     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6088     doubleZSig0 = zSig0<<1;
6089     mul64To128( zSig0, zSig0, &term0, &term1 );
6090     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6091     while ( (int64_t) rem0 < 0 ) {
6092         --zSig0;
6093         doubleZSig0 -= 2;
6094         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6095     }
6096     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6097     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6098         if ( zSig1 == 0 ) zSig1 = 1;
6099         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6100         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6101         mul64To128( zSig1, zSig1, &term2, &term3 );
6102         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6103         while ( (int64_t) rem1 < 0 ) {
6104             --zSig1;
6105             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6106             term3 |= 1;
6107             term2 |= doubleZSig0;
6108             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6109         }
6110         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6111     }
6112     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6113     zSig0 |= doubleZSig0;
6114     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6115                                 0, zExp, zSig0, zSig1, status);
6116 }
6117
6118 /*----------------------------------------------------------------------------
6119 | Returns the result of converting the quadruple-precision floating-point
6120 | value `a' to the extended double-precision floating-point format.  The
6121 | conversion is performed according to the IEC/IEEE Standard for Binary
6122 | Floating-Point Arithmetic.
6123 *----------------------------------------------------------------------------*/
6124
6125 floatx80 float128_to_floatx80(float128 a, float_status *status)
6126 {
6127     bool aSign;
6128     int32_t aExp;
6129     uint64_t aSig0, aSig1;
6130
6131     aSig1 = extractFloat128Frac1( a );
6132     aSig0 = extractFloat128Frac0( a );
6133     aExp = extractFloat128Exp( a );
6134     aSign = extractFloat128Sign( a );
6135     if ( aExp == 0x7FFF ) {
6136         if ( aSig0 | aSig1 ) {
6137             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6138                                                status);
6139             return floatx80_silence_nan(res, status);
6140         }
6141         return packFloatx80(aSign, floatx80_infinity_high,
6142                                    floatx80_infinity_low);
6143     }
6144     if ( aExp == 0 ) {
6145         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6146         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6147     }
6148     else {
6149         aSig0 |= UINT64_C(0x0001000000000000);
6150     }
6151     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6152     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6153
6154 }
6155
6156 /*----------------------------------------------------------------------------
6157 | Returns the remainder of the quadruple-precision floating-point value `a'
6158 | with respect to the corresponding value `b'.  The operation is performed
6159 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6160 *----------------------------------------------------------------------------*/
6161
6162 float128 float128_rem(float128 a, float128 b, float_status *status)
6163 {
6164     bool aSign, zSign;
6165     int32_t aExp, bExp, expDiff;
6166     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6167     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6168     int64_t sigMean0;
6169
6170     aSig1 = extractFloat128Frac1( a );
6171     aSig0 = extractFloat128Frac0( a );
6172     aExp = extractFloat128Exp( a );
6173     aSign = extractFloat128Sign( a );
6174     bSig1 = extractFloat128Frac1( b );
6175     bSig0 = extractFloat128Frac0( b );
6176     bExp = extractFloat128Exp( b );
6177     if ( aExp == 0x7FFF ) {
6178         if (    ( aSig0 | aSig1 )
6179              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6180             return propagateFloat128NaN(a, b, status);
6181         }
6182         goto invalid;
6183     }
6184     if ( bExp == 0x7FFF ) {
6185         if (bSig0 | bSig1) {
6186             return propagateFloat128NaN(a, b, status);
6187         }
6188         return a;
6189     }
6190     if ( bExp == 0 ) {
6191         if ( ( bSig0 | bSig1 ) == 0 ) {
6192  invalid:
6193             float_raise(float_flag_invalid, status);
6194             return float128_default_nan(status);
6195         }
6196         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6197     }
6198     if ( aExp == 0 ) {
6199         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6200         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6201     }
6202     expDiff = aExp - bExp;
6203     if ( expDiff < -1 ) return a;
6204     shortShift128Left(
6205         aSig0 | UINT64_C(0x0001000000000000),
6206         aSig1,
6207         15 - ( expDiff < 0 ),
6208         &aSig0,
6209         &aSig1
6210     );
6211     shortShift128Left(
6212         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6213     q = le128( bSig0, bSig1, aSig0, aSig1 );
6214     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6215     expDiff -= 64;
6216     while ( 0 < expDiff ) {
6217         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6218         q = ( 4 < q ) ? q - 4 : 0;
6219         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6220         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6221         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6222         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6223         expDiff -= 61;
6224     }
6225     if ( -64 < expDiff ) {
6226         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6227         q = ( 4 < q ) ? q - 4 : 0;
6228         q >>= - expDiff;
6229         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6230         expDiff += 52;
6231         if ( expDiff < 0 ) {
6232             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6233         }
6234         else {
6235             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6236         }
6237         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6238         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6239     }
6240     else {
6241         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6242         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6243     }
6244     do {
6245         alternateASig0 = aSig0;
6246         alternateASig1 = aSig1;
6247         ++q;
6248         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6249     } while ( 0 <= (int64_t) aSig0 );
6250     add128(
6251         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6252     if (    ( sigMean0 < 0 )
6253          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6254         aSig0 = alternateASig0;
6255         aSig1 = alternateASig1;
6256     }
6257     zSign = ( (int64_t) aSig0 < 0 );
6258     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6259     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6260                                          status);
6261 }
6262
6263 static inline FloatRelation
6264 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6265                           float_status *status)
6266 {
6267     bool aSign, bSign;
6268
6269     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6270         float_raise(float_flag_invalid, status);
6271         return float_relation_unordered;
6272     }
6273     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6274           ( extractFloatx80Frac( a )<<1 ) ) ||
6275         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6276           ( extractFloatx80Frac( b )<<1 ) )) {
6277         if (!is_quiet ||
6278             floatx80_is_signaling_nan(a, status) ||
6279             floatx80_is_signaling_nan(b, status)) {
6280             float_raise(float_flag_invalid, status);
6281         }
6282         return float_relation_unordered;
6283     }
6284     aSign = extractFloatx80Sign( a );
6285     bSign = extractFloatx80Sign( b );
6286     if ( aSign != bSign ) {
6287
6288         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6289              ( ( a.low | b.low ) == 0 ) ) {
6290             /* zero case */
6291             return float_relation_equal;
6292         } else {
6293             return 1 - (2 * aSign);
6294         }
6295     } else {
6296         /* Normalize pseudo-denormals before comparison.  */
6297         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6298             ++a.high;
6299         }
6300         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6301             ++b.high;
6302         }
6303         if (a.low == b.low && a.high == b.high) {
6304             return float_relation_equal;
6305         } else {
6306             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6307         }
6308     }
6309 }
6310
6311 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6312 {
6313     return floatx80_compare_internal(a, b, 0, status);
6314 }
6315
6316 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6317                                      float_status *status)
6318 {
6319     return floatx80_compare_internal(a, b, 1, status);
6320 }
6321
6322 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6323 {
6324     bool aSign;
6325     int32_t aExp;
6326     uint64_t aSig;
6327
6328     if (floatx80_invalid_encoding(a)) {
6329         float_raise(float_flag_invalid, status);
6330         return floatx80_default_nan(status);
6331     }
6332     aSig = extractFloatx80Frac( a );
6333     aExp = extractFloatx80Exp( a );
6334     aSign = extractFloatx80Sign( a );
6335
6336     if ( aExp == 0x7FFF ) {
6337         if ( aSig<<1 ) {
6338             return propagateFloatx80NaN(a, a, status);
6339         }
6340         return a;
6341     }
6342
6343     if (aExp == 0) {
6344         if (aSig == 0) {
6345             return a;
6346         }
6347         aExp++;
6348     }
6349
6350     if (n > 0x10000) {
6351         n = 0x10000;
6352     } else if (n < -0x10000) {
6353         n = -0x10000;
6354     }
6355
6356     aExp += n;
6357     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6358                                          aSign, aExp, aSig, 0, status);
6359 }
6360
6361 static void __attribute__((constructor)) softfloat_init(void)
6362 {
6363     union_float64 ua, ub, uc, ur;
6364
6365     if (QEMU_NO_HARDFLOAT) {
6366         return;
6367     }
6368     /*
6369      * Test that the host's FMA is not obviously broken. For example,
6370      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6371      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6372      */
6373     ua.s = 0x0020000000000001ULL;
6374     ub.s = 0x3ca0000000000000ULL;
6375     uc.s = 0x0020000000000000ULL;
6376     ur.h = fma(ua.h, ub.h, uc.h);
6377     if (ur.s != 0x0020000000000001ULL) {
6378         force_soft_fma = true;
6379     }
6380 }