fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485 /* Flags for parts_minmax. */
 486 enum {
 487     /* Set for minimum; clear for maximum. */
 488     minmax_ismin = 1,
 489     /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 490     minmax_isnum = 2,
 491     /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 492     minmax_ismag = 4,
 493 };
 494
 495 /* Simple helpers for checking if, or what kind of, NaN we have */
 496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 497 {
 498     return unlikely(c >= float_class_qnan);
 499 }
 500
 501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 502 {
 503     return c == float_class_snan;
 504 }
 505
 506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 507 {
 508     return c == float_class_qnan;
 509 }
 510
 511 /*
 512  * Structure holding all of the decomposed parts of a float.
 513  * The exponent is unbiased and the fraction is normalized.
 514  *
 515  * The fraction words are stored in big-endian word ordering,
 516  * so that truncation from a larger format to a smaller format
 517  * can be done simply by ignoring subsequent elements.
 518  */
 519
 520 typedef struct {
 521     FloatClass cls;
 522     bool sign;
 523     int32_t exp;
 524     union {
 525         /* Routines that know the structure may reference the singular name. */
 526         uint64_t frac;
 527         /*
 528          * Routines expanded with multiple structures reference "hi" and "lo"
 529          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 530          * both the same word and aliased here.
 531          */
 532         uint64_t frac_hi;
 533         uint64_t frac_lo;
 534     };
 535 } FloatParts64;
 536
 537 typedef struct {
 538     FloatClass cls;
 539     bool sign;
 540     int32_t exp;
 541     uint64_t frac_hi;
 542     uint64_t frac_lo;
 543 } FloatParts128;
 544
 545 typedef struct {
 546     FloatClass cls;
 547     bool sign;
 548     int32_t exp;
 549     uint64_t frac_hi;
 550     uint64_t frac_hm;  /* high-middle */
 551     uint64_t frac_lm;  /* low-middle */
 552     uint64_t frac_lo;
 553 } FloatParts256;
 554
 555 /* These apply to the most significant word of each FloatPartsN. */
 556 #define DECOMPOSED_BINARY_POINT    63
 557 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 558
 559 /* Structure holding all of the relevant parameters for a format.
 560  *   exp_size: the size of the exponent field
 561  *   exp_bias: the offset applied to the exponent field
 562  *   exp_max: the maximum normalised exponent
 563  *   frac_size: the size of the fraction field
 564  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 565  * The following are computed based the size of fraction
 566  *   round_mask: bits below lsb which must be rounded
 567  * The following optional modifiers are available:
 568  *   arm_althp: handle ARM Alternative Half Precision
 569  */
 570 typedef struct {
 571     int exp_size;
 572     int exp_bias;
 573     int exp_max;
 574     int frac_size;
 575     int frac_shift;
 576     bool arm_althp;
 577     uint64_t round_mask;
 578 } FloatFmt;
 579
 580 /* Expand fields based on the size of exponent and fraction */
 581 #define FLOAT_PARAMS_(E)                                \
 582     .exp_size       = E,                                \
 583     .exp_bias       = ((1 << E) - 1) >> 1,              \
 584     .exp_max        = (1 << E) - 1
 585
 586 #define FLOAT_PARAMS(E, F)                              \
 587     FLOAT_PARAMS_(E),                                   \
 588     .frac_size      = F,                                \
 589     .frac_shift     = (-F - 1) & 63,                    \
 590     .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 591
 592 static const FloatFmt float16_params = {
 593     FLOAT_PARAMS(5, 10)
 594 };
 595
 596 static const FloatFmt float16_params_ahp = {
 597     FLOAT_PARAMS(5, 10),
 598     .arm_althp = true
 599 };
 600
 601 static const FloatFmt bfloat16_params = {
 602     FLOAT_PARAMS(8, 7)
 603 };
 604
 605 static const FloatFmt float32_params = {
 606     FLOAT_PARAMS(8, 23)
 607 };
 608
 609 static const FloatFmt float64_params = {
 610     FLOAT_PARAMS(11, 52)
 611 };
 612
 613 static const FloatFmt float128_params = {
 614     FLOAT_PARAMS(15, 112)
 615 };
 616
 617 #define FLOATX80_PARAMS(R)              \
 618     FLOAT_PARAMS_(15),                  \
 619     .frac_size = R == 64 ? 63 : R,      \
 620     .frac_shift = 0,                    \
 621     .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 622
 623 static const FloatFmt floatx80_params[3] = {
 624     [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 625     [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 626     [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 627 };
 628
 629 /* Unpack a float to parts, but do not canonicalize.  */
 630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 631 {
 632     const int f_size = fmt->frac_size;
 633     const int e_size = fmt->exp_size;
 634
 635     *r = (FloatParts64) {
 636         .cls = float_class_unclassified,
 637         .sign = extract64(raw, f_size + e_size, 1),
 638         .exp = extract64(raw, f_size, e_size),
 639         .frac = extract64(raw, 0, f_size)
 640     };
 641 }
 642
 643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 644 {
 645     unpack_raw64(p, &float16_params, f);
 646 }
 647
 648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 649 {
 650     unpack_raw64(p, &bfloat16_params, f);
 651 }
 652
 653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 654 {
 655     unpack_raw64(p, &float32_params, f);
 656 }
 657
 658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 659 {
 660     unpack_raw64(p, &float64_params, f);
 661 }
 662
 663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 664 {
 665     *p = (FloatParts128) {
 666         .cls = float_class_unclassified,
 667         .sign = extract32(f.high, 15, 1),
 668         .exp = extract32(f.high, 0, 15),
 669         .frac_hi = f.low
 670     };
 671 }
 672
 673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 674 {
 675     const int f_size = float128_params.frac_size - 64;
 676     const int e_size = float128_params.exp_size;
 677
 678     *p = (FloatParts128) {
 679         .cls = float_class_unclassified,
 680         .sign = extract64(f.high, f_size + e_size, 1),
 681         .exp = extract64(f.high, f_size, e_size),
 682         .frac_hi = extract64(f.high, 0, f_size),
 683         .frac_lo = f.low,
 684     };
 685 }
 686
 687 /* Pack a float from parts, but do not canonicalize.  */
 688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 689 {
 690     const int f_size = fmt->frac_size;
 691     const int e_size = fmt->exp_size;
 692     uint64_t ret;
 693
 694     ret = (uint64_t)p->sign << (f_size + e_size);
 695     ret = deposit64(ret, f_size, e_size, p->exp);
 696     ret = deposit64(ret, 0, f_size, p->frac);
 697     return ret;
 698 }
 699
 700 static inline float16 float16_pack_raw(const FloatParts64 *p)
 701 {
 702     return make_float16(pack_raw64(p, &float16_params));
 703 }
 704
 705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 706 {
 707     return pack_raw64(p, &bfloat16_params);
 708 }
 709
 710 static inline float32 float32_pack_raw(const FloatParts64 *p)
 711 {
 712     return make_float32(pack_raw64(p, &float32_params));
 713 }
 714
 715 static inline float64 float64_pack_raw(const FloatParts64 *p)
 716 {
 717     return make_float64(pack_raw64(p, &float64_params));
 718 }
 719
 720 static float128 float128_pack_raw(const FloatParts128 *p)
 721 {
 722     const int f_size = float128_params.frac_size - 64;
 723     const int e_size = float128_params.exp_size;
 724     uint64_t hi;
 725
 726     hi = (uint64_t)p->sign << (f_size + e_size);
 727     hi = deposit64(hi, f_size, e_size, p->exp);
 728     hi = deposit64(hi, 0, f_size, p->frac_hi);
 729     return make_float128(hi, p->frac_lo);
 730 }
 731
 732 /*----------------------------------------------------------------------------
 733 | Functions and definitions to determine:  (1) whether tininess for underflow
 734 | is detected before or after rounding by default, (2) what (if anything)
 735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 737 | are propagated from function inputs to output.  These details are target-
 738 | specific.
 739 *----------------------------------------------------------------------------*/
 740 #include "softfloat-specialize.c.inc"
 741
 742 #define PARTS_GENERIC_64_128(NAME, P) \
 743     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 744
 745 #define PARTS_GENERIC_64_128_256(NAME, P) \
 746     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 747                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 748
 749 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 750 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 751
 752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 754
 755 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 756
 757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 758                                       float_status *s);
 759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 760                                         float_status *s);
 761
 762 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 763
 764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 765                                              FloatParts64 *c, float_status *s,
 766                                              int ab_mask, int abc_mask);
 767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 768                                                FloatParts128 *b,
 769                                                FloatParts128 *c,
 770                                                float_status *s,
 771                                                int ab_mask, int abc_mask);
 772
 773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 774     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 775
 776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 777                                  const FloatFmt *fmt);
 778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 779                                   const FloatFmt *fmt);
 780
 781 #define parts_canonicalize(A, S, F) \
 782     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 783
 784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 785                                    const FloatFmt *fmt);
 786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 787                                     const FloatFmt *fmt);
 788
 789 #define parts_uncanon_normal(A, S, F) \
 790     PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 791
 792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 793                             const FloatFmt *fmt);
 794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 795                              const FloatFmt *fmt);
 796
 797 #define parts_uncanon(A, S, F) \
 798     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 799
 800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 803
 804 #define parts_add_normal(A, B) \
 805     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 806
 807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 810
 811 #define parts_sub_normal(A, B) \
 812     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 813
 814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 815                                     float_status *s, bool subtract);
 816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 817                                       float_status *s, bool subtract);
 818
 819 #define parts_addsub(A, B, S, Z) \
 820     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 821
 822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 823                                  float_status *s);
 824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 825                                    float_status *s);
 826
 827 #define parts_mul(A, B, S) \
 828     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 829
 830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 831                                     FloatParts64 *c, int flags,
 832                                     float_status *s);
 833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 834                                       FloatParts128 *c, int flags,
 835                                       float_status *s);
 836
 837 #define parts_muladd(A, B, C, Z, S) \
 838     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 839
 840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 841                                  float_status *s);
 842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 843                                    float_status *s);
 844
 845 #define parts_div(A, B, S) \
 846     PARTS_GENERIC_64_128(div, A)(A, B, S)
 847
 848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 850
 851 #define parts_sqrt(A, S, F) \
 852     PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 853
 854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 855                                         int scale, int frac_size);
 856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 857                                          int scale, int frac_size);
 858
 859 #define parts_round_to_int_normal(A, R, C, F) \
 860     PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 861
 862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 863                                  int scale, float_status *s,
 864                                  const FloatFmt *fmt);
 865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 866                                   int scale, float_status *s,
 867                                   const FloatFmt *fmt);
 868
 869 #define parts_round_to_int(A, R, C, S, F) \
 870     PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 871
 872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 873                                      int scale, int64_t min, int64_t max,
 874                                      float_status *s);
 875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 876                                      int scale, int64_t min, int64_t max,
 877                                      float_status *s);
 878
 879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
 880     PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 881
 882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 883                                       int scale, uint64_t max,
 884                                       float_status *s);
 885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 886                                        int scale, uint64_t max,
 887                                        float_status *s);
 888
 889 #define parts_float_to_uint(P, R, Z, M, S) \
 890     PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 891
 892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 893                                   int scale, float_status *s);
 894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 895                                    int scale, float_status *s);
 896
 897 #define parts_sint_to_float(P, I, Z, S) \
 898     PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 899
 900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 901                                   int scale, float_status *s);
 902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 903                                    int scale, float_status *s);
 904
 905 #define parts_uint_to_float(P, I, Z, S) \
 906     PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 907
 908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 909                                     float_status *s, int flags);
 910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 911                                       float_status *s, int flags);
 912
 913 #define parts_minmax(A, B, S, F) \
 914     PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 915
 916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 917                            float_status *s, bool q);
 918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 919                             float_status *s, bool q);
 920
 921 #define parts_compare(A, B, S, Q) \
 922     PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 923
 924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 926
 927 #define parts_scalbn(A, N, S) \
 928     PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 929
 930 /*
 931  * Helper functions for softfloat-parts.c.inc, per-size operations.
 932  */
 933
 934 #define FRAC_GENERIC_64_128(NAME, P) \
 935     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 936
 937 #define FRAC_GENERIC_64_128_256(NAME, P) \
 938     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 939                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 940
 941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 942 {
 943     return uadd64_overflow(a->frac, b->frac, &r->frac);
 944 }
 945
 946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 947 {
 948     bool c = 0;
 949     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 950     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 951     return c;
 952 }
 953
 954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 955 {
 956     bool c = 0;
 957     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 958     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 959     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 960     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 961     return c;
 962 }
 963
 964 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 965
 966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 967 {
 968     return uadd64_overflow(a->frac, c, &r->frac);
 969 }
 970
 971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 972 {
 973     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 974     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 975 }
 976
 977 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 978
 979 static void frac64_allones(FloatParts64 *a)
 980 {
 981     a->frac = -1;
 982 }
 983
 984 static void frac128_allones(FloatParts128 *a)
 985 {
 986     a->frac_hi = a->frac_lo = -1;
 987 }
 988
 989 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 990
 991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 992 {
 993     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 994 }
 995
 996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 997 {
 998     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 999     if (ta == tb) {
1000         ta = a->frac_lo, tb = b->frac_lo;
1001         if (ta == tb) {
1002             return 0;
1003         }
1004     }
1005     return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012     a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017     a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024     uint64_t n1, n0, r, q;
1025     bool ret;
1026
1027     /*
1028      * We want a 2*N / N-bit division to produce exactly an N-bit
1029      * result, so that we do not lose any precision and so that we
1030      * do not have to renormalize afterward.  If A.frac < B.frac,
1031      * then division would produce an (N-1)-bit result; shift A left
1032      * by one to produce the an N-bit result, and return true to
1033      * decrement the exponent to match.
1034      *
1035      * The udiv_qrnnd algorithm that we're using requires normalization,
1036      * i.e. the msb of the denominator must be set, which is already true.
1037      */
1038     ret = a->frac < b->frac;
1039     if (ret) {
1040         n0 = a->frac;
1041         n1 = 0;
1042     } else {
1043         n0 = a->frac >> 1;
1044         n1 = a->frac << 63;
1045     }
1046     q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048     /* Set lsb if there is a remainder, to set inexact. */
1049     a->frac = q | (r != 0);
1050
1051     return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056     uint64_t q0, q1, a0, a1, b0, b1;
1057     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058     bool ret = false;
1059
1060     a0 = a->frac_hi, a1 = a->frac_lo;
1061     b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063     ret = lt128(a0, a1, b0, b1);
1064     if (!ret) {
1065         a1 = shr_double(a0, a1, 1);
1066         a0 = a0 >> 1;
1067     }
1068
1069     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070     q0 = estimateDiv128To64(a0, a1, b0);
1071
1072     /*
1073      * Estimate is high because B1 was not included (unless B1 == 0).
1074      * Reduce quotient and increase remainder until remainder is non-negative.
1075      * This loop will execute 0 to 2 times.
1076      */
1077     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079     while (r0 != 0) {
1080         q0--;
1081         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082     }
1083
1084     /* Repeat using the remainder, producing a second word of quotient. */
1085     q1 = estimateDiv128To64(r1, r2, b0);
1086     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088     while (r1 != 0) {
1089         q1--;
1090         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091     }
1092
1093     /* Any remainder indicates inexact; set sticky bit. */
1094     q1 |= (r2 | r3) != 0;
1095
1096     a->frac_hi = q0;
1097     a->frac_lo = q1;
1098     return ret;
1099 }
1100
1101 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105     return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110     return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130     a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135     bool c = 0;
1136     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142     bool c = 0;
1143     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153     if (a->frac) {
1154         int shift = clz64(a->frac);
1155         a->frac <<= shift;
1156         return shift;
1157     }
1158     return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163     if (a->frac_hi) {
1164         int shl = clz64(a->frac_hi);
1165         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166         a->frac_lo <<= shl;
1167         return shl;
1168     } else if (a->frac_lo) {
1169         int shl = clz64(a->frac_lo);
1170         a->frac_hi = a->frac_lo << shl;
1171         a->frac_lo = 0;
1172         return shl + 64;
1173     }
1174     return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181     int ret, shl;
1182
1183     if (likely(a0)) {
1184         shl = clz64(a0);
1185         if (shl == 0) {
1186             return 0;
1187         }
1188         ret = shl;
1189     } else {
1190         if (a1) {
1191             ret = 64;
1192             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193         } else if (a2) {
1194             ret = 128;
1195             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196         } else if (a3) {
1197             ret = 192;
1198             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199         } else {
1200             ret = 256;
1201             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202             goto done;
1203         }
1204         shl = clz64(a0);
1205         if (shl == 0) {
1206             goto done;
1207         }
1208         ret += shl;
1209     }
1210
1211     a0 = shl_double(a0, a1, shl);
1212     a1 = shl_double(a1, a2, shl);
1213     a2 = shl_double(a2, a3, shl);
1214     a3 <<= shl;
1215
1216  done:
1217     a->frac_hi = a0;
1218     a->frac_hm = a1;
1219     a->frac_lm = a2;
1220     a->frac_lo = a3;
1221     return ret;
1222 }
1223
1224 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228     a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235     if (c & 64) {
1236         a0 = a1, a1 = 0;
1237     }
1238
1239     c &= 63;
1240     if (c) {
1241         a0 = shl_double(a0, a1, c);
1242         a1 = a1 << c;
1243     }
1244
1245     a->frac_hi = a0;
1246     a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253     a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260     if (c & 64) {
1261         a1 = a0, a0 = 0;
1262     }
1263
1264     c &= 63;
1265     if (c) {
1266         a1 = shr_double(a0, a1, c);
1267         a0 = a0 >> c;
1268     }
1269
1270     a->frac_hi = a0;
1271     a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278     uint64_t a0 = a->frac;
1279
1280     if (likely(c != 0)) {
1281         if (likely(c < 64)) {
1282             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283         } else {
1284             a0 = a0 != 0;
1285         }
1286         a->frac = a0;
1287     }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293     uint64_t sticky = 0;
1294
1295     if (unlikely(c == 0)) {
1296         return;
1297     } else if (likely(c < 64)) {
1298         /* nothing */
1299     } else if (likely(c < 128)) {
1300         sticky = a1;
1301         a1 = a0;
1302         a0 = 0;
1303         c &= 63;
1304         if (c == 0) {
1305             goto done;
1306         }
1307     } else {
1308         sticky = a0 | a1;
1309         a0 = a1 = 0;
1310         goto done;
1311     }
1312
1313     sticky |= shr_double(a1, 0, c);
1314     a1 = shr_double(a0, a1, c);
1315     a0 = a0 >> c;
1316
1317  done:
1318     a->frac_lo = a1 | (sticky != 0);
1319     a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326     uint64_t sticky = 0;
1327
1328     if (unlikely(c == 0)) {
1329         return;
1330     } else if (likely(c < 64)) {
1331         /* nothing */
1332     } else if (likely(c < 256)) {
1333         if (unlikely(c & 128)) {
1334             sticky |= a2 | a3;
1335             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336         }
1337         if (unlikely(c & 64)) {
1338             sticky |= a3;
1339             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340         }
1341         c &= 63;
1342         if (c == 0) {
1343             goto done;
1344         }
1345     } else {
1346         sticky = a0 | a1 | a2 | a3;
1347         a0 = a1 = a2 = a3 = 0;
1348         goto done;
1349     }
1350
1351     sticky |= shr_double(a3, 0, c);
1352     a3 = shr_double(a2, a3, c);
1353     a2 = shr_double(a1, a2, c);
1354     a1 = shr_double(a0, a1, c);
1355     a0 = a0 >> c;
1356
1357  done:
1358     a->frac_lo = a3 | (sticky != 0);
1359     a->frac_lm = a2;
1360     a->frac_hm = a1;
1361     a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368     return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373     bool c = 0;
1374     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376     return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381     bool c = 0;
1382     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386     return c;
1387 }
1388
1389 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393     r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398     r->frac_hi = a->frac_hi;
1399     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406     r->frac_hi = a->frac;
1407     r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412     r->frac_hi = a->frac_hi;
1413     r->frac_hm = a->frac_lo;
1414     r->frac_lm = 0;
1415     r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421  * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1422  * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423  * and thus MIT licenced.
1424  */
1425 static const uint16_t rsqrt_tab[128] = {
1426     0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427     0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428     0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429     0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430     0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431     0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432     0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433     0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434     0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435     0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436     0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437     0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438     0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439     0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN    glue(FloatParts,N)
1446 #define FloatPartsW    glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef  N
1455 #undef  W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef  N
1463 #undef  W
1464 #define N            256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef  N
1469 #undef  W
1470 #undef  partsN
1471 #undef  FloatPartsN
1472 #undef  FloatPartsW
1473
1474 /*
1475  * Pack/unpack routines with a specific FloatFmt.
1476  */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479                                       float_status *s, const FloatFmt *params)
1480 {
1481     float16_unpack_raw(p, f);
1482     parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486                                      float_status *s)
1487 {
1488     float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492                                       float_status *s)
1493 {
1494     bfloat16_unpack_raw(p, f);
1495     parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499                                              float_status *s,
1500                                              const FloatFmt *params)
1501 {
1502     parts_uncanon(p, s, params);
1503     return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507                                             float_status *s)
1508 {
1509     return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513                                               float_status *s)
1514 {
1515     parts_uncanon(p, s, &bfloat16_params);
1516     return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520                                      float_status *s)
1521 {
1522     float32_unpack_raw(p, f);
1523     parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527                                             float_status *s)
1528 {
1529     parts_uncanon(p, s, &float32_params);
1530     return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534                                      float_status *s)
1535 {
1536     float64_unpack_raw(p, f);
1537     parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541                                             float_status *s)
1542 {
1543     parts_uncanon(p, s, &float64_params);
1544     return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548                                       float_status *s)
1549 {
1550     float128_unpack_raw(p, f);
1551     parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555                                               float_status *s)
1556 {
1557     parts_uncanon(p, s, &float128_params);
1558     return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563                                       float_status *s)
1564 {
1565     /* Ensure rounding precision is set before beginning. */
1566     switch (s->floatx80_rounding_precision) {
1567     case floatx80_precision_x:
1568     case floatx80_precision_d:
1569     case floatx80_precision_s:
1570         break;
1571     default:
1572         g_assert_not_reached();
1573     }
1574
1575     if (unlikely(floatx80_invalid_encoding(f))) {
1576         float_raise(float_flag_invalid, s);
1577         return false;
1578     }
1579
1580     floatx80_unpack_raw(p, f);
1581
1582     if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583         parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584     } else {
1585         /* The explicit integer bit is ignored, after invalid checks. */
1586         p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587         p->cls = (p->frac_hi == 0 ? float_class_inf
1588                   : parts_is_snan_frac(p->frac_hi, s)
1589                   ? float_class_snan : float_class_qnan);
1590     }
1591     return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595                                               float_status *s)
1596 {
1597     const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598     uint64_t frac;
1599     int exp;
1600
1601     switch (p->cls) {
1602     case float_class_normal:
1603         if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604             parts_uncanon_normal(p, s, fmt);
1605             frac = p->frac_hi;
1606             exp = p->exp;
1607         } else {
1608             FloatParts64 p64;
1609
1610             p64.sign = p->sign;
1611             p64.exp = p->exp;
1612             frac_truncjam(&p64, p);
1613             parts_uncanon_normal(&p64, s, fmt);
1614             frac = p64.frac;
1615             exp = p64.exp;
1616         }
1617         if (exp != fmt->exp_max) {
1618             break;
1619         }
1620         /* rounded to inf -- fall through to set frac correctly */
1621
1622     case float_class_inf:
1623         /* x86 and m68k differ in the setting of the integer bit. */
1624         frac = floatx80_infinity_low;
1625         exp = fmt->exp_max;
1626         break;
1627
1628     case float_class_zero:
1629         frac = 0;
1630         exp = 0;
1631         break;
1632
1633     case float_class_snan:
1634     case float_class_qnan:
1635         /* NaNs have the integer bit set. */
1636         frac = p->frac_hi | (1ull << 63);
1637         exp = fmt->exp_max;
1638         break;
1639
1640     default:
1641         g_assert_not_reached();
1642     }
1643
1644     return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648  * Addition and subtraction
1649  */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654     FloatParts64 pa, pb, *pr;
1655
1656     float16_unpack_canonical(&pa, a, status);
1657     float16_unpack_canonical(&pb, b, status);
1658     pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660     return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665     return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670     return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677
1678     float32_unpack_canonical(&pa, a, status);
1679     float32_unpack_canonical(&pb, b, status);
1680     pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682     return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687     return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692     return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698     FloatParts64 pa, pb, *pr;
1699
1700     float64_unpack_canonical(&pa, a, status);
1701     float64_unpack_canonical(&pb, b, status);
1702     pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704     return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709     return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714     return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719     return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724     return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729     return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734     return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741     }
1742     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749     } else {
1750         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751     }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757     return float32_gen2(a, b, s, hard, soft,
1758                         f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764     return float64_gen2(a, b, s, hard, soft,
1765                         f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795     FloatParts64 pa, pb, *pr;
1796
1797     bfloat16_unpack_canonical(&pa, a, status);
1798     bfloat16_unpack_canonical(&pb, b, status);
1799     pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801     return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806     return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811     return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817     FloatParts128 pa, pb, *pr;
1818
1819     float128_unpack_canonical(&pa, a, status);
1820     float128_unpack_canonical(&pb, b, status);
1821     pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823     return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828     return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833     return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839     FloatParts128 pa, pb, *pr;
1840
1841     if (!floatx80_unpack_canonical(&pa, a, status) ||
1842         !floatx80_unpack_canonical(&pb, b, status)) {
1843         return floatx80_default_nan(status);
1844     }
1845
1846     pr = parts_addsub(&pa, &pb, status, subtract);
1847     return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852     return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857     return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861  * Multiplication
1862  */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866     FloatParts64 pa, pb, *pr;
1867
1868     float16_unpack_canonical(&pa, a, status);
1869     float16_unpack_canonical(&pb, b, status);
1870     pr = parts_mul(&pa, &pb, status);
1871
1872     return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878     FloatParts64 pa, pb, *pr;
1879
1880     float32_unpack_canonical(&pa, a, status);
1881     float32_unpack_canonical(&pb, b, status);
1882     pr = parts_mul(&pa, &pb, status);
1883
1884     return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890     FloatParts64 pa, pb, *pr;
1891
1892     float64_unpack_canonical(&pa, a, status);
1893     float64_unpack_canonical(&pb, b, status);
1894     pr = parts_mul(&pa, &pb, status);
1895
1896     return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901     return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906     return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913                         f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920                         f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926     FloatParts64 pa, pb, *pr;
1927
1928     bfloat16_unpack_canonical(&pa, a, status);
1929     bfloat16_unpack_canonical(&pb, b, status);
1930     pr = parts_mul(&pa, &pb, status);
1931
1932     return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938     FloatParts128 pa, pb, *pr;
1939
1940     float128_unpack_canonical(&pa, a, status);
1941     float128_unpack_canonical(&pb, b, status);
1942     pr = parts_mul(&pa, &pb, status);
1943
1944     return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
1949 {
1950     FloatParts128 pa, pb, *pr;
1951
1952     if (!floatx80_unpack_canonical(&pa, a, status) ||
1953         !floatx80_unpack_canonical(&pb, b, status)) {
1954         return floatx80_default_nan(status);
1955     }
1956
1957     pr = parts_mul(&pa, &pb, status);
1958     return floatx80_round_pack_canonical(pr, status);
1959 }
1960
1961 /*
1962  * Fused multiply-add
1963  */
1964
1965 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1966                                     int flags, float_status *status)
1967 {
1968     FloatParts64 pa, pb, pc, *pr;
1969
1970     float16_unpack_canonical(&pa, a, status);
1971     float16_unpack_canonical(&pb, b, status);
1972     float16_unpack_canonical(&pc, c, status);
1973     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975     return float16_round_pack_canonical(pr, status);
1976 }
1977
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1980                 float_status *status)
1981 {
1982     FloatParts64 pa, pb, pc, *pr;
1983
1984     float32_unpack_canonical(&pa, a, status);
1985     float32_unpack_canonical(&pb, b, status);
1986     float32_unpack_canonical(&pc, c, status);
1987     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989     return float32_round_pack_canonical(pr, status);
1990 }
1991
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1994                 float_status *status)
1995 {
1996     FloatParts64 pa, pb, pc, *pr;
1997
1998     float64_unpack_canonical(&pa, a, status);
1999     float64_unpack_canonical(&pb, b, status);
2000     float64_unpack_canonical(&pc, c, status);
2001     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2002
2003     return float64_round_pack_canonical(pr, status);
2004 }
2005
2006 static bool force_soft_fma;
2007
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2010 {
2011     union_float32 ua, ub, uc, ur;
2012
2013     ua.s = xa;
2014     ub.s = xb;
2015     uc.s = xc;
2016
2017     if (unlikely(!can_use_fpu(s))) {
2018         goto soft;
2019     }
2020     if (unlikely(flags & float_muladd_halve_result)) {
2021         goto soft;
2022     }
2023
2024     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2025     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2026         goto soft;
2027     }
2028
2029     if (unlikely(force_soft_fma)) {
2030         goto soft;
2031     }
2032
2033     /*
2034      * When (a || b) == 0, there's no need to check for under/over flow,
2035      * since we know the addend is (normal || 0) and the product is 0.
2036      */
2037     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2038         union_float32 up;
2039         bool prod_sign;
2040
2041         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2042         prod_sign ^= !!(flags & float_muladd_negate_product);
2043         up.s = float32_set_sign(float32_zero, prod_sign);
2044
2045         if (flags & float_muladd_negate_c) {
2046             uc.h = -uc.h;
2047         }
2048         ur.h = up.h + uc.h;
2049     } else {
2050         union_float32 ua_orig = ua;
2051         union_float32 uc_orig = uc;
2052
2053         if (flags & float_muladd_negate_product) {
2054             ua.h = -ua.h;
2055         }
2056         if (flags & float_muladd_negate_c) {
2057             uc.h = -uc.h;
2058         }
2059
2060         ur.h = fmaf(ua.h, ub.h, uc.h);
2061
2062         if (unlikely(f32_is_inf(ur))) {
2063             float_raise(float_flag_overflow, s);
2064         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2065             ua = ua_orig;
2066             uc = uc_orig;
2067             goto soft;
2068         }
2069     }
2070     if (flags & float_muladd_negate_result) {
2071         return float32_chs(ur.s);
2072     }
2073     return ur.s;
2074
2075  soft:
2076     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2077 }
2078
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2081 {
2082     union_float64 ua, ub, uc, ur;
2083
2084     ua.s = xa;
2085     ub.s = xb;
2086     uc.s = xc;
2087
2088     if (unlikely(!can_use_fpu(s))) {
2089         goto soft;
2090     }
2091     if (unlikely(flags & float_muladd_halve_result)) {
2092         goto soft;
2093     }
2094
2095     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2096     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2097         goto soft;
2098     }
2099
2100     if (unlikely(force_soft_fma)) {
2101         goto soft;
2102     }
2103
2104     /*
2105      * When (a || b) == 0, there's no need to check for under/over flow,
2106      * since we know the addend is (normal || 0) and the product is 0.
2107      */
2108     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2109         union_float64 up;
2110         bool prod_sign;
2111
2112         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2113         prod_sign ^= !!(flags & float_muladd_negate_product);
2114         up.s = float64_set_sign(float64_zero, prod_sign);
2115
2116         if (flags & float_muladd_negate_c) {
2117             uc.h = -uc.h;
2118         }
2119         ur.h = up.h + uc.h;
2120     } else {
2121         union_float64 ua_orig = ua;
2122         union_float64 uc_orig = uc;
2123
2124         if (flags & float_muladd_negate_product) {
2125             ua.h = -ua.h;
2126         }
2127         if (flags & float_muladd_negate_c) {
2128             uc.h = -uc.h;
2129         }
2130
2131         ur.h = fma(ua.h, ub.h, uc.h);
2132
2133         if (unlikely(f64_is_inf(ur))) {
2134             float_raise(float_flag_overflow, s);
2135         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2136             ua = ua_orig;
2137             uc = uc_orig;
2138             goto soft;
2139         }
2140     }
2141     if (flags & float_muladd_negate_result) {
2142         return float64_chs(ur.s);
2143     }
2144     return ur.s;
2145
2146  soft:
2147     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2148 }
2149
2150 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2151                                       int flags, float_status *status)
2152 {
2153     FloatParts64 pa, pb, pc, *pr;
2154
2155     bfloat16_unpack_canonical(&pa, a, status);
2156     bfloat16_unpack_canonical(&pb, b, status);
2157     bfloat16_unpack_canonical(&pc, c, status);
2158     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2159
2160     return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2164                                       int flags, float_status *status)
2165 {
2166     FloatParts128 pa, pb, pc, *pr;
2167
2168     float128_unpack_canonical(&pa, a, status);
2169     float128_unpack_canonical(&pb, b, status);
2170     float128_unpack_canonical(&pc, c, status);
2171     pr = parts_muladd(&pa, &pb, &pc, flags, status);
2172
2173     return float128_round_pack_canonical(pr, status);
2174 }
2175
2176 /*
2177  * Division
2178  */
2179
2180 float16 float16_div(float16 a, float16 b, float_status *status)
2181 {
2182     FloatParts64 pa, pb, *pr;
2183
2184     float16_unpack_canonical(&pa, a, status);
2185     float16_unpack_canonical(&pb, b, status);
2186     pr = parts_div(&pa, &pb, status);
2187
2188     return float16_round_pack_canonical(pr, status);
2189 }
2190
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a, float32 b, float_status *status)
2193 {
2194     FloatParts64 pa, pb, *pr;
2195
2196     float32_unpack_canonical(&pa, a, status);
2197     float32_unpack_canonical(&pb, b, status);
2198     pr = parts_div(&pa, &pb, status);
2199
2200     return float32_round_pack_canonical(pr, status);
2201 }
2202
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a, float64 b, float_status *status)
2205 {
2206     FloatParts64 pa, pb, *pr;
2207
2208     float64_unpack_canonical(&pa, a, status);
2209     float64_unpack_canonical(&pb, b, status);
2210     pr = parts_div(&pa, &pb, status);
2211
2212     return float64_round_pack_canonical(pr, status);
2213 }
2214
2215 static float hard_f32_div(float a, float b)
2216 {
2217     return a / b;
2218 }
2219
2220 static double hard_f64_div(double a, double b)
2221 {
2222     return a / b;
2223 }
2224
2225 static bool f32_div_pre(union_float32 a, union_float32 b)
2226 {
2227     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2228         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2229                fpclassify(b.h) == FP_NORMAL;
2230     }
2231     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2232 }
2233
2234 static bool f64_div_pre(union_float64 a, union_float64 b)
2235 {
2236     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2237         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2238                fpclassify(b.h) == FP_NORMAL;
2239     }
2240     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2241 }
2242
2243 static bool f32_div_post(union_float32 a, union_float32 b)
2244 {
2245     if (QEMU_HARDFLOAT_2F32_USE_FP) {
2246         return fpclassify(a.h) != FP_ZERO;
2247     }
2248     return !float32_is_zero(a.s);
2249 }
2250
2251 static bool f64_div_post(union_float64 a, union_float64 b)
2252 {
2253     if (QEMU_HARDFLOAT_2F64_USE_FP) {
2254         return fpclassify(a.h) != FP_ZERO;
2255     }
2256     return !float64_is_zero(a.s);
2257 }
2258
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a, float32 b, float_status *s)
2261 {
2262     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2263                         f32_div_pre, f32_div_post);
2264 }
2265
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a, float64 b, float_status *s)
2268 {
2269     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2270                         f64_div_pre, f64_div_post);
2271 }
2272
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2275 {
2276     FloatParts64 pa, pb, *pr;
2277
2278     bfloat16_unpack_canonical(&pa, a, status);
2279     bfloat16_unpack_canonical(&pb, b, status);
2280     pr = parts_div(&pa, &pb, status);
2281
2282     return bfloat16_round_pack_canonical(pr, status);
2283 }
2284
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a, float128 b, float_status *status)
2287 {
2288     FloatParts128 pa, pb, *pr;
2289
2290     float128_unpack_canonical(&pa, a, status);
2291     float128_unpack_canonical(&pb, b, status);
2292     pr = parts_div(&pa, &pb, status);
2293
2294     return float128_round_pack_canonical(pr, status);
2295 }
2296
2297 /*
2298  * Float to Float conversions
2299  *
2300  * Returns the result of converting one float format to another. The
2301  * conversion is performed according to the IEC/IEEE Standard for
2302  * Binary Floating-Point Arithmetic.
2303  *
2304  * Usually this only needs to take care of raising invalid exceptions
2305  * and handling the conversion on NaNs.
2306  */
2307
2308 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2309 {
2310     switch (a->cls) {
2311     case float_class_qnan:
2312     case float_class_snan:
2313         /*
2314          * There is no NaN in the destination format.  Raise Invalid
2315          * and return a zero with the sign of the input NaN.
2316          */
2317         float_raise(float_flag_invalid, s);
2318         a->cls = float_class_zero;
2319         break;
2320
2321     case float_class_inf:
2322         /*
2323          * There is no Inf in the destination format.  Raise Invalid
2324          * and return the maximum normal with the correct sign.
2325          */
2326         float_raise(float_flag_invalid, s);
2327         a->cls = float_class_normal;
2328         a->exp = float16_params_ahp.exp_max;
2329         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2330                                   float16_params_ahp.frac_size + 1);
2331         break;
2332
2333     case float_class_normal:
2334     case float_class_zero:
2335         break;
2336
2337     default:
2338         g_assert_not_reached();
2339     }
2340 }
2341
2342 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2343 {
2344     if (is_nan(a->cls)) {
2345         parts_return_nan(a, s);
2346     }
2347 }
2348
2349 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2350 {
2351     if (is_nan(a->cls)) {
2352         parts_return_nan(a, s);
2353     }
2354 }
2355
2356 #define parts_float_to_float(P, S) \
2357     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2358
2359 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2360                                         float_status *s)
2361 {
2362     a->cls = b->cls;
2363     a->sign = b->sign;
2364     a->exp = b->exp;
2365
2366     if (a->cls == float_class_normal) {
2367         frac_truncjam(a, b);
2368     } else if (is_nan(a->cls)) {
2369         /* Discard the low bits of the NaN. */
2370         a->frac = b->frac_hi;
2371         parts_return_nan(a, s);
2372     }
2373 }
2374
2375 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2376                                        float_status *s)
2377 {
2378     a->cls = b->cls;
2379     a->sign = b->sign;
2380     a->exp = b->exp;
2381     frac_widen(a, b);
2382
2383     if (is_nan(a->cls)) {
2384         parts_return_nan(a, s);
2385     }
2386 }
2387
2388 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2389 {
2390     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2391     FloatParts64 p;
2392
2393     float16a_unpack_canonical(&p, a, s, fmt16);
2394     parts_float_to_float(&p, s);
2395     return float32_round_pack_canonical(&p, s);
2396 }
2397
2398 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2399 {
2400     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2401     FloatParts64 p;
2402
2403     float16a_unpack_canonical(&p, a, s, fmt16);
2404     parts_float_to_float(&p, s);
2405     return float64_round_pack_canonical(&p, s);
2406 }
2407
2408 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2409 {
2410     FloatParts64 p;
2411     const FloatFmt *fmt;
2412
2413     float32_unpack_canonical(&p, a, s);
2414     if (ieee) {
2415         parts_float_to_float(&p, s);
2416         fmt = &float16_params;
2417     } else {
2418         parts_float_to_ahp(&p, s);
2419         fmt = &float16_params_ahp;
2420     }
2421     return float16a_round_pack_canonical(&p, s, fmt);
2422 }
2423
2424 static float64 QEMU_SOFTFLOAT_ATTR
2425 soft_float32_to_float64(float32 a, float_status *s)
2426 {
2427     FloatParts64 p;
2428
2429     float32_unpack_canonical(&p, a, s);
2430     parts_float_to_float(&p, s);
2431     return float64_round_pack_canonical(&p, s);
2432 }
2433
2434 float64 float32_to_float64(float32 a, float_status *s)
2435 {
2436     if (likely(float32_is_normal(a))) {
2437         /* Widening conversion can never produce inexact results.  */
2438         union_float32 uf;
2439         union_float64 ud;
2440         uf.s = a;
2441         ud.h = uf.h;
2442         return ud.s;
2443     } else if (float32_is_zero(a)) {
2444         return float64_set_sign(float64_zero, float32_is_neg(a));
2445     } else {
2446         return soft_float32_to_float64(a, s);
2447     }
2448 }
2449
2450 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2451 {
2452     FloatParts64 p;
2453     const FloatFmt *fmt;
2454
2455     float64_unpack_canonical(&p, a, s);
2456     if (ieee) {
2457         parts_float_to_float(&p, s);
2458         fmt = &float16_params;
2459     } else {
2460         parts_float_to_ahp(&p, s);
2461         fmt = &float16_params_ahp;
2462     }
2463     return float16a_round_pack_canonical(&p, s, fmt);
2464 }
2465
2466 float32 float64_to_float32(float64 a, float_status *s)
2467 {
2468     FloatParts64 p;
2469
2470     float64_unpack_canonical(&p, a, s);
2471     parts_float_to_float(&p, s);
2472     return float32_round_pack_canonical(&p, s);
2473 }
2474
2475 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2476 {
2477     FloatParts64 p;
2478
2479     bfloat16_unpack_canonical(&p, a, s);
2480     parts_float_to_float(&p, s);
2481     return float32_round_pack_canonical(&p, s);
2482 }
2483
2484 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2485 {
2486     FloatParts64 p;
2487
2488     bfloat16_unpack_canonical(&p, a, s);
2489     parts_float_to_float(&p, s);
2490     return float64_round_pack_canonical(&p, s);
2491 }
2492
2493 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2494 {
2495     FloatParts64 p;
2496
2497     float32_unpack_canonical(&p, a, s);
2498     parts_float_to_float(&p, s);
2499     return bfloat16_round_pack_canonical(&p, s);
2500 }
2501
2502 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2503 {
2504     FloatParts64 p;
2505
2506     float64_unpack_canonical(&p, a, s);
2507     parts_float_to_float(&p, s);
2508     return bfloat16_round_pack_canonical(&p, s);
2509 }
2510
2511 float32 float128_to_float32(float128 a, float_status *s)
2512 {
2513     FloatParts64 p64;
2514     FloatParts128 p128;
2515
2516     float128_unpack_canonical(&p128, a, s);
2517     parts_float_to_float_narrow(&p64, &p128, s);
2518     return float32_round_pack_canonical(&p64, s);
2519 }
2520
2521 float64 float128_to_float64(float128 a, float_status *s)
2522 {
2523     FloatParts64 p64;
2524     FloatParts128 p128;
2525
2526     float128_unpack_canonical(&p128, a, s);
2527     parts_float_to_float_narrow(&p64, &p128, s);
2528     return float64_round_pack_canonical(&p64, s);
2529 }
2530
2531 float128 float32_to_float128(float32 a, float_status *s)
2532 {
2533     FloatParts64 p64;
2534     FloatParts128 p128;
2535
2536     float32_unpack_canonical(&p64, a, s);
2537     parts_float_to_float_widen(&p128, &p64, s);
2538     return float128_round_pack_canonical(&p128, s);
2539 }
2540
2541 float128 float64_to_float128(float64 a, float_status *s)
2542 {
2543     FloatParts64 p64;
2544     FloatParts128 p128;
2545
2546     float64_unpack_canonical(&p64, a, s);
2547     parts_float_to_float_widen(&p128, &p64, s);
2548     return float128_round_pack_canonical(&p128, s);
2549 }
2550
2551 /*
2552  * Round to integral value
2553  */
2554
2555 float16 float16_round_to_int(float16 a, float_status *s)
2556 {
2557     FloatParts64 p;
2558
2559     float16_unpack_canonical(&p, a, s);
2560     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2561     return float16_round_pack_canonical(&p, s);
2562 }
2563
2564 float32 float32_round_to_int(float32 a, float_status *s)
2565 {
2566     FloatParts64 p;
2567
2568     float32_unpack_canonical(&p, a, s);
2569     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2570     return float32_round_pack_canonical(&p, s);
2571 }
2572
2573 float64 float64_round_to_int(float64 a, float_status *s)
2574 {
2575     FloatParts64 p;
2576
2577     float64_unpack_canonical(&p, a, s);
2578     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2579     return float64_round_pack_canonical(&p, s);
2580 }
2581
2582 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2583 {
2584     FloatParts64 p;
2585
2586     bfloat16_unpack_canonical(&p, a, s);
2587     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2588     return bfloat16_round_pack_canonical(&p, s);
2589 }
2590
2591 float128 float128_round_to_int(float128 a, float_status *s)
2592 {
2593     FloatParts128 p;
2594
2595     float128_unpack_canonical(&p, a, s);
2596     parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2597     return float128_round_pack_canonical(&p, s);
2598 }
2599
2600 /*
2601  * Floating-point to signed integer conversions
2602  */
2603
2604 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2605                               float_status *s)
2606 {
2607     FloatParts64 p;
2608
2609     float16_unpack_canonical(&p, a, s);
2610     return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2611 }
2612
2613 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2614                                 float_status *s)
2615 {
2616     FloatParts64 p;
2617
2618     float16_unpack_canonical(&p, a, s);
2619     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2620 }
2621
2622 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2623                                 float_status *s)
2624 {
2625     FloatParts64 p;
2626
2627     float16_unpack_canonical(&p, a, s);
2628     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2629 }
2630
2631 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2632                                 float_status *s)
2633 {
2634     FloatParts64 p;
2635
2636     float16_unpack_canonical(&p, a, s);
2637     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2638 }
2639
2640 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2641                                 float_status *s)
2642 {
2643     FloatParts64 p;
2644
2645     float32_unpack_canonical(&p, a, s);
2646     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2647 }
2648
2649 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2650                                 float_status *s)
2651 {
2652     FloatParts64 p;
2653
2654     float32_unpack_canonical(&p, a, s);
2655     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2656 }
2657
2658 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2659                                 float_status *s)
2660 {
2661     FloatParts64 p;
2662
2663     float32_unpack_canonical(&p, a, s);
2664     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2665 }
2666
2667 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2668                                 float_status *s)
2669 {
2670     FloatParts64 p;
2671
2672     float64_unpack_canonical(&p, a, s);
2673     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2674 }
2675
2676 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2677                                 float_status *s)
2678 {
2679     FloatParts64 p;
2680
2681     float64_unpack_canonical(&p, a, s);
2682     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2683 }
2684
2685 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2686                                 float_status *s)
2687 {
2688     FloatParts64 p;
2689
2690     float64_unpack_canonical(&p, a, s);
2691     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2692 }
2693
2694 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2695                                  float_status *s)
2696 {
2697     FloatParts64 p;
2698
2699     bfloat16_unpack_canonical(&p, a, s);
2700     return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2701 }
2702
2703 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2704                                  float_status *s)
2705 {
2706     FloatParts64 p;
2707
2708     bfloat16_unpack_canonical(&p, a, s);
2709     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2710 }
2711
2712 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2713                                  float_status *s)
2714 {
2715     FloatParts64 p;
2716
2717     bfloat16_unpack_canonical(&p, a, s);
2718     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2719 }
2720
2721 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2722                                         int scale, float_status *s)
2723 {
2724     FloatParts128 p;
2725
2726     float128_unpack_canonical(&p, a, s);
2727     return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2728 }
2729
2730 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2731                                         int scale, float_status *s)
2732 {
2733     FloatParts128 p;
2734
2735     float128_unpack_canonical(&p, a, s);
2736     return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2737 }
2738
2739 int8_t float16_to_int8(float16 a, float_status *s)
2740 {
2741     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2742 }
2743
2744 int16_t float16_to_int16(float16 a, float_status *s)
2745 {
2746     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2747 }
2748
2749 int32_t float16_to_int32(float16 a, float_status *s)
2750 {
2751     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2752 }
2753
2754 int64_t float16_to_int64(float16 a, float_status *s)
2755 {
2756     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2757 }
2758
2759 int16_t float32_to_int16(float32 a, float_status *s)
2760 {
2761     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2762 }
2763
2764 int32_t float32_to_int32(float32 a, float_status *s)
2765 {
2766     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2767 }
2768
2769 int64_t float32_to_int64(float32 a, float_status *s)
2770 {
2771     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2772 }
2773
2774 int16_t float64_to_int16(float64 a, float_status *s)
2775 {
2776     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2777 }
2778
2779 int32_t float64_to_int32(float64 a, float_status *s)
2780 {
2781     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2782 }
2783
2784 int64_t float64_to_int64(float64 a, float_status *s)
2785 {
2786     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2787 }
2788
2789 int32_t float128_to_int32(float128 a, float_status *s)
2790 {
2791     return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2792 }
2793
2794 int64_t float128_to_int64(float128 a, float_status *s)
2795 {
2796     return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2797 }
2798
2799 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2800 {
2801     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2802 }
2803
2804 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2805 {
2806     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2807 }
2808
2809 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2810 {
2811     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2812 }
2813
2814 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2815 {
2816     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2817 }
2818
2819 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2820 {
2821     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2822 }
2823
2824 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2825 {
2826     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2827 }
2828
2829 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2830 {
2831     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2832 }
2833
2834 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2835 {
2836     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2837 }
2838
2839 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2840 {
2841     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2842 }
2843
2844 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2845 {
2846     return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2847 }
2848
2849 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2850 {
2851     return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2852 }
2853
2854 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2855 {
2856     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2857 }
2858
2859 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2860 {
2861     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2862 }
2863
2864 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2865 {
2866     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2867 }
2868
2869 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2870 {
2871     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2872 }
2873
2874 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2875 {
2876     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2877 }
2878
2879 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2880 {
2881     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2882 }
2883
2884 /*
2885  * Floating-point to unsigned integer conversions
2886  */
2887
2888 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2889                                 float_status *s)
2890 {
2891     FloatParts64 p;
2892
2893     float16_unpack_canonical(&p, a, s);
2894     return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2895 }
2896
2897 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2898                                   float_status *s)
2899 {
2900     FloatParts64 p;
2901
2902     float16_unpack_canonical(&p, a, s);
2903     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2904 }
2905
2906 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2907                                   float_status *s)
2908 {
2909     FloatParts64 p;
2910
2911     float16_unpack_canonical(&p, a, s);
2912     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2913 }
2914
2915 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2916                                   float_status *s)
2917 {
2918     FloatParts64 p;
2919
2920     float16_unpack_canonical(&p, a, s);
2921     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2922 }
2923
2924 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2925                                   float_status *s)
2926 {
2927     FloatParts64 p;
2928
2929     float32_unpack_canonical(&p, a, s);
2930     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2931 }
2932
2933 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2934                                   float_status *s)
2935 {
2936     FloatParts64 p;
2937
2938     float32_unpack_canonical(&p, a, s);
2939     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2940 }
2941
2942 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2943                                   float_status *s)
2944 {
2945     FloatParts64 p;
2946
2947     float32_unpack_canonical(&p, a, s);
2948     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2949 }
2950
2951 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2952                                   float_status *s)
2953 {
2954     FloatParts64 p;
2955
2956     float64_unpack_canonical(&p, a, s);
2957     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2958 }
2959
2960 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2961                                   float_status *s)
2962 {
2963     FloatParts64 p;
2964
2965     float64_unpack_canonical(&p, a, s);
2966     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2967 }
2968
2969 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2970                                   float_status *s)
2971 {
2972     FloatParts64 p;
2973
2974     float64_unpack_canonical(&p, a, s);
2975     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2976 }
2977
2978 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2979                                    int scale, float_status *s)
2980 {
2981     FloatParts64 p;
2982
2983     bfloat16_unpack_canonical(&p, a, s);
2984     return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2985 }
2986
2987 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2988                                    int scale, float_status *s)
2989 {
2990     FloatParts64 p;
2991
2992     bfloat16_unpack_canonical(&p, a, s);
2993     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2994 }
2995
2996 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2997                                    int scale, float_status *s)
2998 {
2999     FloatParts64 p;
3000
3001     bfloat16_unpack_canonical(&p, a, s);
3002     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3003 }
3004
3005 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3006                                           int scale, float_status *s)
3007 {
3008     FloatParts128 p;
3009
3010     float128_unpack_canonical(&p, a, s);
3011     return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3012 }
3013
3014 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3015                                           int scale, float_status *s)
3016 {
3017     FloatParts128 p;
3018
3019     float128_unpack_canonical(&p, a, s);
3020     return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3021 }
3022
3023 uint8_t float16_to_uint8(float16 a, float_status *s)
3024 {
3025     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3026 }
3027
3028 uint16_t float16_to_uint16(float16 a, float_status *s)
3029 {
3030     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3031 }
3032
3033 uint32_t float16_to_uint32(float16 a, float_status *s)
3034 {
3035     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3036 }
3037
3038 uint64_t float16_to_uint64(float16 a, float_status *s)
3039 {
3040     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3041 }
3042
3043 uint16_t float32_to_uint16(float32 a, float_status *s)
3044 {
3045     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3046 }
3047
3048 uint32_t float32_to_uint32(float32 a, float_status *s)
3049 {
3050     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3051 }
3052
3053 uint64_t float32_to_uint64(float32 a, float_status *s)
3054 {
3055     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3056 }
3057
3058 uint16_t float64_to_uint16(float64 a, float_status *s)
3059 {
3060     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3061 }
3062
3063 uint32_t float64_to_uint32(float64 a, float_status *s)
3064 {
3065     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3066 }
3067
3068 uint64_t float64_to_uint64(float64 a, float_status *s)
3069 {
3070     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3071 }
3072
3073 uint32_t float128_to_uint32(float128 a, float_status *s)
3074 {
3075     return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3076 }
3077
3078 uint64_t float128_to_uint64(float128 a, float_status *s)
3079 {
3080     return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3081 }
3082
3083 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3084 {
3085     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3086 }
3087
3088 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3089 {
3090     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3091 }
3092
3093 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3094 {
3095     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3096 }
3097
3098 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3099 {
3100     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3101 }
3102
3103 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3104 {
3105     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3106 }
3107
3108 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3109 {
3110     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3111 }
3112
3113 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3114 {
3115     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3116 }
3117
3118 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3119 {
3120     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3121 }
3122
3123 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3124 {
3125     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3126 }
3127
3128 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3129 {
3130     return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3131 }
3132
3133 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3134 {
3135     return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3136 }
3137
3138 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3139 {
3140     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3141 }
3142
3143 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3144 {
3145     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3146 }
3147
3148 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3149 {
3150     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3151 }
3152
3153 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3154 {
3155     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3156 }
3157
3158 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3159 {
3160     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3161 }
3162
3163 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3164 {
3165     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3166 }
3167
3168 /*
3169  * Signed integer to floating-point conversions
3170  */
3171
3172 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3173 {
3174     FloatParts64 p;
3175
3176     parts_sint_to_float(&p, a, scale, status);
3177     return float16_round_pack_canonical(&p, status);
3178 }
3179
3180 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3181 {
3182     return int64_to_float16_scalbn(a, scale, status);
3183 }
3184
3185 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3186 {
3187     return int64_to_float16_scalbn(a, scale, status);
3188 }
3189
3190 float16 int64_to_float16(int64_t a, float_status *status)
3191 {
3192     return int64_to_float16_scalbn(a, 0, status);
3193 }
3194
3195 float16 int32_to_float16(int32_t a, float_status *status)
3196 {
3197     return int64_to_float16_scalbn(a, 0, status);
3198 }
3199
3200 float16 int16_to_float16(int16_t a, float_status *status)
3201 {
3202     return int64_to_float16_scalbn(a, 0, status);
3203 }
3204
3205 float16 int8_to_float16(int8_t a, float_status *status)
3206 {
3207     return int64_to_float16_scalbn(a, 0, status);
3208 }
3209
3210 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3211 {
3212     FloatParts64 p;
3213
3214     parts64_sint_to_float(&p, a, scale, status);
3215     return float32_round_pack_canonical(&p, status);
3216 }
3217
3218 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3219 {
3220     return int64_to_float32_scalbn(a, scale, status);
3221 }
3222
3223 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3224 {
3225     return int64_to_float32_scalbn(a, scale, status);
3226 }
3227
3228 float32 int64_to_float32(int64_t a, float_status *status)
3229 {
3230     return int64_to_float32_scalbn(a, 0, status);
3231 }
3232
3233 float32 int32_to_float32(int32_t a, float_status *status)
3234 {
3235     return int64_to_float32_scalbn(a, 0, status);
3236 }
3237
3238 float32 int16_to_float32(int16_t a, float_status *status)
3239 {
3240     return int64_to_float32_scalbn(a, 0, status);
3241 }
3242
3243 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3244 {
3245     FloatParts64 p;
3246
3247     parts_sint_to_float(&p, a, scale, status);
3248     return float64_round_pack_canonical(&p, status);
3249 }
3250
3251 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3252 {
3253     return int64_to_float64_scalbn(a, scale, status);
3254 }
3255
3256 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3257 {
3258     return int64_to_float64_scalbn(a, scale, status);
3259 }
3260
3261 float64 int64_to_float64(int64_t a, float_status *status)
3262 {
3263     return int64_to_float64_scalbn(a, 0, status);
3264 }
3265
3266 float64 int32_to_float64(int32_t a, float_status *status)
3267 {
3268     return int64_to_float64_scalbn(a, 0, status);
3269 }
3270
3271 float64 int16_to_float64(int16_t a, float_status *status)
3272 {
3273     return int64_to_float64_scalbn(a, 0, status);
3274 }
3275
3276 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3277 {
3278     FloatParts64 p;
3279
3280     parts_sint_to_float(&p, a, scale, status);
3281     return bfloat16_round_pack_canonical(&p, status);
3282 }
3283
3284 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3285 {
3286     return int64_to_bfloat16_scalbn(a, scale, status);
3287 }
3288
3289 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3290 {
3291     return int64_to_bfloat16_scalbn(a, scale, status);
3292 }
3293
3294 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3295 {
3296     return int64_to_bfloat16_scalbn(a, 0, status);
3297 }
3298
3299 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3300 {
3301     return int64_to_bfloat16_scalbn(a, 0, status);
3302 }
3303
3304 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3305 {
3306     return int64_to_bfloat16_scalbn(a, 0, status);
3307 }
3308
3309 float128 int64_to_float128(int64_t a, float_status *status)
3310 {
3311     FloatParts128 p;
3312
3313     parts_sint_to_float(&p, a, 0, status);
3314     return float128_round_pack_canonical(&p, status);
3315 }
3316
3317 float128 int32_to_float128(int32_t a, float_status *status)
3318 {
3319     return int64_to_float128(a, status);
3320 }
3321
3322 /*
3323  * Unsigned Integer to floating-point conversions
3324  */
3325
3326 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3327 {
3328     FloatParts64 p;
3329
3330     parts_uint_to_float(&p, a, scale, status);
3331     return float16_round_pack_canonical(&p, status);
3332 }
3333
3334 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3335 {
3336     return uint64_to_float16_scalbn(a, scale, status);
3337 }
3338
3339 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3340 {
3341     return uint64_to_float16_scalbn(a, scale, status);
3342 }
3343
3344 float16 uint64_to_float16(uint64_t a, float_status *status)
3345 {
3346     return uint64_to_float16_scalbn(a, 0, status);
3347 }
3348
3349 float16 uint32_to_float16(uint32_t a, float_status *status)
3350 {
3351     return uint64_to_float16_scalbn(a, 0, status);
3352 }
3353
3354 float16 uint16_to_float16(uint16_t a, float_status *status)
3355 {
3356     return uint64_to_float16_scalbn(a, 0, status);
3357 }
3358
3359 float16 uint8_to_float16(uint8_t a, float_status *status)
3360 {
3361     return uint64_to_float16_scalbn(a, 0, status);
3362 }
3363
3364 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3365 {
3366     FloatParts64 p;
3367
3368     parts_uint_to_float(&p, a, scale, status);
3369     return float32_round_pack_canonical(&p, status);
3370 }
3371
3372 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3373 {
3374     return uint64_to_float32_scalbn(a, scale, status);
3375 }
3376
3377 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3378 {
3379     return uint64_to_float32_scalbn(a, scale, status);
3380 }
3381
3382 float32 uint64_to_float32(uint64_t a, float_status *status)
3383 {
3384     return uint64_to_float32_scalbn(a, 0, status);
3385 }
3386
3387 float32 uint32_to_float32(uint32_t a, float_status *status)
3388 {
3389     return uint64_to_float32_scalbn(a, 0, status);
3390 }
3391
3392 float32 uint16_to_float32(uint16_t a, float_status *status)
3393 {
3394     return uint64_to_float32_scalbn(a, 0, status);
3395 }
3396
3397 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3398 {
3399     FloatParts64 p;
3400
3401     parts_uint_to_float(&p, a, scale, status);
3402     return float64_round_pack_canonical(&p, status);
3403 }
3404
3405 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3406 {
3407     return uint64_to_float64_scalbn(a, scale, status);
3408 }
3409
3410 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3411 {
3412     return uint64_to_float64_scalbn(a, scale, status);
3413 }
3414
3415 float64 uint64_to_float64(uint64_t a, float_status *status)
3416 {
3417     return uint64_to_float64_scalbn(a, 0, status);
3418 }
3419
3420 float64 uint32_to_float64(uint32_t a, float_status *status)
3421 {
3422     return uint64_to_float64_scalbn(a, 0, status);
3423 }
3424
3425 float64 uint16_to_float64(uint16_t a, float_status *status)
3426 {
3427     return uint64_to_float64_scalbn(a, 0, status);
3428 }
3429
3430 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3431 {
3432     FloatParts64 p;
3433
3434     parts_uint_to_float(&p, a, scale, status);
3435     return bfloat16_round_pack_canonical(&p, status);
3436 }
3437
3438 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3439 {
3440     return uint64_to_bfloat16_scalbn(a, scale, status);
3441 }
3442
3443 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3444 {
3445     return uint64_to_bfloat16_scalbn(a, scale, status);
3446 }
3447
3448 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3449 {
3450     return uint64_to_bfloat16_scalbn(a, 0, status);
3451 }
3452
3453 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3454 {
3455     return uint64_to_bfloat16_scalbn(a, 0, status);
3456 }
3457
3458 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3459 {
3460     return uint64_to_bfloat16_scalbn(a, 0, status);
3461 }
3462
3463 float128 uint64_to_float128(uint64_t a, float_status *status)
3464 {
3465     FloatParts128 p;
3466
3467     parts_uint_to_float(&p, a, 0, status);
3468     return float128_round_pack_canonical(&p, status);
3469 }
3470
3471 /*
3472  * Minimum and maximum
3473  */
3474
3475 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3476 {
3477     FloatParts64 pa, pb, *pr;
3478
3479     float16_unpack_canonical(&pa, a, s);
3480     float16_unpack_canonical(&pb, b, s);
3481     pr = parts_minmax(&pa, &pb, s, flags);
3482
3483     return float16_round_pack_canonical(pr, s);
3484 }
3485
3486 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3487                                 float_status *s, int flags)
3488 {
3489     FloatParts64 pa, pb, *pr;
3490
3491     bfloat16_unpack_canonical(&pa, a, s);
3492     bfloat16_unpack_canonical(&pb, b, s);
3493     pr = parts_minmax(&pa, &pb, s, flags);
3494
3495     return bfloat16_round_pack_canonical(pr, s);
3496 }
3497
3498 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3499 {
3500     FloatParts64 pa, pb, *pr;
3501
3502     float32_unpack_canonical(&pa, a, s);
3503     float32_unpack_canonical(&pb, b, s);
3504     pr = parts_minmax(&pa, &pb, s, flags);
3505
3506     return float32_round_pack_canonical(pr, s);
3507 }
3508
3509 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3510 {
3511     FloatParts64 pa, pb, *pr;
3512
3513     float64_unpack_canonical(&pa, a, s);
3514     float64_unpack_canonical(&pb, b, s);
3515     pr = parts_minmax(&pa, &pb, s, flags);
3516
3517     return float64_round_pack_canonical(pr, s);
3518 }
3519
3520 static float128 float128_minmax(float128 a, float128 b,
3521                                 float_status *s, int flags)
3522 {
3523     FloatParts128 pa, pb, *pr;
3524
3525     float128_unpack_canonical(&pa, a, s);
3526     float128_unpack_canonical(&pb, b, s);
3527     pr = parts_minmax(&pa, &pb, s, flags);
3528
3529     return float128_round_pack_canonical(pr, s);
3530 }
3531
3532 #define MINMAX_1(type, name, flags) \
3533     type type##_##name(type a, type b, float_status *s) \
3534     { return type##_minmax(a, b, s, flags); }
3535
3536 #define MINMAX_2(type) \
3537     MINMAX_1(type, max, 0)                                      \
3538     MINMAX_1(type, maxnum, minmax_isnum)                        \
3539     MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
3540     MINMAX_1(type, min, minmax_ismin)                           \
3541     MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
3542     MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3543
3544 MINMAX_2(float16)
3545 MINMAX_2(bfloat16)
3546 MINMAX_2(float32)
3547 MINMAX_2(float64)
3548 MINMAX_2(float128)
3549
3550 #undef MINMAX_1
3551 #undef MINMAX_2
3552
3553 /*
3554  * Floating point compare
3555  */
3556
3557 static FloatRelation QEMU_FLATTEN
3558 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3559 {
3560     FloatParts64 pa, pb;
3561
3562     float16_unpack_canonical(&pa, a, s);
3563     float16_unpack_canonical(&pb, b, s);
3564     return parts_compare(&pa, &pb, s, is_quiet);
3565 }
3566
3567 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3568 {
3569     return float16_do_compare(a, b, s, false);
3570 }
3571
3572 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3573 {
3574     return float16_do_compare(a, b, s, true);
3575 }
3576
3577 static FloatRelation QEMU_SOFTFLOAT_ATTR
3578 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3579 {
3580     FloatParts64 pa, pb;
3581
3582     float32_unpack_canonical(&pa, a, s);
3583     float32_unpack_canonical(&pb, b, s);
3584     return parts_compare(&pa, &pb, s, is_quiet);
3585 }
3586
3587 static FloatRelation QEMU_FLATTEN
3588 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3589 {
3590     union_float32 ua, ub;
3591
3592     ua.s = xa;
3593     ub.s = xb;
3594
3595     if (QEMU_NO_HARDFLOAT) {
3596         goto soft;
3597     }
3598
3599     float32_input_flush2(&ua.s, &ub.s, s);
3600     if (isgreaterequal(ua.h, ub.h)) {
3601         if (isgreater(ua.h, ub.h)) {
3602             return float_relation_greater;
3603         }
3604         return float_relation_equal;
3605     }
3606     if (likely(isless(ua.h, ub.h))) {
3607         return float_relation_less;
3608     }
3609     /*
3610      * The only condition remaining is unordered.
3611      * Fall through to set flags.
3612      */
3613  soft:
3614     return float32_do_compare(ua.s, ub.s, s, is_quiet);
3615 }
3616
3617 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3618 {
3619     return float32_hs_compare(a, b, s, false);
3620 }
3621
3622 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3623 {
3624     return float32_hs_compare(a, b, s, true);
3625 }
3626
3627 static FloatRelation QEMU_SOFTFLOAT_ATTR
3628 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3629 {
3630     FloatParts64 pa, pb;
3631
3632     float64_unpack_canonical(&pa, a, s);
3633     float64_unpack_canonical(&pb, b, s);
3634     return parts_compare(&pa, &pb, s, is_quiet);
3635 }
3636
3637 static FloatRelation QEMU_FLATTEN
3638 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3639 {
3640     union_float64 ua, ub;
3641
3642     ua.s = xa;
3643     ub.s = xb;
3644
3645     if (QEMU_NO_HARDFLOAT) {
3646         goto soft;
3647     }
3648
3649     float64_input_flush2(&ua.s, &ub.s, s);
3650     if (isgreaterequal(ua.h, ub.h)) {
3651         if (isgreater(ua.h, ub.h)) {
3652             return float_relation_greater;
3653         }
3654         return float_relation_equal;
3655     }
3656     if (likely(isless(ua.h, ub.h))) {
3657         return float_relation_less;
3658     }
3659     /*
3660      * The only condition remaining is unordered.
3661      * Fall through to set flags.
3662      */
3663  soft:
3664     return float64_do_compare(ua.s, ub.s, s, is_quiet);
3665 }
3666
3667 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3668 {
3669     return float64_hs_compare(a, b, s, false);
3670 }
3671
3672 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3673 {
3674     return float64_hs_compare(a, b, s, true);
3675 }
3676
3677 static FloatRelation QEMU_FLATTEN
3678 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3679 {
3680     FloatParts64 pa, pb;
3681
3682     bfloat16_unpack_canonical(&pa, a, s);
3683     bfloat16_unpack_canonical(&pb, b, s);
3684     return parts_compare(&pa, &pb, s, is_quiet);
3685 }
3686
3687 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3688 {
3689     return bfloat16_do_compare(a, b, s, false);
3690 }
3691
3692 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3693 {
3694     return bfloat16_do_compare(a, b, s, true);
3695 }
3696
3697 static FloatRelation QEMU_FLATTEN
3698 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3699 {
3700     FloatParts128 pa, pb;
3701
3702     float128_unpack_canonical(&pa, a, s);
3703     float128_unpack_canonical(&pb, b, s);
3704     return parts_compare(&pa, &pb, s, is_quiet);
3705 }
3706
3707 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3708 {
3709     return float128_do_compare(a, b, s, false);
3710 }
3711
3712 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3713 {
3714     return float128_do_compare(a, b, s, true);
3715 }
3716
3717 /*
3718  * Scale by 2**N
3719  */
3720
3721 float16 float16_scalbn(float16 a, int n, float_status *status)
3722 {
3723     FloatParts64 p;
3724
3725     float16_unpack_canonical(&p, a, status);
3726     parts_scalbn(&p, n, status);
3727     return float16_round_pack_canonical(&p, status);
3728 }
3729
3730 float32 float32_scalbn(float32 a, int n, float_status *status)
3731 {
3732     FloatParts64 p;
3733
3734     float32_unpack_canonical(&p, a, status);
3735     parts_scalbn(&p, n, status);
3736     return float32_round_pack_canonical(&p, status);
3737 }
3738
3739 float64 float64_scalbn(float64 a, int n, float_status *status)
3740 {
3741     FloatParts64 p;
3742
3743     float64_unpack_canonical(&p, a, status);
3744     parts_scalbn(&p, n, status);
3745     return float64_round_pack_canonical(&p, status);
3746 }
3747
3748 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3749 {
3750     FloatParts64 p;
3751
3752     bfloat16_unpack_canonical(&p, a, status);
3753     parts_scalbn(&p, n, status);
3754     return bfloat16_round_pack_canonical(&p, status);
3755 }
3756
3757 float128 float128_scalbn(float128 a, int n, float_status *status)
3758 {
3759     FloatParts128 p;
3760
3761     float128_unpack_canonical(&p, a, status);
3762     parts_scalbn(&p, n, status);
3763     return float128_round_pack_canonical(&p, status);
3764 }
3765
3766 /*
3767  * Square Root
3768  */
3769
3770 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3771 {
3772     FloatParts64 p;
3773
3774     float16_unpack_canonical(&p, a, status);
3775     parts_sqrt(&p, status, &float16_params);
3776     return float16_round_pack_canonical(&p, status);
3777 }
3778
3779 static float32 QEMU_SOFTFLOAT_ATTR
3780 soft_f32_sqrt(float32 a, float_status *status)
3781 {
3782     FloatParts64 p;
3783
3784     float32_unpack_canonical(&p, a, status);
3785     parts_sqrt(&p, status, &float32_params);
3786     return float32_round_pack_canonical(&p, status);
3787 }
3788
3789 static float64 QEMU_SOFTFLOAT_ATTR
3790 soft_f64_sqrt(float64 a, float_status *status)
3791 {
3792     FloatParts64 p;
3793
3794     float64_unpack_canonical(&p, a, status);
3795     parts_sqrt(&p, status, &float64_params);
3796     return float64_round_pack_canonical(&p, status);
3797 }
3798
3799 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3800 {
3801     union_float32 ua, ur;
3802
3803     ua.s = xa;
3804     if (unlikely(!can_use_fpu(s))) {
3805         goto soft;
3806     }
3807
3808     float32_input_flush1(&ua.s, s);
3809     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3810         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3811                        fpclassify(ua.h) == FP_ZERO) ||
3812                      signbit(ua.h))) {
3813             goto soft;
3814         }
3815     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3816                         float32_is_neg(ua.s))) {
3817         goto soft;
3818     }
3819     ur.h = sqrtf(ua.h);
3820     return ur.s;
3821
3822  soft:
3823     return soft_f32_sqrt(ua.s, s);
3824 }
3825
3826 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3827 {
3828     union_float64 ua, ur;
3829
3830     ua.s = xa;
3831     if (unlikely(!can_use_fpu(s))) {
3832         goto soft;
3833     }
3834
3835     float64_input_flush1(&ua.s, s);
3836     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3837         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3838                        fpclassify(ua.h) == FP_ZERO) ||
3839                      signbit(ua.h))) {
3840             goto soft;
3841         }
3842     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3843                         float64_is_neg(ua.s))) {
3844         goto soft;
3845     }
3846     ur.h = sqrt(ua.h);
3847     return ur.s;
3848
3849  soft:
3850     return soft_f64_sqrt(ua.s, s);
3851 }
3852
3853 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3854 {
3855     FloatParts64 p;
3856
3857     bfloat16_unpack_canonical(&p, a, status);
3858     parts_sqrt(&p, status, &bfloat16_params);
3859     return bfloat16_round_pack_canonical(&p, status);
3860 }
3861
3862 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3863 {
3864     FloatParts128 p;
3865
3866     float128_unpack_canonical(&p, a, status);
3867     parts_sqrt(&p, status, &float128_params);
3868     return float128_round_pack_canonical(&p, status);
3869 }
3870
3871 /*----------------------------------------------------------------------------
3872 | The pattern for a default generated NaN.
3873 *----------------------------------------------------------------------------*/
3874
3875 float16 float16_default_nan(float_status *status)
3876 {
3877     FloatParts64 p;
3878
3879     parts_default_nan(&p, status);
3880     p.frac >>= float16_params.frac_shift;
3881     return float16_pack_raw(&p);
3882 }
3883
3884 float32 float32_default_nan(float_status *status)
3885 {
3886     FloatParts64 p;
3887
3888     parts_default_nan(&p, status);
3889     p.frac >>= float32_params.frac_shift;
3890     return float32_pack_raw(&p);
3891 }
3892
3893 float64 float64_default_nan(float_status *status)
3894 {
3895     FloatParts64 p;
3896
3897     parts_default_nan(&p, status);
3898     p.frac >>= float64_params.frac_shift;
3899     return float64_pack_raw(&p);
3900 }
3901
3902 float128 float128_default_nan(float_status *status)
3903 {
3904     FloatParts128 p;
3905
3906     parts_default_nan(&p, status);
3907     frac_shr(&p, float128_params.frac_shift);
3908     return float128_pack_raw(&p);
3909 }
3910
3911 bfloat16 bfloat16_default_nan(float_status *status)
3912 {
3913     FloatParts64 p;
3914
3915     parts_default_nan(&p, status);
3916     p.frac >>= bfloat16_params.frac_shift;
3917     return bfloat16_pack_raw(&p);
3918 }
3919
3920 /*----------------------------------------------------------------------------
3921 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3922 *----------------------------------------------------------------------------*/
3923
3924 float16 float16_silence_nan(float16 a, float_status *status)
3925 {
3926     FloatParts64 p;
3927
3928     float16_unpack_raw(&p, a);
3929     p.frac <<= float16_params.frac_shift;
3930     parts_silence_nan(&p, status);
3931     p.frac >>= float16_params.frac_shift;
3932     return float16_pack_raw(&p);
3933 }
3934
3935 float32 float32_silence_nan(float32 a, float_status *status)
3936 {
3937     FloatParts64 p;
3938
3939     float32_unpack_raw(&p, a);
3940     p.frac <<= float32_params.frac_shift;
3941     parts_silence_nan(&p, status);
3942     p.frac >>= float32_params.frac_shift;
3943     return float32_pack_raw(&p);
3944 }
3945
3946 float64 float64_silence_nan(float64 a, float_status *status)
3947 {
3948     FloatParts64 p;
3949
3950     float64_unpack_raw(&p, a);
3951     p.frac <<= float64_params.frac_shift;
3952     parts_silence_nan(&p, status);
3953     p.frac >>= float64_params.frac_shift;
3954     return float64_pack_raw(&p);
3955 }
3956
3957 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3958 {
3959     FloatParts64 p;
3960
3961     bfloat16_unpack_raw(&p, a);
3962     p.frac <<= bfloat16_params.frac_shift;
3963     parts_silence_nan(&p, status);
3964     p.frac >>= bfloat16_params.frac_shift;
3965     return bfloat16_pack_raw(&p);
3966 }
3967
3968 float128 float128_silence_nan(float128 a, float_status *status)
3969 {
3970     FloatParts128 p;
3971
3972     float128_unpack_raw(&p, a);
3973     frac_shl(&p, float128_params.frac_shift);
3974     parts_silence_nan(&p, status);
3975     frac_shr(&p, float128_params.frac_shift);
3976     return float128_pack_raw(&p);
3977 }
3978
3979 /*----------------------------------------------------------------------------
3980 | If `a' is denormal and we are in flush-to-zero mode then set the
3981 | input-denormal exception and return zero. Otherwise just return the value.
3982 *----------------------------------------------------------------------------*/
3983
3984 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3985 {
3986     if (p.exp == 0 && p.frac != 0) {
3987         float_raise(float_flag_input_denormal, status);
3988         return true;
3989     }
3990
3991     return false;
3992 }
3993
3994 float16 float16_squash_input_denormal(float16 a, float_status *status)
3995 {
3996     if (status->flush_inputs_to_zero) {
3997         FloatParts64 p;
3998
3999         float16_unpack_raw(&p, a);
4000         if (parts_squash_denormal(p, status)) {
4001             return float16_set_sign(float16_zero, p.sign);
4002         }
4003     }
4004     return a;
4005 }
4006
4007 float32 float32_squash_input_denormal(float32 a, float_status *status)
4008 {
4009     if (status->flush_inputs_to_zero) {
4010         FloatParts64 p;
4011
4012         float32_unpack_raw(&p, a);
4013         if (parts_squash_denormal(p, status)) {
4014             return float32_set_sign(float32_zero, p.sign);
4015         }
4016     }
4017     return a;
4018 }
4019
4020 float64 float64_squash_input_denormal(float64 a, float_status *status)
4021 {
4022     if (status->flush_inputs_to_zero) {
4023         FloatParts64 p;
4024
4025         float64_unpack_raw(&p, a);
4026         if (parts_squash_denormal(p, status)) {
4027             return float64_set_sign(float64_zero, p.sign);
4028         }
4029     }
4030     return a;
4031 }
4032
4033 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4034 {
4035     if (status->flush_inputs_to_zero) {
4036         FloatParts64 p;
4037
4038         bfloat16_unpack_raw(&p, a);
4039         if (parts_squash_denormal(p, status)) {
4040             return bfloat16_set_sign(bfloat16_zero, p.sign);
4041         }
4042     }
4043     return a;
4044 }
4045
4046 /*----------------------------------------------------------------------------
4047 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4048 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4049 | input.  If `zSign' is 1, the input is negated before being converted to an
4050 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
4051 | is simply rounded to an integer, with the inexact exception raised if the
4052 | input cannot be represented exactly as an integer.  However, if the fixed-
4053 | point input is too large, the invalid exception is raised and the largest
4054 | positive or negative integer is returned.
4055 *----------------------------------------------------------------------------*/
4056
4057 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4058                                  float_status *status)
4059 {
4060     int8_t roundingMode;
4061     bool roundNearestEven;
4062     int8_t roundIncrement, roundBits;
4063     int32_t z;
4064
4065     roundingMode = status->float_rounding_mode;
4066     roundNearestEven = ( roundingMode == float_round_nearest_even );
4067     switch (roundingMode) {
4068     case float_round_nearest_even:
4069     case float_round_ties_away:
4070         roundIncrement = 0x40;
4071         break;
4072     case float_round_to_zero:
4073         roundIncrement = 0;
4074         break;
4075     case float_round_up:
4076         roundIncrement = zSign ? 0 : 0x7f;
4077         break;
4078     case float_round_down:
4079         roundIncrement = zSign ? 0x7f : 0;
4080         break;
4081     case float_round_to_odd:
4082         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4083         break;
4084     default:
4085         abort();
4086     }
4087     roundBits = absZ & 0x7F;
4088     absZ = ( absZ + roundIncrement )>>7;
4089     if (!(roundBits ^ 0x40) && roundNearestEven) {
4090         absZ &= ~1;
4091     }
4092     z = absZ;
4093     if ( zSign ) z = - z;
4094     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4095         float_raise(float_flag_invalid, status);
4096         return zSign ? INT32_MIN : INT32_MAX;
4097     }
4098     if (roundBits) {
4099         float_raise(float_flag_inexact, status);
4100     }
4101     return z;
4102
4103 }
4104
4105 /*----------------------------------------------------------------------------
4106 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4107 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4108 | and returns the properly rounded 64-bit integer corresponding to the input.
4109 | If `zSign' is 1, the input is negated before being converted to an integer.
4110 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4111 | the inexact exception raised if the input cannot be represented exactly as
4112 | an integer.  However, if the fixed-point input is too large, the invalid
4113 | exception is raised and the largest positive or negative integer is
4114 | returned.
4115 *----------------------------------------------------------------------------*/
4116
4117 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4118                                float_status *status)
4119 {
4120     int8_t roundingMode;
4121     bool roundNearestEven, increment;
4122     int64_t z;
4123
4124     roundingMode = status->float_rounding_mode;
4125     roundNearestEven = ( roundingMode == float_round_nearest_even );
4126     switch (roundingMode) {
4127     case float_round_nearest_even:
4128     case float_round_ties_away:
4129         increment = ((int64_t) absZ1 < 0);
4130         break;
4131     case float_round_to_zero:
4132         increment = 0;
4133         break;
4134     case float_round_up:
4135         increment = !zSign && absZ1;
4136         break;
4137     case float_round_down:
4138         increment = zSign && absZ1;
4139         break;
4140     case float_round_to_odd:
4141         increment = !(absZ0 & 1) && absZ1;
4142         break;
4143     default:
4144         abort();
4145     }
4146     if ( increment ) {
4147         ++absZ0;
4148         if ( absZ0 == 0 ) goto overflow;
4149         if (!(absZ1 << 1) && roundNearestEven) {
4150             absZ0 &= ~1;
4151         }
4152     }
4153     z = absZ0;
4154     if ( zSign ) z = - z;
4155     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4156  overflow:
4157         float_raise(float_flag_invalid, status);
4158         return zSign ? INT64_MIN : INT64_MAX;
4159     }
4160     if (absZ1) {
4161         float_raise(float_flag_inexact, status);
4162     }
4163     return z;
4164
4165 }
4166
4167 /*----------------------------------------------------------------------------
4168 | Normalizes the subnormal single-precision floating-point value represented
4169 | by the denormalized significand `aSig'.  The normalized exponent and
4170 | significand are stored at the locations pointed to by `zExpPtr' and
4171 | `zSigPtr', respectively.
4172 *----------------------------------------------------------------------------*/
4173
4174 static void
4175  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4176 {
4177     int8_t shiftCount;
4178
4179     shiftCount = clz32(aSig) - 8;
4180     *zSigPtr = aSig<<shiftCount;
4181     *zExpPtr = 1 - shiftCount;
4182
4183 }
4184
4185 /*----------------------------------------------------------------------------
4186 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4187 | and significand `zSig', and returns the proper single-precision floating-
4188 | point value corresponding to the abstract input.  Ordinarily, the abstract
4189 | value is simply rounded and packed into the single-precision format, with
4190 | the inexact exception raised if the abstract input cannot be represented
4191 | exactly.  However, if the abstract value is too large, the overflow and
4192 | inexact exceptions are raised and an infinity or maximal finite value is
4193 | returned.  If the abstract value is too small, the input value is rounded to
4194 | a subnormal number, and the underflow and inexact exceptions are raised if
4195 | the abstract input cannot be represented exactly as a subnormal single-
4196 | precision floating-point number.
4197 |     The input significand `zSig' has its binary point between bits 30
4198 | and 29, which is 7 bits to the left of the usual location.  This shifted
4199 | significand must be normalized or smaller.  If `zSig' is not normalized,
4200 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4201 | and it must not require rounding.  In the usual case that `zSig' is
4202 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4203 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4204 | Binary Floating-Point Arithmetic.
4205 *----------------------------------------------------------------------------*/
4206
4207 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4208                                    float_status *status)
4209 {
4210     int8_t roundingMode;
4211     bool roundNearestEven;
4212     int8_t roundIncrement, roundBits;
4213     bool isTiny;
4214
4215     roundingMode = status->float_rounding_mode;
4216     roundNearestEven = ( roundingMode == float_round_nearest_even );
4217     switch (roundingMode) {
4218     case float_round_nearest_even:
4219     case float_round_ties_away:
4220         roundIncrement = 0x40;
4221         break;
4222     case float_round_to_zero:
4223         roundIncrement = 0;
4224         break;
4225     case float_round_up:
4226         roundIncrement = zSign ? 0 : 0x7f;
4227         break;
4228     case float_round_down:
4229         roundIncrement = zSign ? 0x7f : 0;
4230         break;
4231     case float_round_to_odd:
4232         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4233         break;
4234     default:
4235         abort();
4236         break;
4237     }
4238     roundBits = zSig & 0x7F;
4239     if ( 0xFD <= (uint16_t) zExp ) {
4240         if (    ( 0xFD < zExp )
4241              || (    ( zExp == 0xFD )
4242                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4243            ) {
4244             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4245                                    roundIncrement != 0;
4246             float_raise(float_flag_overflow | float_flag_inexact, status);
4247             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4248         }
4249         if ( zExp < 0 ) {
4250             if (status->flush_to_zero) {
4251                 float_raise(float_flag_output_denormal, status);
4252                 return packFloat32(zSign, 0, 0);
4253             }
4254             isTiny = status->tininess_before_rounding
4255                   || (zExp < -1)
4256                   || (zSig + roundIncrement < 0x80000000);
4257             shift32RightJamming( zSig, - zExp, &zSig );
4258             zExp = 0;
4259             roundBits = zSig & 0x7F;
4260             if (isTiny && roundBits) {
4261                 float_raise(float_flag_underflow, status);
4262             }
4263             if (roundingMode == float_round_to_odd) {
4264                 /*
4265                  * For round-to-odd case, the roundIncrement depends on
4266                  * zSig which just changed.
4267                  */
4268                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4269             }
4270         }
4271     }
4272     if (roundBits) {
4273         float_raise(float_flag_inexact, status);
4274     }
4275     zSig = ( zSig + roundIncrement )>>7;
4276     if (!(roundBits ^ 0x40) && roundNearestEven) {
4277         zSig &= ~1;
4278     }
4279     if ( zSig == 0 ) zExp = 0;
4280     return packFloat32( zSign, zExp, zSig );
4281
4282 }
4283
4284 /*----------------------------------------------------------------------------
4285 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4286 | and significand `zSig', and returns the proper single-precision floating-
4287 | point value corresponding to the abstract input.  This routine is just like
4288 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4289 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4290 | floating-point exponent.
4291 *----------------------------------------------------------------------------*/
4292
4293 static float32
4294  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4295                               float_status *status)
4296 {
4297     int8_t shiftCount;
4298
4299     shiftCount = clz32(zSig) - 1;
4300     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4301                                status);
4302
4303 }
4304
4305 /*----------------------------------------------------------------------------
4306 | Normalizes the subnormal double-precision floating-point value represented
4307 | by the denormalized significand `aSig'.  The normalized exponent and
4308 | significand are stored at the locations pointed to by `zExpPtr' and
4309 | `zSigPtr', respectively.
4310 *----------------------------------------------------------------------------*/
4311
4312 static void
4313  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4314 {
4315     int8_t shiftCount;
4316
4317     shiftCount = clz64(aSig) - 11;
4318     *zSigPtr = aSig<<shiftCount;
4319     *zExpPtr = 1 - shiftCount;
4320
4321 }
4322
4323 /*----------------------------------------------------------------------------
4324 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4325 | double-precision floating-point value, returning the result.  After being
4326 | shifted into the proper positions, the three fields are simply added
4327 | together to form the result.  This means that any integer portion of `zSig'
4328 | will be added into the exponent.  Since a properly normalized significand
4329 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4330 | than the desired result exponent whenever `zSig' is a complete, normalized
4331 | significand.
4332 *----------------------------------------------------------------------------*/
4333
4334 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4335 {
4336
4337     return make_float64(
4338         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4339
4340 }
4341
4342 /*----------------------------------------------------------------------------
4343 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4344 | and significand `zSig', and returns the proper double-precision floating-
4345 | point value corresponding to the abstract input.  Ordinarily, the abstract
4346 | value is simply rounded and packed into the double-precision format, with
4347 | the inexact exception raised if the abstract input cannot be represented
4348 | exactly.  However, if the abstract value is too large, the overflow and
4349 | inexact exceptions are raised and an infinity or maximal finite value is
4350 | returned.  If the abstract value is too small, the input value is rounded to
4351 | a subnormal number, and the underflow and inexact exceptions are raised if
4352 | the abstract input cannot be represented exactly as a subnormal double-
4353 | precision floating-point number.
4354 |     The input significand `zSig' has its binary point between bits 62
4355 | and 61, which is 10 bits to the left of the usual location.  This shifted
4356 | significand must be normalized or smaller.  If `zSig' is not normalized,
4357 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4358 | and it must not require rounding.  In the usual case that `zSig' is
4359 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4360 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4361 | Binary Floating-Point Arithmetic.
4362 *----------------------------------------------------------------------------*/
4363
4364 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4365                                    float_status *status)
4366 {
4367     int8_t roundingMode;
4368     bool roundNearestEven;
4369     int roundIncrement, roundBits;
4370     bool isTiny;
4371
4372     roundingMode = status->float_rounding_mode;
4373     roundNearestEven = ( roundingMode == float_round_nearest_even );
4374     switch (roundingMode) {
4375     case float_round_nearest_even:
4376     case float_round_ties_away:
4377         roundIncrement = 0x200;
4378         break;
4379     case float_round_to_zero:
4380         roundIncrement = 0;
4381         break;
4382     case float_round_up:
4383         roundIncrement = zSign ? 0 : 0x3ff;
4384         break;
4385     case float_round_down:
4386         roundIncrement = zSign ? 0x3ff : 0;
4387         break;
4388     case float_round_to_odd:
4389         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4390         break;
4391     default:
4392         abort();
4393     }
4394     roundBits = zSig & 0x3FF;
4395     if ( 0x7FD <= (uint16_t) zExp ) {
4396         if (    ( 0x7FD < zExp )
4397              || (    ( zExp == 0x7FD )
4398                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4399            ) {
4400             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4401                                    roundIncrement != 0;
4402             float_raise(float_flag_overflow | float_flag_inexact, status);
4403             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4404         }
4405         if ( zExp < 0 ) {
4406             if (status->flush_to_zero) {
4407                 float_raise(float_flag_output_denormal, status);
4408                 return packFloat64(zSign, 0, 0);
4409             }
4410             isTiny = status->tininess_before_rounding
4411                   || (zExp < -1)
4412                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4413             shift64RightJamming( zSig, - zExp, &zSig );
4414             zExp = 0;
4415             roundBits = zSig & 0x3FF;
4416             if (isTiny && roundBits) {
4417                 float_raise(float_flag_underflow, status);
4418             }
4419             if (roundingMode == float_round_to_odd) {
4420                 /*
4421                  * For round-to-odd case, the roundIncrement depends on
4422                  * zSig which just changed.
4423                  */
4424                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4425             }
4426         }
4427     }
4428     if (roundBits) {
4429         float_raise(float_flag_inexact, status);
4430     }
4431     zSig = ( zSig + roundIncrement )>>10;
4432     if (!(roundBits ^ 0x200) && roundNearestEven) {
4433         zSig &= ~1;
4434     }
4435     if ( zSig == 0 ) zExp = 0;
4436     return packFloat64( zSign, zExp, zSig );
4437
4438 }
4439
4440 /*----------------------------------------------------------------------------
4441 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4442 | and significand `zSig', and returns the proper double-precision floating-
4443 | point value corresponding to the abstract input.  This routine is just like
4444 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4445 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4446 | floating-point exponent.
4447 *----------------------------------------------------------------------------*/
4448
4449 static float64
4450  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4451                               float_status *status)
4452 {
4453     int8_t shiftCount;
4454
4455     shiftCount = clz64(zSig) - 1;
4456     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4457                                status);
4458
4459 }
4460
4461 /*----------------------------------------------------------------------------
4462 | Normalizes the subnormal extended double-precision floating-point value
4463 | represented by the denormalized significand `aSig'.  The normalized exponent
4464 | and significand are stored at the locations pointed to by `zExpPtr' and
4465 | `zSigPtr', respectively.
4466 *----------------------------------------------------------------------------*/
4467
4468 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4469                                 uint64_t *zSigPtr)
4470 {
4471     int8_t shiftCount;
4472
4473     shiftCount = clz64(aSig);
4474     *zSigPtr = aSig<<shiftCount;
4475     *zExpPtr = 1 - shiftCount;
4476 }
4477
4478 /*----------------------------------------------------------------------------
4479 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4480 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4481 | and returns the proper extended double-precision floating-point value
4482 | corresponding to the abstract input.  Ordinarily, the abstract value is
4483 | rounded and packed into the extended double-precision format, with the
4484 | inexact exception raised if the abstract input cannot be represented
4485 | exactly.  However, if the abstract value is too large, the overflow and
4486 | inexact exceptions are raised and an infinity or maximal finite value is
4487 | returned.  If the abstract value is too small, the input value is rounded to
4488 | a subnormal number, and the underflow and inexact exceptions are raised if
4489 | the abstract input cannot be represented exactly as a subnormal extended
4490 | double-precision floating-point number.
4491 |     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4492 | the result is rounded to the same number of bits as single or double
4493 | precision, respectively.  Otherwise, the result is rounded to the full
4494 | precision of the extended double-precision format.
4495 |     The input significand must be normalized or smaller.  If the input
4496 | significand is not normalized, `zExp' must be 0; in that case, the result
4497 | returned is a subnormal number, and it must not require rounding.  The
4498 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4499 | Floating-Point Arithmetic.
4500 *----------------------------------------------------------------------------*/
4501
4502 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4503                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4504                               float_status *status)
4505 {
4506     FloatRoundMode roundingMode;
4507     bool roundNearestEven, increment, isTiny;
4508     int64_t roundIncrement, roundMask, roundBits;
4509
4510     roundingMode = status->float_rounding_mode;
4511     roundNearestEven = ( roundingMode == float_round_nearest_even );
4512     switch (roundingPrecision) {
4513     case floatx80_precision_x:
4514         goto precision80;
4515     case floatx80_precision_d:
4516         roundIncrement = UINT64_C(0x0000000000000400);
4517         roundMask = UINT64_C(0x00000000000007FF);
4518         break;
4519     case floatx80_precision_s:
4520         roundIncrement = UINT64_C(0x0000008000000000);
4521         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4522         break;
4523     default:
4524         g_assert_not_reached();
4525     }
4526     zSig0 |= ( zSig1 != 0 );
4527     switch (roundingMode) {
4528     case float_round_nearest_even:
4529     case float_round_ties_away:
4530         break;
4531     case float_round_to_zero:
4532         roundIncrement = 0;
4533         break;
4534     case float_round_up:
4535         roundIncrement = zSign ? 0 : roundMask;
4536         break;
4537     case float_round_down:
4538         roundIncrement = zSign ? roundMask : 0;
4539         break;
4540     default:
4541         abort();
4542     }
4543     roundBits = zSig0 & roundMask;
4544     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4545         if (    ( 0x7FFE < zExp )
4546              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4547            ) {
4548             goto overflow;
4549         }
4550         if ( zExp <= 0 ) {
4551             if (status->flush_to_zero) {
4552                 float_raise(float_flag_output_denormal, status);
4553                 return packFloatx80(zSign, 0, 0);
4554             }
4555             isTiny = status->tininess_before_rounding
4556                   || (zExp < 0 )
4557                   || (zSig0 <= zSig0 + roundIncrement);
4558             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4559             zExp = 0;
4560             roundBits = zSig0 & roundMask;
4561             if (isTiny && roundBits) {
4562                 float_raise(float_flag_underflow, status);
4563             }
4564             if (roundBits) {
4565                 float_raise(float_flag_inexact, status);
4566             }
4567             zSig0 += roundIncrement;
4568             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4569             roundIncrement = roundMask + 1;
4570             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4571                 roundMask |= roundIncrement;
4572             }
4573             zSig0 &= ~ roundMask;
4574             return packFloatx80( zSign, zExp, zSig0 );
4575         }
4576     }
4577     if (roundBits) {
4578         float_raise(float_flag_inexact, status);
4579     }
4580     zSig0 += roundIncrement;
4581     if ( zSig0 < roundIncrement ) {
4582         ++zExp;
4583         zSig0 = UINT64_C(0x8000000000000000);
4584     }
4585     roundIncrement = roundMask + 1;
4586     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4587         roundMask |= roundIncrement;
4588     }
4589     zSig0 &= ~ roundMask;
4590     if ( zSig0 == 0 ) zExp = 0;
4591     return packFloatx80( zSign, zExp, zSig0 );
4592  precision80:
4593     switch (roundingMode) {
4594     case float_round_nearest_even:
4595     case float_round_ties_away:
4596         increment = ((int64_t)zSig1 < 0);
4597         break;
4598     case float_round_to_zero:
4599         increment = 0;
4600         break;
4601     case float_round_up:
4602         increment = !zSign && zSig1;
4603         break;
4604     case float_round_down:
4605         increment = zSign && zSig1;
4606         break;
4607     default:
4608         abort();
4609     }
4610     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4611         if (    ( 0x7FFE < zExp )
4612              || (    ( zExp == 0x7FFE )
4613                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4614                   && increment
4615                 )
4616            ) {
4617             roundMask = 0;
4618  overflow:
4619             float_raise(float_flag_overflow | float_flag_inexact, status);
4620             if (    ( roundingMode == float_round_to_zero )
4621                  || ( zSign && ( roundingMode == float_round_up ) )
4622                  || ( ! zSign && ( roundingMode == float_round_down ) )
4623                ) {
4624                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4625             }
4626             return packFloatx80(zSign,
4627                                 floatx80_infinity_high,
4628                                 floatx80_infinity_low);
4629         }
4630         if ( zExp <= 0 ) {
4631             isTiny = status->tininess_before_rounding
4632                   || (zExp < 0)
4633                   || !increment
4634                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4635             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4636             zExp = 0;
4637             if (isTiny && zSig1) {
4638                 float_raise(float_flag_underflow, status);
4639             }
4640             if (zSig1) {
4641                 float_raise(float_flag_inexact, status);
4642             }
4643             switch (roundingMode) {
4644             case float_round_nearest_even:
4645             case float_round_ties_away:
4646                 increment = ((int64_t)zSig1 < 0);
4647                 break;
4648             case float_round_to_zero:
4649                 increment = 0;
4650                 break;
4651             case float_round_up:
4652                 increment = !zSign && zSig1;
4653                 break;
4654             case float_round_down:
4655                 increment = zSign && zSig1;
4656                 break;
4657             default:
4658                 abort();
4659             }
4660             if ( increment ) {
4661                 ++zSig0;
4662                 if (!(zSig1 << 1) && roundNearestEven) {
4663                     zSig0 &= ~1;
4664                 }
4665                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4666             }
4667             return packFloatx80( zSign, zExp, zSig0 );
4668         }
4669     }
4670     if (zSig1) {
4671         float_raise(float_flag_inexact, status);
4672     }
4673     if ( increment ) {
4674         ++zSig0;
4675         if ( zSig0 == 0 ) {
4676             ++zExp;
4677             zSig0 = UINT64_C(0x8000000000000000);
4678         }
4679         else {
4680             if (!(zSig1 << 1) && roundNearestEven) {
4681                 zSig0 &= ~1;
4682             }
4683         }
4684     }
4685     else {
4686         if ( zSig0 == 0 ) zExp = 0;
4687     }
4688     return packFloatx80( zSign, zExp, zSig0 );
4689
4690 }
4691
4692 /*----------------------------------------------------------------------------
4693 | Takes an abstract floating-point value having sign `zSign', exponent
4694 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4695 | and returns the proper extended double-precision floating-point value
4696 | corresponding to the abstract input.  This routine is just like
4697 | `roundAndPackFloatx80' except that the input significand does not have to be
4698 | normalized.
4699 *----------------------------------------------------------------------------*/
4700
4701 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4702                                        bool zSign, int32_t zExp,
4703                                        uint64_t zSig0, uint64_t zSig1,
4704                                        float_status *status)
4705 {
4706     int8_t shiftCount;
4707
4708     if ( zSig0 == 0 ) {
4709         zSig0 = zSig1;
4710         zSig1 = 0;
4711         zExp -= 64;
4712     }
4713     shiftCount = clz64(zSig0);
4714     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4715     zExp -= shiftCount;
4716     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4717                                 zSig0, zSig1, status);
4718
4719 }
4720
4721 /*----------------------------------------------------------------------------
4722 | Returns the least-significant 64 fraction bits of the quadruple-precision
4723 | floating-point value `a'.
4724 *----------------------------------------------------------------------------*/
4725
4726 static inline uint64_t extractFloat128Frac1( float128 a )
4727 {
4728
4729     return a.low;
4730
4731 }
4732
4733 /*----------------------------------------------------------------------------
4734 | Returns the most-significant 48 fraction bits of the quadruple-precision
4735 | floating-point value `a'.
4736 *----------------------------------------------------------------------------*/
4737
4738 static inline uint64_t extractFloat128Frac0( float128 a )
4739 {
4740
4741     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4742
4743 }
4744
4745 /*----------------------------------------------------------------------------
4746 | Returns the exponent bits of the quadruple-precision floating-point value
4747 | `a'.
4748 *----------------------------------------------------------------------------*/
4749
4750 static inline int32_t extractFloat128Exp( float128 a )
4751 {
4752
4753     return ( a.high>>48 ) & 0x7FFF;
4754
4755 }
4756
4757 /*----------------------------------------------------------------------------
4758 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4759 *----------------------------------------------------------------------------*/
4760
4761 static inline bool extractFloat128Sign(float128 a)
4762 {
4763     return a.high >> 63;
4764 }
4765
4766 /*----------------------------------------------------------------------------
4767 | Normalizes the subnormal quadruple-precision floating-point value
4768 | represented by the denormalized significand formed by the concatenation of
4769 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4770 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4771 | significand are stored at the location pointed to by `zSig0Ptr', and the
4772 | least significant 64 bits of the normalized significand are stored at the
4773 | location pointed to by `zSig1Ptr'.
4774 *----------------------------------------------------------------------------*/
4775
4776 static void
4777  normalizeFloat128Subnormal(
4778      uint64_t aSig0,
4779      uint64_t aSig1,
4780      int32_t *zExpPtr,
4781      uint64_t *zSig0Ptr,
4782      uint64_t *zSig1Ptr
4783  )
4784 {
4785     int8_t shiftCount;
4786
4787     if ( aSig0 == 0 ) {
4788         shiftCount = clz64(aSig1) - 15;
4789         if ( shiftCount < 0 ) {
4790             *zSig0Ptr = aSig1>>( - shiftCount );
4791             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4792         }
4793         else {
4794             *zSig0Ptr = aSig1<<shiftCount;
4795             *zSig1Ptr = 0;
4796         }
4797         *zExpPtr = - shiftCount - 63;
4798     }
4799     else {
4800         shiftCount = clz64(aSig0) - 15;
4801         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4802         *zExpPtr = 1 - shiftCount;
4803     }
4804
4805 }
4806
4807 /*----------------------------------------------------------------------------
4808 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4809 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4810 | floating-point value, returning the result.  After being shifted into the
4811 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4812 | added together to form the most significant 32 bits of the result.  This
4813 | means that any integer portion of `zSig0' will be added into the exponent.
4814 | Since a properly normalized significand will have an integer portion equal
4815 | to 1, the `zExp' input should be 1 less than the desired result exponent
4816 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4817 | significand.
4818 *----------------------------------------------------------------------------*/
4819
4820 static inline float128
4821 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4822 {
4823     float128 z;
4824
4825     z.low = zSig1;
4826     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4827     return z;
4828 }
4829
4830 /*----------------------------------------------------------------------------
4831 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4832 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4833 | and `zSig2', and returns the proper quadruple-precision floating-point value
4834 | corresponding to the abstract input.  Ordinarily, the abstract value is
4835 | simply rounded and packed into the quadruple-precision format, with the
4836 | inexact exception raised if the abstract input cannot be represented
4837 | exactly.  However, if the abstract value is too large, the overflow and
4838 | inexact exceptions are raised and an infinity or maximal finite value is
4839 | returned.  If the abstract value is too small, the input value is rounded to
4840 | a subnormal number, and the underflow and inexact exceptions are raised if
4841 | the abstract input cannot be represented exactly as a subnormal quadruple-
4842 | precision floating-point number.
4843 |     The input significand must be normalized or smaller.  If the input
4844 | significand is not normalized, `zExp' must be 0; in that case, the result
4845 | returned is a subnormal number, and it must not require rounding.  In the
4846 | usual case that the input significand is normalized, `zExp' must be 1 less
4847 | than the ``true'' floating-point exponent.  The handling of underflow and
4848 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4849 *----------------------------------------------------------------------------*/
4850
4851 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4852                                      uint64_t zSig0, uint64_t zSig1,
4853                                      uint64_t zSig2, float_status *status)
4854 {
4855     int8_t roundingMode;
4856     bool roundNearestEven, increment, isTiny;
4857
4858     roundingMode = status->float_rounding_mode;
4859     roundNearestEven = ( roundingMode == float_round_nearest_even );
4860     switch (roundingMode) {
4861     case float_round_nearest_even:
4862     case float_round_ties_away:
4863         increment = ((int64_t)zSig2 < 0);
4864         break;
4865     case float_round_to_zero:
4866         increment = 0;
4867         break;
4868     case float_round_up:
4869         increment = !zSign && zSig2;
4870         break;
4871     case float_round_down:
4872         increment = zSign && zSig2;
4873         break;
4874     case float_round_to_odd:
4875         increment = !(zSig1 & 0x1) && zSig2;
4876         break;
4877     default:
4878         abort();
4879     }
4880     if ( 0x7FFD <= (uint32_t) zExp ) {
4881         if (    ( 0x7FFD < zExp )
4882              || (    ( zExp == 0x7FFD )
4883                   && eq128(
4884                          UINT64_C(0x0001FFFFFFFFFFFF),
4885                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4886                          zSig0,
4887                          zSig1
4888                      )
4889                   && increment
4890                 )
4891            ) {
4892             float_raise(float_flag_overflow | float_flag_inexact, status);
4893             if (    ( roundingMode == float_round_to_zero )
4894                  || ( zSign && ( roundingMode == float_round_up ) )
4895                  || ( ! zSign && ( roundingMode == float_round_down ) )
4896                  || (roundingMode == float_round_to_odd)
4897                ) {
4898                 return
4899                     packFloat128(
4900                         zSign,
4901                         0x7FFE,
4902                         UINT64_C(0x0000FFFFFFFFFFFF),
4903                         UINT64_C(0xFFFFFFFFFFFFFFFF)
4904                     );
4905             }
4906             return packFloat128( zSign, 0x7FFF, 0, 0 );
4907         }
4908         if ( zExp < 0 ) {
4909             if (status->flush_to_zero) {
4910                 float_raise(float_flag_output_denormal, status);
4911                 return packFloat128(zSign, 0, 0, 0);
4912             }
4913             isTiny = status->tininess_before_rounding
4914                   || (zExp < -1)
4915                   || !increment
4916                   || lt128(zSig0, zSig1,
4917                            UINT64_C(0x0001FFFFFFFFFFFF),
4918                            UINT64_C(0xFFFFFFFFFFFFFFFF));
4919             shift128ExtraRightJamming(
4920                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4921             zExp = 0;
4922             if (isTiny && zSig2) {
4923                 float_raise(float_flag_underflow, status);
4924             }
4925             switch (roundingMode) {
4926             case float_round_nearest_even:
4927             case float_round_ties_away:
4928                 increment = ((int64_t)zSig2 < 0);
4929                 break;
4930             case float_round_to_zero:
4931                 increment = 0;
4932                 break;
4933             case float_round_up:
4934                 increment = !zSign && zSig2;
4935                 break;
4936             case float_round_down:
4937                 increment = zSign && zSig2;
4938                 break;
4939             case float_round_to_odd:
4940                 increment = !(zSig1 & 0x1) && zSig2;
4941                 break;
4942             default:
4943                 abort();
4944             }
4945         }
4946     }
4947     if (zSig2) {
4948         float_raise(float_flag_inexact, status);
4949     }
4950     if ( increment ) {
4951         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4952         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4953             zSig1 &= ~1;
4954         }
4955     }
4956     else {
4957         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4958     }
4959     return packFloat128( zSign, zExp, zSig0, zSig1 );
4960
4961 }
4962
4963 /*----------------------------------------------------------------------------
4964 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4965 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4966 | returns the proper quadruple-precision floating-point value corresponding
4967 | to the abstract input.  This routine is just like `roundAndPackFloat128'
4968 | except that the input significand has fewer bits and does not have to be
4969 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4970 | point exponent.
4971 *----------------------------------------------------------------------------*/
4972
4973 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4974                                               uint64_t zSig0, uint64_t zSig1,
4975                                               float_status *status)
4976 {
4977     int8_t shiftCount;
4978     uint64_t zSig2;
4979
4980     if ( zSig0 == 0 ) {
4981         zSig0 = zSig1;
4982         zSig1 = 0;
4983         zExp -= 64;
4984     }
4985     shiftCount = clz64(zSig0) - 15;
4986     if ( 0 <= shiftCount ) {
4987         zSig2 = 0;
4988         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4989     }
4990     else {
4991         shift128ExtraRightJamming(
4992             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4993     }
4994     zExp -= shiftCount;
4995     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4996
4997 }
4998
4999
5000 /*----------------------------------------------------------------------------
5001 | Returns the result of converting the 32-bit two's complement integer `a'
5002 | to the extended double-precision floating-point format.  The conversion
5003 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5004 | Arithmetic.
5005 *----------------------------------------------------------------------------*/
5006
5007 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5008 {
5009     bool zSign;
5010     uint32_t absA;
5011     int8_t shiftCount;
5012     uint64_t zSig;
5013
5014     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5015     zSign = ( a < 0 );
5016     absA = zSign ? - a : a;
5017     shiftCount = clz32(absA) + 32;
5018     zSig = absA;
5019     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5020
5021 }
5022
5023 /*----------------------------------------------------------------------------
5024 | Returns the result of converting the 64-bit two's complement integer `a'
5025 | to the extended double-precision floating-point format.  The conversion
5026 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5027 | Arithmetic.
5028 *----------------------------------------------------------------------------*/
5029
5030 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5031 {
5032     bool zSign;
5033     uint64_t absA;
5034     int8_t shiftCount;
5035
5036     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5037     zSign = ( a < 0 );
5038     absA = zSign ? - a : a;
5039     shiftCount = clz64(absA);
5040     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5041
5042 }
5043
5044 /*----------------------------------------------------------------------------
5045 | Returns the result of converting the single-precision floating-point value
5046 | `a' to the extended double-precision floating-point format.  The conversion
5047 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5048 | Arithmetic.
5049 *----------------------------------------------------------------------------*/
5050
5051 floatx80 float32_to_floatx80(float32 a, float_status *status)
5052 {
5053     bool aSign;
5054     int aExp;
5055     uint32_t aSig;
5056
5057     a = float32_squash_input_denormal(a, status);
5058     aSig = extractFloat32Frac( a );
5059     aExp = extractFloat32Exp( a );
5060     aSign = extractFloat32Sign( a );
5061     if ( aExp == 0xFF ) {
5062         if (aSig) {
5063             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5064                                                status);
5065             return floatx80_silence_nan(res, status);
5066         }
5067         return packFloatx80(aSign,
5068                             floatx80_infinity_high,
5069                             floatx80_infinity_low);
5070     }
5071     if ( aExp == 0 ) {
5072         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5073         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5074     }
5075     aSig |= 0x00800000;
5076     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5077
5078 }
5079
5080 /*----------------------------------------------------------------------------
5081 | Returns the remainder of the single-precision floating-point value `a'
5082 | with respect to the corresponding value `b'.  The operation is performed
5083 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5084 *----------------------------------------------------------------------------*/
5085
5086 float32 float32_rem(float32 a, float32 b, float_status *status)
5087 {
5088     bool aSign, zSign;
5089     int aExp, bExp, expDiff;
5090     uint32_t aSig, bSig;
5091     uint32_t q;
5092     uint64_t aSig64, bSig64, q64;
5093     uint32_t alternateASig;
5094     int32_t sigMean;
5095     a = float32_squash_input_denormal(a, status);
5096     b = float32_squash_input_denormal(b, status);
5097
5098     aSig = extractFloat32Frac( a );
5099     aExp = extractFloat32Exp( a );
5100     aSign = extractFloat32Sign( a );
5101     bSig = extractFloat32Frac( b );
5102     bExp = extractFloat32Exp( b );
5103     if ( aExp == 0xFF ) {
5104         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5105             return propagateFloat32NaN(a, b, status);
5106         }
5107         float_raise(float_flag_invalid, status);
5108         return float32_default_nan(status);
5109     }
5110     if ( bExp == 0xFF ) {
5111         if (bSig) {
5112             return propagateFloat32NaN(a, b, status);
5113         }
5114         return a;
5115     }
5116     if ( bExp == 0 ) {
5117         if ( bSig == 0 ) {
5118             float_raise(float_flag_invalid, status);
5119             return float32_default_nan(status);
5120         }
5121         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5122     }
5123     if ( aExp == 0 ) {
5124         if ( aSig == 0 ) return a;
5125         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5126     }
5127     expDiff = aExp - bExp;
5128     aSig |= 0x00800000;
5129     bSig |= 0x00800000;
5130     if ( expDiff < 32 ) {
5131         aSig <<= 8;
5132         bSig <<= 8;
5133         if ( expDiff < 0 ) {
5134             if ( expDiff < -1 ) return a;
5135             aSig >>= 1;
5136         }
5137         q = ( bSig <= aSig );
5138         if ( q ) aSig -= bSig;
5139         if ( 0 < expDiff ) {
5140             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5141             q >>= 32 - expDiff;
5142             bSig >>= 2;
5143             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5144         }
5145         else {
5146             aSig >>= 2;
5147             bSig >>= 2;
5148         }
5149     }
5150     else {
5151         if ( bSig <= aSig ) aSig -= bSig;
5152         aSig64 = ( (uint64_t) aSig )<<40;
5153         bSig64 = ( (uint64_t) bSig )<<40;
5154         expDiff -= 64;
5155         while ( 0 < expDiff ) {
5156             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5157             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5158             aSig64 = - ( ( bSig * q64 )<<38 );
5159             expDiff -= 62;
5160         }
5161         expDiff += 64;
5162         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5163         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5164         q = q64>>( 64 - expDiff );
5165         bSig <<= 6;
5166         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5167     }
5168     do {
5169         alternateASig = aSig;
5170         ++q;
5171         aSig -= bSig;
5172     } while ( 0 <= (int32_t) aSig );
5173     sigMean = aSig + alternateASig;
5174     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5175         aSig = alternateASig;
5176     }
5177     zSign = ( (int32_t) aSig < 0 );
5178     if ( zSign ) aSig = - aSig;
5179     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5180 }
5181
5182
5183
5184 /*----------------------------------------------------------------------------
5185 | Returns the binary exponential of the single-precision floating-point value
5186 | `a'. The operation is performed according to the IEC/IEEE Standard for
5187 | Binary Floating-Point Arithmetic.
5188 |
5189 | Uses the following identities:
5190 |
5191 | 1. -------------------------------------------------------------------------
5192 |      x    x*ln(2)
5193 |     2  = e
5194 |
5195 | 2. -------------------------------------------------------------------------
5196 |                      2     3     4     5           n
5197 |      x        x     x     x     x     x           x
5198 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5199 |               1!    2!    3!    4!    5!          n!
5200 *----------------------------------------------------------------------------*/
5201
5202 static const float64 float32_exp2_coefficients[15] =
5203 {
5204     const_float64( 0x3ff0000000000000ll ), /*  1 */
5205     const_float64( 0x3fe0000000000000ll ), /*  2 */
5206     const_float64( 0x3fc5555555555555ll ), /*  3 */
5207     const_float64( 0x3fa5555555555555ll ), /*  4 */
5208     const_float64( 0x3f81111111111111ll ), /*  5 */
5209     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5210     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5211     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5212     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5213     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5214     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5215     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5216     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5217     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5218     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5219 };
5220
5221 float32 float32_exp2(float32 a, float_status *status)
5222 {
5223     bool aSign;
5224     int aExp;
5225     uint32_t aSig;
5226     float64 r, x, xn;
5227     int i;
5228     a = float32_squash_input_denormal(a, status);
5229
5230     aSig = extractFloat32Frac( a );
5231     aExp = extractFloat32Exp( a );
5232     aSign = extractFloat32Sign( a );
5233
5234     if ( aExp == 0xFF) {
5235         if (aSig) {
5236             return propagateFloat32NaN(a, float32_zero, status);
5237         }
5238         return (aSign) ? float32_zero : a;
5239     }
5240     if (aExp == 0) {
5241         if (aSig == 0) return float32_one;
5242     }
5243
5244     float_raise(float_flag_inexact, status);
5245
5246     /* ******************************* */
5247     /* using float64 for approximation */
5248     /* ******************************* */
5249     x = float32_to_float64(a, status);
5250     x = float64_mul(x, float64_ln2, status);
5251
5252     xn = x;
5253     r = float64_one;
5254     for (i = 0 ; i < 15 ; i++) {
5255         float64 f;
5256
5257         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5258         r = float64_add(r, f, status);
5259
5260         xn = float64_mul(xn, x, status);
5261     }
5262
5263     return float64_to_float32(r, status);
5264 }
5265
5266 /*----------------------------------------------------------------------------
5267 | Returns the binary log of the single-precision floating-point value `a'.
5268 | The operation is performed according to the IEC/IEEE Standard for Binary
5269 | Floating-Point Arithmetic.
5270 *----------------------------------------------------------------------------*/
5271 float32 float32_log2(float32 a, float_status *status)
5272 {
5273     bool aSign, zSign;
5274     int aExp;
5275     uint32_t aSig, zSig, i;
5276
5277     a = float32_squash_input_denormal(a, status);
5278     aSig = extractFloat32Frac( a );
5279     aExp = extractFloat32Exp( a );
5280     aSign = extractFloat32Sign( a );
5281
5282     if ( aExp == 0 ) {
5283         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5284         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5285     }
5286     if ( aSign ) {
5287         float_raise(float_flag_invalid, status);
5288         return float32_default_nan(status);
5289     }
5290     if ( aExp == 0xFF ) {
5291         if (aSig) {
5292             return propagateFloat32NaN(a, float32_zero, status);
5293         }
5294         return a;
5295     }
5296
5297     aExp -= 0x7F;
5298     aSig |= 0x00800000;
5299     zSign = aExp < 0;
5300     zSig = aExp << 23;
5301
5302     for (i = 1 << 22; i > 0; i >>= 1) {
5303         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5304         if ( aSig & 0x01000000 ) {
5305             aSig >>= 1;
5306             zSig |= i;
5307         }
5308     }
5309
5310     if ( zSign )
5311         zSig = -zSig;
5312
5313     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5314 }
5315
5316 /*----------------------------------------------------------------------------
5317 | Returns the result of converting the double-precision floating-point value
5318 | `a' to the extended double-precision floating-point format.  The conversion
5319 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5320 | Arithmetic.
5321 *----------------------------------------------------------------------------*/
5322
5323 floatx80 float64_to_floatx80(float64 a, float_status *status)
5324 {
5325     bool aSign;
5326     int aExp;
5327     uint64_t aSig;
5328
5329     a = float64_squash_input_denormal(a, status);
5330     aSig = extractFloat64Frac( a );
5331     aExp = extractFloat64Exp( a );
5332     aSign = extractFloat64Sign( a );
5333     if ( aExp == 0x7FF ) {
5334         if (aSig) {
5335             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5336                                                status);
5337             return floatx80_silence_nan(res, status);
5338         }
5339         return packFloatx80(aSign,
5340                             floatx80_infinity_high,
5341                             floatx80_infinity_low);
5342     }
5343     if ( aExp == 0 ) {
5344         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5345         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5346     }
5347     return
5348         packFloatx80(
5349             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5350
5351 }
5352
5353 /*----------------------------------------------------------------------------
5354 | Returns the remainder of the double-precision floating-point value `a'
5355 | with respect to the corresponding value `b'.  The operation is performed
5356 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5357 *----------------------------------------------------------------------------*/
5358
5359 float64 float64_rem(float64 a, float64 b, float_status *status)
5360 {
5361     bool aSign, zSign;
5362     int aExp, bExp, expDiff;
5363     uint64_t aSig, bSig;
5364     uint64_t q, alternateASig;
5365     int64_t sigMean;
5366
5367     a = float64_squash_input_denormal(a, status);
5368     b = float64_squash_input_denormal(b, status);
5369     aSig = extractFloat64Frac( a );
5370     aExp = extractFloat64Exp( a );
5371     aSign = extractFloat64Sign( a );
5372     bSig = extractFloat64Frac( b );
5373     bExp = extractFloat64Exp( b );
5374     if ( aExp == 0x7FF ) {
5375         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5376             return propagateFloat64NaN(a, b, status);
5377         }
5378         float_raise(float_flag_invalid, status);
5379         return float64_default_nan(status);
5380     }
5381     if ( bExp == 0x7FF ) {
5382         if (bSig) {
5383             return propagateFloat64NaN(a, b, status);
5384         }
5385         return a;
5386     }
5387     if ( bExp == 0 ) {
5388         if ( bSig == 0 ) {
5389             float_raise(float_flag_invalid, status);
5390             return float64_default_nan(status);
5391         }
5392         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5393     }
5394     if ( aExp == 0 ) {
5395         if ( aSig == 0 ) return a;
5396         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5397     }
5398     expDiff = aExp - bExp;
5399     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5400     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5401     if ( expDiff < 0 ) {
5402         if ( expDiff < -1 ) return a;
5403         aSig >>= 1;
5404     }
5405     q = ( bSig <= aSig );
5406     if ( q ) aSig -= bSig;
5407     expDiff -= 64;
5408     while ( 0 < expDiff ) {
5409         q = estimateDiv128To64( aSig, 0, bSig );
5410         q = ( 2 < q ) ? q - 2 : 0;
5411         aSig = - ( ( bSig>>2 ) * q );
5412         expDiff -= 62;
5413     }
5414     expDiff += 64;
5415     if ( 0 < expDiff ) {
5416         q = estimateDiv128To64( aSig, 0, bSig );
5417         q = ( 2 < q ) ? q - 2 : 0;
5418         q >>= 64 - expDiff;
5419         bSig >>= 2;
5420         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5421     }
5422     else {
5423         aSig >>= 2;
5424         bSig >>= 2;
5425     }
5426     do {
5427         alternateASig = aSig;
5428         ++q;
5429         aSig -= bSig;
5430     } while ( 0 <= (int64_t) aSig );
5431     sigMean = aSig + alternateASig;
5432     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5433         aSig = alternateASig;
5434     }
5435     zSign = ( (int64_t) aSig < 0 );
5436     if ( zSign ) aSig = - aSig;
5437     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5438
5439 }
5440
5441 /*----------------------------------------------------------------------------
5442 | Returns the binary log of the double-precision floating-point value `a'.
5443 | The operation is performed according to the IEC/IEEE Standard for Binary
5444 | Floating-Point Arithmetic.
5445 *----------------------------------------------------------------------------*/
5446 float64 float64_log2(float64 a, float_status *status)
5447 {
5448     bool aSign, zSign;
5449     int aExp;
5450     uint64_t aSig, aSig0, aSig1, zSig, i;
5451     a = float64_squash_input_denormal(a, status);
5452
5453     aSig = extractFloat64Frac( a );
5454     aExp = extractFloat64Exp( a );
5455     aSign = extractFloat64Sign( a );
5456
5457     if ( aExp == 0 ) {
5458         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5459         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5460     }
5461     if ( aSign ) {
5462         float_raise(float_flag_invalid, status);
5463         return float64_default_nan(status);
5464     }
5465     if ( aExp == 0x7FF ) {
5466         if (aSig) {
5467             return propagateFloat64NaN(a, float64_zero, status);
5468         }
5469         return a;
5470     }
5471
5472     aExp -= 0x3FF;
5473     aSig |= UINT64_C(0x0010000000000000);
5474     zSign = aExp < 0;
5475     zSig = (uint64_t)aExp << 52;
5476     for (i = 1LL << 51; i > 0; i >>= 1) {
5477         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5478         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5479         if ( aSig & UINT64_C(0x0020000000000000) ) {
5480             aSig >>= 1;
5481             zSig |= i;
5482         }
5483     }
5484
5485     if ( zSign )
5486         zSig = -zSig;
5487     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5488 }
5489
5490 /*----------------------------------------------------------------------------
5491 | Returns the result of converting the extended double-precision floating-
5492 | point value `a' to the 32-bit two's complement integer format.  The
5493 | conversion is performed according to the IEC/IEEE Standard for Binary
5494 | Floating-Point Arithmetic---which means in particular that the conversion
5495 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5496 | largest positive integer is returned.  Otherwise, if the conversion
5497 | overflows, the largest integer with the same sign as `a' is returned.
5498 *----------------------------------------------------------------------------*/
5499
5500 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5501 {
5502     bool aSign;
5503     int32_t aExp, shiftCount;
5504     uint64_t aSig;
5505
5506     if (floatx80_invalid_encoding(a)) {
5507         float_raise(float_flag_invalid, status);
5508         return 1 << 31;
5509     }
5510     aSig = extractFloatx80Frac( a );
5511     aExp = extractFloatx80Exp( a );
5512     aSign = extractFloatx80Sign( a );
5513     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5514     shiftCount = 0x4037 - aExp;
5515     if ( shiftCount <= 0 ) shiftCount = 1;
5516     shift64RightJamming( aSig, shiftCount, &aSig );
5517     return roundAndPackInt32(aSign, aSig, status);
5518
5519 }
5520
5521 /*----------------------------------------------------------------------------
5522 | Returns the result of converting the extended double-precision floating-
5523 | point value `a' to the 32-bit two's complement integer format.  The
5524 | conversion is performed according to the IEC/IEEE Standard for Binary
5525 | Floating-Point Arithmetic, except that the conversion is always rounded
5526 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5527 | Otherwise, if the conversion overflows, the largest integer with the same
5528 | sign as `a' is returned.
5529 *----------------------------------------------------------------------------*/
5530
5531 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5532 {
5533     bool aSign;
5534     int32_t aExp, shiftCount;
5535     uint64_t aSig, savedASig;
5536     int32_t z;
5537
5538     if (floatx80_invalid_encoding(a)) {
5539         float_raise(float_flag_invalid, status);
5540         return 1 << 31;
5541     }
5542     aSig = extractFloatx80Frac( a );
5543     aExp = extractFloatx80Exp( a );
5544     aSign = extractFloatx80Sign( a );
5545     if ( 0x401E < aExp ) {
5546         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5547         goto invalid;
5548     }
5549     else if ( aExp < 0x3FFF ) {
5550         if (aExp || aSig) {
5551             float_raise(float_flag_inexact, status);
5552         }
5553         return 0;
5554     }
5555     shiftCount = 0x403E - aExp;
5556     savedASig = aSig;
5557     aSig >>= shiftCount;
5558     z = aSig;
5559     if ( aSign ) z = - z;
5560     if ( ( z < 0 ) ^ aSign ) {
5561  invalid:
5562         float_raise(float_flag_invalid, status);
5563         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5564     }
5565     if ( ( aSig<<shiftCount ) != savedASig ) {
5566         float_raise(float_flag_inexact, status);
5567     }
5568     return z;
5569
5570 }
5571
5572 /*----------------------------------------------------------------------------
5573 | Returns the result of converting the extended double-precision floating-
5574 | point value `a' to the 64-bit two's complement integer format.  The
5575 | conversion is performed according to the IEC/IEEE Standard for Binary
5576 | Floating-Point Arithmetic---which means in particular that the conversion
5577 | is rounded according to the current rounding mode.  If `a' is a NaN,
5578 | the largest positive integer is returned.  Otherwise, if the conversion
5579 | overflows, the largest integer with the same sign as `a' is returned.
5580 *----------------------------------------------------------------------------*/
5581
5582 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5583 {
5584     bool aSign;
5585     int32_t aExp, shiftCount;
5586     uint64_t aSig, aSigExtra;
5587
5588     if (floatx80_invalid_encoding(a)) {
5589         float_raise(float_flag_invalid, status);
5590         return 1ULL << 63;
5591     }
5592     aSig = extractFloatx80Frac( a );
5593     aExp = extractFloatx80Exp( a );
5594     aSign = extractFloatx80Sign( a );
5595     shiftCount = 0x403E - aExp;
5596     if ( shiftCount <= 0 ) {
5597         if ( shiftCount ) {
5598             float_raise(float_flag_invalid, status);
5599             if (!aSign || floatx80_is_any_nan(a)) {
5600                 return INT64_MAX;
5601             }
5602             return INT64_MIN;
5603         }
5604         aSigExtra = 0;
5605     }
5606     else {
5607         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5608     }
5609     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5610
5611 }
5612
5613 /*----------------------------------------------------------------------------
5614 | Returns the result of converting the extended double-precision floating-
5615 | point value `a' to the 64-bit two's complement integer format.  The
5616 | conversion is performed according to the IEC/IEEE Standard for Binary
5617 | Floating-Point Arithmetic, except that the conversion is always rounded
5618 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5619 | Otherwise, if the conversion overflows, the largest integer with the same
5620 | sign as `a' is returned.
5621 *----------------------------------------------------------------------------*/
5622
5623 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5624 {
5625     bool aSign;
5626     int32_t aExp, shiftCount;
5627     uint64_t aSig;
5628     int64_t z;
5629
5630     if (floatx80_invalid_encoding(a)) {
5631         float_raise(float_flag_invalid, status);
5632         return 1ULL << 63;
5633     }
5634     aSig = extractFloatx80Frac( a );
5635     aExp = extractFloatx80Exp( a );
5636     aSign = extractFloatx80Sign( a );
5637     shiftCount = aExp - 0x403E;
5638     if ( 0 <= shiftCount ) {
5639         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5640         if ( ( a.high != 0xC03E ) || aSig ) {
5641             float_raise(float_flag_invalid, status);
5642             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5643                 return INT64_MAX;
5644             }
5645         }
5646         return INT64_MIN;
5647     }
5648     else if ( aExp < 0x3FFF ) {
5649         if (aExp | aSig) {
5650             float_raise(float_flag_inexact, status);
5651         }
5652         return 0;
5653     }
5654     z = aSig>>( - shiftCount );
5655     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5656         float_raise(float_flag_inexact, status);
5657     }
5658     if ( aSign ) z = - z;
5659     return z;
5660
5661 }
5662
5663 /*----------------------------------------------------------------------------
5664 | Returns the result of converting the extended double-precision floating-
5665 | point value `a' to the single-precision floating-point format.  The
5666 | conversion is performed according to the IEC/IEEE Standard for Binary
5667 | Floating-Point Arithmetic.
5668 *----------------------------------------------------------------------------*/
5669
5670 float32 floatx80_to_float32(floatx80 a, float_status *status)
5671 {
5672     bool aSign;
5673     int32_t aExp;
5674     uint64_t aSig;
5675
5676     if (floatx80_invalid_encoding(a)) {
5677         float_raise(float_flag_invalid, status);
5678         return float32_default_nan(status);
5679     }
5680     aSig = extractFloatx80Frac( a );
5681     aExp = extractFloatx80Exp( a );
5682     aSign = extractFloatx80Sign( a );
5683     if ( aExp == 0x7FFF ) {
5684         if ( (uint64_t) ( aSig<<1 ) ) {
5685             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5686                                              status);
5687             return float32_silence_nan(res, status);
5688         }
5689         return packFloat32( aSign, 0xFF, 0 );
5690     }
5691     shift64RightJamming( aSig, 33, &aSig );
5692     if ( aExp || aSig ) aExp -= 0x3F81;
5693     return roundAndPackFloat32(aSign, aExp, aSig, status);
5694
5695 }
5696
5697 /*----------------------------------------------------------------------------
5698 | Returns the result of converting the extended double-precision floating-
5699 | point value `a' to the double-precision floating-point format.  The
5700 | conversion is performed according to the IEC/IEEE Standard for Binary
5701 | Floating-Point Arithmetic.
5702 *----------------------------------------------------------------------------*/
5703
5704 float64 floatx80_to_float64(floatx80 a, float_status *status)
5705 {
5706     bool aSign;
5707     int32_t aExp;
5708     uint64_t aSig, zSig;
5709
5710     if (floatx80_invalid_encoding(a)) {
5711         float_raise(float_flag_invalid, status);
5712         return float64_default_nan(status);
5713     }
5714     aSig = extractFloatx80Frac( a );
5715     aExp = extractFloatx80Exp( a );
5716     aSign = extractFloatx80Sign( a );
5717     if ( aExp == 0x7FFF ) {
5718         if ( (uint64_t) ( aSig<<1 ) ) {
5719             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5720                                              status);
5721             return float64_silence_nan(res, status);
5722         }
5723         return packFloat64( aSign, 0x7FF, 0 );
5724     }
5725     shift64RightJamming( aSig, 1, &zSig );
5726     if ( aExp || aSig ) aExp -= 0x3C01;
5727     return roundAndPackFloat64(aSign, aExp, zSig, status);
5728
5729 }
5730
5731 /*----------------------------------------------------------------------------
5732 | Returns the result of converting the extended double-precision floating-
5733 | point value `a' to the quadruple-precision floating-point format.  The
5734 | conversion is performed according to the IEC/IEEE Standard for Binary
5735 | Floating-Point Arithmetic.
5736 *----------------------------------------------------------------------------*/
5737
5738 float128 floatx80_to_float128(floatx80 a, float_status *status)
5739 {
5740     bool aSign;
5741     int aExp;
5742     uint64_t aSig, zSig0, zSig1;
5743
5744     if (floatx80_invalid_encoding(a)) {
5745         float_raise(float_flag_invalid, status);
5746         return float128_default_nan(status);
5747     }
5748     aSig = extractFloatx80Frac( a );
5749     aExp = extractFloatx80Exp( a );
5750     aSign = extractFloatx80Sign( a );
5751     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5752         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5753                                            status);
5754         return float128_silence_nan(res, status);
5755     }
5756     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5757     return packFloat128( aSign, aExp, zSig0, zSig1 );
5758
5759 }
5760
5761 /*----------------------------------------------------------------------------
5762 | Rounds the extended double-precision floating-point value `a'
5763 | to the precision provided by floatx80_rounding_precision and returns the
5764 | result as an extended double-precision floating-point value.
5765 | The operation is performed according to the IEC/IEEE Standard for Binary
5766 | Floating-Point Arithmetic.
5767 *----------------------------------------------------------------------------*/
5768
5769 floatx80 floatx80_round(floatx80 a, float_status *status)
5770 {
5771     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5772                                 extractFloatx80Sign(a),
5773                                 extractFloatx80Exp(a),
5774                                 extractFloatx80Frac(a), 0, status);
5775 }
5776
5777 /*----------------------------------------------------------------------------
5778 | Rounds the extended double-precision floating-point value `a' to an integer,
5779 | and returns the result as an extended quadruple-precision floating-point
5780 | value.  The operation is performed according to the IEC/IEEE Standard for
5781 | Binary Floating-Point Arithmetic.
5782 *----------------------------------------------------------------------------*/
5783
5784 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5785 {
5786     bool aSign;
5787     int32_t aExp;
5788     uint64_t lastBitMask, roundBitsMask;
5789     floatx80 z;
5790
5791     if (floatx80_invalid_encoding(a)) {
5792         float_raise(float_flag_invalid, status);
5793         return floatx80_default_nan(status);
5794     }
5795     aExp = extractFloatx80Exp( a );
5796     if ( 0x403E <= aExp ) {
5797         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5798             return propagateFloatx80NaN(a, a, status);
5799         }
5800         return a;
5801     }
5802     if ( aExp < 0x3FFF ) {
5803         if (    ( aExp == 0 )
5804              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5805             return a;
5806         }
5807         float_raise(float_flag_inexact, status);
5808         aSign = extractFloatx80Sign( a );
5809         switch (status->float_rounding_mode) {
5810          case float_round_nearest_even:
5811             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5812                ) {
5813                 return
5814                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5815             }
5816             break;
5817         case float_round_ties_away:
5818             if (aExp == 0x3FFE) {
5819                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5820             }
5821             break;
5822          case float_round_down:
5823             return
5824                   aSign ?
5825                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5826                 : packFloatx80( 0, 0, 0 );
5827          case float_round_up:
5828             return
5829                   aSign ? packFloatx80( 1, 0, 0 )
5830                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5831
5832         case float_round_to_zero:
5833             break;
5834         default:
5835             g_assert_not_reached();
5836         }
5837         return packFloatx80( aSign, 0, 0 );
5838     }
5839     lastBitMask = 1;
5840     lastBitMask <<= 0x403E - aExp;
5841     roundBitsMask = lastBitMask - 1;
5842     z = a;
5843     switch (status->float_rounding_mode) {
5844     case float_round_nearest_even:
5845         z.low += lastBitMask>>1;
5846         if ((z.low & roundBitsMask) == 0) {
5847             z.low &= ~lastBitMask;
5848         }
5849         break;
5850     case float_round_ties_away:
5851         z.low += lastBitMask >> 1;
5852         break;
5853     case float_round_to_zero:
5854         break;
5855     case float_round_up:
5856         if (!extractFloatx80Sign(z)) {
5857             z.low += roundBitsMask;
5858         }
5859         break;
5860     case float_round_down:
5861         if (extractFloatx80Sign(z)) {
5862             z.low += roundBitsMask;
5863         }
5864         break;
5865     default:
5866         abort();
5867     }
5868     z.low &= ~ roundBitsMask;
5869     if ( z.low == 0 ) {
5870         ++z.high;
5871         z.low = UINT64_C(0x8000000000000000);
5872     }
5873     if (z.low != a.low) {
5874         float_raise(float_flag_inexact, status);
5875     }
5876     return z;
5877
5878 }
5879
5880 /*----------------------------------------------------------------------------
5881 | Returns the result of dividing the extended double-precision floating-point
5882 | value `a' by the corresponding value `b'.  The operation is performed
5883 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5884 *----------------------------------------------------------------------------*/
5885
5886 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5887 {
5888     bool aSign, bSign, zSign;
5889     int32_t aExp, bExp, zExp;
5890     uint64_t aSig, bSig, zSig0, zSig1;
5891     uint64_t rem0, rem1, rem2, term0, term1, term2;
5892
5893     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5894         float_raise(float_flag_invalid, status);
5895         return floatx80_default_nan(status);
5896     }
5897     aSig = extractFloatx80Frac( a );
5898     aExp = extractFloatx80Exp( a );
5899     aSign = extractFloatx80Sign( a );
5900     bSig = extractFloatx80Frac( b );
5901     bExp = extractFloatx80Exp( b );
5902     bSign = extractFloatx80Sign( b );
5903     zSign = aSign ^ bSign;
5904     if ( aExp == 0x7FFF ) {
5905         if ((uint64_t)(aSig << 1)) {
5906             return propagateFloatx80NaN(a, b, status);
5907         }
5908         if ( bExp == 0x7FFF ) {
5909             if ((uint64_t)(bSig << 1)) {
5910                 return propagateFloatx80NaN(a, b, status);
5911             }
5912             goto invalid;
5913         }
5914         return packFloatx80(zSign, floatx80_infinity_high,
5915                                    floatx80_infinity_low);
5916     }
5917     if ( bExp == 0x7FFF ) {
5918         if ((uint64_t)(bSig << 1)) {
5919             return propagateFloatx80NaN(a, b, status);
5920         }
5921         return packFloatx80( zSign, 0, 0 );
5922     }
5923     if ( bExp == 0 ) {
5924         if ( bSig == 0 ) {
5925             if ( ( aExp | aSig ) == 0 ) {
5926  invalid:
5927                 float_raise(float_flag_invalid, status);
5928                 return floatx80_default_nan(status);
5929             }
5930             float_raise(float_flag_divbyzero, status);
5931             return packFloatx80(zSign, floatx80_infinity_high,
5932                                        floatx80_infinity_low);
5933         }
5934         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5935     }
5936     if ( aExp == 0 ) {
5937         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5938         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5939     }
5940     zExp = aExp - bExp + 0x3FFE;
5941     rem1 = 0;
5942     if ( bSig <= aSig ) {
5943         shift128Right( aSig, 0, 1, &aSig, &rem1 );
5944         ++zExp;
5945     }
5946     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5947     mul64To128( bSig, zSig0, &term0, &term1 );
5948     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5949     while ( (int64_t) rem0 < 0 ) {
5950         --zSig0;
5951         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5952     }
5953     zSig1 = estimateDiv128To64( rem1, 0, bSig );
5954     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5955         mul64To128( bSig, zSig1, &term1, &term2 );
5956         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5957         while ( (int64_t) rem1 < 0 ) {
5958             --zSig1;
5959             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5960         }
5961         zSig1 |= ( ( rem1 | rem2 ) != 0 );
5962     }
5963     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5964                                 zSign, zExp, zSig0, zSig1, status);
5965 }
5966
5967 /*----------------------------------------------------------------------------
5968 | Returns the remainder of the extended double-precision floating-point value
5969 | `a' with respect to the corresponding value `b'.  The operation is performed
5970 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5971 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5972 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
5973 | the absolute value of the integer quotient.
5974 *----------------------------------------------------------------------------*/
5975
5976 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
5977                          float_status *status)
5978 {
5979     bool aSign, zSign;
5980     int32_t aExp, bExp, expDiff, aExpOrig;
5981     uint64_t aSig0, aSig1, bSig;
5982     uint64_t q, term0, term1, alternateASig0, alternateASig1;
5983
5984     *quotient = 0;
5985     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5986         float_raise(float_flag_invalid, status);
5987         return floatx80_default_nan(status);
5988     }
5989     aSig0 = extractFloatx80Frac( a );
5990     aExpOrig = aExp = extractFloatx80Exp( a );
5991     aSign = extractFloatx80Sign( a );
5992     bSig = extractFloatx80Frac( b );
5993     bExp = extractFloatx80Exp( b );
5994     if ( aExp == 0x7FFF ) {
5995         if (    (uint64_t) ( aSig0<<1 )
5996              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5997             return propagateFloatx80NaN(a, b, status);
5998         }
5999         goto invalid;
6000     }
6001     if ( bExp == 0x7FFF ) {
6002         if ((uint64_t)(bSig << 1)) {
6003             return propagateFloatx80NaN(a, b, status);
6004         }
6005         if (aExp == 0 && aSig0 >> 63) {
6006             /*
6007              * Pseudo-denormal argument must be returned in normalized
6008              * form.
6009              */
6010             return packFloatx80(aSign, 1, aSig0);
6011         }
6012         return a;
6013     }
6014     if ( bExp == 0 ) {
6015         if ( bSig == 0 ) {
6016  invalid:
6017             float_raise(float_flag_invalid, status);
6018             return floatx80_default_nan(status);
6019         }
6020         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6021     }
6022     if ( aExp == 0 ) {
6023         if ( aSig0 == 0 ) return a;
6024         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6025     }
6026     zSign = aSign;
6027     expDiff = aExp - bExp;
6028     aSig1 = 0;
6029     if ( expDiff < 0 ) {
6030         if ( mod || expDiff < -1 ) {
6031             if (aExp == 1 && aExpOrig == 0) {
6032                 /*
6033                  * Pseudo-denormal argument must be returned in
6034                  * normalized form.
6035                  */
6036                 return packFloatx80(aSign, aExp, aSig0);
6037             }
6038             return a;
6039         }
6040         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6041         expDiff = 0;
6042     }
6043     *quotient = q = ( bSig <= aSig0 );
6044     if ( q ) aSig0 -= bSig;
6045     expDiff -= 64;
6046     while ( 0 < expDiff ) {
6047         q = estimateDiv128To64( aSig0, aSig1, bSig );
6048         q = ( 2 < q ) ? q - 2 : 0;
6049         mul64To128( bSig, q, &term0, &term1 );
6050         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6051         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6052         expDiff -= 62;
6053         *quotient <<= 62;
6054         *quotient += q;
6055     }
6056     expDiff += 64;
6057     if ( 0 < expDiff ) {
6058         q = estimateDiv128To64( aSig0, aSig1, bSig );
6059         q = ( 2 < q ) ? q - 2 : 0;
6060         q >>= 64 - expDiff;
6061         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6062         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6063         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6064         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6065             ++q;
6066             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6067         }
6068         if (expDiff < 64) {
6069             *quotient <<= expDiff;
6070         } else {
6071             *quotient = 0;
6072         }
6073         *quotient += q;
6074     }
6075     else {
6076         term1 = 0;
6077         term0 = bSig;
6078     }
6079     if (!mod) {
6080         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6081         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6082                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6083                         && ( q & 1 ) )
6084             ) {
6085             aSig0 = alternateASig0;
6086             aSig1 = alternateASig1;
6087             zSign = ! zSign;
6088             ++*quotient;
6089         }
6090     }
6091     return
6092         normalizeRoundAndPackFloatx80(
6093             floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
6094
6095 }
6096
6097 /*----------------------------------------------------------------------------
6098 | Returns the remainder of the extended double-precision floating-point value
6099 | `a' with respect to the corresponding value `b'.  The operation is performed
6100 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6101 *----------------------------------------------------------------------------*/
6102
6103 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6104 {
6105     uint64_t quotient;
6106     return floatx80_modrem(a, b, false, &quotient, status);
6107 }
6108
6109 /*----------------------------------------------------------------------------
6110 | Returns the remainder of the extended double-precision floating-point value
6111 | `a' with respect to the corresponding value `b', with the quotient truncated
6112 | toward zero.
6113 *----------------------------------------------------------------------------*/
6114
6115 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6116 {
6117     uint64_t quotient;
6118     return floatx80_modrem(a, b, true, &quotient, status);
6119 }
6120
6121 /*----------------------------------------------------------------------------
6122 | Returns the square root of the extended double-precision floating-point
6123 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6124 | for Binary Floating-Point Arithmetic.
6125 *----------------------------------------------------------------------------*/
6126
6127 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6128 {
6129     bool aSign;
6130     int32_t aExp, zExp;
6131     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6132     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6133
6134     if (floatx80_invalid_encoding(a)) {
6135         float_raise(float_flag_invalid, status);
6136         return floatx80_default_nan(status);
6137     }
6138     aSig0 = extractFloatx80Frac( a );
6139     aExp = extractFloatx80Exp( a );
6140     aSign = extractFloatx80Sign( a );
6141     if ( aExp == 0x7FFF ) {
6142         if ((uint64_t)(aSig0 << 1)) {
6143             return propagateFloatx80NaN(a, a, status);
6144         }
6145         if ( ! aSign ) return a;
6146         goto invalid;
6147     }
6148     if ( aSign ) {
6149         if ( ( aExp | aSig0 ) == 0 ) return a;
6150  invalid:
6151         float_raise(float_flag_invalid, status);
6152         return floatx80_default_nan(status);
6153     }
6154     if ( aExp == 0 ) {
6155         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6156         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6157     }
6158     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6159     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6160     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6161     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6162     doubleZSig0 = zSig0<<1;
6163     mul64To128( zSig0, zSig0, &term0, &term1 );
6164     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6165     while ( (int64_t) rem0 < 0 ) {
6166         --zSig0;
6167         doubleZSig0 -= 2;
6168         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6169     }
6170     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6171     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6172         if ( zSig1 == 0 ) zSig1 = 1;
6173         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6174         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6175         mul64To128( zSig1, zSig1, &term2, &term3 );
6176         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6177         while ( (int64_t) rem1 < 0 ) {
6178             --zSig1;
6179             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6180             term3 |= 1;
6181             term2 |= doubleZSig0;
6182             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6183         }
6184         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6185     }
6186     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6187     zSig0 |= doubleZSig0;
6188     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6189                                 0, zExp, zSig0, zSig1, status);
6190 }
6191
6192 /*----------------------------------------------------------------------------
6193 | Returns the result of converting the quadruple-precision floating-point
6194 | value `a' to the extended double-precision floating-point format.  The
6195 | conversion is performed according to the IEC/IEEE Standard for Binary
6196 | Floating-Point Arithmetic.
6197 *----------------------------------------------------------------------------*/
6198
6199 floatx80 float128_to_floatx80(float128 a, float_status *status)
6200 {
6201     bool aSign;
6202     int32_t aExp;
6203     uint64_t aSig0, aSig1;
6204
6205     aSig1 = extractFloat128Frac1( a );
6206     aSig0 = extractFloat128Frac0( a );
6207     aExp = extractFloat128Exp( a );
6208     aSign = extractFloat128Sign( a );
6209     if ( aExp == 0x7FFF ) {
6210         if ( aSig0 | aSig1 ) {
6211             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6212                                                status);
6213             return floatx80_silence_nan(res, status);
6214         }
6215         return packFloatx80(aSign, floatx80_infinity_high,
6216                                    floatx80_infinity_low);
6217     }
6218     if ( aExp == 0 ) {
6219         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6220         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6221     }
6222     else {
6223         aSig0 |= UINT64_C(0x0001000000000000);
6224     }
6225     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6226     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6227
6228 }
6229
6230 /*----------------------------------------------------------------------------
6231 | Returns the remainder of the quadruple-precision floating-point value `a'
6232 | with respect to the corresponding value `b'.  The operation is performed
6233 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6234 *----------------------------------------------------------------------------*/
6235
6236 float128 float128_rem(float128 a, float128 b, float_status *status)
6237 {
6238     bool aSign, zSign;
6239     int32_t aExp, bExp, expDiff;
6240     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6241     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6242     int64_t sigMean0;
6243
6244     aSig1 = extractFloat128Frac1( a );
6245     aSig0 = extractFloat128Frac0( a );
6246     aExp = extractFloat128Exp( a );
6247     aSign = extractFloat128Sign( a );
6248     bSig1 = extractFloat128Frac1( b );
6249     bSig0 = extractFloat128Frac0( b );
6250     bExp = extractFloat128Exp( b );
6251     if ( aExp == 0x7FFF ) {
6252         if (    ( aSig0 | aSig1 )
6253              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6254             return propagateFloat128NaN(a, b, status);
6255         }
6256         goto invalid;
6257     }
6258     if ( bExp == 0x7FFF ) {
6259         if (bSig0 | bSig1) {
6260             return propagateFloat128NaN(a, b, status);
6261         }
6262         return a;
6263     }
6264     if ( bExp == 0 ) {
6265         if ( ( bSig0 | bSig1 ) == 0 ) {
6266  invalid:
6267             float_raise(float_flag_invalid, status);
6268             return float128_default_nan(status);
6269         }
6270         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6271     }
6272     if ( aExp == 0 ) {
6273         if ( ( aSig0 | aSig1 ) == 0 ) return a;
6274         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6275     }
6276     expDiff = aExp - bExp;
6277     if ( expDiff < -1 ) return a;
6278     shortShift128Left(
6279         aSig0 | UINT64_C(0x0001000000000000),
6280         aSig1,
6281         15 - ( expDiff < 0 ),
6282         &aSig0,
6283         &aSig1
6284     );
6285     shortShift128Left(
6286         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6287     q = le128( bSig0, bSig1, aSig0, aSig1 );
6288     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6289     expDiff -= 64;
6290     while ( 0 < expDiff ) {
6291         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6292         q = ( 4 < q ) ? q - 4 : 0;
6293         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6294         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6295         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6296         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6297         expDiff -= 61;
6298     }
6299     if ( -64 < expDiff ) {
6300         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6301         q = ( 4 < q ) ? q - 4 : 0;
6302         q >>= - expDiff;
6303         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6304         expDiff += 52;
6305         if ( expDiff < 0 ) {
6306             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6307         }
6308         else {
6309             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6310         }
6311         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6312         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6313     }
6314     else {
6315         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6316         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6317     }
6318     do {
6319         alternateASig0 = aSig0;
6320         alternateASig1 = aSig1;
6321         ++q;
6322         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6323     } while ( 0 <= (int64_t) aSig0 );
6324     add128(
6325         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6326     if (    ( sigMean0 < 0 )
6327          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6328         aSig0 = alternateASig0;
6329         aSig1 = alternateASig1;
6330     }
6331     zSign = ( (int64_t) aSig0 < 0 );
6332     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6333     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6334                                          status);
6335 }
6336
6337 static inline FloatRelation
6338 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6339                           float_status *status)
6340 {
6341     bool aSign, bSign;
6342
6343     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6344         float_raise(float_flag_invalid, status);
6345         return float_relation_unordered;
6346     }
6347     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6348           ( extractFloatx80Frac( a )<<1 ) ) ||
6349         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6350           ( extractFloatx80Frac( b )<<1 ) )) {
6351         if (!is_quiet ||
6352             floatx80_is_signaling_nan(a, status) ||
6353             floatx80_is_signaling_nan(b, status)) {
6354             float_raise(float_flag_invalid, status);
6355         }
6356         return float_relation_unordered;
6357     }
6358     aSign = extractFloatx80Sign( a );
6359     bSign = extractFloatx80Sign( b );
6360     if ( aSign != bSign ) {
6361
6362         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6363              ( ( a.low | b.low ) == 0 ) ) {
6364             /* zero case */
6365             return float_relation_equal;
6366         } else {
6367             return 1 - (2 * aSign);
6368         }
6369     } else {
6370         /* Normalize pseudo-denormals before comparison.  */
6371         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6372             ++a.high;
6373         }
6374         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6375             ++b.high;
6376         }
6377         if (a.low == b.low && a.high == b.high) {
6378             return float_relation_equal;
6379         } else {
6380             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6381         }
6382     }
6383 }
6384
6385 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6386 {
6387     return floatx80_compare_internal(a, b, 0, status);
6388 }
6389
6390 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6391                                      float_status *status)
6392 {
6393     return floatx80_compare_internal(a, b, 1, status);
6394 }
6395
6396 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6397 {
6398     bool aSign;
6399     int32_t aExp;
6400     uint64_t aSig;
6401
6402     if (floatx80_invalid_encoding(a)) {
6403         float_raise(float_flag_invalid, status);
6404         return floatx80_default_nan(status);
6405     }
6406     aSig = extractFloatx80Frac( a );
6407     aExp = extractFloatx80Exp( a );
6408     aSign = extractFloatx80Sign( a );
6409
6410     if ( aExp == 0x7FFF ) {
6411         if ( aSig<<1 ) {
6412             return propagateFloatx80NaN(a, a, status);
6413         }
6414         return a;
6415     }
6416
6417     if (aExp == 0) {
6418         if (aSig == 0) {
6419             return a;
6420         }
6421         aExp++;
6422     }
6423
6424     if (n > 0x10000) {
6425         n = 0x10000;
6426     } else if (n < -0x10000) {
6427         n = -0x10000;
6428     }
6429
6430     aExp += n;
6431     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6432                                          aSign, aExp, aSig, 0, status);
6433 }
6434
6435 static void __attribute__((constructor)) softfloat_init(void)
6436 {
6437     union_float64 ua, ub, uc, ur;
6438
6439     if (QEMU_NO_HARDFLOAT) {
6440         return;
6441     }
6442     /*
6443      * Test that the host's FMA is not obviously broken. For example,
6444      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6445      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6446      */
6447     ua.s = 0x0020000000000001ULL;
6448     ub.s = 0x3ca0000000000000ULL;
6449     uc.s = 0x0020000000000000ULL;
6450     ur.h = fma(ua.h, ub.h, uc.h);
6451     if (ur.s != 0x0020000000000001ULL) {
6452         force_soft_fma = true;
6453     }
6454 }