fpu/softfloat.c

   1 /*
   2  * QEMU float support
   3  *
   4  * The code in this source file is derived from release 2a of the SoftFloat
   5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6  * some later contributions) are provided under that license, as detailed below.
   7  * It has subsequently been modified by contributors to the QEMU Project,
   8  * so some portions are provided under:
   9  *  the SoftFloat-2a license
  10  *  the BSD license
  11  *  GPL-v2-or-later
  12  *
  13  * Any future contributions to this file after December 1st 2014 will be
  14  * taken to be licensed under the Softfloat-2a license unless specifically
  15  * indicated otherwise.
  16  */
  17
  18 /*
  19 ===============================================================================
  20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21 Arithmetic Package, Release 2a.
  22
  23 Written by John R. Hauser.  This work was made possible in part by the
  24 International Computer Science Institute, located at Suite 600, 1947 Center
  25 Street, Berkeley, California 94704.  Funding was partially provided by the
  26 National Science Foundation under grant MIP-9311980.  The original version
  27 of this code was written as part of a project to build a fixed-point vector
  28 processor in collaboration with the University of California at Berkeley,
  29 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31 arithmetic/SoftFloat.html'.
  32
  33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39 Derivative works are acceptable, even for commercial purposes, so long as
  40 (1) they include prominent notice that the work is derivative, and (2) they
  41 include prominent notice akin to these four paragraphs for those parts of
  42 this code that are retained.
  43
  44 ===============================================================================
  45 */
  46
  47 /* BSD licensing:
  48  * Copyright (c) 2006, Fabrice Bellard
  49  * All rights reserved.
  50  *
  51  * Redistribution and use in source and binary forms, with or without
  52  * modification, are permitted provided that the following conditions are met:
  53  *
  54  * 1. Redistributions of source code must retain the above copyright notice,
  55  * this list of conditions and the following disclaimer.
  56  *
  57  * 2. Redistributions in binary form must reproduce the above copyright notice,
  58  * this list of conditions and the following disclaimer in the documentation
  59  * and/or other materials provided with the distribution.
  60  *
  61  * 3. Neither the name of the copyright holder nor the names of its contributors
  62  * may be used to endorse or promote products derived from this software without
  63  * specific prior written permission.
  64  *
  65  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75  * THE POSSIBILITY OF SUCH DAMAGE.
  76  */
  77
  78 /* Portions of this work are licensed under the terms of the GNU GPL,
  79  * version 2 or later. See the COPYING file in the top-level directory.
  80  */
  81
  82 /* softfloat (and in particular the code in softfloat-specialize.h) is
  83  * target-dependent and needs the TARGET_* macros.
  84  */
  85 #include "qemu/osdep.h"
  86 #include <math.h>
  87 #include "qemu/bitops.h"
  88 #include "fpu/softfloat.h"
  89
  90 /* We only need stdlib for abort() */
  91
  92 /*----------------------------------------------------------------------------
  93 | Primitive arithmetic functions, including multi-word arithmetic, and
  94 | division and square root approximations.  (Can be specialized to target if
  95 | desired.)
  96 *----------------------------------------------------------------------------*/
  97 #include "fpu/softfloat-macros.h"
  98
  99 /*
 100  * Hardfloat
 101  *
 102  * Fast emulation of guest FP instructions is challenging for two reasons.
 103  * First, FP instruction semantics are similar but not identical, particularly
 104  * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105  * exception flags is not trivial: reading the host's flags register with a
 106  * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107  * and trapping on every FP exception is not fast nor pleasant to work with.
 108  *
 109  * We address these challenges by leveraging the host FPU for a subset of the
 110  * operations. To do this we expand on the idea presented in this paper:
 111  *
 112  * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113  * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114  *
 115  * The idea is thus to leverage the host FPU to (1) compute FP operations
 116  * and (2) identify whether FP exceptions occurred while avoiding
 117  * expensive exception flag register accesses.
 118  *
 119  * An important optimization shown in the paper is that given that exception
 120  * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121  * This is particularly useful for the inexact flag, which is very frequently
 122  * raised in floating-point workloads.
 123  *
 124  * We optimize the code further by deferring to soft-fp whenever FP exception
 125  * detection might get hairy. Two examples: (1) when at least one operand is
 126  * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127  * and the result is < the minimum normal.
 128  */
 129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130     static inline void name(soft_t *a, float_status *s)                 \
 131     {                                                                   \
 132         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                      soft_t ## _is_neg(*a));            \
 135             float_raise(float_flag_input_denormal, s);                  \
 136         }                                                               \
 137     }
 138
 139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141 #undef GEN_INPUT_FLUSH__NOCHECK
 142
 143 #define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144     static inline void name(soft_t *a, float_status *s) \
 145     {                                                   \
 146         if (likely(!s->flush_inputs_to_zero)) {         \
 147             return;                                     \
 148         }                                               \
 149         soft_t ## _input_flush__nocheck(a, s);          \
 150     }
 151
 152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154 #undef GEN_INPUT_FLUSH1
 155
 156 #define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157     static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158     {                                                                   \
 159         if (likely(!s->flush_inputs_to_zero)) {                         \
 160             return;                                                     \
 161         }                                                               \
 162         soft_t ## _input_flush__nocheck(a, s);                          \
 163         soft_t ## _input_flush__nocheck(b, s);                          \
 164     }
 165
 166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168 #undef GEN_INPUT_FLUSH2
 169
 170 #define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171     static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172     {                                                                   \
 173         if (likely(!s->flush_inputs_to_zero)) {                         \
 174             return;                                                     \
 175         }                                                               \
 176         soft_t ## _input_flush__nocheck(a, s);                          \
 177         soft_t ## _input_flush__nocheck(b, s);                          \
 178         soft_t ## _input_flush__nocheck(c, s);                          \
 179     }
 180
 181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183 #undef GEN_INPUT_FLUSH3
 184
 185 /*
 186  * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187  * hardfloat functions. Each combination of number of inputs and float size
 188  * gets its own value.
 189  */
 190 #if defined(__x86_64__)
 191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
 193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
 195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
 197 #else
 198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
 199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
 200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
 201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
 202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
 203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
 204 #endif
 205
 206 /*
 207  * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208  * float{32,64}_is_infinity when !USE_FP.
 209  * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210  * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211  */
 212 #if defined(__x86_64__) || defined(__aarch64__)
 213 # define QEMU_HARDFLOAT_USE_ISINF   1
 214 #else
 215 # define QEMU_HARDFLOAT_USE_ISINF   0
 216 #endif
 217
 218 /*
 219  * Some targets clear the FP flags before most FP operations. This prevents
 220  * the use of hardfloat, since hardfloat relies on the inexact flag being
 221  * already set.
 222  */
 223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224 # if defined(__FAST_MATH__)
 225 #  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226     IEEE implementation
 227 # endif
 228 # define QEMU_NO_HARDFLOAT 1
 229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230 #else
 231 # define QEMU_NO_HARDFLOAT 0
 232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233 #endif
 234
 235 static inline bool can_use_fpu(const float_status *s)
 236 {
 237     if (QEMU_NO_HARDFLOAT) {
 238         return false;
 239     }
 240     return likely(s->float_exception_flags & float_flag_inexact &&
 241                   s->float_rounding_mode == float_round_nearest_even);
 242 }
 243
 244 /*
 245  * Hardfloat generation functions. Each operation can have two flavors:
 246  * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247  * most condition checks, or native ones (e.g. fpclassify).
 248  *
 249  * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250  * compiler to propagate constants and inline everything into the callers.
 251  *
 252  * We only generate functions for operations with two inputs, since only
 253  * these are common enough to justify consolidating them into common code.
 254  */
 255
 256 typedef union {
 257     float32 s;
 258     float h;
 259 } union_float32;
 260
 261 typedef union {
 262     float64 s;
 263     double h;
 264 } union_float64;
 265
 266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271 typedef float   (*hard_f32_op2_fn)(float a, float b);
 272 typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274 /* 2-input is-zero-or-normal */
 275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276 {
 277     if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278         /*
 279          * Not using a temp variable for consecutive fpclassify calls ends up
 280          * generating faster code.
 281          */
 282         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284     }
 285     return float32_is_zero_or_normal(a.s) &&
 286            float32_is_zero_or_normal(b.s);
 287 }
 288
 289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290 {
 291     if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294     }
 295     return float64_is_zero_or_normal(a.s) &&
 296            float64_is_zero_or_normal(b.s);
 297 }
 298
 299 /* 3-input is-zero-or-normal */
 300 static inline
 301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302 {
 303     if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307     }
 308     return float32_is_zero_or_normal(a.s) &&
 309            float32_is_zero_or_normal(b.s) &&
 310            float32_is_zero_or_normal(c.s);
 311 }
 312
 313 static inline
 314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315 {
 316     if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318                (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319                (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320     }
 321     return float64_is_zero_or_normal(a.s) &&
 322            float64_is_zero_or_normal(b.s) &&
 323            float64_is_zero_or_normal(c.s);
 324 }
 325
 326 static inline bool f32_is_inf(union_float32 a)
 327 {
 328     if (QEMU_HARDFLOAT_USE_ISINF) {
 329         return isinf(a.h);
 330     }
 331     return float32_is_infinity(a.s);
 332 }
 333
 334 static inline bool f64_is_inf(union_float64 a)
 335 {
 336     if (QEMU_HARDFLOAT_USE_ISINF) {
 337         return isinf(a.h);
 338     }
 339     return float64_is_infinity(a.s);
 340 }
 341
 342 static inline float32
 343 float32_gen2(float32 xa, float32 xb, float_status *s,
 344              hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345              f32_check_fn pre, f32_check_fn post)
 346 {
 347     union_float32 ua, ub, ur;
 348
 349     ua.s = xa;
 350     ub.s = xb;
 351
 352     if (unlikely(!can_use_fpu(s))) {
 353         goto soft;
 354     }
 355
 356     float32_input_flush2(&ua.s, &ub.s, s);
 357     if (unlikely(!pre(ua, ub))) {
 358         goto soft;
 359     }
 360
 361     ur.h = hard(ua.h, ub.h);
 362     if (unlikely(f32_is_inf(ur))) {
 363         float_raise(float_flag_overflow, s);
 364     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365         goto soft;
 366     }
 367     return ur.s;
 368
 369  soft:
 370     return soft(ua.s, ub.s, s);
 371 }
 372
 373 static inline float64
 374 float64_gen2(float64 xa, float64 xb, float_status *s,
 375              hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376              f64_check_fn pre, f64_check_fn post)
 377 {
 378     union_float64 ua, ub, ur;
 379
 380     ua.s = xa;
 381     ub.s = xb;
 382
 383     if (unlikely(!can_use_fpu(s))) {
 384         goto soft;
 385     }
 386
 387     float64_input_flush2(&ua.s, &ub.s, s);
 388     if (unlikely(!pre(ua, ub))) {
 389         goto soft;
 390     }
 391
 392     ur.h = hard(ua.h, ub.h);
 393     if (unlikely(f64_is_inf(ur))) {
 394         float_raise(float_flag_overflow, s);
 395     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396         goto soft;
 397     }
 398     return ur.s;
 399
 400  soft:
 401     return soft(ua.s, ub.s, s);
 402 }
 403
 404 /*----------------------------------------------------------------------------
 405 | Returns the fraction bits of the single-precision floating-point value `a'.
 406 *----------------------------------------------------------------------------*/
 407
 408 static inline uint32_t extractFloat32Frac(float32 a)
 409 {
 410     return float32_val(a) & 0x007FFFFF;
 411 }
 412
 413 /*----------------------------------------------------------------------------
 414 | Returns the exponent bits of the single-precision floating-point value `a'.
 415 *----------------------------------------------------------------------------*/
 416
 417 static inline int extractFloat32Exp(float32 a)
 418 {
 419     return (float32_val(a) >> 23) & 0xFF;
 420 }
 421
 422 /*----------------------------------------------------------------------------
 423 | Returns the sign bit of the single-precision floating-point value `a'.
 424 *----------------------------------------------------------------------------*/
 425
 426 static inline bool extractFloat32Sign(float32 a)
 427 {
 428     return float32_val(a) >> 31;
 429 }
 430
 431 /*----------------------------------------------------------------------------
 432 | Returns the fraction bits of the double-precision floating-point value `a'.
 433 *----------------------------------------------------------------------------*/
 434
 435 static inline uint64_t extractFloat64Frac(float64 a)
 436 {
 437     return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438 }
 439
 440 /*----------------------------------------------------------------------------
 441 | Returns the exponent bits of the double-precision floating-point value `a'.
 442 *----------------------------------------------------------------------------*/
 443
 444 static inline int extractFloat64Exp(float64 a)
 445 {
 446     return (float64_val(a) >> 52) & 0x7FF;
 447 }
 448
 449 /*----------------------------------------------------------------------------
 450 | Returns the sign bit of the double-precision floating-point value `a'.
 451 *----------------------------------------------------------------------------*/
 452
 453 static inline bool extractFloat64Sign(float64 a)
 454 {
 455     return float64_val(a) >> 63;
 456 }
 457
 458 /*
 459  * Classify a floating point number. Everything above float_class_qnan
 460  * is a NaN so cls >= float_class_qnan is any NaN.
 461  */
 462
 463 typedef enum __attribute__ ((__packed__)) {
 464     float_class_unclassified,
 465     float_class_zero,
 466     float_class_normal,
 467     float_class_inf,
 468     float_class_qnan,  /* all NaNs from here */
 469     float_class_snan,
 470 } FloatClass;
 471
 472 #define float_cmask(bit)  (1u << (bit))
 473
 474 enum {
 475     float_cmask_zero    = float_cmask(float_class_zero),
 476     float_cmask_normal  = float_cmask(float_class_normal),
 477     float_cmask_inf     = float_cmask(float_class_inf),
 478     float_cmask_qnan    = float_cmask(float_class_qnan),
 479     float_cmask_snan    = float_cmask(float_class_snan),
 480
 481     float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 482     float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 483 };
 484
 485
 486 /* Simple helpers for checking if, or what kind of, NaN we have */
 487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 488 {
 489     return unlikely(c >= float_class_qnan);
 490 }
 491
 492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
 493 {
 494     return c == float_class_snan;
 495 }
 496
 497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 498 {
 499     return c == float_class_qnan;
 500 }
 501
 502 /*
 503  * Structure holding all of the decomposed parts of a float.
 504  * The exponent is unbiased and the fraction is normalized.
 505  *
 506  * The fraction words are stored in big-endian word ordering,
 507  * so that truncation from a larger format to a smaller format
 508  * can be done simply by ignoring subsequent elements.
 509  */
 510
 511 typedef struct {
 512     FloatClass cls;
 513     bool sign;
 514     int32_t exp;
 515     union {
 516         /* Routines that know the structure may reference the singular name. */
 517         uint64_t frac;
 518         /*
 519          * Routines expanded with multiple structures reference "hi" and "lo"
 520          * depending on the operation.  In FloatParts64, "hi" and "lo" are
 521          * both the same word and aliased here.
 522          */
 523         uint64_t frac_hi;
 524         uint64_t frac_lo;
 525     };
 526 } FloatParts64;
 527
 528 typedef struct {
 529     FloatClass cls;
 530     bool sign;
 531     int32_t exp;
 532     uint64_t frac_hi;
 533     uint64_t frac_lo;
 534 } FloatParts128;
 535
 536 typedef struct {
 537     FloatClass cls;
 538     bool sign;
 539     int32_t exp;
 540     uint64_t frac_hi;
 541     uint64_t frac_hm;  /* high-middle */
 542     uint64_t frac_lm;  /* low-middle */
 543     uint64_t frac_lo;
 544 } FloatParts256;
 545
 546 /* These apply to the most significant word of each FloatPartsN. */
 547 #define DECOMPOSED_BINARY_POINT    63
 548 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 549
 550 /* Structure holding all of the relevant parameters for a format.
 551  *   exp_size: the size of the exponent field
 552  *   exp_bias: the offset applied to the exponent field
 553  *   exp_max: the maximum normalised exponent
 554  *   frac_size: the size of the fraction field
 555  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 556  * The following are computed based the size of fraction
 557  *   frac_lsb: least significant bit of fraction
 558  *   frac_lsbm1: the bit below the least significant bit (for rounding)
 559  *   round_mask/roundeven_mask: masks used for rounding
 560  * The following optional modifiers are available:
 561  *   arm_althp: handle ARM Alternative Half Precision
 562  */
 563 typedef struct {
 564     int exp_size;
 565     int exp_bias;
 566     int exp_max;
 567     int frac_size;
 568     int frac_shift;
 569     uint64_t frac_lsb;
 570     uint64_t frac_lsbm1;
 571     uint64_t round_mask;
 572     uint64_t roundeven_mask;
 573     bool arm_althp;
 574 } FloatFmt;
 575
 576 /* Expand fields based on the size of exponent and fraction */
 577 #define FLOAT_PARAMS(E, F)                                           \
 578     .exp_size       = E,                                             \
 579     .exp_bias       = ((1 << E) - 1) >> 1,                           \
 580     .exp_max        = (1 << E) - 1,                                  \
 581     .frac_size      = F,                                             \
 582     .frac_shift     = (-F - 1) & 63,                                 \
 583     .frac_lsb       = 1ull << ((-F - 1) & 63),                       \
 584     .frac_lsbm1     = 1ull << ((-F - 2) & 63),                       \
 585     .round_mask     = (1ull << ((-F - 1) & 63)) - 1,                 \
 586     .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
 587
 588 static const FloatFmt float16_params = {
 589     FLOAT_PARAMS(5, 10)
 590 };
 591
 592 static const FloatFmt float16_params_ahp = {
 593     FLOAT_PARAMS(5, 10),
 594     .arm_althp = true
 595 };
 596
 597 static const FloatFmt bfloat16_params = {
 598     FLOAT_PARAMS(8, 7)
 599 };
 600
 601 static const FloatFmt float32_params = {
 602     FLOAT_PARAMS(8, 23)
 603 };
 604
 605 static const FloatFmt float64_params = {
 606     FLOAT_PARAMS(11, 52)
 607 };
 608
 609 static const FloatFmt float128_params = {
 610     FLOAT_PARAMS(15, 112)
 611 };
 612
 613 /* Unpack a float to parts, but do not canonicalize.  */
 614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 615 {
 616     const int f_size = fmt->frac_size;
 617     const int e_size = fmt->exp_size;
 618
 619     *r = (FloatParts64) {
 620         .cls = float_class_unclassified,
 621         .sign = extract64(raw, f_size + e_size, 1),
 622         .exp = extract64(raw, f_size, e_size),
 623         .frac = extract64(raw, 0, f_size)
 624     };
 625 }
 626
 627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 628 {
 629     unpack_raw64(p, &float16_params, f);
 630 }
 631
 632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 633 {
 634     unpack_raw64(p, &bfloat16_params, f);
 635 }
 636
 637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 638 {
 639     unpack_raw64(p, &float32_params, f);
 640 }
 641
 642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 643 {
 644     unpack_raw64(p, &float64_params, f);
 645 }
 646
 647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
 648 {
 649     const int f_size = float128_params.frac_size - 64;
 650     const int e_size = float128_params.exp_size;
 651
 652     *p = (FloatParts128) {
 653         .cls = float_class_unclassified,
 654         .sign = extract64(f.high, f_size + e_size, 1),
 655         .exp = extract64(f.high, f_size, e_size),
 656         .frac_hi = extract64(f.high, 0, f_size),
 657         .frac_lo = f.low,
 658     };
 659 }
 660
 661 /* Pack a float from parts, but do not canonicalize.  */
 662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 663 {
 664     const int f_size = fmt->frac_size;
 665     const int e_size = fmt->exp_size;
 666     uint64_t ret;
 667
 668     ret = (uint64_t)p->sign << (f_size + e_size);
 669     ret = deposit64(ret, f_size, e_size, p->exp);
 670     ret = deposit64(ret, 0, f_size, p->frac);
 671     return ret;
 672 }
 673
 674 static inline float16 float16_pack_raw(const FloatParts64 *p)
 675 {
 676     return make_float16(pack_raw64(p, &float16_params));
 677 }
 678
 679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 680 {
 681     return pack_raw64(p, &bfloat16_params);
 682 }
 683
 684 static inline float32 float32_pack_raw(const FloatParts64 *p)
 685 {
 686     return make_float32(pack_raw64(p, &float32_params));
 687 }
 688
 689 static inline float64 float64_pack_raw(const FloatParts64 *p)
 690 {
 691     return make_float64(pack_raw64(p, &float64_params));
 692 }
 693
 694 static float128 float128_pack_raw(const FloatParts128 *p)
 695 {
 696     const int f_size = float128_params.frac_size - 64;
 697     const int e_size = float128_params.exp_size;
 698     uint64_t hi;
 699
 700     hi = (uint64_t)p->sign << (f_size + e_size);
 701     hi = deposit64(hi, f_size, e_size, p->exp);
 702     hi = deposit64(hi, 0, f_size, p->frac_hi);
 703     return make_float128(hi, p->frac_lo);
 704 }
 705
 706 /*----------------------------------------------------------------------------
 707 | Functions and definitions to determine:  (1) whether tininess for underflow
 708 | is detected before or after rounding by default, (2) what (if anything)
 709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
 710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 711 | are propagated from function inputs to output.  These details are target-
 712 | specific.
 713 *----------------------------------------------------------------------------*/
 714 #include "softfloat-specialize.c.inc"
 715
 716 #define PARTS_GENERIC_64_128(NAME, P) \
 717     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 718
 719 #define PARTS_GENERIC_64_128_256(NAME, P) \
 720     QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
 721                  (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 722
 723 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 724 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 725
 726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
 727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
 728
 729 #define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 730
 731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 732                                       float_status *s);
 733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 734                                         float_status *s);
 735
 736 #define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 737
 738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 739                                              FloatParts64 *c, float_status *s,
 740                                              int ab_mask, int abc_mask);
 741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 742                                                FloatParts128 *b,
 743                                                FloatParts128 *c,
 744                                                float_status *s,
 745                                                int ab_mask, int abc_mask);
 746
 747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 748     PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 749
 750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 751                                  const FloatFmt *fmt);
 752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 753                                   const FloatFmt *fmt);
 754
 755 #define parts_canonicalize(A, S, F) \
 756     PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 757
 758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
 759                             const FloatFmt *fmt);
 760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
 761                              const FloatFmt *fmt);
 762
 763 #define parts_uncanon(A, S, F) \
 764     PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 765
 766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 769
 770 #define parts_add_normal(A, B) \
 771     PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 772
 773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 776
 777 #define parts_sub_normal(A, B) \
 778     PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 779
 780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 781                                     float_status *s, bool subtract);
 782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 783                                       float_status *s, bool subtract);
 784
 785 #define parts_addsub(A, B, S, Z) \
 786     PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 787
 788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 789                                  float_status *s);
 790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 791                                    float_status *s);
 792
 793 #define parts_mul(A, B, S) \
 794     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 795
 796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 797                                     FloatParts64 *c, int flags,
 798                                     float_status *s);
 799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 800                                       FloatParts128 *c, int flags,
 801                                       float_status *s);
 802
 803 #define parts_muladd(A, B, C, Z, S) \
 804     PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 805
 806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 807                                  float_status *s);
 808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 809                                    float_status *s);
 810
 811 #define parts_div(A, B, S) \
 812     PARTS_GENERIC_64_128(div, A)(A, B, S)
 813
 814 /*
 815  * Helper functions for softfloat-parts.c.inc, per-size operations.
 816  */
 817
 818 #define FRAC_GENERIC_64_128(NAME, P) \
 819     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 820
 821 #define FRAC_GENERIC_64_128_256(NAME, P) \
 822     QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
 823                  (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 824
 825 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 826 {
 827     return uadd64_overflow(a->frac, b->frac, &r->frac);
 828 }
 829
 830 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 831 {
 832     bool c = 0;
 833     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 834     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 835     return c;
 836 }
 837
 838 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 839 {
 840     bool c = 0;
 841     r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 842     r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 843     r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 844     r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 845     return c;
 846 }
 847
 848 #define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 849
 850 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 851 {
 852     return uadd64_overflow(a->frac, c, &r->frac);
 853 }
 854
 855 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 856 {
 857     c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 858     return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 859 }
 860
 861 #define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 862
 863 static void frac64_allones(FloatParts64 *a)
 864 {
 865     a->frac = -1;
 866 }
 867
 868 static void frac128_allones(FloatParts128 *a)
 869 {
 870     a->frac_hi = a->frac_lo = -1;
 871 }
 872
 873 #define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 874
 875 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 876 {
 877     return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 878 }
 879
 880 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 881 {
 882     uint64_t ta = a->frac_hi, tb = b->frac_hi;
 883     if (ta == tb) {
 884         ta = a->frac_lo, tb = b->frac_lo;
 885         if (ta == tb) {
 886             return 0;
 887         }
 888     }
 889     return ta < tb ? -1 : 1;
 890 }
 891
 892 #define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 893
 894 static void frac64_clear(FloatParts64 *a)
 895 {
 896     a->frac = 0;
 897 }
 898
 899 static void frac128_clear(FloatParts128 *a)
 900 {
 901     a->frac_hi = a->frac_lo = 0;
 902 }
 903
 904 #define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 905
 906 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 907 {
 908     uint64_t n1, n0, r, q;
 909     bool ret;
 910
 911     /*
 912      * We want a 2*N / N-bit division to produce exactly an N-bit
 913      * result, so that we do not lose any precision and so that we
 914      * do not have to renormalize afterward.  If A.frac < B.frac,
 915      * then division would produce an (N-1)-bit result; shift A left
 916      * by one to produce the an N-bit result, and return true to
 917      * decrement the exponent to match.
 918      *
 919      * The udiv_qrnnd algorithm that we're using requires normalization,
 920      * i.e. the msb of the denominator must be set, which is already true.
 921      */
 922     ret = a->frac < b->frac;
 923     if (ret) {
 924         n0 = a->frac;
 925         n1 = 0;
 926     } else {
 927         n0 = a->frac >> 1;
 928         n1 = a->frac << 63;
 929     }
 930     q = udiv_qrnnd(&r, n0, n1, b->frac);
 931
 932     /* Set lsb if there is a remainder, to set inexact. */
 933     a->frac = q | (r != 0);
 934
 935     return ret;
 936 }
 937
 938 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
 939 {
 940     uint64_t q0, q1, a0, a1, b0, b1;
 941     uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
 942     bool ret = false;
 943
 944     a0 = a->frac_hi, a1 = a->frac_lo;
 945     b0 = b->frac_hi, b1 = b->frac_lo;
 946
 947     ret = lt128(a0, a1, b0, b1);
 948     if (!ret) {
 949         a1 = shr_double(a0, a1, 1);
 950         a0 = a0 >> 1;
 951     }
 952
 953     /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
 954     q0 = estimateDiv128To64(a0, a1, b0);
 955
 956     /*
 957      * Estimate is high because B1 was not included (unless B1 == 0).
 958      * Reduce quotient and increase remainder until remainder is non-negative.
 959      * This loop will execute 0 to 2 times.
 960      */
 961     mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
 962     sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
 963     while (r0 != 0) {
 964         q0--;
 965         add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
 966     }
 967
 968     /* Repeat using the remainder, producing a second word of quotient. */
 969     q1 = estimateDiv128To64(r1, r2, b0);
 970     mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
 971     sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
 972     while (r1 != 0) {
 973         q1--;
 974         add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
 975     }
 976
 977     /* Any remainder indicates inexact; set sticky bit. */
 978     q1 |= (r2 | r3) != 0;
 979
 980     a->frac_hi = q0;
 981     a->frac_lo = q1;
 982     return ret;
 983 }
 984
 985 #define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
 986
 987 static bool frac64_eqz(FloatParts64 *a)
 988 {
 989     return a->frac == 0;
 990 }
 991
 992 static bool frac128_eqz(FloatParts128 *a)
 993 {
 994     return (a->frac_hi | a->frac_lo) == 0;
 995 }
 996
 997 #define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
 998
 999 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1000 {
1001     mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1002 }
1003
1004 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1005 {
1006     mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1007                 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1008 }
1009
1010 #define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1011
1012 static void frac64_neg(FloatParts64 *a)
1013 {
1014     a->frac = -a->frac;
1015 }
1016
1017 static void frac128_neg(FloatParts128 *a)
1018 {
1019     bool c = 0;
1020     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1021     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1022 }
1023
1024 static void frac256_neg(FloatParts256 *a)
1025 {
1026     bool c = 0;
1027     a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1028     a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1029     a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1030     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1031 }
1032
1033 #define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1034
1035 static int frac64_normalize(FloatParts64 *a)
1036 {
1037     if (a->frac) {
1038         int shift = clz64(a->frac);
1039         a->frac <<= shift;
1040         return shift;
1041     }
1042     return 64;
1043 }
1044
1045 static int frac128_normalize(FloatParts128 *a)
1046 {
1047     if (a->frac_hi) {
1048         int shl = clz64(a->frac_hi);
1049         a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1050         a->frac_lo <<= shl;
1051         return shl;
1052     } else if (a->frac_lo) {
1053         int shl = clz64(a->frac_lo);
1054         a->frac_hi = a->frac_lo << shl;
1055         a->frac_lo = 0;
1056         return shl + 64;
1057     }
1058     return 128;
1059 }
1060
1061 static int frac256_normalize(FloatParts256 *a)
1062 {
1063     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1064     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1065     int ret, shl;
1066
1067     if (likely(a0)) {
1068         shl = clz64(a0);
1069         if (shl == 0) {
1070             return 0;
1071         }
1072         ret = shl;
1073     } else {
1074         if (a1) {
1075             ret = 64;
1076             a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1077         } else if (a2) {
1078             ret = 128;
1079             a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1080         } else if (a3) {
1081             ret = 192;
1082             a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1083         } else {
1084             ret = 256;
1085             a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1086             goto done;
1087         }
1088         shl = clz64(a0);
1089         if (shl == 0) {
1090             goto done;
1091         }
1092         ret += shl;
1093     }
1094
1095     a0 = shl_double(a0, a1, shl);
1096     a1 = shl_double(a1, a2, shl);
1097     a2 = shl_double(a2, a3, shl);
1098     a3 <<= shl;
1099
1100  done:
1101     a->frac_hi = a0;
1102     a->frac_hm = a1;
1103     a->frac_lm = a2;
1104     a->frac_lo = a3;
1105     return ret;
1106 }
1107
1108 #define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1109
1110 static void frac64_shl(FloatParts64 *a, int c)
1111 {
1112     a->frac <<= c;
1113 }
1114
1115 static void frac128_shl(FloatParts128 *a, int c)
1116 {
1117     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1118
1119     if (c & 64) {
1120         a0 = a1, a1 = 0;
1121     }
1122
1123     c &= 63;
1124     if (c) {
1125         a0 = shl_double(a0, a1, c);
1126         a1 = a1 << c;
1127     }
1128
1129     a->frac_hi = a0;
1130     a->frac_lo = a1;
1131 }
1132
1133 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1134
1135 static void frac64_shr(FloatParts64 *a, int c)
1136 {
1137     a->frac >>= c;
1138 }
1139
1140 static void frac128_shr(FloatParts128 *a, int c)
1141 {
1142     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1143
1144     if (c & 64) {
1145         a1 = a0, a0 = 0;
1146     }
1147
1148     c &= 63;
1149     if (c) {
1150         a1 = shr_double(a0, a1, c);
1151         a0 = a0 >> c;
1152     }
1153
1154     a->frac_hi = a0;
1155     a->frac_lo = a1;
1156 }
1157
1158 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1159
1160 static void frac64_shrjam(FloatParts64 *a, int c)
1161 {
1162     uint64_t a0 = a->frac;
1163
1164     if (likely(c != 0)) {
1165         if (likely(c < 64)) {
1166             a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1167         } else {
1168             a0 = a0 != 0;
1169         }
1170         a->frac = a0;
1171     }
1172 }
1173
1174 static void frac128_shrjam(FloatParts128 *a, int c)
1175 {
1176     uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1177     uint64_t sticky = 0;
1178
1179     if (unlikely(c == 0)) {
1180         return;
1181     } else if (likely(c < 64)) {
1182         /* nothing */
1183     } else if (likely(c < 128)) {
1184         sticky = a1;
1185         a1 = a0;
1186         a0 = 0;
1187         c &= 63;
1188         if (c == 0) {
1189             goto done;
1190         }
1191     } else {
1192         sticky = a0 | a1;
1193         a0 = a1 = 0;
1194         goto done;
1195     }
1196
1197     sticky |= shr_double(a1, 0, c);
1198     a1 = shr_double(a0, a1, c);
1199     a0 = a0 >> c;
1200
1201  done:
1202     a->frac_lo = a1 | (sticky != 0);
1203     a->frac_hi = a0;
1204 }
1205
1206 static void frac256_shrjam(FloatParts256 *a, int c)
1207 {
1208     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1209     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1210     uint64_t sticky = 0;
1211
1212     if (unlikely(c == 0)) {
1213         return;
1214     } else if (likely(c < 64)) {
1215         /* nothing */
1216     } else if (likely(c < 256)) {
1217         if (unlikely(c & 128)) {
1218             sticky |= a2 | a3;
1219             a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1220         }
1221         if (unlikely(c & 64)) {
1222             sticky |= a3;
1223             a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1224         }
1225         c &= 63;
1226         if (c == 0) {
1227             goto done;
1228         }
1229     } else {
1230         sticky = a0 | a1 | a2 | a3;
1231         a0 = a1 = a2 = a3 = 0;
1232         goto done;
1233     }
1234
1235     sticky |= shr_double(a3, 0, c);
1236     a3 = shr_double(a2, a3, c);
1237     a2 = shr_double(a1, a2, c);
1238     a1 = shr_double(a0, a1, c);
1239     a0 = a0 >> c;
1240
1241  done:
1242     a->frac_lo = a3 | (sticky != 0);
1243     a->frac_lm = a2;
1244     a->frac_hm = a1;
1245     a->frac_hi = a0;
1246 }
1247
1248 #define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1249
1250 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1251 {
1252     return usub64_overflow(a->frac, b->frac, &r->frac);
1253 }
1254
1255 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1256 {
1257     bool c = 0;
1258     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1259     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1260     return c;
1261 }
1262
1263 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1264 {
1265     bool c = 0;
1266     r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1267     r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1268     r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1269     r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1270     return c;
1271 }
1272
1273 #define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1274
1275 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1276 {
1277     r->frac = a->frac_hi | (a->frac_lo != 0);
1278 }
1279
1280 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1281 {
1282     r->frac_hi = a->frac_hi;
1283     r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1284 }
1285
1286 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1287
1288 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1289 {
1290     r->frac_hi = a->frac;
1291     r->frac_lo = 0;
1292 }
1293
1294 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1295 {
1296     r->frac_hi = a->frac_hi;
1297     r->frac_hm = a->frac_lo;
1298     r->frac_lm = 0;
1299     r->frac_lo = 0;
1300 }
1301
1302 #define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1303
1304 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1305 #define FloatPartsN    glue(FloatParts,N)
1306 #define FloatPartsW    glue(FloatParts,W)
1307
1308 #define N 64
1309 #define W 128
1310
1311 #include "softfloat-parts-addsub.c.inc"
1312 #include "softfloat-parts.c.inc"
1313
1314 #undef  N
1315 #undef  W
1316 #define N 128
1317 #define W 256
1318
1319 #include "softfloat-parts-addsub.c.inc"
1320 #include "softfloat-parts.c.inc"
1321
1322 #undef  N
1323 #undef  W
1324 #define N            256
1325
1326 #include "softfloat-parts-addsub.c.inc"
1327
1328 #undef  N
1329 #undef  W
1330 #undef  partsN
1331 #undef  FloatPartsN
1332 #undef  FloatPartsW
1333
1334 /*
1335  * Pack/unpack routines with a specific FloatFmt.
1336  */
1337
1338 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1339                                       float_status *s, const FloatFmt *params)
1340 {
1341     float16_unpack_raw(p, f);
1342     parts_canonicalize(p, s, params);
1343 }
1344
1345 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1346                                      float_status *s)
1347 {
1348     float16a_unpack_canonical(p, f, s, &float16_params);
1349 }
1350
1351 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1352                                       float_status *s)
1353 {
1354     bfloat16_unpack_raw(p, f);
1355     parts_canonicalize(p, s, &bfloat16_params);
1356 }
1357
1358 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1359                                              float_status *s,
1360                                              const FloatFmt *params)
1361 {
1362     parts_uncanon(p, s, params);
1363     return float16_pack_raw(p);
1364 }
1365
1366 static float16 float16_round_pack_canonical(FloatParts64 *p,
1367                                             float_status *s)
1368 {
1369     return float16a_round_pack_canonical(p, s, &float16_params);
1370 }
1371
1372 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1373                                               float_status *s)
1374 {
1375     parts_uncanon(p, s, &bfloat16_params);
1376     return bfloat16_pack_raw(p);
1377 }
1378
1379 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1380                                      float_status *s)
1381 {
1382     float32_unpack_raw(p, f);
1383     parts_canonicalize(p, s, &float32_params);
1384 }
1385
1386 static float32 float32_round_pack_canonical(FloatParts64 *p,
1387                                             float_status *s)
1388 {
1389     parts_uncanon(p, s, &float32_params);
1390     return float32_pack_raw(p);
1391 }
1392
1393 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1394                                      float_status *s)
1395 {
1396     float64_unpack_raw(p, f);
1397     parts_canonicalize(p, s, &float64_params);
1398 }
1399
1400 static float64 float64_round_pack_canonical(FloatParts64 *p,
1401                                             float_status *s)
1402 {
1403     parts_uncanon(p, s, &float64_params);
1404     return float64_pack_raw(p);
1405 }
1406
1407 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1408                                       float_status *s)
1409 {
1410     float128_unpack_raw(p, f);
1411     parts_canonicalize(p, s, &float128_params);
1412 }
1413
1414 static float128 float128_round_pack_canonical(FloatParts128 *p,
1415                                               float_status *s)
1416 {
1417     parts_uncanon(p, s, &float128_params);
1418     return float128_pack_raw(p);
1419 }
1420
1421 /*
1422  * Addition and subtraction
1423  */
1424
1425 static float16 QEMU_FLATTEN
1426 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1427 {
1428     FloatParts64 pa, pb, *pr;
1429
1430     float16_unpack_canonical(&pa, a, status);
1431     float16_unpack_canonical(&pb, b, status);
1432     pr = parts_addsub(&pa, &pb, status, subtract);
1433
1434     return float16_round_pack_canonical(pr, status);
1435 }
1436
1437 float16 float16_add(float16 a, float16 b, float_status *status)
1438 {
1439     return float16_addsub(a, b, status, false);
1440 }
1441
1442 float16 float16_sub(float16 a, float16 b, float_status *status)
1443 {
1444     return float16_addsub(a, b, status, true);
1445 }
1446
1447 static float32 QEMU_SOFTFLOAT_ATTR
1448 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1449 {
1450     FloatParts64 pa, pb, *pr;
1451
1452     float32_unpack_canonical(&pa, a, status);
1453     float32_unpack_canonical(&pb, b, status);
1454     pr = parts_addsub(&pa, &pb, status, subtract);
1455
1456     return float32_round_pack_canonical(pr, status);
1457 }
1458
1459 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1460 {
1461     return soft_f32_addsub(a, b, status, false);
1462 }
1463
1464 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1465 {
1466     return soft_f32_addsub(a, b, status, true);
1467 }
1468
1469 static float64 QEMU_SOFTFLOAT_ATTR
1470 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1471 {
1472     FloatParts64 pa, pb, *pr;
1473
1474     float64_unpack_canonical(&pa, a, status);
1475     float64_unpack_canonical(&pb, b, status);
1476     pr = parts_addsub(&pa, &pb, status, subtract);
1477
1478     return float64_round_pack_canonical(pr, status);
1479 }
1480
1481 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1482 {
1483     return soft_f64_addsub(a, b, status, false);
1484 }
1485
1486 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1487 {
1488     return soft_f64_addsub(a, b, status, true);
1489 }
1490
1491 static float hard_f32_add(float a, float b)
1492 {
1493     return a + b;
1494 }
1495
1496 static float hard_f32_sub(float a, float b)
1497 {
1498     return a - b;
1499 }
1500
1501 static double hard_f64_add(double a, double b)
1502 {
1503     return a + b;
1504 }
1505
1506 static double hard_f64_sub(double a, double b)
1507 {
1508     return a - b;
1509 }
1510
1511 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1512 {
1513     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1514         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1515     }
1516     return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1517 }
1518
1519 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1520 {
1521     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1522         return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1523     } else {
1524         return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1525     }
1526 }
1527
1528 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1529                               hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1530 {
1531     return float32_gen2(a, b, s, hard, soft,
1532                         f32_is_zon2, f32_addsubmul_post);
1533 }
1534
1535 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1536                               hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1537 {
1538     return float64_gen2(a, b, s, hard, soft,
1539                         f64_is_zon2, f64_addsubmul_post);
1540 }
1541
1542 float32 QEMU_FLATTEN
1543 float32_add(float32 a, float32 b, float_status *s)
1544 {
1545     return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1546 }
1547
1548 float32 QEMU_FLATTEN
1549 float32_sub(float32 a, float32 b, float_status *s)
1550 {
1551     return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1552 }
1553
1554 float64 QEMU_FLATTEN
1555 float64_add(float64 a, float64 b, float_status *s)
1556 {
1557     return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1558 }
1559
1560 float64 QEMU_FLATTEN
1561 float64_sub(float64 a, float64 b, float_status *s)
1562 {
1563     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1564 }
1565
1566 static bfloat16 QEMU_FLATTEN
1567 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1568 {
1569     FloatParts64 pa, pb, *pr;
1570
1571     bfloat16_unpack_canonical(&pa, a, status);
1572     bfloat16_unpack_canonical(&pb, b, status);
1573     pr = parts_addsub(&pa, &pb, status, subtract);
1574
1575     return bfloat16_round_pack_canonical(pr, status);
1576 }
1577
1578 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1579 {
1580     return bfloat16_addsub(a, b, status, false);
1581 }
1582
1583 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1584 {
1585     return bfloat16_addsub(a, b, status, true);
1586 }
1587
1588 static float128 QEMU_FLATTEN
1589 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1590 {
1591     FloatParts128 pa, pb, *pr;
1592
1593     float128_unpack_canonical(&pa, a, status);
1594     float128_unpack_canonical(&pb, b, status);
1595     pr = parts_addsub(&pa, &pb, status, subtract);
1596
1597     return float128_round_pack_canonical(pr, status);
1598 }
1599
1600 float128 float128_add(float128 a, float128 b, float_status *status)
1601 {
1602     return float128_addsub(a, b, status, false);
1603 }
1604
1605 float128 float128_sub(float128 a, float128 b, float_status *status)
1606 {
1607     return float128_addsub(a, b, status, true);
1608 }
1609
1610 /*
1611  * Multiplication
1612  */
1613
1614 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1615 {
1616     FloatParts64 pa, pb, *pr;
1617
1618     float16_unpack_canonical(&pa, a, status);
1619     float16_unpack_canonical(&pb, b, status);
1620     pr = parts_mul(&pa, &pb, status);
1621
1622     return float16_round_pack_canonical(pr, status);
1623 }
1624
1625 static float32 QEMU_SOFTFLOAT_ATTR
1626 soft_f32_mul(float32 a, float32 b, float_status *status)
1627 {
1628     FloatParts64 pa, pb, *pr;
1629
1630     float32_unpack_canonical(&pa, a, status);
1631     float32_unpack_canonical(&pb, b, status);
1632     pr = parts_mul(&pa, &pb, status);
1633
1634     return float32_round_pack_canonical(pr, status);
1635 }
1636
1637 static float64 QEMU_SOFTFLOAT_ATTR
1638 soft_f64_mul(float64 a, float64 b, float_status *status)
1639 {
1640     FloatParts64 pa, pb, *pr;
1641
1642     float64_unpack_canonical(&pa, a, status);
1643     float64_unpack_canonical(&pb, b, status);
1644     pr = parts_mul(&pa, &pb, status);
1645
1646     return float64_round_pack_canonical(pr, status);
1647 }
1648
1649 static float hard_f32_mul(float a, float b)
1650 {
1651     return a * b;
1652 }
1653
1654 static double hard_f64_mul(double a, double b)
1655 {
1656     return a * b;
1657 }
1658
1659 float32 QEMU_FLATTEN
1660 float32_mul(float32 a, float32 b, float_status *s)
1661 {
1662     return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1663                         f32_is_zon2, f32_addsubmul_post);
1664 }
1665
1666 float64 QEMU_FLATTEN
1667 float64_mul(float64 a, float64 b, float_status *s)
1668 {
1669     return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1670                         f64_is_zon2, f64_addsubmul_post);
1671 }
1672
1673 bfloat16 QEMU_FLATTEN
1674 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1675 {
1676     FloatParts64 pa, pb, *pr;
1677
1678     bfloat16_unpack_canonical(&pa, a, status);
1679     bfloat16_unpack_canonical(&pb, b, status);
1680     pr = parts_mul(&pa, &pb, status);
1681
1682     return bfloat16_round_pack_canonical(pr, status);
1683 }
1684
1685 float128 QEMU_FLATTEN
1686 float128_mul(float128 a, float128 b, float_status *status)
1687 {
1688     FloatParts128 pa, pb, *pr;
1689
1690     float128_unpack_canonical(&pa, a, status);
1691     float128_unpack_canonical(&pb, b, status);
1692     pr = parts_mul(&pa, &pb, status);
1693
1694     return float128_round_pack_canonical(pr, status);
1695 }
1696
1697 /*
1698  * Fused multiply-add
1699  */
1700
1701 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1702                                     int flags, float_status *status)
1703 {
1704     FloatParts64 pa, pb, pc, *pr;
1705
1706     float16_unpack_canonical(&pa, a, status);
1707     float16_unpack_canonical(&pb, b, status);
1708     float16_unpack_canonical(&pc, c, status);
1709     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1710
1711     return float16_round_pack_canonical(pr, status);
1712 }
1713
1714 static float32 QEMU_SOFTFLOAT_ATTR
1715 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1716                 float_status *status)
1717 {
1718     FloatParts64 pa, pb, pc, *pr;
1719
1720     float32_unpack_canonical(&pa, a, status);
1721     float32_unpack_canonical(&pb, b, status);
1722     float32_unpack_canonical(&pc, c, status);
1723     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1724
1725     return float32_round_pack_canonical(pr, status);
1726 }
1727
1728 static float64 QEMU_SOFTFLOAT_ATTR
1729 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1730                 float_status *status)
1731 {
1732     FloatParts64 pa, pb, pc, *pr;
1733
1734     float64_unpack_canonical(&pa, a, status);
1735     float64_unpack_canonical(&pb, b, status);
1736     float64_unpack_canonical(&pc, c, status);
1737     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1738
1739     return float64_round_pack_canonical(pr, status);
1740 }
1741
1742 static bool force_soft_fma;
1743
1744 float32 QEMU_FLATTEN
1745 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1746 {
1747     union_float32 ua, ub, uc, ur;
1748
1749     ua.s = xa;
1750     ub.s = xb;
1751     uc.s = xc;
1752
1753     if (unlikely(!can_use_fpu(s))) {
1754         goto soft;
1755     }
1756     if (unlikely(flags & float_muladd_halve_result)) {
1757         goto soft;
1758     }
1759
1760     float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1761     if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1762         goto soft;
1763     }
1764
1765     if (unlikely(force_soft_fma)) {
1766         goto soft;
1767     }
1768
1769     /*
1770      * When (a || b) == 0, there's no need to check for under/over flow,
1771      * since we know the addend is (normal || 0) and the product is 0.
1772      */
1773     if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1774         union_float32 up;
1775         bool prod_sign;
1776
1777         prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1778         prod_sign ^= !!(flags & float_muladd_negate_product);
1779         up.s = float32_set_sign(float32_zero, prod_sign);
1780
1781         if (flags & float_muladd_negate_c) {
1782             uc.h = -uc.h;
1783         }
1784         ur.h = up.h + uc.h;
1785     } else {
1786         union_float32 ua_orig = ua;
1787         union_float32 uc_orig = uc;
1788
1789         if (flags & float_muladd_negate_product) {
1790             ua.h = -ua.h;
1791         }
1792         if (flags & float_muladd_negate_c) {
1793             uc.h = -uc.h;
1794         }
1795
1796         ur.h = fmaf(ua.h, ub.h, uc.h);
1797
1798         if (unlikely(f32_is_inf(ur))) {
1799             float_raise(float_flag_overflow, s);
1800         } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1801             ua = ua_orig;
1802             uc = uc_orig;
1803             goto soft;
1804         }
1805     }
1806     if (flags & float_muladd_negate_result) {
1807         return float32_chs(ur.s);
1808     }
1809     return ur.s;
1810
1811  soft:
1812     return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1813 }
1814
1815 float64 QEMU_FLATTEN
1816 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1817 {
1818     union_float64 ua, ub, uc, ur;
1819
1820     ua.s = xa;
1821     ub.s = xb;
1822     uc.s = xc;
1823
1824     if (unlikely(!can_use_fpu(s))) {
1825         goto soft;
1826     }
1827     if (unlikely(flags & float_muladd_halve_result)) {
1828         goto soft;
1829     }
1830
1831     float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1832     if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1833         goto soft;
1834     }
1835
1836     if (unlikely(force_soft_fma)) {
1837         goto soft;
1838     }
1839
1840     /*
1841      * When (a || b) == 0, there's no need to check for under/over flow,
1842      * since we know the addend is (normal || 0) and the product is 0.
1843      */
1844     if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1845         union_float64 up;
1846         bool prod_sign;
1847
1848         prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1849         prod_sign ^= !!(flags & float_muladd_negate_product);
1850         up.s = float64_set_sign(float64_zero, prod_sign);
1851
1852         if (flags & float_muladd_negate_c) {
1853             uc.h = -uc.h;
1854         }
1855         ur.h = up.h + uc.h;
1856     } else {
1857         union_float64 ua_orig = ua;
1858         union_float64 uc_orig = uc;
1859
1860         if (flags & float_muladd_negate_product) {
1861             ua.h = -ua.h;
1862         }
1863         if (flags & float_muladd_negate_c) {
1864             uc.h = -uc.h;
1865         }
1866
1867         ur.h = fma(ua.h, ub.h, uc.h);
1868
1869         if (unlikely(f64_is_inf(ur))) {
1870             float_raise(float_flag_overflow, s);
1871         } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1872             ua = ua_orig;
1873             uc = uc_orig;
1874             goto soft;
1875         }
1876     }
1877     if (flags & float_muladd_negate_result) {
1878         return float64_chs(ur.s);
1879     }
1880     return ur.s;
1881
1882  soft:
1883     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1884 }
1885
1886 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1887                                       int flags, float_status *status)
1888 {
1889     FloatParts64 pa, pb, pc, *pr;
1890
1891     bfloat16_unpack_canonical(&pa, a, status);
1892     bfloat16_unpack_canonical(&pb, b, status);
1893     bfloat16_unpack_canonical(&pc, c, status);
1894     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1895
1896     return bfloat16_round_pack_canonical(pr, status);
1897 }
1898
1899 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1900                                       int flags, float_status *status)
1901 {
1902     FloatParts128 pa, pb, pc, *pr;
1903
1904     float128_unpack_canonical(&pa, a, status);
1905     float128_unpack_canonical(&pb, b, status);
1906     float128_unpack_canonical(&pc, c, status);
1907     pr = parts_muladd(&pa, &pb, &pc, flags, status);
1908
1909     return float128_round_pack_canonical(pr, status);
1910 }
1911
1912 /*
1913  * Division
1914  */
1915
1916 float16 float16_div(float16 a, float16 b, float_status *status)
1917 {
1918     FloatParts64 pa, pb, *pr;
1919
1920     float16_unpack_canonical(&pa, a, status);
1921     float16_unpack_canonical(&pb, b, status);
1922     pr = parts_div(&pa, &pb, status);
1923
1924     return float16_round_pack_canonical(pr, status);
1925 }
1926
1927 static float32 QEMU_SOFTFLOAT_ATTR
1928 soft_f32_div(float32 a, float32 b, float_status *status)
1929 {
1930     FloatParts64 pa, pb, *pr;
1931
1932     float32_unpack_canonical(&pa, a, status);
1933     float32_unpack_canonical(&pb, b, status);
1934     pr = parts_div(&pa, &pb, status);
1935
1936     return float32_round_pack_canonical(pr, status);
1937 }
1938
1939 static float64 QEMU_SOFTFLOAT_ATTR
1940 soft_f64_div(float64 a, float64 b, float_status *status)
1941 {
1942     FloatParts64 pa, pb, *pr;
1943
1944     float64_unpack_canonical(&pa, a, status);
1945     float64_unpack_canonical(&pb, b, status);
1946     pr = parts_div(&pa, &pb, status);
1947
1948     return float64_round_pack_canonical(pr, status);
1949 }
1950
1951 static float hard_f32_div(float a, float b)
1952 {
1953     return a / b;
1954 }
1955
1956 static double hard_f64_div(double a, double b)
1957 {
1958     return a / b;
1959 }
1960
1961 static bool f32_div_pre(union_float32 a, union_float32 b)
1962 {
1963     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1964         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1965                fpclassify(b.h) == FP_NORMAL;
1966     }
1967     return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1968 }
1969
1970 static bool f64_div_pre(union_float64 a, union_float64 b)
1971 {
1972     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1973         return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1974                fpclassify(b.h) == FP_NORMAL;
1975     }
1976     return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1977 }
1978
1979 static bool f32_div_post(union_float32 a, union_float32 b)
1980 {
1981     if (QEMU_HARDFLOAT_2F32_USE_FP) {
1982         return fpclassify(a.h) != FP_ZERO;
1983     }
1984     return !float32_is_zero(a.s);
1985 }
1986
1987 static bool f64_div_post(union_float64 a, union_float64 b)
1988 {
1989     if (QEMU_HARDFLOAT_2F64_USE_FP) {
1990         return fpclassify(a.h) != FP_ZERO;
1991     }
1992     return !float64_is_zero(a.s);
1993 }
1994
1995 float32 QEMU_FLATTEN
1996 float32_div(float32 a, float32 b, float_status *s)
1997 {
1998     return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1999                         f32_div_pre, f32_div_post);
2000 }
2001
2002 float64 QEMU_FLATTEN
2003 float64_div(float64 a, float64 b, float_status *s)
2004 {
2005     return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2006                         f64_div_pre, f64_div_post);
2007 }
2008
2009 bfloat16 QEMU_FLATTEN
2010 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2011 {
2012     FloatParts64 pa, pb, *pr;
2013
2014     bfloat16_unpack_canonical(&pa, a, status);
2015     bfloat16_unpack_canonical(&pb, b, status);
2016     pr = parts_div(&pa, &pb, status);
2017
2018     return bfloat16_round_pack_canonical(pr, status);
2019 }
2020
2021 float128 QEMU_FLATTEN
2022 float128_div(float128 a, float128 b, float_status *status)
2023 {
2024     FloatParts128 pa, pb, *pr;
2025
2026     float128_unpack_canonical(&pa, a, status);
2027     float128_unpack_canonical(&pb, b, status);
2028     pr = parts_div(&pa, &pb, status);
2029
2030     return float128_round_pack_canonical(pr, status);
2031 }
2032
2033 /*
2034  * Float to Float conversions
2035  *
2036  * Returns the result of converting one float format to another. The
2037  * conversion is performed according to the IEC/IEEE Standard for
2038  * Binary Floating-Point Arithmetic.
2039  *
2040  * Usually this only needs to take care of raising invalid exceptions
2041  * and handling the conversion on NaNs.
2042  */
2043
2044 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2045 {
2046     switch (a->cls) {
2047     case float_class_qnan:
2048     case float_class_snan:
2049         /*
2050          * There is no NaN in the destination format.  Raise Invalid
2051          * and return a zero with the sign of the input NaN.
2052          */
2053         float_raise(float_flag_invalid, s);
2054         a->cls = float_class_zero;
2055         break;
2056
2057     case float_class_inf:
2058         /*
2059          * There is no Inf in the destination format.  Raise Invalid
2060          * and return the maximum normal with the correct sign.
2061          */
2062         float_raise(float_flag_invalid, s);
2063         a->cls = float_class_normal;
2064         a->exp = float16_params_ahp.exp_max;
2065         a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2066                                   float16_params_ahp.frac_size + 1);
2067         break;
2068
2069     case float_class_normal:
2070     case float_class_zero:
2071         break;
2072
2073     default:
2074         g_assert_not_reached();
2075     }
2076 }
2077
2078 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2079 {
2080     if (is_nan(a->cls)) {
2081         parts_return_nan(a, s);
2082     }
2083 }
2084
2085 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2086 {
2087     if (is_nan(a->cls)) {
2088         parts_return_nan(a, s);
2089     }
2090 }
2091
2092 #define parts_float_to_float(P, S) \
2093     PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2094
2095 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2096                                         float_status *s)
2097 {
2098     a->cls = b->cls;
2099     a->sign = b->sign;
2100     a->exp = b->exp;
2101
2102     if (a->cls == float_class_normal) {
2103         frac_truncjam(a, b);
2104     } else if (is_nan(a->cls)) {
2105         /* Discard the low bits of the NaN. */
2106         a->frac = b->frac_hi;
2107         parts_return_nan(a, s);
2108     }
2109 }
2110
2111 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2112                                        float_status *s)
2113 {
2114     a->cls = b->cls;
2115     a->sign = b->sign;
2116     a->exp = b->exp;
2117     frac_widen(a, b);
2118
2119     if (is_nan(a->cls)) {
2120         parts_return_nan(a, s);
2121     }
2122 }
2123
2124 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2125 {
2126     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2127     FloatParts64 p;
2128
2129     float16a_unpack_canonical(&p, a, s, fmt16);
2130     parts_float_to_float(&p, s);
2131     return float32_round_pack_canonical(&p, s);
2132 }
2133
2134 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2135 {
2136     const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2137     FloatParts64 p;
2138
2139     float16a_unpack_canonical(&p, a, s, fmt16);
2140     parts_float_to_float(&p, s);
2141     return float64_round_pack_canonical(&p, s);
2142 }
2143
2144 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2145 {
2146     FloatParts64 p;
2147     const FloatFmt *fmt;
2148
2149     float32_unpack_canonical(&p, a, s);
2150     if (ieee) {
2151         parts_float_to_float(&p, s);
2152         fmt = &float16_params;
2153     } else {
2154         parts_float_to_ahp(&p, s);
2155         fmt = &float16_params_ahp;
2156     }
2157     return float16a_round_pack_canonical(&p, s, fmt);
2158 }
2159
2160 static float64 QEMU_SOFTFLOAT_ATTR
2161 soft_float32_to_float64(float32 a, float_status *s)
2162 {
2163     FloatParts64 p;
2164
2165     float32_unpack_canonical(&p, a, s);
2166     parts_float_to_float(&p, s);
2167     return float64_round_pack_canonical(&p, s);
2168 }
2169
2170 float64 float32_to_float64(float32 a, float_status *s)
2171 {
2172     if (likely(float32_is_normal(a))) {
2173         /* Widening conversion can never produce inexact results.  */
2174         union_float32 uf;
2175         union_float64 ud;
2176         uf.s = a;
2177         ud.h = uf.h;
2178         return ud.s;
2179     } else if (float32_is_zero(a)) {
2180         return float64_set_sign(float64_zero, float32_is_neg(a));
2181     } else {
2182         return soft_float32_to_float64(a, s);
2183     }
2184 }
2185
2186 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2187 {
2188     FloatParts64 p;
2189     const FloatFmt *fmt;
2190
2191     float64_unpack_canonical(&p, a, s);
2192     if (ieee) {
2193         parts_float_to_float(&p, s);
2194         fmt = &float16_params;
2195     } else {
2196         parts_float_to_ahp(&p, s);
2197         fmt = &float16_params_ahp;
2198     }
2199     return float16a_round_pack_canonical(&p, s, fmt);
2200 }
2201
2202 float32 float64_to_float32(float64 a, float_status *s)
2203 {
2204     FloatParts64 p;
2205
2206     float64_unpack_canonical(&p, a, s);
2207     parts_float_to_float(&p, s);
2208     return float32_round_pack_canonical(&p, s);
2209 }
2210
2211 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2212 {
2213     FloatParts64 p;
2214
2215     bfloat16_unpack_canonical(&p, a, s);
2216     parts_float_to_float(&p, s);
2217     return float32_round_pack_canonical(&p, s);
2218 }
2219
2220 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2221 {
2222     FloatParts64 p;
2223
2224     bfloat16_unpack_canonical(&p, a, s);
2225     parts_float_to_float(&p, s);
2226     return float64_round_pack_canonical(&p, s);
2227 }
2228
2229 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2230 {
2231     FloatParts64 p;
2232
2233     float32_unpack_canonical(&p, a, s);
2234     parts_float_to_float(&p, s);
2235     return bfloat16_round_pack_canonical(&p, s);
2236 }
2237
2238 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2239 {
2240     FloatParts64 p;
2241
2242     float64_unpack_canonical(&p, a, s);
2243     parts_float_to_float(&p, s);
2244     return bfloat16_round_pack_canonical(&p, s);
2245 }
2246
2247 float32 float128_to_float32(float128 a, float_status *s)
2248 {
2249     FloatParts64 p64;
2250     FloatParts128 p128;
2251
2252     float128_unpack_canonical(&p128, a, s);
2253     parts_float_to_float_narrow(&p64, &p128, s);
2254     return float32_round_pack_canonical(&p64, s);
2255 }
2256
2257 float64 float128_to_float64(float128 a, float_status *s)
2258 {
2259     FloatParts64 p64;
2260     FloatParts128 p128;
2261
2262     float128_unpack_canonical(&p128, a, s);
2263     parts_float_to_float_narrow(&p64, &p128, s);
2264     return float64_round_pack_canonical(&p64, s);
2265 }
2266
2267 float128 float32_to_float128(float32 a, float_status *s)
2268 {
2269     FloatParts64 p64;
2270     FloatParts128 p128;
2271
2272     float32_unpack_canonical(&p64, a, s);
2273     parts_float_to_float_widen(&p128, &p64, s);
2274     return float128_round_pack_canonical(&p128, s);
2275 }
2276
2277 float128 float64_to_float128(float64 a, float_status *s)
2278 {
2279     FloatParts64 p64;
2280     FloatParts128 p128;
2281
2282     float64_unpack_canonical(&p64, a, s);
2283     parts_float_to_float_widen(&p128, &p64, s);
2284     return float128_round_pack_canonical(&p128, s);
2285 }
2286
2287 /*
2288  * Rounds the floating-point value `a' to an integer, and returns the
2289  * result as a floating-point value. The operation is performed
2290  * according to the IEC/IEEE Standard for Binary Floating-Point
2291  * Arithmetic.
2292  */
2293
2294 static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2295                                int scale, float_status *s)
2296 {
2297     switch (a.cls) {
2298     case float_class_qnan:
2299     case float_class_snan:
2300         parts_return_nan(&a, s);
2301         break;
2302
2303     case float_class_zero:
2304     case float_class_inf:
2305         /* already "integral" */
2306         break;
2307
2308     case float_class_normal:
2309         scale = MIN(MAX(scale, -0x10000), 0x10000);
2310         a.exp += scale;
2311
2312         if (a.exp >= DECOMPOSED_BINARY_POINT) {
2313             /* already integral */
2314             break;
2315         }
2316         if (a.exp < 0) {
2317             bool one;
2318             /* all fractional */
2319             float_raise(float_flag_inexact, s);
2320             switch (rmode) {
2321             case float_round_nearest_even:
2322                 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2323                 break;
2324             case float_round_ties_away:
2325                 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2326                 break;
2327             case float_round_to_zero:
2328                 one = false;
2329                 break;
2330             case float_round_up:
2331                 one = !a.sign;
2332                 break;
2333             case float_round_down:
2334                 one = a.sign;
2335                 break;
2336             case float_round_to_odd:
2337                 one = true;
2338                 break;
2339             default:
2340                 g_assert_not_reached();
2341             }
2342
2343             if (one) {
2344                 a.frac = DECOMPOSED_IMPLICIT_BIT;
2345                 a.exp = 0;
2346             } else {
2347                 a.cls = float_class_zero;
2348             }
2349         } else {
2350             uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2351             uint64_t frac_lsbm1 = frac_lsb >> 1;
2352             uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2353             uint64_t rnd_mask = rnd_even_mask >> 1;
2354             uint64_t inc;
2355
2356             switch (rmode) {
2357             case float_round_nearest_even:
2358                 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2359                 break;
2360             case float_round_ties_away:
2361                 inc = frac_lsbm1;
2362                 break;
2363             case float_round_to_zero:
2364                 inc = 0;
2365                 break;
2366             case float_round_up:
2367                 inc = a.sign ? 0 : rnd_mask;
2368                 break;
2369             case float_round_down:
2370                 inc = a.sign ? rnd_mask : 0;
2371                 break;
2372             case float_round_to_odd:
2373                 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2374                 break;
2375             default:
2376                 g_assert_not_reached();
2377             }
2378
2379             if (a.frac & rnd_mask) {
2380                 float_raise(float_flag_inexact, s);
2381                 if (uadd64_overflow(a.frac, inc, &a.frac)) {
2382                     a.frac >>= 1;
2383                     a.frac |= DECOMPOSED_IMPLICIT_BIT;
2384                     a.exp++;
2385                 }
2386                 a.frac &= ~rnd_mask;
2387             }
2388         }
2389         break;
2390     default:
2391         g_assert_not_reached();
2392     }
2393     return a;
2394 }
2395
2396 float16 float16_round_to_int(float16 a, float_status *s)
2397 {
2398     FloatParts64 pa, pr;
2399
2400     float16_unpack_canonical(&pa, a, s);
2401     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2402     return float16_round_pack_canonical(&pr, s);
2403 }
2404
2405 float32 float32_round_to_int(float32 a, float_status *s)
2406 {
2407     FloatParts64 pa, pr;
2408
2409     float32_unpack_canonical(&pa, a, s);
2410     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2411     return float32_round_pack_canonical(&pr, s);
2412 }
2413
2414 float64 float64_round_to_int(float64 a, float_status *s)
2415 {
2416     FloatParts64 pa, pr;
2417
2418     float64_unpack_canonical(&pa, a, s);
2419     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2420     return float64_round_pack_canonical(&pr, s);
2421 }
2422
2423 /*
2424  * Rounds the bfloat16 value `a' to an integer, and returns the
2425  * result as a bfloat16 value.
2426  */
2427
2428 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2429 {
2430     FloatParts64 pa, pr;
2431
2432     bfloat16_unpack_canonical(&pa, a, s);
2433     pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2434     return bfloat16_round_pack_canonical(&pr, s);
2435 }
2436
2437 /*
2438  * Returns the result of converting the floating-point value `a' to
2439  * the two's complement integer format. The conversion is performed
2440  * according to the IEC/IEEE Standard for Binary Floating-Point
2441  * Arithmetic---which means in particular that the conversion is
2442  * rounded according to the current rounding mode. If `a' is a NaN,
2443  * the largest positive integer is returned. Otherwise, if the
2444  * conversion overflows, the largest integer with the same sign as `a'
2445  * is returned.
2446 */
2447
2448 static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
2449                                      int scale, int64_t min, int64_t max,
2450                                      float_status *s)
2451 {
2452     uint64_t r;
2453     int orig_flags = get_float_exception_flags(s);
2454     FloatParts64 p = round_to_int(in, rmode, scale, s);
2455
2456     switch (p.cls) {
2457     case float_class_snan:
2458     case float_class_qnan:
2459         s->float_exception_flags = orig_flags | float_flag_invalid;
2460         return max;
2461     case float_class_inf:
2462         s->float_exception_flags = orig_flags | float_flag_invalid;
2463         return p.sign ? min : max;
2464     case float_class_zero:
2465         return 0;
2466     case float_class_normal:
2467         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2468             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2469         } else {
2470             r = UINT64_MAX;
2471         }
2472         if (p.sign) {
2473             if (r <= -(uint64_t) min) {
2474                 return -r;
2475             } else {
2476                 s->float_exception_flags = orig_flags | float_flag_invalid;
2477                 return min;
2478             }
2479         } else {
2480             if (r <= max) {
2481                 return r;
2482             } else {
2483                 s->float_exception_flags = orig_flags | float_flag_invalid;
2484                 return max;
2485             }
2486         }
2487     default:
2488         g_assert_not_reached();
2489     }
2490 }
2491
2492 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2493                               float_status *s)
2494 {
2495     FloatParts64 p;
2496
2497     float16_unpack_canonical(&p, a, s);
2498     return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
2499 }
2500
2501 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2502                                 float_status *s)
2503 {
2504     FloatParts64 p;
2505
2506     float16_unpack_canonical(&p, a, s);
2507     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2508 }
2509
2510 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2511                                 float_status *s)
2512 {
2513     FloatParts64 p;
2514
2515     float16_unpack_canonical(&p, a, s);
2516     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2517 }
2518
2519 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2520                                 float_status *s)
2521 {
2522     FloatParts64 p;
2523
2524     float16_unpack_canonical(&p, a, s);
2525     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2526 }
2527
2528 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2529                                 float_status *s)
2530 {
2531     FloatParts64 p;
2532
2533     float32_unpack_canonical(&p, a, s);
2534     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2535 }
2536
2537 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2538                                 float_status *s)
2539 {
2540     FloatParts64 p;
2541
2542     float32_unpack_canonical(&p, a, s);
2543     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2544 }
2545
2546 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2547                                 float_status *s)
2548 {
2549     FloatParts64 p;
2550
2551     float32_unpack_canonical(&p, a, s);
2552     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2553 }
2554
2555 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2556                                 float_status *s)
2557 {
2558     FloatParts64 p;
2559
2560     float64_unpack_canonical(&p, a, s);
2561     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2562 }
2563
2564 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2565                                 float_status *s)
2566 {
2567     FloatParts64 p;
2568
2569     float64_unpack_canonical(&p, a, s);
2570     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2571 }
2572
2573 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2574                                 float_status *s)
2575 {
2576     FloatParts64 p;
2577
2578     float64_unpack_canonical(&p, a, s);
2579     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2580 }
2581
2582 int8_t float16_to_int8(float16 a, float_status *s)
2583 {
2584     return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2585 }
2586
2587 int16_t float16_to_int16(float16 a, float_status *s)
2588 {
2589     return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2590 }
2591
2592 int32_t float16_to_int32(float16 a, float_status *s)
2593 {
2594     return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2595 }
2596
2597 int64_t float16_to_int64(float16 a, float_status *s)
2598 {
2599     return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2600 }
2601
2602 int16_t float32_to_int16(float32 a, float_status *s)
2603 {
2604     return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2605 }
2606
2607 int32_t float32_to_int32(float32 a, float_status *s)
2608 {
2609     return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2610 }
2611
2612 int64_t float32_to_int64(float32 a, float_status *s)
2613 {
2614     return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2615 }
2616
2617 int16_t float64_to_int16(float64 a, float_status *s)
2618 {
2619     return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2620 }
2621
2622 int32_t float64_to_int32(float64 a, float_status *s)
2623 {
2624     return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2625 }
2626
2627 int64_t float64_to_int64(float64 a, float_status *s)
2628 {
2629     return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2630 }
2631
2632 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2633 {
2634     return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2635 }
2636
2637 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2638 {
2639     return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2640 }
2641
2642 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2643 {
2644     return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2645 }
2646
2647 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2648 {
2649     return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2650 }
2651
2652 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2653 {
2654     return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2655 }
2656
2657 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2658 {
2659     return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2660 }
2661
2662 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2663 {
2664     return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2665 }
2666
2667 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2668 {
2669     return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2670 }
2671
2672 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2673 {
2674     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2675 }
2676
2677 /*
2678  * Returns the result of converting the floating-point value `a' to
2679  * the two's complement integer format.
2680  */
2681
2682 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2683                                  float_status *s)
2684 {
2685     FloatParts64 p;
2686
2687     bfloat16_unpack_canonical(&p, a, s);
2688     return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2689 }
2690
2691 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2692                                  float_status *s)
2693 {
2694     FloatParts64 p;
2695
2696     bfloat16_unpack_canonical(&p, a, s);
2697     return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2698 }
2699
2700 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2701                                  float_status *s)
2702 {
2703     FloatParts64 p;
2704
2705     bfloat16_unpack_canonical(&p, a, s);
2706     return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2707 }
2708
2709 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2710 {
2711     return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2712 }
2713
2714 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2715 {
2716     return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2717 }
2718
2719 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2720 {
2721     return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2722 }
2723
2724 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2725 {
2726     return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2727 }
2728
2729 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2730 {
2731     return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2732 }
2733
2734 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2735 {
2736     return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2737 }
2738
2739 /*
2740  *  Returns the result of converting the floating-point value `a' to
2741  *  the unsigned integer format. The conversion is performed according
2742  *  to the IEC/IEEE Standard for Binary Floating-Point
2743  *  Arithmetic---which means in particular that the conversion is
2744  *  rounded according to the current rounding mode. If `a' is a NaN,
2745  *  the largest unsigned integer is returned. Otherwise, if the
2746  *  conversion overflows, the largest unsigned integer is returned. If
2747  *  the 'a' is negative, the result is rounded and zero is returned;
2748  *  values that do not round to zero will raise the inexact exception
2749  *  flag.
2750  */
2751
2752 static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
2753                                        int scale, uint64_t max,
2754                                        float_status *s)
2755 {
2756     int orig_flags = get_float_exception_flags(s);
2757     FloatParts64 p = round_to_int(in, rmode, scale, s);
2758     uint64_t r;
2759
2760     switch (p.cls) {
2761     case float_class_snan:
2762     case float_class_qnan:
2763         s->float_exception_flags = orig_flags | float_flag_invalid;
2764         return max;
2765     case float_class_inf:
2766         s->float_exception_flags = orig_flags | float_flag_invalid;
2767         return p.sign ? 0 : max;
2768     case float_class_zero:
2769         return 0;
2770     case float_class_normal:
2771         if (p.sign) {
2772             s->float_exception_flags = orig_flags | float_flag_invalid;
2773             return 0;
2774         }
2775
2776         if (p.exp <= DECOMPOSED_BINARY_POINT) {
2777             r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2778         } else {
2779             s->float_exception_flags = orig_flags | float_flag_invalid;
2780             return max;
2781         }
2782
2783         /* For uint64 this will never trip, but if p.exp is too large
2784          * to shift a decomposed fraction we shall have exited via the
2785          * 3rd leg above.
2786          */
2787         if (r > max) {
2788             s->float_exception_flags = orig_flags | float_flag_invalid;
2789             return max;
2790         }
2791         return r;
2792     default:
2793         g_assert_not_reached();
2794     }
2795 }
2796
2797 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2798                                 float_status *s)
2799 {
2800     FloatParts64 p;
2801
2802     float16_unpack_canonical(&p, a, s);
2803     return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
2804 }
2805
2806 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2807                                   float_status *s)
2808 {
2809     FloatParts64 p;
2810
2811     float16_unpack_canonical(&p, a, s);
2812     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2813 }
2814
2815 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2816                                   float_status *s)
2817 {
2818     FloatParts64 p;
2819
2820     float16_unpack_canonical(&p, a, s);
2821     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2822 }
2823
2824 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2825                                   float_status *s)
2826 {
2827     FloatParts64 p;
2828
2829     float16_unpack_canonical(&p, a, s);
2830     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2831 }
2832
2833 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2834                                   float_status *s)
2835 {
2836     FloatParts64 p;
2837
2838     float32_unpack_canonical(&p, a, s);
2839     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2840 }
2841
2842 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2843                                   float_status *s)
2844 {
2845     FloatParts64 p;
2846
2847     float32_unpack_canonical(&p, a, s);
2848     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2849 }
2850
2851 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2852                                   float_status *s)
2853 {
2854     FloatParts64 p;
2855
2856     float32_unpack_canonical(&p, a, s);
2857     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2858 }
2859
2860 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2861                                   float_status *s)
2862 {
2863     FloatParts64 p;
2864
2865     float64_unpack_canonical(&p, a, s);
2866     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2867 }
2868
2869 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2870                                   float_status *s)
2871 {
2872     FloatParts64 p;
2873
2874     float64_unpack_canonical(&p, a, s);
2875     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2876 }
2877
2878 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2879                                   float_status *s)
2880 {
2881     FloatParts64 p;
2882
2883     float64_unpack_canonical(&p, a, s);
2884     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2885 }
2886
2887 uint8_t float16_to_uint8(float16 a, float_status *s)
2888 {
2889     return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2890 }
2891
2892 uint16_t float16_to_uint16(float16 a, float_status *s)
2893 {
2894     return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2895 }
2896
2897 uint32_t float16_to_uint32(float16 a, float_status *s)
2898 {
2899     return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2900 }
2901
2902 uint64_t float16_to_uint64(float16 a, float_status *s)
2903 {
2904     return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2905 }
2906
2907 uint16_t float32_to_uint16(float32 a, float_status *s)
2908 {
2909     return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2910 }
2911
2912 uint32_t float32_to_uint32(float32 a, float_status *s)
2913 {
2914     return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2915 }
2916
2917 uint64_t float32_to_uint64(float32 a, float_status *s)
2918 {
2919     return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2920 }
2921
2922 uint16_t float64_to_uint16(float64 a, float_status *s)
2923 {
2924     return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2925 }
2926
2927 uint32_t float64_to_uint32(float64 a, float_status *s)
2928 {
2929     return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2930 }
2931
2932 uint64_t float64_to_uint64(float64 a, float_status *s)
2933 {
2934     return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2935 }
2936
2937 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2938 {
2939     return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2940 }
2941
2942 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2943 {
2944     return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2945 }
2946
2947 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2948 {
2949     return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2950 }
2951
2952 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2953 {
2954     return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2955 }
2956
2957 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2958 {
2959     return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2960 }
2961
2962 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2963 {
2964     return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2965 }
2966
2967 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2968 {
2969     return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2970 }
2971
2972 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2973 {
2974     return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2975 }
2976
2977 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2978 {
2979     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2980 }
2981
2982 /*
2983  *  Returns the result of converting the bfloat16 value `a' to
2984  *  the unsigned integer format.
2985  */
2986
2987 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2988                                    int scale, float_status *s)
2989 {
2990     FloatParts64 p;
2991
2992     bfloat16_unpack_canonical(&p, a, s);
2993     return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2994 }
2995
2996 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2997                                    int scale, float_status *s)
2998 {
2999     FloatParts64 p;
3000
3001     bfloat16_unpack_canonical(&p, a, s);
3002     return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
3003 }
3004
3005 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3006                                    int scale, float_status *s)
3007 {
3008     FloatParts64 p;
3009
3010     bfloat16_unpack_canonical(&p, a, s);
3011     return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
3012 }
3013
3014 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3015 {
3016     return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3017 }
3018
3019 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3020 {
3021     return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3022 }
3023
3024 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3025 {
3026     return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3027 }
3028
3029 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3030 {
3031     return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3032 }
3033
3034 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3035 {
3036     return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3037 }
3038
3039 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3040 {
3041     return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3042 }
3043
3044 /*
3045  * Integer to float conversions
3046  *
3047  * Returns the result of converting the two's complement integer `a'
3048  * to the floating-point format. The conversion is performed according
3049  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3050  */
3051
3052 static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
3053 {
3054     FloatParts64 r = { .sign = false };
3055
3056     if (a == 0) {
3057         r.cls = float_class_zero;
3058     } else {
3059         uint64_t f = a;
3060         int shift;
3061
3062         r.cls = float_class_normal;
3063         if (a < 0) {
3064             f = -f;
3065             r.sign = true;
3066         }
3067         shift = clz64(f);
3068         scale = MIN(MAX(scale, -0x10000), 0x10000);
3069
3070         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3071         r.frac = f << shift;
3072     }
3073
3074     return r;
3075 }
3076
3077 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3078 {
3079     FloatParts64 pa = int_to_float(a, scale, status);
3080     return float16_round_pack_canonical(&pa, status);
3081 }
3082
3083 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3084 {
3085     return int64_to_float16_scalbn(a, scale, status);
3086 }
3087
3088 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3089 {
3090     return int64_to_float16_scalbn(a, scale, status);
3091 }
3092
3093 float16 int64_to_float16(int64_t a, float_status *status)
3094 {
3095     return int64_to_float16_scalbn(a, 0, status);
3096 }
3097
3098 float16 int32_to_float16(int32_t a, float_status *status)
3099 {
3100     return int64_to_float16_scalbn(a, 0, status);
3101 }
3102
3103 float16 int16_to_float16(int16_t a, float_status *status)
3104 {
3105     return int64_to_float16_scalbn(a, 0, status);
3106 }
3107
3108 float16 int8_to_float16(int8_t a, float_status *status)
3109 {
3110     return int64_to_float16_scalbn(a, 0, status);
3111 }
3112
3113 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3114 {
3115     FloatParts64 pa = int_to_float(a, scale, status);
3116     return float32_round_pack_canonical(&pa, status);
3117 }
3118
3119 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3120 {
3121     return int64_to_float32_scalbn(a, scale, status);
3122 }
3123
3124 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3125 {
3126     return int64_to_float32_scalbn(a, scale, status);
3127 }
3128
3129 float32 int64_to_float32(int64_t a, float_status *status)
3130 {
3131     return int64_to_float32_scalbn(a, 0, status);
3132 }
3133
3134 float32 int32_to_float32(int32_t a, float_status *status)
3135 {
3136     return int64_to_float32_scalbn(a, 0, status);
3137 }
3138
3139 float32 int16_to_float32(int16_t a, float_status *status)
3140 {
3141     return int64_to_float32_scalbn(a, 0, status);
3142 }
3143
3144 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3145 {
3146     FloatParts64 pa = int_to_float(a, scale, status);
3147     return float64_round_pack_canonical(&pa, status);
3148 }
3149
3150 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3151 {
3152     return int64_to_float64_scalbn(a, scale, status);
3153 }
3154
3155 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3156 {
3157     return int64_to_float64_scalbn(a, scale, status);
3158 }
3159
3160 float64 int64_to_float64(int64_t a, float_status *status)
3161 {
3162     return int64_to_float64_scalbn(a, 0, status);
3163 }
3164
3165 float64 int32_to_float64(int32_t a, float_status *status)
3166 {
3167     return int64_to_float64_scalbn(a, 0, status);
3168 }
3169
3170 float64 int16_to_float64(int16_t a, float_status *status)
3171 {
3172     return int64_to_float64_scalbn(a, 0, status);
3173 }
3174
3175 /*
3176  * Returns the result of converting the two's complement integer `a'
3177  * to the bfloat16 format.
3178  */
3179
3180 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3181 {
3182     FloatParts64 pa = int_to_float(a, scale, status);
3183     return bfloat16_round_pack_canonical(&pa, status);
3184 }
3185
3186 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3187 {
3188     return int64_to_bfloat16_scalbn(a, scale, status);
3189 }
3190
3191 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3192 {
3193     return int64_to_bfloat16_scalbn(a, scale, status);
3194 }
3195
3196 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3197 {
3198     return int64_to_bfloat16_scalbn(a, 0, status);
3199 }
3200
3201 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3202 {
3203     return int64_to_bfloat16_scalbn(a, 0, status);
3204 }
3205
3206 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3207 {
3208     return int64_to_bfloat16_scalbn(a, 0, status);
3209 }
3210
3211 /*
3212  * Unsigned Integer to float conversions
3213  *
3214  * Returns the result of converting the unsigned integer `a' to the
3215  * floating-point format. The conversion is performed according to the
3216  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3217  */
3218
3219 static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
3220 {
3221     FloatParts64 r = { .sign = false };
3222     int shift;
3223
3224     if (a == 0) {
3225         r.cls = float_class_zero;
3226     } else {
3227         scale = MIN(MAX(scale, -0x10000), 0x10000);
3228         shift = clz64(a);
3229         r.cls = float_class_normal;
3230         r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3231         r.frac = a << shift;
3232     }
3233
3234     return r;
3235 }
3236
3237 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3238 {
3239     FloatParts64 pa = uint_to_float(a, scale, status);
3240     return float16_round_pack_canonical(&pa, status);
3241 }
3242
3243 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3244 {
3245     return uint64_to_float16_scalbn(a, scale, status);
3246 }
3247
3248 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3249 {
3250     return uint64_to_float16_scalbn(a, scale, status);
3251 }
3252
3253 float16 uint64_to_float16(uint64_t a, float_status *status)
3254 {
3255     return uint64_to_float16_scalbn(a, 0, status);
3256 }
3257
3258 float16 uint32_to_float16(uint32_t a, float_status *status)
3259 {
3260     return uint64_to_float16_scalbn(a, 0, status);
3261 }
3262
3263 float16 uint16_to_float16(uint16_t a, float_status *status)
3264 {
3265     return uint64_to_float16_scalbn(a, 0, status);
3266 }
3267
3268 float16 uint8_to_float16(uint8_t a, float_status *status)
3269 {
3270     return uint64_to_float16_scalbn(a, 0, status);
3271 }
3272
3273 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3274 {
3275     FloatParts64 pa = uint_to_float(a, scale, status);
3276     return float32_round_pack_canonical(&pa, status);
3277 }
3278
3279 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3280 {
3281     return uint64_to_float32_scalbn(a, scale, status);
3282 }
3283
3284 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3285 {
3286     return uint64_to_float32_scalbn(a, scale, status);
3287 }
3288
3289 float32 uint64_to_float32(uint64_t a, float_status *status)
3290 {
3291     return uint64_to_float32_scalbn(a, 0, status);
3292 }
3293
3294 float32 uint32_to_float32(uint32_t a, float_status *status)
3295 {
3296     return uint64_to_float32_scalbn(a, 0, status);
3297 }
3298
3299 float32 uint16_to_float32(uint16_t a, float_status *status)
3300 {
3301     return uint64_to_float32_scalbn(a, 0, status);
3302 }
3303
3304 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3305 {
3306     FloatParts64 pa = uint_to_float(a, scale, status);
3307     return float64_round_pack_canonical(&pa, status);
3308 }
3309
3310 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3311 {
3312     return uint64_to_float64_scalbn(a, scale, status);
3313 }
3314
3315 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3316 {
3317     return uint64_to_float64_scalbn(a, scale, status);
3318 }
3319
3320 float64 uint64_to_float64(uint64_t a, float_status *status)
3321 {
3322     return uint64_to_float64_scalbn(a, 0, status);
3323 }
3324
3325 float64 uint32_to_float64(uint32_t a, float_status *status)
3326 {
3327     return uint64_to_float64_scalbn(a, 0, status);
3328 }
3329
3330 float64 uint16_to_float64(uint16_t a, float_status *status)
3331 {
3332     return uint64_to_float64_scalbn(a, 0, status);
3333 }
3334
3335 /*
3336  * Returns the result of converting the unsigned integer `a' to the
3337  * bfloat16 format.
3338  */
3339
3340 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3341 {
3342     FloatParts64 pa = uint_to_float(a, scale, status);
3343     return bfloat16_round_pack_canonical(&pa, status);
3344 }
3345
3346 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3347 {
3348     return uint64_to_bfloat16_scalbn(a, scale, status);
3349 }
3350
3351 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3352 {
3353     return uint64_to_bfloat16_scalbn(a, scale, status);
3354 }
3355
3356 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3357 {
3358     return uint64_to_bfloat16_scalbn(a, 0, status);
3359 }
3360
3361 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3362 {
3363     return uint64_to_bfloat16_scalbn(a, 0, status);
3364 }
3365
3366 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3367 {
3368     return uint64_to_bfloat16_scalbn(a, 0, status);
3369 }
3370
3371 /* Float Min/Max */
3372 /* min() and max() functions. These can't be implemented as
3373  * 'compare and pick one input' because that would mishandle
3374  * NaNs and +0 vs -0.
3375  *
3376  * minnum() and maxnum() functions. These are similar to the min()
3377  * and max() functions but if one of the arguments is a QNaN and
3378  * the other is numerical then the numerical argument is returned.
3379  * SNaNs will get quietened before being returned.
3380  * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3381  * and maxNum() operations. min() and max() are the typical min/max
3382  * semantics provided by many CPUs which predate that specification.
3383  *
3384  * minnummag() and maxnummag() functions correspond to minNumMag()
3385  * and minNumMag() from the IEEE-754 2008.
3386  */
3387 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3388                                 bool ieee, bool ismag, float_status *s)
3389 {
3390     if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3391         if (ieee) {
3392             /* Takes two floating-point values `a' and `b', one of
3393              * which is a NaN, and returns the appropriate NaN
3394              * result. If either `a' or `b' is a signaling NaN,
3395              * the invalid exception is raised.
3396              */
3397             if (is_snan(a.cls) || is_snan(b.cls)) {
3398                 return *parts_pick_nan(&a, &b, s);
3399             } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3400                 return b;
3401             } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3402                 return a;
3403             }
3404         }
3405         return *parts_pick_nan(&a, &b, s);
3406     } else {
3407         int a_exp, b_exp;
3408
3409         switch (a.cls) {
3410         case float_class_normal:
3411             a_exp = a.exp;
3412             break;
3413         case float_class_inf:
3414             a_exp = INT_MAX;
3415             break;
3416         case float_class_zero:
3417             a_exp = INT_MIN;
3418             break;
3419         default:
3420             g_assert_not_reached();
3421             break;
3422         }
3423         switch (b.cls) {
3424         case float_class_normal:
3425             b_exp = b.exp;
3426             break;
3427         case float_class_inf:
3428             b_exp = INT_MAX;
3429             break;
3430         case float_class_zero:
3431             b_exp = INT_MIN;
3432             break;
3433         default:
3434             g_assert_not_reached();
3435             break;
3436         }
3437
3438         if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3439             bool a_less = a_exp < b_exp;
3440             if (a_exp == b_exp) {
3441                 a_less = a.frac < b.frac;
3442             }
3443             return a_less ^ ismin ? b : a;
3444         }
3445
3446         if (a.sign == b.sign) {
3447             bool a_less = a_exp < b_exp;
3448             if (a_exp == b_exp) {
3449                 a_less = a.frac < b.frac;
3450             }
3451             return a.sign ^ a_less ^ ismin ? b : a;
3452         } else {
3453             return a.sign ^ ismin ? b : a;
3454         }
3455     }
3456 }
3457
3458 #define MINMAX(sz, name, ismin, isiee, ismag)                           \
3459 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3460                                      float_status *s)                   \
3461 {                                                                       \
3462     FloatParts64 pa, pb, pr;                                            \
3463     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3464     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3465     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3466     return float ## sz ## _round_pack_canonical(&pr, s);                \
3467 }
3468
3469 MINMAX(16, min, true, false, false)
3470 MINMAX(16, minnum, true, true, false)
3471 MINMAX(16, minnummag, true, true, true)
3472 MINMAX(16, max, false, false, false)
3473 MINMAX(16, maxnum, false, true, false)
3474 MINMAX(16, maxnummag, false, true, true)
3475
3476 MINMAX(32, min, true, false, false)
3477 MINMAX(32, minnum, true, true, false)
3478 MINMAX(32, minnummag, true, true, true)
3479 MINMAX(32, max, false, false, false)
3480 MINMAX(32, maxnum, false, true, false)
3481 MINMAX(32, maxnummag, false, true, true)
3482
3483 MINMAX(64, min, true, false, false)
3484 MINMAX(64, minnum, true, true, false)
3485 MINMAX(64, minnummag, true, true, true)
3486 MINMAX(64, max, false, false, false)
3487 MINMAX(64, maxnum, false, true, false)
3488 MINMAX(64, maxnummag, false, true, true)
3489
3490 #undef MINMAX
3491
3492 #define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3493 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3494 {                                                                       \
3495     FloatParts64 pa, pb, pr;                                            \
3496     bfloat16_unpack_canonical(&pa, a, s);                               \
3497     bfloat16_unpack_canonical(&pb, b, s);                               \
3498     pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);                 \
3499     return bfloat16_round_pack_canonical(&pr, s);                       \
3500 }
3501
3502 BF16_MINMAX(min, true, false, false)
3503 BF16_MINMAX(minnum, true, true, false)
3504 BF16_MINMAX(minnummag, true, true, true)
3505 BF16_MINMAX(max, false, false, false)
3506 BF16_MINMAX(maxnum, false, true, false)
3507 BF16_MINMAX(maxnummag, false, true, true)
3508
3509 #undef BF16_MINMAX
3510
3511 /* Floating point compare */
3512 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3513                                     float_status *s)
3514 {
3515     if (is_nan(a.cls) || is_nan(b.cls)) {
3516         if (!is_quiet ||
3517             a.cls == float_class_snan ||
3518             b.cls == float_class_snan) {
3519             float_raise(float_flag_invalid, s);
3520         }
3521         return float_relation_unordered;
3522     }
3523
3524     if (a.cls == float_class_zero) {
3525         if (b.cls == float_class_zero) {
3526             return float_relation_equal;
3527         }
3528         return b.sign ? float_relation_greater : float_relation_less;
3529     } else if (b.cls == float_class_zero) {
3530         return a.sign ? float_relation_less : float_relation_greater;
3531     }
3532
3533     /* The only really important thing about infinity is its sign. If
3534      * both are infinities the sign marks the smallest of the two.
3535      */
3536     if (a.cls == float_class_inf) {
3537         if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3538             return float_relation_equal;
3539         }
3540         return a.sign ? float_relation_less : float_relation_greater;
3541     } else if (b.cls == float_class_inf) {
3542         return b.sign ? float_relation_greater : float_relation_less;
3543     }
3544
3545     if (a.sign != b.sign) {
3546         return a.sign ? float_relation_less : float_relation_greater;
3547     }
3548
3549     if (a.exp == b.exp) {
3550         if (a.frac == b.frac) {
3551             return float_relation_equal;
3552         }
3553         if (a.sign) {
3554             return a.frac > b.frac ?
3555                 float_relation_less : float_relation_greater;
3556         } else {
3557             return a.frac > b.frac ?
3558                 float_relation_greater : float_relation_less;
3559         }
3560     } else {
3561         if (a.sign) {
3562             return a.exp > b.exp ? float_relation_less : float_relation_greater;
3563         } else {
3564             return a.exp > b.exp ? float_relation_greater : float_relation_less;
3565         }
3566     }
3567 }
3568
3569 #define COMPARE(name, attr, sz)                                         \
3570 static int attr                                                         \
3571 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3572 {                                                                       \
3573     FloatParts64 pa, pb;                                                \
3574     float ## sz ## _unpack_canonical(&pa, a, s);                        \
3575     float ## sz ## _unpack_canonical(&pb, b, s);                        \
3576     return compare_floats(pa, pb, is_quiet, s);                         \
3577 }
3578
3579 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3580 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3581 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3582
3583 #undef COMPARE
3584
3585 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3586 {
3587     return soft_f16_compare(a, b, false, s);
3588 }
3589
3590 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3591 {
3592     return soft_f16_compare(a, b, true, s);
3593 }
3594
3595 static FloatRelation QEMU_FLATTEN
3596 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3597 {
3598     union_float32 ua, ub;
3599
3600     ua.s = xa;
3601     ub.s = xb;
3602
3603     if (QEMU_NO_HARDFLOAT) {
3604         goto soft;
3605     }
3606
3607     float32_input_flush2(&ua.s, &ub.s, s);
3608     if (isgreaterequal(ua.h, ub.h)) {
3609         if (isgreater(ua.h, ub.h)) {
3610             return float_relation_greater;
3611         }
3612         return float_relation_equal;
3613     }
3614     if (likely(isless(ua.h, ub.h))) {
3615         return float_relation_less;
3616     }
3617     /* The only condition remaining is unordered.
3618      * Fall through to set flags.
3619      */
3620  soft:
3621     return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3622 }
3623
3624 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3625 {
3626     return f32_compare(a, b, false, s);
3627 }
3628
3629 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3630 {
3631     return f32_compare(a, b, true, s);
3632 }
3633
3634 static FloatRelation QEMU_FLATTEN
3635 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3636 {
3637     union_float64 ua, ub;
3638
3639     ua.s = xa;
3640     ub.s = xb;
3641
3642     if (QEMU_NO_HARDFLOAT) {
3643         goto soft;
3644     }
3645
3646     float64_input_flush2(&ua.s, &ub.s, s);
3647     if (isgreaterequal(ua.h, ub.h)) {
3648         if (isgreater(ua.h, ub.h)) {
3649             return float_relation_greater;
3650         }
3651         return float_relation_equal;
3652     }
3653     if (likely(isless(ua.h, ub.h))) {
3654         return float_relation_less;
3655     }
3656     /* The only condition remaining is unordered.
3657      * Fall through to set flags.
3658      */
3659  soft:
3660     return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3661 }
3662
3663 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3664 {
3665     return f64_compare(a, b, false, s);
3666 }
3667
3668 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3669 {
3670     return f64_compare(a, b, true, s);
3671 }
3672
3673 static FloatRelation QEMU_FLATTEN
3674 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3675 {
3676     FloatParts64 pa, pb;
3677
3678     bfloat16_unpack_canonical(&pa, a, s);
3679     bfloat16_unpack_canonical(&pb, b, s);
3680     return compare_floats(pa, pb, is_quiet, s);
3681 }
3682
3683 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3684 {
3685     return soft_bf16_compare(a, b, false, s);
3686 }
3687
3688 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3689 {
3690     return soft_bf16_compare(a, b, true, s);
3691 }
3692
3693 /* Multiply A by 2 raised to the power N.  */
3694 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3695 {
3696     if (unlikely(is_nan(a.cls))) {
3697         parts_return_nan(&a, s);
3698     }
3699     if (a.cls == float_class_normal) {
3700         /* The largest float type (even though not supported by FloatParts64)
3701          * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3702          * still allows rounding to infinity, without allowing overflow
3703          * within the int32_t that backs FloatParts64.exp.
3704          */
3705         n = MIN(MAX(n, -0x10000), 0x10000);
3706         a.exp += n;
3707     }
3708     return a;
3709 }
3710
3711 float16 float16_scalbn(float16 a, int n, float_status *status)
3712 {
3713     FloatParts64 pa, pr;
3714
3715     float16_unpack_canonical(&pa, a, status);
3716     pr = scalbn_decomposed(pa, n, status);
3717     return float16_round_pack_canonical(&pr, status);
3718 }
3719
3720 float32 float32_scalbn(float32 a, int n, float_status *status)
3721 {
3722     FloatParts64 pa, pr;
3723
3724     float32_unpack_canonical(&pa, a, status);
3725     pr = scalbn_decomposed(pa, n, status);
3726     return float32_round_pack_canonical(&pr, status);
3727 }
3728
3729 float64 float64_scalbn(float64 a, int n, float_status *status)
3730 {
3731     FloatParts64 pa, pr;
3732
3733     float64_unpack_canonical(&pa, a, status);
3734     pr = scalbn_decomposed(pa, n, status);
3735     return float64_round_pack_canonical(&pr, status);
3736 }
3737
3738 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3739 {
3740     FloatParts64 pa, pr;
3741
3742     bfloat16_unpack_canonical(&pa, a, status);
3743     pr = scalbn_decomposed(pa, n, status);
3744     return bfloat16_round_pack_canonical(&pr, status);
3745 }
3746
3747 /*
3748  * Square Root
3749  *
3750  * The old softfloat code did an approximation step before zeroing in
3751  * on the final result. However for simpleness we just compute the
3752  * square root by iterating down from the implicit bit to enough extra
3753  * bits to ensure we get a correctly rounded result.
3754  *
3755  * This does mean however the calculation is slower than before,
3756  * especially for 64 bit floats.
3757  */
3758
3759 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3760 {
3761     uint64_t a_frac, r_frac, s_frac;
3762     int bit, last_bit;
3763
3764     if (is_nan(a.cls)) {
3765         parts_return_nan(&a, s);
3766         return a;
3767     }
3768     if (a.cls == float_class_zero) {
3769         return a;  /* sqrt(+-0) = +-0 */
3770     }
3771     if (a.sign) {
3772         float_raise(float_flag_invalid, s);
3773         parts_default_nan(&a, s);
3774         return a;
3775     }
3776     if (a.cls == float_class_inf) {
3777         return a;  /* sqrt(+inf) = +inf */
3778     }
3779
3780     assert(a.cls == float_class_normal);
3781
3782     /* We need two overflow bits at the top. Adding room for that is a
3783      * right shift. If the exponent is odd, we can discard the low bit
3784      * by multiplying the fraction by 2; that's a left shift. Combine
3785      * those and we shift right by 1 if the exponent is odd, otherwise 2.
3786      */
3787     a_frac = a.frac >> (2 - (a.exp & 1));
3788     a.exp >>= 1;
3789
3790     /* Bit-by-bit computation of sqrt.  */
3791     r_frac = 0;
3792     s_frac = 0;
3793
3794     /* Iterate from implicit bit down to the 3 extra bits to compute a
3795      * properly rounded result. Remember we've inserted two more bits
3796      * at the top, so these positions are two less.
3797      */
3798     bit = DECOMPOSED_BINARY_POINT - 2;
3799     last_bit = MAX(p->frac_shift - 4, 0);
3800     do {
3801         uint64_t q = 1ULL << bit;
3802         uint64_t t_frac = s_frac + q;
3803         if (t_frac <= a_frac) {
3804             s_frac = t_frac + q;
3805             a_frac -= t_frac;
3806             r_frac += q;
3807         }
3808         a_frac <<= 1;
3809     } while (--bit >= last_bit);
3810
3811     /* Undo the right shift done above. If there is any remaining
3812      * fraction, the result is inexact. Set the sticky bit.
3813      */
3814     a.frac = (r_frac << 2) + (a_frac != 0);
3815
3816     return a;
3817 }
3818
3819 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3820 {
3821     FloatParts64 pa, pr;
3822
3823     float16_unpack_canonical(&pa, a, status);
3824     pr = sqrt_float(pa, status, &float16_params);
3825     return float16_round_pack_canonical(&pr, status);
3826 }
3827
3828 static float32 QEMU_SOFTFLOAT_ATTR
3829 soft_f32_sqrt(float32 a, float_status *status)
3830 {
3831     FloatParts64 pa, pr;
3832
3833     float32_unpack_canonical(&pa, a, status);
3834     pr = sqrt_float(pa, status, &float32_params);
3835     return float32_round_pack_canonical(&pr, status);
3836 }
3837
3838 static float64 QEMU_SOFTFLOAT_ATTR
3839 soft_f64_sqrt(float64 a, float_status *status)
3840 {
3841     FloatParts64 pa, pr;
3842
3843     float64_unpack_canonical(&pa, a, status);
3844     pr = sqrt_float(pa, status, &float64_params);
3845     return float64_round_pack_canonical(&pr, status);
3846 }
3847
3848 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3849 {
3850     union_float32 ua, ur;
3851
3852     ua.s = xa;
3853     if (unlikely(!can_use_fpu(s))) {
3854         goto soft;
3855     }
3856
3857     float32_input_flush1(&ua.s, s);
3858     if (QEMU_HARDFLOAT_1F32_USE_FP) {
3859         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3860                        fpclassify(ua.h) == FP_ZERO) ||
3861                      signbit(ua.h))) {
3862             goto soft;
3863         }
3864     } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3865                         float32_is_neg(ua.s))) {
3866         goto soft;
3867     }
3868     ur.h = sqrtf(ua.h);
3869     return ur.s;
3870
3871  soft:
3872     return soft_f32_sqrt(ua.s, s);
3873 }
3874
3875 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3876 {
3877     union_float64 ua, ur;
3878
3879     ua.s = xa;
3880     if (unlikely(!can_use_fpu(s))) {
3881         goto soft;
3882     }
3883
3884     float64_input_flush1(&ua.s, s);
3885     if (QEMU_HARDFLOAT_1F64_USE_FP) {
3886         if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3887                        fpclassify(ua.h) == FP_ZERO) ||
3888                      signbit(ua.h))) {
3889             goto soft;
3890         }
3891     } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3892                         float64_is_neg(ua.s))) {
3893         goto soft;
3894     }
3895     ur.h = sqrt(ua.h);
3896     return ur.s;
3897
3898  soft:
3899     return soft_f64_sqrt(ua.s, s);
3900 }
3901
3902 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3903 {
3904     FloatParts64 pa, pr;
3905
3906     bfloat16_unpack_canonical(&pa, a, status);
3907     pr = sqrt_float(pa, status, &bfloat16_params);
3908     return bfloat16_round_pack_canonical(&pr, status);
3909 }
3910
3911 /*----------------------------------------------------------------------------
3912 | The pattern for a default generated NaN.
3913 *----------------------------------------------------------------------------*/
3914
3915 float16 float16_default_nan(float_status *status)
3916 {
3917     FloatParts64 p;
3918
3919     parts_default_nan(&p, status);
3920     p.frac >>= float16_params.frac_shift;
3921     return float16_pack_raw(&p);
3922 }
3923
3924 float32 float32_default_nan(float_status *status)
3925 {
3926     FloatParts64 p;
3927
3928     parts_default_nan(&p, status);
3929     p.frac >>= float32_params.frac_shift;
3930     return float32_pack_raw(&p);
3931 }
3932
3933 float64 float64_default_nan(float_status *status)
3934 {
3935     FloatParts64 p;
3936
3937     parts_default_nan(&p, status);
3938     p.frac >>= float64_params.frac_shift;
3939     return float64_pack_raw(&p);
3940 }
3941
3942 float128 float128_default_nan(float_status *status)
3943 {
3944     FloatParts128 p;
3945
3946     parts_default_nan(&p, status);
3947     frac_shr(&p, float128_params.frac_shift);
3948     return float128_pack_raw(&p);
3949 }
3950
3951 bfloat16 bfloat16_default_nan(float_status *status)
3952 {
3953     FloatParts64 p;
3954
3955     parts_default_nan(&p, status);
3956     p.frac >>= bfloat16_params.frac_shift;
3957     return bfloat16_pack_raw(&p);
3958 }
3959
3960 /*----------------------------------------------------------------------------
3961 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3962 *----------------------------------------------------------------------------*/
3963
3964 float16 float16_silence_nan(float16 a, float_status *status)
3965 {
3966     FloatParts64 p;
3967
3968     float16_unpack_raw(&p, a);
3969     p.frac <<= float16_params.frac_shift;
3970     parts_silence_nan(&p, status);
3971     p.frac >>= float16_params.frac_shift;
3972     return float16_pack_raw(&p);
3973 }
3974
3975 float32 float32_silence_nan(float32 a, float_status *status)
3976 {
3977     FloatParts64 p;
3978
3979     float32_unpack_raw(&p, a);
3980     p.frac <<= float32_params.frac_shift;
3981     parts_silence_nan(&p, status);
3982     p.frac >>= float32_params.frac_shift;
3983     return float32_pack_raw(&p);
3984 }
3985
3986 float64 float64_silence_nan(float64 a, float_status *status)
3987 {
3988     FloatParts64 p;
3989
3990     float64_unpack_raw(&p, a);
3991     p.frac <<= float64_params.frac_shift;
3992     parts_silence_nan(&p, status);
3993     p.frac >>= float64_params.frac_shift;
3994     return float64_pack_raw(&p);
3995 }
3996
3997 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3998 {
3999     FloatParts64 p;
4000
4001     bfloat16_unpack_raw(&p, a);
4002     p.frac <<= bfloat16_params.frac_shift;
4003     parts_silence_nan(&p, status);
4004     p.frac >>= bfloat16_params.frac_shift;
4005     return bfloat16_pack_raw(&p);
4006 }
4007
4008 float128 float128_silence_nan(float128 a, float_status *status)
4009 {
4010     FloatParts128 p;
4011
4012     float128_unpack_raw(&p, a);
4013     frac_shl(&p, float128_params.frac_shift);
4014     parts_silence_nan(&p, status);
4015     frac_shr(&p, float128_params.frac_shift);
4016     return float128_pack_raw(&p);
4017 }
4018
4019 /*----------------------------------------------------------------------------
4020 | If `a' is denormal and we are in flush-to-zero mode then set the
4021 | input-denormal exception and return zero. Otherwise just return the value.
4022 *----------------------------------------------------------------------------*/
4023
4024 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4025 {
4026     if (p.exp == 0 && p.frac != 0) {
4027         float_raise(float_flag_input_denormal, status);
4028         return true;
4029     }
4030
4031     return false;
4032 }
4033
4034 float16 float16_squash_input_denormal(float16 a, float_status *status)
4035 {
4036     if (status->flush_inputs_to_zero) {
4037         FloatParts64 p;
4038
4039         float16_unpack_raw(&p, a);
4040         if (parts_squash_denormal(p, status)) {
4041             return float16_set_sign(float16_zero, p.sign);
4042         }
4043     }
4044     return a;
4045 }
4046
4047 float32 float32_squash_input_denormal(float32 a, float_status *status)
4048 {
4049     if (status->flush_inputs_to_zero) {
4050         FloatParts64 p;
4051
4052         float32_unpack_raw(&p, a);
4053         if (parts_squash_denormal(p, status)) {
4054             return float32_set_sign(float32_zero, p.sign);
4055         }
4056     }
4057     return a;
4058 }
4059
4060 float64 float64_squash_input_denormal(float64 a, float_status *status)
4061 {
4062     if (status->flush_inputs_to_zero) {
4063         FloatParts64 p;
4064
4065         float64_unpack_raw(&p, a);
4066         if (parts_squash_denormal(p, status)) {
4067             return float64_set_sign(float64_zero, p.sign);
4068         }
4069     }
4070     return a;
4071 }
4072
4073 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4074 {
4075     if (status->flush_inputs_to_zero) {
4076         FloatParts64 p;
4077
4078         bfloat16_unpack_raw(&p, a);
4079         if (parts_squash_denormal(p, status)) {
4080             return bfloat16_set_sign(bfloat16_zero, p.sign);
4081         }
4082     }
4083     return a;
4084 }
4085
4086 /*----------------------------------------------------------------------------
4087 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4088 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4089 | input.  If `zSign' is 1, the input is negated before being converted to an
4090 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
4091 | is simply rounded to an integer, with the inexact exception raised if the
4092 | input cannot be represented exactly as an integer.  However, if the fixed-
4093 | point input is too large, the invalid exception is raised and the largest
4094 | positive or negative integer is returned.
4095 *----------------------------------------------------------------------------*/
4096
4097 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4098                                  float_status *status)
4099 {
4100     int8_t roundingMode;
4101     bool roundNearestEven;
4102     int8_t roundIncrement, roundBits;
4103     int32_t z;
4104
4105     roundingMode = status->float_rounding_mode;
4106     roundNearestEven = ( roundingMode == float_round_nearest_even );
4107     switch (roundingMode) {
4108     case float_round_nearest_even:
4109     case float_round_ties_away:
4110         roundIncrement = 0x40;
4111         break;
4112     case float_round_to_zero:
4113         roundIncrement = 0;
4114         break;
4115     case float_round_up:
4116         roundIncrement = zSign ? 0 : 0x7f;
4117         break;
4118     case float_round_down:
4119         roundIncrement = zSign ? 0x7f : 0;
4120         break;
4121     case float_round_to_odd:
4122         roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4123         break;
4124     default:
4125         abort();
4126     }
4127     roundBits = absZ & 0x7F;
4128     absZ = ( absZ + roundIncrement )>>7;
4129     if (!(roundBits ^ 0x40) && roundNearestEven) {
4130         absZ &= ~1;
4131     }
4132     z = absZ;
4133     if ( zSign ) z = - z;
4134     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4135         float_raise(float_flag_invalid, status);
4136         return zSign ? INT32_MIN : INT32_MAX;
4137     }
4138     if (roundBits) {
4139         float_raise(float_flag_inexact, status);
4140     }
4141     return z;
4142
4143 }
4144
4145 /*----------------------------------------------------------------------------
4146 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4147 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4148 | and returns the properly rounded 64-bit integer corresponding to the input.
4149 | If `zSign' is 1, the input is negated before being converted to an integer.
4150 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4151 | the inexact exception raised if the input cannot be represented exactly as
4152 | an integer.  However, if the fixed-point input is too large, the invalid
4153 | exception is raised and the largest positive or negative integer is
4154 | returned.
4155 *----------------------------------------------------------------------------*/
4156
4157 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4158                                float_status *status)
4159 {
4160     int8_t roundingMode;
4161     bool roundNearestEven, increment;
4162     int64_t z;
4163
4164     roundingMode = status->float_rounding_mode;
4165     roundNearestEven = ( roundingMode == float_round_nearest_even );
4166     switch (roundingMode) {
4167     case float_round_nearest_even:
4168     case float_round_ties_away:
4169         increment = ((int64_t) absZ1 < 0);
4170         break;
4171     case float_round_to_zero:
4172         increment = 0;
4173         break;
4174     case float_round_up:
4175         increment = !zSign && absZ1;
4176         break;
4177     case float_round_down:
4178         increment = zSign && absZ1;
4179         break;
4180     case float_round_to_odd:
4181         increment = !(absZ0 & 1) && absZ1;
4182         break;
4183     default:
4184         abort();
4185     }
4186     if ( increment ) {
4187         ++absZ0;
4188         if ( absZ0 == 0 ) goto overflow;
4189         if (!(absZ1 << 1) && roundNearestEven) {
4190             absZ0 &= ~1;
4191         }
4192     }
4193     z = absZ0;
4194     if ( zSign ) z = - z;
4195     if ( z && ( ( z < 0 ) ^ zSign ) ) {
4196  overflow:
4197         float_raise(float_flag_invalid, status);
4198         return zSign ? INT64_MIN : INT64_MAX;
4199     }
4200     if (absZ1) {
4201         float_raise(float_flag_inexact, status);
4202     }
4203     return z;
4204
4205 }
4206
4207 /*----------------------------------------------------------------------------
4208 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4209 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4210 | and returns the properly rounded 64-bit unsigned integer corresponding to the
4211 | input.  Ordinarily, the fixed-point input is simply rounded to an integer,
4212 | with the inexact exception raised if the input cannot be represented exactly
4213 | as an integer.  However, if the fixed-point input is too large, the invalid
4214 | exception is raised and the largest unsigned integer is returned.
4215 *----------------------------------------------------------------------------*/
4216
4217 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
4218                                 uint64_t absZ1, float_status *status)
4219 {
4220     int8_t roundingMode;
4221     bool roundNearestEven, increment;
4222
4223     roundingMode = status->float_rounding_mode;
4224     roundNearestEven = (roundingMode == float_round_nearest_even);
4225     switch (roundingMode) {
4226     case float_round_nearest_even:
4227     case float_round_ties_away:
4228         increment = ((int64_t)absZ1 < 0);
4229         break;
4230     case float_round_to_zero:
4231         increment = 0;
4232         break;
4233     case float_round_up:
4234         increment = !zSign && absZ1;
4235         break;
4236     case float_round_down:
4237         increment = zSign && absZ1;
4238         break;
4239     case float_round_to_odd:
4240         increment = !(absZ0 & 1) && absZ1;
4241         break;
4242     default:
4243         abort();
4244     }
4245     if (increment) {
4246         ++absZ0;
4247         if (absZ0 == 0) {
4248             float_raise(float_flag_invalid, status);
4249             return UINT64_MAX;
4250         }
4251         if (!(absZ1 << 1) && roundNearestEven) {
4252             absZ0 &= ~1;
4253         }
4254     }
4255
4256     if (zSign && absZ0) {
4257         float_raise(float_flag_invalid, status);
4258         return 0;
4259     }
4260
4261     if (absZ1) {
4262         float_raise(float_flag_inexact, status);
4263     }
4264     return absZ0;
4265 }
4266
4267 /*----------------------------------------------------------------------------
4268 | Normalizes the subnormal single-precision floating-point value represented
4269 | by the denormalized significand `aSig'.  The normalized exponent and
4270 | significand are stored at the locations pointed to by `zExpPtr' and
4271 | `zSigPtr', respectively.
4272 *----------------------------------------------------------------------------*/
4273
4274 static void
4275  normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4276 {
4277     int8_t shiftCount;
4278
4279     shiftCount = clz32(aSig) - 8;
4280     *zSigPtr = aSig<<shiftCount;
4281     *zExpPtr = 1 - shiftCount;
4282
4283 }
4284
4285 /*----------------------------------------------------------------------------
4286 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4287 | and significand `zSig', and returns the proper single-precision floating-
4288 | point value corresponding to the abstract input.  Ordinarily, the abstract
4289 | value is simply rounded and packed into the single-precision format, with
4290 | the inexact exception raised if the abstract input cannot be represented
4291 | exactly.  However, if the abstract value is too large, the overflow and
4292 | inexact exceptions are raised and an infinity or maximal finite value is
4293 | returned.  If the abstract value is too small, the input value is rounded to
4294 | a subnormal number, and the underflow and inexact exceptions are raised if
4295 | the abstract input cannot be represented exactly as a subnormal single-
4296 | precision floating-point number.
4297 |     The input significand `zSig' has its binary point between bits 30
4298 | and 29, which is 7 bits to the left of the usual location.  This shifted
4299 | significand must be normalized or smaller.  If `zSig' is not normalized,
4300 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4301 | and it must not require rounding.  In the usual case that `zSig' is
4302 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4303 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4304 | Binary Floating-Point Arithmetic.
4305 *----------------------------------------------------------------------------*/
4306
4307 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4308                                    float_status *status)
4309 {
4310     int8_t roundingMode;
4311     bool roundNearestEven;
4312     int8_t roundIncrement, roundBits;
4313     bool isTiny;
4314
4315     roundingMode = status->float_rounding_mode;
4316     roundNearestEven = ( roundingMode == float_round_nearest_even );
4317     switch (roundingMode) {
4318     case float_round_nearest_even:
4319     case float_round_ties_away:
4320         roundIncrement = 0x40;
4321         break;
4322     case float_round_to_zero:
4323         roundIncrement = 0;
4324         break;
4325     case float_round_up:
4326         roundIncrement = zSign ? 0 : 0x7f;
4327         break;
4328     case float_round_down:
4329         roundIncrement = zSign ? 0x7f : 0;
4330         break;
4331     case float_round_to_odd:
4332         roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4333         break;
4334     default:
4335         abort();
4336         break;
4337     }
4338     roundBits = zSig & 0x7F;
4339     if ( 0xFD <= (uint16_t) zExp ) {
4340         if (    ( 0xFD < zExp )
4341              || (    ( zExp == 0xFD )
4342                   && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4343            ) {
4344             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4345                                    roundIncrement != 0;
4346             float_raise(float_flag_overflow | float_flag_inexact, status);
4347             return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4348         }
4349         if ( zExp < 0 ) {
4350             if (status->flush_to_zero) {
4351                 float_raise(float_flag_output_denormal, status);
4352                 return packFloat32(zSign, 0, 0);
4353             }
4354             isTiny = status->tininess_before_rounding
4355                   || (zExp < -1)
4356                   || (zSig + roundIncrement < 0x80000000);
4357             shift32RightJamming( zSig, - zExp, &zSig );
4358             zExp = 0;
4359             roundBits = zSig & 0x7F;
4360             if (isTiny && roundBits) {
4361                 float_raise(float_flag_underflow, status);
4362             }
4363             if (roundingMode == float_round_to_odd) {
4364                 /*
4365                  * For round-to-odd case, the roundIncrement depends on
4366                  * zSig which just changed.
4367                  */
4368                 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4369             }
4370         }
4371     }
4372     if (roundBits) {
4373         float_raise(float_flag_inexact, status);
4374     }
4375     zSig = ( zSig + roundIncrement )>>7;
4376     if (!(roundBits ^ 0x40) && roundNearestEven) {
4377         zSig &= ~1;
4378     }
4379     if ( zSig == 0 ) zExp = 0;
4380     return packFloat32( zSign, zExp, zSig );
4381
4382 }
4383
4384 /*----------------------------------------------------------------------------
4385 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4386 | and significand `zSig', and returns the proper single-precision floating-
4387 | point value corresponding to the abstract input.  This routine is just like
4388 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4389 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4390 | floating-point exponent.
4391 *----------------------------------------------------------------------------*/
4392
4393 static float32
4394  normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4395                               float_status *status)
4396 {
4397     int8_t shiftCount;
4398
4399     shiftCount = clz32(zSig) - 1;
4400     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4401                                status);
4402
4403 }
4404
4405 /*----------------------------------------------------------------------------
4406 | Normalizes the subnormal double-precision floating-point value represented
4407 | by the denormalized significand `aSig'.  The normalized exponent and
4408 | significand are stored at the locations pointed to by `zExpPtr' and
4409 | `zSigPtr', respectively.
4410 *----------------------------------------------------------------------------*/
4411
4412 static void
4413  normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4414 {
4415     int8_t shiftCount;
4416
4417     shiftCount = clz64(aSig) - 11;
4418     *zSigPtr = aSig<<shiftCount;
4419     *zExpPtr = 1 - shiftCount;
4420
4421 }
4422
4423 /*----------------------------------------------------------------------------
4424 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4425 | double-precision floating-point value, returning the result.  After being
4426 | shifted into the proper positions, the three fields are simply added
4427 | together to form the result.  This means that any integer portion of `zSig'
4428 | will be added into the exponent.  Since a properly normalized significand
4429 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4430 | than the desired result exponent whenever `zSig' is a complete, normalized
4431 | significand.
4432 *----------------------------------------------------------------------------*/
4433
4434 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4435 {
4436
4437     return make_float64(
4438         ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4439
4440 }
4441
4442 /*----------------------------------------------------------------------------
4443 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4444 | and significand `zSig', and returns the proper double-precision floating-
4445 | point value corresponding to the abstract input.  Ordinarily, the abstract
4446 | value is simply rounded and packed into the double-precision format, with
4447 | the inexact exception raised if the abstract input cannot be represented
4448 | exactly.  However, if the abstract value is too large, the overflow and
4449 | inexact exceptions are raised and an infinity or maximal finite value is
4450 | returned.  If the abstract value is too small, the input value is rounded to
4451 | a subnormal number, and the underflow and inexact exceptions are raised if
4452 | the abstract input cannot be represented exactly as a subnormal double-
4453 | precision floating-point number.
4454 |     The input significand `zSig' has its binary point between bits 62
4455 | and 61, which is 10 bits to the left of the usual location.  This shifted
4456 | significand must be normalized or smaller.  If `zSig' is not normalized,
4457 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4458 | and it must not require rounding.  In the usual case that `zSig' is
4459 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4460 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4461 | Binary Floating-Point Arithmetic.
4462 *----------------------------------------------------------------------------*/
4463
4464 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4465                                    float_status *status)
4466 {
4467     int8_t roundingMode;
4468     bool roundNearestEven;
4469     int roundIncrement, roundBits;
4470     bool isTiny;
4471
4472     roundingMode = status->float_rounding_mode;
4473     roundNearestEven = ( roundingMode == float_round_nearest_even );
4474     switch (roundingMode) {
4475     case float_round_nearest_even:
4476     case float_round_ties_away:
4477         roundIncrement = 0x200;
4478         break;
4479     case float_round_to_zero:
4480         roundIncrement = 0;
4481         break;
4482     case float_round_up:
4483         roundIncrement = zSign ? 0 : 0x3ff;
4484         break;
4485     case float_round_down:
4486         roundIncrement = zSign ? 0x3ff : 0;
4487         break;
4488     case float_round_to_odd:
4489         roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4490         break;
4491     default:
4492         abort();
4493     }
4494     roundBits = zSig & 0x3FF;
4495     if ( 0x7FD <= (uint16_t) zExp ) {
4496         if (    ( 0x7FD < zExp )
4497              || (    ( zExp == 0x7FD )
4498                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4499            ) {
4500             bool overflow_to_inf = roundingMode != float_round_to_odd &&
4501                                    roundIncrement != 0;
4502             float_raise(float_flag_overflow | float_flag_inexact, status);
4503             return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4504         }
4505         if ( zExp < 0 ) {
4506             if (status->flush_to_zero) {
4507                 float_raise(float_flag_output_denormal, status);
4508                 return packFloat64(zSign, 0, 0);
4509             }
4510             isTiny = status->tininess_before_rounding
4511                   || (zExp < -1)
4512                   || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4513             shift64RightJamming( zSig, - zExp, &zSig );
4514             zExp = 0;
4515             roundBits = zSig & 0x3FF;
4516             if (isTiny && roundBits) {
4517                 float_raise(float_flag_underflow, status);
4518             }
4519             if (roundingMode == float_round_to_odd) {
4520                 /*
4521                  * For round-to-odd case, the roundIncrement depends on
4522                  * zSig which just changed.
4523                  */
4524                 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4525             }
4526         }
4527     }
4528     if (roundBits) {
4529         float_raise(float_flag_inexact, status);
4530     }
4531     zSig = ( zSig + roundIncrement )>>10;
4532     if (!(roundBits ^ 0x200) && roundNearestEven) {
4533         zSig &= ~1;
4534     }
4535     if ( zSig == 0 ) zExp = 0;
4536     return packFloat64( zSign, zExp, zSig );
4537
4538 }
4539
4540 /*----------------------------------------------------------------------------
4541 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4542 | and significand `zSig', and returns the proper double-precision floating-
4543 | point value corresponding to the abstract input.  This routine is just like
4544 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4545 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4546 | floating-point exponent.
4547 *----------------------------------------------------------------------------*/
4548
4549 static float64
4550  normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4551                               float_status *status)
4552 {
4553     int8_t shiftCount;
4554
4555     shiftCount = clz64(zSig) - 1;
4556     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4557                                status);
4558
4559 }
4560
4561 /*----------------------------------------------------------------------------
4562 | Normalizes the subnormal extended double-precision floating-point value
4563 | represented by the denormalized significand `aSig'.  The normalized exponent
4564 | and significand are stored at the locations pointed to by `zExpPtr' and
4565 | `zSigPtr', respectively.
4566 *----------------------------------------------------------------------------*/
4567
4568 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4569                                 uint64_t *zSigPtr)
4570 {
4571     int8_t shiftCount;
4572
4573     shiftCount = clz64(aSig);
4574     *zSigPtr = aSig<<shiftCount;
4575     *zExpPtr = 1 - shiftCount;
4576 }
4577
4578 /*----------------------------------------------------------------------------
4579 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4580 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4581 | and returns the proper extended double-precision floating-point value
4582 | corresponding to the abstract input.  Ordinarily, the abstract value is
4583 | rounded and packed into the extended double-precision format, with the
4584 | inexact exception raised if the abstract input cannot be represented
4585 | exactly.  However, if the abstract value is too large, the overflow and
4586 | inexact exceptions are raised and an infinity or maximal finite value is
4587 | returned.  If the abstract value is too small, the input value is rounded to
4588 | a subnormal number, and the underflow and inexact exceptions are raised if
4589 | the abstract input cannot be represented exactly as a subnormal extended
4590 | double-precision floating-point number.
4591 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4592 | number of bits as single or double precision, respectively.  Otherwise, the
4593 | result is rounded to the full precision of the extended double-precision
4594 | format.
4595 |     The input significand must be normalized or smaller.  If the input
4596 | significand is not normalized, `zExp' must be 0; in that case, the result
4597 | returned is a subnormal number, and it must not require rounding.  The
4598 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4599 | Floating-Point Arithmetic.
4600 *----------------------------------------------------------------------------*/
4601
4602 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4603                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4604                               float_status *status)
4605 {
4606     int8_t roundingMode;
4607     bool roundNearestEven, increment, isTiny;
4608     int64_t roundIncrement, roundMask, roundBits;
4609
4610     roundingMode = status->float_rounding_mode;
4611     roundNearestEven = ( roundingMode == float_round_nearest_even );
4612     if ( roundingPrecision == 80 ) goto precision80;
4613     if ( roundingPrecision == 64 ) {
4614         roundIncrement = UINT64_C(0x0000000000000400);
4615         roundMask = UINT64_C(0x00000000000007FF);
4616     }
4617     else if ( roundingPrecision == 32 ) {
4618         roundIncrement = UINT64_C(0x0000008000000000);
4619         roundMask = UINT64_C(0x000000FFFFFFFFFF);
4620     }
4621     else {
4622         goto precision80;
4623     }
4624     zSig0 |= ( zSig1 != 0 );
4625     switch (roundingMode) {
4626     case float_round_nearest_even:
4627     case float_round_ties_away:
4628         break;
4629     case float_round_to_zero:
4630         roundIncrement = 0;
4631         break;
4632     case float_round_up:
4633         roundIncrement = zSign ? 0 : roundMask;
4634         break;
4635     case float_round_down:
4636         roundIncrement = zSign ? roundMask : 0;
4637         break;
4638     default:
4639         abort();
4640     }
4641     roundBits = zSig0 & roundMask;
4642     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4643         if (    ( 0x7FFE < zExp )
4644              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4645            ) {
4646             goto overflow;
4647         }
4648         if ( zExp <= 0 ) {
4649             if (status->flush_to_zero) {
4650                 float_raise(float_flag_output_denormal, status);
4651                 return packFloatx80(zSign, 0, 0);
4652             }
4653             isTiny = status->tininess_before_rounding
4654                   || (zExp < 0 )
4655                   || (zSig0 <= zSig0 + roundIncrement);
4656             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4657             zExp = 0;
4658             roundBits = zSig0 & roundMask;
4659             if (isTiny && roundBits) {
4660                 float_raise(float_flag_underflow, status);
4661             }
4662             if (roundBits) {
4663                 float_raise(float_flag_inexact, status);
4664             }
4665             zSig0 += roundIncrement;
4666             if ( (int64_t) zSig0 < 0 ) zExp = 1;
4667             roundIncrement = roundMask + 1;
4668             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4669                 roundMask |= roundIncrement;
4670             }
4671             zSig0 &= ~ roundMask;
4672             return packFloatx80( zSign, zExp, zSig0 );
4673         }
4674     }
4675     if (roundBits) {
4676         float_raise(float_flag_inexact, status);
4677     }
4678     zSig0 += roundIncrement;
4679     if ( zSig0 < roundIncrement ) {
4680         ++zExp;
4681         zSig0 = UINT64_C(0x8000000000000000);
4682     }
4683     roundIncrement = roundMask + 1;
4684     if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4685         roundMask |= roundIncrement;
4686     }
4687     zSig0 &= ~ roundMask;
4688     if ( zSig0 == 0 ) zExp = 0;
4689     return packFloatx80( zSign, zExp, zSig0 );
4690  precision80:
4691     switch (roundingMode) {
4692     case float_round_nearest_even:
4693     case float_round_ties_away:
4694         increment = ((int64_t)zSig1 < 0);
4695         break;
4696     case float_round_to_zero:
4697         increment = 0;
4698         break;
4699     case float_round_up:
4700         increment = !zSign && zSig1;
4701         break;
4702     case float_round_down:
4703         increment = zSign && zSig1;
4704         break;
4705     default:
4706         abort();
4707     }
4708     if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4709         if (    ( 0x7FFE < zExp )
4710              || (    ( zExp == 0x7FFE )
4711                   && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4712                   && increment
4713                 )
4714            ) {
4715             roundMask = 0;
4716  overflow:
4717             float_raise(float_flag_overflow | float_flag_inexact, status);
4718             if (    ( roundingMode == float_round_to_zero )
4719                  || ( zSign && ( roundingMode == float_round_up ) )
4720                  || ( ! zSign && ( roundingMode == float_round_down ) )
4721                ) {
4722                 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4723             }
4724             return packFloatx80(zSign,
4725                                 floatx80_infinity_high,
4726                                 floatx80_infinity_low);
4727         }
4728         if ( zExp <= 0 ) {
4729             isTiny = status->tininess_before_rounding
4730                   || (zExp < 0)
4731                   || !increment
4732                   || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4733             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4734             zExp = 0;
4735             if (isTiny && zSig1) {
4736                 float_raise(float_flag_underflow, status);
4737             }
4738             if (zSig1) {
4739                 float_raise(float_flag_inexact, status);
4740             }
4741             switch (roundingMode) {
4742             case float_round_nearest_even:
4743             case float_round_ties_away:
4744                 increment = ((int64_t)zSig1 < 0);
4745                 break;
4746             case float_round_to_zero:
4747                 increment = 0;
4748                 break;
4749             case float_round_up:
4750                 increment = !zSign && zSig1;
4751                 break;
4752             case float_round_down:
4753                 increment = zSign && zSig1;
4754                 break;
4755             default:
4756                 abort();
4757             }
4758             if ( increment ) {
4759                 ++zSig0;
4760                 if (!(zSig1 << 1) && roundNearestEven) {
4761                     zSig0 &= ~1;
4762                 }
4763                 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4764             }
4765             return packFloatx80( zSign, zExp, zSig0 );
4766         }
4767     }
4768     if (zSig1) {
4769         float_raise(float_flag_inexact, status);
4770     }
4771     if ( increment ) {
4772         ++zSig0;
4773         if ( zSig0 == 0 ) {
4774             ++zExp;
4775             zSig0 = UINT64_C(0x8000000000000000);
4776         }
4777         else {
4778             if (!(zSig1 << 1) && roundNearestEven) {
4779                 zSig0 &= ~1;
4780             }
4781         }
4782     }
4783     else {
4784         if ( zSig0 == 0 ) zExp = 0;
4785     }
4786     return packFloatx80( zSign, zExp, zSig0 );
4787
4788 }
4789
4790 /*----------------------------------------------------------------------------
4791 | Takes an abstract floating-point value having sign `zSign', exponent
4792 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4793 | and returns the proper extended double-precision floating-point value
4794 | corresponding to the abstract input.  This routine is just like
4795 | `roundAndPackFloatx80' except that the input significand does not have to be
4796 | normalized.
4797 *----------------------------------------------------------------------------*/
4798
4799 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4800                                        bool zSign, int32_t zExp,
4801                                        uint64_t zSig0, uint64_t zSig1,
4802                                        float_status *status)
4803 {
4804     int8_t shiftCount;
4805
4806     if ( zSig0 == 0 ) {
4807         zSig0 = zSig1;
4808         zSig1 = 0;
4809         zExp -= 64;
4810     }
4811     shiftCount = clz64(zSig0);
4812     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4813     zExp -= shiftCount;
4814     return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4815                                 zSig0, zSig1, status);
4816
4817 }
4818
4819 /*----------------------------------------------------------------------------
4820 | Returns the least-significant 64 fraction bits of the quadruple-precision
4821 | floating-point value `a'.
4822 *----------------------------------------------------------------------------*/
4823
4824 static inline uint64_t extractFloat128Frac1( float128 a )
4825 {
4826
4827     return a.low;
4828
4829 }
4830
4831 /*----------------------------------------------------------------------------
4832 | Returns the most-significant 48 fraction bits of the quadruple-precision
4833 | floating-point value `a'.
4834 *----------------------------------------------------------------------------*/
4835
4836 static inline uint64_t extractFloat128Frac0( float128 a )
4837 {
4838
4839     return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4840
4841 }
4842
4843 /*----------------------------------------------------------------------------
4844 | Returns the exponent bits of the quadruple-precision floating-point value
4845 | `a'.
4846 *----------------------------------------------------------------------------*/
4847
4848 static inline int32_t extractFloat128Exp( float128 a )
4849 {
4850
4851     return ( a.high>>48 ) & 0x7FFF;
4852
4853 }
4854
4855 /*----------------------------------------------------------------------------
4856 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4857 *----------------------------------------------------------------------------*/
4858
4859 static inline bool extractFloat128Sign(float128 a)
4860 {
4861     return a.high >> 63;
4862 }
4863
4864 /*----------------------------------------------------------------------------
4865 | Normalizes the subnormal quadruple-precision floating-point value
4866 | represented by the denormalized significand formed by the concatenation of
4867 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4868 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4869 | significand are stored at the location pointed to by `zSig0Ptr', and the
4870 | least significant 64 bits of the normalized significand are stored at the
4871 | location pointed to by `zSig1Ptr'.
4872 *----------------------------------------------------------------------------*/
4873
4874 static void
4875  normalizeFloat128Subnormal(
4876      uint64_t aSig0,
4877      uint64_t aSig1,
4878      int32_t *zExpPtr,
4879      uint64_t *zSig0Ptr,
4880      uint64_t *zSig1Ptr
4881  )
4882 {
4883     int8_t shiftCount;
4884
4885     if ( aSig0 == 0 ) {
4886         shiftCount = clz64(aSig1) - 15;
4887         if ( shiftCount < 0 ) {
4888             *zSig0Ptr = aSig1>>( - shiftCount );
4889             *zSig1Ptr = aSig1<<( shiftCount & 63 );
4890         }
4891         else {
4892             *zSig0Ptr = aSig1<<shiftCount;
4893             *zSig1Ptr = 0;
4894         }
4895         *zExpPtr = - shiftCount - 63;
4896     }
4897     else {
4898         shiftCount = clz64(aSig0) - 15;
4899         shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4900         *zExpPtr = 1 - shiftCount;
4901     }
4902
4903 }
4904
4905 /*----------------------------------------------------------------------------
4906 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4907 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4908 | floating-point value, returning the result.  After being shifted into the
4909 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4910 | added together to form the most significant 32 bits of the result.  This
4911 | means that any integer portion of `zSig0' will be added into the exponent.
4912 | Since a properly normalized significand will have an integer portion equal
4913 | to 1, the `zExp' input should be 1 less than the desired result exponent
4914 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4915 | significand.
4916 *----------------------------------------------------------------------------*/
4917
4918 static inline float128
4919 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4920 {
4921     float128 z;
4922
4923     z.low = zSig1;
4924     z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4925     return z;
4926 }
4927
4928 /*----------------------------------------------------------------------------
4929 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4930 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4931 | and `zSig2', and returns the proper quadruple-precision floating-point value
4932 | corresponding to the abstract input.  Ordinarily, the abstract value is
4933 | simply rounded and packed into the quadruple-precision format, with the
4934 | inexact exception raised if the abstract input cannot be represented
4935 | exactly.  However, if the abstract value is too large, the overflow and
4936 | inexact exceptions are raised and an infinity or maximal finite value is
4937 | returned.  If the abstract value is too small, the input value is rounded to
4938 | a subnormal number, and the underflow and inexact exceptions are raised if
4939 | the abstract input cannot be represented exactly as a subnormal quadruple-
4940 | precision floating-point number.
4941 |     The input significand must be normalized or smaller.  If the input
4942 | significand is not normalized, `zExp' must be 0; in that case, the result
4943 | returned is a subnormal number, and it must not require rounding.  In the
4944 | usual case that the input significand is normalized, `zExp' must be 1 less
4945 | than the ``true'' floating-point exponent.  The handling of underflow and
4946 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4947 *----------------------------------------------------------------------------*/
4948
4949 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4950                                      uint64_t zSig0, uint64_t zSig1,
4951                                      uint64_t zSig2, float_status *status)
4952 {
4953     int8_t roundingMode;
4954     bool roundNearestEven, increment, isTiny;
4955
4956     roundingMode = status->float_rounding_mode;
4957     roundNearestEven = ( roundingMode == float_round_nearest_even );
4958     switch (roundingMode) {
4959     case float_round_nearest_even:
4960     case float_round_ties_away:
4961         increment = ((int64_t)zSig2 < 0);
4962         break;
4963     case float_round_to_zero:
4964         increment = 0;
4965         break;
4966     case float_round_up:
4967         increment = !zSign && zSig2;
4968         break;
4969     case float_round_down:
4970         increment = zSign && zSig2;
4971         break;
4972     case float_round_to_odd:
4973         increment = !(zSig1 & 0x1) && zSig2;
4974         break;
4975     default:
4976         abort();
4977     }
4978     if ( 0x7FFD <= (uint32_t) zExp ) {
4979         if (    ( 0x7FFD < zExp )
4980              || (    ( zExp == 0x7FFD )
4981                   && eq128(
4982                          UINT64_C(0x0001FFFFFFFFFFFF),
4983                          UINT64_C(0xFFFFFFFFFFFFFFFF),
4984                          zSig0,
4985                          zSig1
4986                      )
4987                   && increment
4988                 )
4989            ) {
4990             float_raise(float_flag_overflow | float_flag_inexact, status);
4991             if (    ( roundingMode == float_round_to_zero )
4992                  || ( zSign && ( roundingMode == float_round_up ) )
4993                  || ( ! zSign && ( roundingMode == float_round_down ) )
4994                  || (roundingMode == float_round_to_odd)
4995                ) {
4996                 return
4997                     packFloat128(
4998                         zSign,
4999                         0x7FFE,
5000                         UINT64_C(0x0000FFFFFFFFFFFF),
5001                         UINT64_C(0xFFFFFFFFFFFFFFFF)
5002                     );
5003             }
5004             return packFloat128( zSign, 0x7FFF, 0, 0 );
5005         }
5006         if ( zExp < 0 ) {
5007             if (status->flush_to_zero) {
5008                 float_raise(float_flag_output_denormal, status);
5009                 return packFloat128(zSign, 0, 0, 0);
5010             }
5011             isTiny = status->tininess_before_rounding
5012                   || (zExp < -1)
5013                   || !increment
5014                   || lt128(zSig0, zSig1,
5015                            UINT64_C(0x0001FFFFFFFFFFFF),
5016                            UINT64_C(0xFFFFFFFFFFFFFFFF));
5017             shift128ExtraRightJamming(
5018                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
5019             zExp = 0;
5020             if (isTiny && zSig2) {
5021                 float_raise(float_flag_underflow, status);
5022             }
5023             switch (roundingMode) {
5024             case float_round_nearest_even:
5025             case float_round_ties_away:
5026                 increment = ((int64_t)zSig2 < 0);
5027                 break;
5028             case float_round_to_zero:
5029                 increment = 0;
5030                 break;
5031             case float_round_up:
5032                 increment = !zSign && zSig2;
5033                 break;
5034             case float_round_down:
5035                 increment = zSign && zSig2;
5036                 break;
5037             case float_round_to_odd:
5038                 increment = !(zSig1 & 0x1) && zSig2;
5039                 break;
5040             default:
5041                 abort();
5042             }
5043         }
5044     }
5045     if (zSig2) {
5046         float_raise(float_flag_inexact, status);
5047     }
5048     if ( increment ) {
5049         add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
5050         if ((zSig2 + zSig2 == 0) && roundNearestEven) {
5051             zSig1 &= ~1;
5052         }
5053     }
5054     else {
5055         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
5056     }
5057     return packFloat128( zSign, zExp, zSig0, zSig1 );
5058
5059 }
5060
5061 /*----------------------------------------------------------------------------
5062 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5063 | and significand formed by the concatenation of `zSig0' and `zSig1', and
5064 | returns the proper quadruple-precision floating-point value corresponding
5065 | to the abstract input.  This routine is just like `roundAndPackFloat128'
5066 | except that the input significand has fewer bits and does not have to be
5067 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
5068 | point exponent.
5069 *----------------------------------------------------------------------------*/
5070
5071 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
5072                                               uint64_t zSig0, uint64_t zSig1,
5073                                               float_status *status)
5074 {
5075     int8_t shiftCount;
5076     uint64_t zSig2;
5077
5078     if ( zSig0 == 0 ) {
5079         zSig0 = zSig1;
5080         zSig1 = 0;
5081         zExp -= 64;
5082     }
5083     shiftCount = clz64(zSig0) - 15;
5084     if ( 0 <= shiftCount ) {
5085         zSig2 = 0;
5086         shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5087     }
5088     else {
5089         shift128ExtraRightJamming(
5090             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5091     }
5092     zExp -= shiftCount;
5093     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5094
5095 }
5096
5097
5098 /*----------------------------------------------------------------------------
5099 | Returns the result of converting the 32-bit two's complement integer `a'
5100 | to the extended double-precision floating-point format.  The conversion
5101 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5102 | Arithmetic.
5103 *----------------------------------------------------------------------------*/
5104
5105 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5106 {
5107     bool zSign;
5108     uint32_t absA;
5109     int8_t shiftCount;
5110     uint64_t zSig;
5111
5112     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5113     zSign = ( a < 0 );
5114     absA = zSign ? - a : a;
5115     shiftCount = clz32(absA) + 32;
5116     zSig = absA;
5117     return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5118
5119 }
5120
5121 /*----------------------------------------------------------------------------
5122 | Returns the result of converting the 32-bit two's complement integer `a' to
5123 | the quadruple-precision floating-point format.  The conversion is performed
5124 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5125 *----------------------------------------------------------------------------*/
5126
5127 float128 int32_to_float128(int32_t a, float_status *status)
5128 {
5129     bool zSign;
5130     uint32_t absA;
5131     int8_t shiftCount;
5132     uint64_t zSig0;
5133
5134     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5135     zSign = ( a < 0 );
5136     absA = zSign ? - a : a;
5137     shiftCount = clz32(absA) + 17;
5138     zSig0 = absA;
5139     return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5140
5141 }
5142
5143 /*----------------------------------------------------------------------------
5144 | Returns the result of converting the 64-bit two's complement integer `a'
5145 | to the extended double-precision floating-point format.  The conversion
5146 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5147 | Arithmetic.
5148 *----------------------------------------------------------------------------*/
5149
5150 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5151 {
5152     bool zSign;
5153     uint64_t absA;
5154     int8_t shiftCount;
5155
5156     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5157     zSign = ( a < 0 );
5158     absA = zSign ? - a : a;
5159     shiftCount = clz64(absA);
5160     return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5161
5162 }
5163
5164 /*----------------------------------------------------------------------------
5165 | Returns the result of converting the 64-bit two's complement integer `a' to
5166 | the quadruple-precision floating-point format.  The conversion is performed
5167 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5168 *----------------------------------------------------------------------------*/
5169
5170 float128 int64_to_float128(int64_t a, float_status *status)
5171 {
5172     bool zSign;
5173     uint64_t absA;
5174     int8_t shiftCount;
5175     int32_t zExp;
5176     uint64_t zSig0, zSig1;
5177
5178     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5179     zSign = ( a < 0 );
5180     absA = zSign ? - a : a;
5181     shiftCount = clz64(absA) + 49;
5182     zExp = 0x406E - shiftCount;
5183     if ( 64 <= shiftCount ) {
5184         zSig1 = 0;
5185         zSig0 = absA;
5186         shiftCount -= 64;
5187     }
5188     else {
5189         zSig1 = absA;
5190         zSig0 = 0;
5191     }
5192     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5193     return packFloat128( zSign, zExp, zSig0, zSig1 );
5194
5195 }
5196
5197 /*----------------------------------------------------------------------------
5198 | Returns the result of converting the 64-bit unsigned integer `a'
5199 | to the quadruple-precision floating-point format.  The conversion is performed
5200 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5201 *----------------------------------------------------------------------------*/
5202
5203 float128 uint64_to_float128(uint64_t a, float_status *status)
5204 {
5205     if (a == 0) {
5206         return float128_zero;
5207     }
5208     return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
5209 }
5210
5211 /*----------------------------------------------------------------------------
5212 | Returns the result of converting the single-precision floating-point value
5213 | `a' to the extended double-precision floating-point format.  The conversion
5214 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5215 | Arithmetic.
5216 *----------------------------------------------------------------------------*/
5217
5218 floatx80 float32_to_floatx80(float32 a, float_status *status)
5219 {
5220     bool aSign;
5221     int aExp;
5222     uint32_t aSig;
5223
5224     a = float32_squash_input_denormal(a, status);
5225     aSig = extractFloat32Frac( a );
5226     aExp = extractFloat32Exp( a );
5227     aSign = extractFloat32Sign( a );
5228     if ( aExp == 0xFF ) {
5229         if (aSig) {
5230             floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5231                                                status);
5232             return floatx80_silence_nan(res, status);
5233         }
5234         return packFloatx80(aSign,
5235                             floatx80_infinity_high,
5236                             floatx80_infinity_low);
5237     }
5238     if ( aExp == 0 ) {
5239         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5240         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5241     }
5242     aSig |= 0x00800000;
5243     return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5244
5245 }
5246
5247 /*----------------------------------------------------------------------------
5248 | Returns the remainder of the single-precision floating-point value `a'
5249 | with respect to the corresponding value `b'.  The operation is performed
5250 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5251 *----------------------------------------------------------------------------*/
5252
5253 float32 float32_rem(float32 a, float32 b, float_status *status)
5254 {
5255     bool aSign, zSign;
5256     int aExp, bExp, expDiff;
5257     uint32_t aSig, bSig;
5258     uint32_t q;
5259     uint64_t aSig64, bSig64, q64;
5260     uint32_t alternateASig;
5261     int32_t sigMean;
5262     a = float32_squash_input_denormal(a, status);
5263     b = float32_squash_input_denormal(b, status);
5264
5265     aSig = extractFloat32Frac( a );
5266     aExp = extractFloat32Exp( a );
5267     aSign = extractFloat32Sign( a );
5268     bSig = extractFloat32Frac( b );
5269     bExp = extractFloat32Exp( b );
5270     if ( aExp == 0xFF ) {
5271         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5272             return propagateFloat32NaN(a, b, status);
5273         }
5274         float_raise(float_flag_invalid, status);
5275         return float32_default_nan(status);
5276     }
5277     if ( bExp == 0xFF ) {
5278         if (bSig) {
5279             return propagateFloat32NaN(a, b, status);
5280         }
5281         return a;
5282     }
5283     if ( bExp == 0 ) {
5284         if ( bSig == 0 ) {
5285             float_raise(float_flag_invalid, status);
5286             return float32_default_nan(status);
5287         }
5288         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5289     }
5290     if ( aExp == 0 ) {
5291         if ( aSig == 0 ) return a;
5292         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5293     }
5294     expDiff = aExp - bExp;
5295     aSig |= 0x00800000;
5296     bSig |= 0x00800000;
5297     if ( expDiff < 32 ) {
5298         aSig <<= 8;
5299         bSig <<= 8;
5300         if ( expDiff < 0 ) {
5301             if ( expDiff < -1 ) return a;
5302             aSig >>= 1;
5303         }
5304         q = ( bSig <= aSig );
5305         if ( q ) aSig -= bSig;
5306         if ( 0 < expDiff ) {
5307             q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5308             q >>= 32 - expDiff;
5309             bSig >>= 2;
5310             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5311         }
5312         else {
5313             aSig >>= 2;
5314             bSig >>= 2;
5315         }
5316     }
5317     else {
5318         if ( bSig <= aSig ) aSig -= bSig;
5319         aSig64 = ( (uint64_t) aSig )<<40;
5320         bSig64 = ( (uint64_t) bSig )<<40;
5321         expDiff -= 64;
5322         while ( 0 < expDiff ) {
5323             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5324             q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5325             aSig64 = - ( ( bSig * q64 )<<38 );
5326             expDiff -= 62;
5327         }
5328         expDiff += 64;
5329         q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5330         q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5331         q = q64>>( 64 - expDiff );
5332         bSig <<= 6;
5333         aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5334     }
5335     do {
5336         alternateASig = aSig;
5337         ++q;
5338         aSig -= bSig;
5339     } while ( 0 <= (int32_t) aSig );
5340     sigMean = aSig + alternateASig;
5341     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5342         aSig = alternateASig;
5343     }
5344     zSign = ( (int32_t) aSig < 0 );
5345     if ( zSign ) aSig = - aSig;
5346     return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5347 }
5348
5349
5350
5351 /*----------------------------------------------------------------------------
5352 | Returns the binary exponential of the single-precision floating-point value
5353 | `a'. The operation is performed according to the IEC/IEEE Standard for
5354 | Binary Floating-Point Arithmetic.
5355 |
5356 | Uses the following identities:
5357 |
5358 | 1. -------------------------------------------------------------------------
5359 |      x    x*ln(2)
5360 |     2  = e
5361 |
5362 | 2. -------------------------------------------------------------------------
5363 |                      2     3     4     5           n
5364 |      x        x     x     x     x     x           x
5365 |     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5366 |               1!    2!    3!    4!    5!          n!
5367 *----------------------------------------------------------------------------*/
5368
5369 static const float64 float32_exp2_coefficients[15] =
5370 {
5371     const_float64( 0x3ff0000000000000ll ), /*  1 */
5372     const_float64( 0x3fe0000000000000ll ), /*  2 */
5373     const_float64( 0x3fc5555555555555ll ), /*  3 */
5374     const_float64( 0x3fa5555555555555ll ), /*  4 */
5375     const_float64( 0x3f81111111111111ll ), /*  5 */
5376     const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5377     const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5378     const_float64( 0x3efa01a01a01a01all ), /*  8 */
5379     const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5380     const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5381     const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5382     const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5383     const_float64( 0x3de6124613a86d09ll ), /* 13 */
5384     const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5385     const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5386 };
5387
5388 float32 float32_exp2(float32 a, float_status *status)
5389 {
5390     bool aSign;
5391     int aExp;
5392     uint32_t aSig;
5393     float64 r, x, xn;
5394     int i;
5395     a = float32_squash_input_denormal(a, status);
5396
5397     aSig = extractFloat32Frac( a );
5398     aExp = extractFloat32Exp( a );
5399     aSign = extractFloat32Sign( a );
5400
5401     if ( aExp == 0xFF) {
5402         if (aSig) {
5403             return propagateFloat32NaN(a, float32_zero, status);
5404         }
5405         return (aSign) ? float32_zero : a;
5406     }
5407     if (aExp == 0) {
5408         if (aSig == 0) return float32_one;
5409     }
5410
5411     float_raise(float_flag_inexact, status);
5412
5413     /* ******************************* */
5414     /* using float64 for approximation */
5415     /* ******************************* */
5416     x = float32_to_float64(a, status);
5417     x = float64_mul(x, float64_ln2, status);
5418
5419     xn = x;
5420     r = float64_one;
5421     for (i = 0 ; i < 15 ; i++) {
5422         float64 f;
5423
5424         f = float64_mul(xn, float32_exp2_coefficients[i], status);
5425         r = float64_add(r, f, status);
5426
5427         xn = float64_mul(xn, x, status);
5428     }
5429
5430     return float64_to_float32(r, status);
5431 }
5432
5433 /*----------------------------------------------------------------------------
5434 | Returns the binary log of the single-precision floating-point value `a'.
5435 | The operation is performed according to the IEC/IEEE Standard for Binary
5436 | Floating-Point Arithmetic.
5437 *----------------------------------------------------------------------------*/
5438 float32 float32_log2(float32 a, float_status *status)
5439 {
5440     bool aSign, zSign;
5441     int aExp;
5442     uint32_t aSig, zSig, i;
5443
5444     a = float32_squash_input_denormal(a, status);
5445     aSig = extractFloat32Frac( a );
5446     aExp = extractFloat32Exp( a );
5447     aSign = extractFloat32Sign( a );
5448
5449     if ( aExp == 0 ) {
5450         if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5451         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5452     }
5453     if ( aSign ) {
5454         float_raise(float_flag_invalid, status);
5455         return float32_default_nan(status);
5456     }
5457     if ( aExp == 0xFF ) {
5458         if (aSig) {
5459             return propagateFloat32NaN(a, float32_zero, status);
5460         }
5461         return a;
5462     }
5463
5464     aExp -= 0x7F;
5465     aSig |= 0x00800000;
5466     zSign = aExp < 0;
5467     zSig = aExp << 23;
5468
5469     for (i = 1 << 22; i > 0; i >>= 1) {
5470         aSig = ( (uint64_t)aSig * aSig ) >> 23;
5471         if ( aSig & 0x01000000 ) {
5472             aSig >>= 1;
5473             zSig |= i;
5474         }
5475     }
5476
5477     if ( zSign )
5478         zSig = -zSig;
5479
5480     return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5481 }
5482
5483 /*----------------------------------------------------------------------------
5484 | Returns the result of converting the double-precision floating-point value
5485 | `a' to the extended double-precision floating-point format.  The conversion
5486 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5487 | Arithmetic.
5488 *----------------------------------------------------------------------------*/
5489
5490 floatx80 float64_to_floatx80(float64 a, float_status *status)
5491 {
5492     bool aSign;
5493     int aExp;
5494     uint64_t aSig;
5495
5496     a = float64_squash_input_denormal(a, status);
5497     aSig = extractFloat64Frac( a );
5498     aExp = extractFloat64Exp( a );
5499     aSign = extractFloat64Sign( a );
5500     if ( aExp == 0x7FF ) {
5501         if (aSig) {
5502             floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5503                                                status);
5504             return floatx80_silence_nan(res, status);
5505         }
5506         return packFloatx80(aSign,
5507                             floatx80_infinity_high,
5508                             floatx80_infinity_low);
5509     }
5510     if ( aExp == 0 ) {
5511         if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5512         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5513     }
5514     return
5515         packFloatx80(
5516             aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5517
5518 }
5519
5520 /*----------------------------------------------------------------------------
5521 | Returns the remainder of the double-precision floating-point value `a'
5522 | with respect to the corresponding value `b'.  The operation is performed
5523 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5524 *----------------------------------------------------------------------------*/
5525
5526 float64 float64_rem(float64 a, float64 b, float_status *status)
5527 {
5528     bool aSign, zSign;
5529     int aExp, bExp, expDiff;
5530     uint64_t aSig, bSig;
5531     uint64_t q, alternateASig;
5532     int64_t sigMean;
5533
5534     a = float64_squash_input_denormal(a, status);
5535     b = float64_squash_input_denormal(b, status);
5536     aSig = extractFloat64Frac( a );
5537     aExp = extractFloat64Exp( a );
5538     aSign = extractFloat64Sign( a );
5539     bSig = extractFloat64Frac( b );
5540     bExp = extractFloat64Exp( b );
5541     if ( aExp == 0x7FF ) {
5542         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5543             return propagateFloat64NaN(a, b, status);
5544         }
5545         float_raise(float_flag_invalid, status);
5546         return float64_default_nan(status);
5547     }
5548     if ( bExp == 0x7FF ) {
5549         if (bSig) {
5550             return propagateFloat64NaN(a, b, status);
5551         }
5552         return a;
5553     }
5554     if ( bExp == 0 ) {
5555         if ( bSig == 0 ) {
5556             float_raise(float_flag_invalid, status);
5557             return float64_default_nan(status);
5558         }
5559         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5560     }
5561     if ( aExp == 0 ) {
5562         if ( aSig == 0 ) return a;
5563         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5564     }
5565     expDiff = aExp - bExp;
5566     aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5567     bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5568     if ( expDiff < 0 ) {
5569         if ( expDiff < -1 ) return a;
5570         aSig >>= 1;
5571     }
5572     q = ( bSig <= aSig );
5573     if ( q ) aSig -= bSig;
5574     expDiff -= 64;
5575     while ( 0 < expDiff ) {
5576         q = estimateDiv128To64( aSig, 0, bSig );
5577         q = ( 2 < q ) ? q - 2 : 0;
5578         aSig = - ( ( bSig>>2 ) * q );
5579         expDiff -= 62;
5580     }
5581     expDiff += 64;
5582     if ( 0 < expDiff ) {
5583         q = estimateDiv128To64( aSig, 0, bSig );
5584         q = ( 2 < q ) ? q - 2 : 0;
5585         q >>= 64 - expDiff;
5586         bSig >>= 2;
5587         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5588     }
5589     else {
5590         aSig >>= 2;
5591         bSig >>= 2;
5592     }
5593     do {
5594         alternateASig = aSig;
5595         ++q;
5596         aSig -= bSig;
5597     } while ( 0 <= (int64_t) aSig );
5598     sigMean = aSig + alternateASig;
5599     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5600         aSig = alternateASig;
5601     }
5602     zSign = ( (int64_t) aSig < 0 );
5603     if ( zSign ) aSig = - aSig;
5604     return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5605
5606 }
5607
5608 /*----------------------------------------------------------------------------
5609 | Returns the binary log of the double-precision floating-point value `a'.
5610 | The operation is performed according to the IEC/IEEE Standard for Binary
5611 | Floating-Point Arithmetic.
5612 *----------------------------------------------------------------------------*/
5613 float64 float64_log2(float64 a, float_status *status)
5614 {
5615     bool aSign, zSign;
5616     int aExp;
5617     uint64_t aSig, aSig0, aSig1, zSig, i;
5618     a = float64_squash_input_denormal(a, status);
5619
5620     aSig = extractFloat64Frac( a );
5621     aExp = extractFloat64Exp( a );
5622     aSign = extractFloat64Sign( a );
5623
5624     if ( aExp == 0 ) {
5625         if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5626         normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5627     }
5628     if ( aSign ) {
5629         float_raise(float_flag_invalid, status);
5630         return float64_default_nan(status);
5631     }
5632     if ( aExp == 0x7FF ) {
5633         if (aSig) {
5634             return propagateFloat64NaN(a, float64_zero, status);
5635         }
5636         return a;
5637     }
5638
5639     aExp -= 0x3FF;
5640     aSig |= UINT64_C(0x0010000000000000);
5641     zSign = aExp < 0;
5642     zSig = (uint64_t)aExp << 52;
5643     for (i = 1LL << 51; i > 0; i >>= 1) {
5644         mul64To128( aSig, aSig, &aSig0, &aSig1 );
5645         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5646         if ( aSig & UINT64_C(0x0020000000000000) ) {
5647             aSig >>= 1;
5648             zSig |= i;
5649         }
5650     }
5651
5652     if ( zSign )
5653         zSig = -zSig;
5654     return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5655 }
5656
5657 /*----------------------------------------------------------------------------
5658 | Returns the result of converting the extended double-precision floating-
5659 | point value `a' to the 32-bit two's complement integer format.  The
5660 | conversion is performed according to the IEC/IEEE Standard for Binary
5661 | Floating-Point Arithmetic---which means in particular that the conversion
5662 | is rounded according to the current rounding mode.  If `a' is a NaN, the
5663 | largest positive integer is returned.  Otherwise, if the conversion
5664 | overflows, the largest integer with the same sign as `a' is returned.
5665 *----------------------------------------------------------------------------*/
5666
5667 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5668 {
5669     bool aSign;
5670     int32_t aExp, shiftCount;
5671     uint64_t aSig;
5672
5673     if (floatx80_invalid_encoding(a)) {
5674         float_raise(float_flag_invalid, status);
5675         return 1 << 31;
5676     }
5677     aSig = extractFloatx80Frac( a );
5678     aExp = extractFloatx80Exp( a );
5679     aSign = extractFloatx80Sign( a );
5680     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5681     shiftCount = 0x4037 - aExp;
5682     if ( shiftCount <= 0 ) shiftCount = 1;
5683     shift64RightJamming( aSig, shiftCount, &aSig );
5684     return roundAndPackInt32(aSign, aSig, status);
5685
5686 }
5687
5688 /*----------------------------------------------------------------------------
5689 | Returns the result of converting the extended double-precision floating-
5690 | point value `a' to the 32-bit two's complement integer format.  The
5691 | conversion is performed according to the IEC/IEEE Standard for Binary
5692 | Floating-Point Arithmetic, except that the conversion is always rounded
5693 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5694 | Otherwise, if the conversion overflows, the largest integer with the same
5695 | sign as `a' is returned.
5696 *----------------------------------------------------------------------------*/
5697
5698 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5699 {
5700     bool aSign;
5701     int32_t aExp, shiftCount;
5702     uint64_t aSig, savedASig;
5703     int32_t z;
5704
5705     if (floatx80_invalid_encoding(a)) {
5706         float_raise(float_flag_invalid, status);
5707         return 1 << 31;
5708     }
5709     aSig = extractFloatx80Frac( a );
5710     aExp = extractFloatx80Exp( a );
5711     aSign = extractFloatx80Sign( a );
5712     if ( 0x401E < aExp ) {
5713         if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5714         goto invalid;
5715     }
5716     else if ( aExp < 0x3FFF ) {
5717         if (aExp || aSig) {
5718             float_raise(float_flag_inexact, status);
5719         }
5720         return 0;
5721     }
5722     shiftCount = 0x403E - aExp;
5723     savedASig = aSig;
5724     aSig >>= shiftCount;
5725     z = aSig;
5726     if ( aSign ) z = - z;
5727     if ( ( z < 0 ) ^ aSign ) {
5728  invalid:
5729         float_raise(float_flag_invalid, status);
5730         return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5731     }
5732     if ( ( aSig<<shiftCount ) != savedASig ) {
5733         float_raise(float_flag_inexact, status);
5734     }
5735     return z;
5736
5737 }
5738
5739 /*----------------------------------------------------------------------------
5740 | Returns the result of converting the extended double-precision floating-
5741 | point value `a' to the 64-bit two's complement integer format.  The
5742 | conversion is performed according to the IEC/IEEE Standard for Binary
5743 | Floating-Point Arithmetic---which means in particular that the conversion
5744 | is rounded according to the current rounding mode.  If `a' is a NaN,
5745 | the largest positive integer is returned.  Otherwise, if the conversion
5746 | overflows, the largest integer with the same sign as `a' is returned.
5747 *----------------------------------------------------------------------------*/
5748
5749 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5750 {
5751     bool aSign;
5752     int32_t aExp, shiftCount;
5753     uint64_t aSig, aSigExtra;
5754
5755     if (floatx80_invalid_encoding(a)) {
5756         float_raise(float_flag_invalid, status);
5757         return 1ULL << 63;
5758     }
5759     aSig = extractFloatx80Frac( a );
5760     aExp = extractFloatx80Exp( a );
5761     aSign = extractFloatx80Sign( a );
5762     shiftCount = 0x403E - aExp;
5763     if ( shiftCount <= 0 ) {
5764         if ( shiftCount ) {
5765             float_raise(float_flag_invalid, status);
5766             if (!aSign || floatx80_is_any_nan(a)) {
5767                 return INT64_MAX;
5768             }
5769             return INT64_MIN;
5770         }
5771         aSigExtra = 0;
5772     }
5773     else {
5774         shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5775     }
5776     return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5777
5778 }
5779
5780 /*----------------------------------------------------------------------------
5781 | Returns the result of converting the extended double-precision floating-
5782 | point value `a' to the 64-bit two's complement integer format.  The
5783 | conversion is performed according to the IEC/IEEE Standard for Binary
5784 | Floating-Point Arithmetic, except that the conversion is always rounded
5785 | toward zero.  If `a' is a NaN, the largest positive integer is returned.
5786 | Otherwise, if the conversion overflows, the largest integer with the same
5787 | sign as `a' is returned.
5788 *----------------------------------------------------------------------------*/
5789
5790 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5791 {
5792     bool aSign;
5793     int32_t aExp, shiftCount;
5794     uint64_t aSig;
5795     int64_t z;
5796
5797     if (floatx80_invalid_encoding(a)) {
5798         float_raise(float_flag_invalid, status);
5799         return 1ULL << 63;
5800     }
5801     aSig = extractFloatx80Frac( a );
5802     aExp = extractFloatx80Exp( a );
5803     aSign = extractFloatx80Sign( a );
5804     shiftCount = aExp - 0x403E;
5805     if ( 0 <= shiftCount ) {
5806         aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5807         if ( ( a.high != 0xC03E ) || aSig ) {
5808             float_raise(float_flag_invalid, status);
5809             if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5810                 return INT64_MAX;
5811             }
5812         }
5813         return INT64_MIN;
5814     }
5815     else if ( aExp < 0x3FFF ) {
5816         if (aExp | aSig) {
5817             float_raise(float_flag_inexact, status);
5818         }
5819         return 0;
5820     }
5821     z = aSig>>( - shiftCount );
5822     if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5823         float_raise(float_flag_inexact, status);
5824     }
5825     if ( aSign ) z = - z;
5826     return z;
5827
5828 }
5829
5830 /*----------------------------------------------------------------------------
5831 | Returns the result of converting the extended double-precision floating-
5832 | point value `a' to the single-precision floating-point format.  The
5833 | conversion is performed according to the IEC/IEEE Standard for Binary
5834 | Floating-Point Arithmetic.
5835 *----------------------------------------------------------------------------*/
5836
5837 float32 floatx80_to_float32(floatx80 a, float_status *status)
5838 {
5839     bool aSign;
5840     int32_t aExp;
5841     uint64_t aSig;
5842
5843     if (floatx80_invalid_encoding(a)) {
5844         float_raise(float_flag_invalid, status);
5845         return float32_default_nan(status);
5846     }
5847     aSig = extractFloatx80Frac( a );
5848     aExp = extractFloatx80Exp( a );
5849     aSign = extractFloatx80Sign( a );
5850     if ( aExp == 0x7FFF ) {
5851         if ( (uint64_t) ( aSig<<1 ) ) {
5852             float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5853                                              status);
5854             return float32_silence_nan(res, status);
5855         }
5856         return packFloat32( aSign, 0xFF, 0 );
5857     }
5858     shift64RightJamming( aSig, 33, &aSig );
5859     if ( aExp || aSig ) aExp -= 0x3F81;
5860     return roundAndPackFloat32(aSign, aExp, aSig, status);
5861
5862 }
5863
5864 /*----------------------------------------------------------------------------
5865 | Returns the result of converting the extended double-precision floating-
5866 | point value `a' to the double-precision floating-point format.  The
5867 | conversion is performed according to the IEC/IEEE Standard for Binary
5868 | Floating-Point Arithmetic.
5869 *----------------------------------------------------------------------------*/
5870
5871 float64 floatx80_to_float64(floatx80 a, float_status *status)
5872 {
5873     bool aSign;
5874     int32_t aExp;
5875     uint64_t aSig, zSig;
5876
5877     if (floatx80_invalid_encoding(a)) {
5878         float_raise(float_flag_invalid, status);
5879         return float64_default_nan(status);
5880     }
5881     aSig = extractFloatx80Frac( a );
5882     aExp = extractFloatx80Exp( a );
5883     aSign = extractFloatx80Sign( a );
5884     if ( aExp == 0x7FFF ) {
5885         if ( (uint64_t) ( aSig<<1 ) ) {
5886             float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5887                                              status);
5888             return float64_silence_nan(res, status);
5889         }
5890         return packFloat64( aSign, 0x7FF, 0 );
5891     }
5892     shift64RightJamming( aSig, 1, &zSig );
5893     if ( aExp || aSig ) aExp -= 0x3C01;
5894     return roundAndPackFloat64(aSign, aExp, zSig, status);
5895
5896 }
5897
5898 /*----------------------------------------------------------------------------
5899 | Returns the result of converting the extended double-precision floating-
5900 | point value `a' to the quadruple-precision floating-point format.  The
5901 | conversion is performed according to the IEC/IEEE Standard for Binary
5902 | Floating-Point Arithmetic.
5903 *----------------------------------------------------------------------------*/
5904
5905 float128 floatx80_to_float128(floatx80 a, float_status *status)
5906 {
5907     bool aSign;
5908     int aExp;
5909     uint64_t aSig, zSig0, zSig1;
5910
5911     if (floatx80_invalid_encoding(a)) {
5912         float_raise(float_flag_invalid, status);
5913         return float128_default_nan(status);
5914     }
5915     aSig = extractFloatx80Frac( a );
5916     aExp = extractFloatx80Exp( a );
5917     aSign = extractFloatx80Sign( a );
5918     if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5919         float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5920                                            status);
5921         return float128_silence_nan(res, status);
5922     }
5923     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5924     return packFloat128( aSign, aExp, zSig0, zSig1 );
5925
5926 }
5927
5928 /*----------------------------------------------------------------------------
5929 | Rounds the extended double-precision floating-point value `a'
5930 | to the precision provided by floatx80_rounding_precision and returns the
5931 | result as an extended double-precision floating-point value.
5932 | The operation is performed according to the IEC/IEEE Standard for Binary
5933 | Floating-Point Arithmetic.
5934 *----------------------------------------------------------------------------*/
5935
5936 floatx80 floatx80_round(floatx80 a, float_status *status)
5937 {
5938     return roundAndPackFloatx80(status->floatx80_rounding_precision,
5939                                 extractFloatx80Sign(a),
5940                                 extractFloatx80Exp(a),
5941                                 extractFloatx80Frac(a), 0, status);
5942 }
5943
5944 /*----------------------------------------------------------------------------
5945 | Rounds the extended double-precision floating-point value `a' to an integer,
5946 | and returns the result as an extended quadruple-precision floating-point
5947 | value.  The operation is performed according to the IEC/IEEE Standard for
5948 | Binary Floating-Point Arithmetic.
5949 *----------------------------------------------------------------------------*/
5950
5951 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5952 {
5953     bool aSign;
5954     int32_t aExp;
5955     uint64_t lastBitMask, roundBitsMask;
5956     floatx80 z;
5957
5958     if (floatx80_invalid_encoding(a)) {
5959         float_raise(float_flag_invalid, status);
5960         return floatx80_default_nan(status);
5961     }
5962     aExp = extractFloatx80Exp( a );
5963     if ( 0x403E <= aExp ) {
5964         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5965             return propagateFloatx80NaN(a, a, status);
5966         }
5967         return a;
5968     }
5969     if ( aExp < 0x3FFF ) {
5970         if (    ( aExp == 0 )
5971              && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5972             return a;
5973         }
5974         float_raise(float_flag_inexact, status);
5975         aSign = extractFloatx80Sign( a );
5976         switch (status->float_rounding_mode) {
5977          case float_round_nearest_even:
5978             if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5979                ) {
5980                 return
5981                     packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5982             }
5983             break;
5984         case float_round_ties_away:
5985             if (aExp == 0x3FFE) {
5986                 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5987             }
5988             break;
5989          case float_round_down:
5990             return
5991                   aSign ?
5992                       packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5993                 : packFloatx80( 0, 0, 0 );
5994          case float_round_up:
5995             return
5996                   aSign ? packFloatx80( 1, 0, 0 )
5997                 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5998
5999         case float_round_to_zero:
6000             break;
6001         default:
6002             g_assert_not_reached();
6003         }
6004         return packFloatx80( aSign, 0, 0 );
6005     }
6006     lastBitMask = 1;
6007     lastBitMask <<= 0x403E - aExp;
6008     roundBitsMask = lastBitMask - 1;
6009     z = a;
6010     switch (status->float_rounding_mode) {
6011     case float_round_nearest_even:
6012         z.low += lastBitMask>>1;
6013         if ((z.low & roundBitsMask) == 0) {
6014             z.low &= ~lastBitMask;
6015         }
6016         break;
6017     case float_round_ties_away:
6018         z.low += lastBitMask >> 1;
6019         break;
6020     case float_round_to_zero:
6021         break;
6022     case float_round_up:
6023         if (!extractFloatx80Sign(z)) {
6024             z.low += roundBitsMask;
6025         }
6026         break;
6027     case float_round_down:
6028         if (extractFloatx80Sign(z)) {
6029             z.low += roundBitsMask;
6030         }
6031         break;
6032     default:
6033         abort();
6034     }
6035     z.low &= ~ roundBitsMask;
6036     if ( z.low == 0 ) {
6037         ++z.high;
6038         z.low = UINT64_C(0x8000000000000000);
6039     }
6040     if (z.low != a.low) {
6041         float_raise(float_flag_inexact, status);
6042     }
6043     return z;
6044
6045 }
6046
6047 /*----------------------------------------------------------------------------
6048 | Returns the result of adding the absolute values of the extended double-
6049 | precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
6050 | negated before being returned.  `zSign' is ignored if the result is a NaN.
6051 | The addition is performed according to the IEC/IEEE Standard for Binary
6052 | Floating-Point Arithmetic.
6053 *----------------------------------------------------------------------------*/
6054
6055 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6056                                 float_status *status)
6057 {
6058     int32_t aExp, bExp, zExp;
6059     uint64_t aSig, bSig, zSig0, zSig1;
6060     int32_t expDiff;
6061
6062     aSig = extractFloatx80Frac( a );
6063     aExp = extractFloatx80Exp( a );
6064     bSig = extractFloatx80Frac( b );
6065     bExp = extractFloatx80Exp( b );
6066     expDiff = aExp - bExp;
6067     if ( 0 < expDiff ) {
6068         if ( aExp == 0x7FFF ) {
6069             if ((uint64_t)(aSig << 1)) {
6070                 return propagateFloatx80NaN(a, b, status);
6071             }
6072             return a;
6073         }
6074         if ( bExp == 0 ) --expDiff;
6075         shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6076         zExp = aExp;
6077     }
6078     else if ( expDiff < 0 ) {
6079         if ( bExp == 0x7FFF ) {
6080             if ((uint64_t)(bSig << 1)) {
6081                 return propagateFloatx80NaN(a, b, status);
6082             }
6083             return packFloatx80(zSign,
6084                                 floatx80_infinity_high,
6085                                 floatx80_infinity_low);
6086         }
6087         if ( aExp == 0 ) ++expDiff;
6088         shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6089         zExp = bExp;
6090     }
6091     else {
6092         if ( aExp == 0x7FFF ) {
6093             if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6094                 return propagateFloatx80NaN(a, b, status);
6095             }
6096             return a;
6097         }
6098         zSig1 = 0;
6099         zSig0 = aSig + bSig;
6100         if ( aExp == 0 ) {
6101             if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6102                 /* At least one of the values is a pseudo-denormal,
6103                  * and there is a carry out of the result.  */
6104                 zExp = 1;
6105                 goto shiftRight1;
6106             }
6107             if (zSig0 == 0) {
6108                 return packFloatx80(zSign, 0, 0);
6109             }
6110             normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6111             goto roundAndPack;
6112         }
6113         zExp = aExp;
6114         goto shiftRight1;
6115     }
6116     zSig0 = aSig + bSig;
6117     if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
6118  shiftRight1:
6119     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
6120     zSig0 |= UINT64_C(0x8000000000000000);
6121     ++zExp;
6122  roundAndPack:
6123     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6124                                 zSign, zExp, zSig0, zSig1, status);
6125 }
6126
6127 /*----------------------------------------------------------------------------
6128 | Returns the result of subtracting the absolute values of the extended
6129 | double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
6130 | difference is negated before being returned.  `zSign' is ignored if the
6131 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
6132 | Standard for Binary Floating-Point Arithmetic.
6133 *----------------------------------------------------------------------------*/
6134
6135 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
6136                                 float_status *status)
6137 {
6138     int32_t aExp, bExp, zExp;
6139     uint64_t aSig, bSig, zSig0, zSig1;
6140     int32_t expDiff;
6141
6142     aSig = extractFloatx80Frac( a );
6143     aExp = extractFloatx80Exp( a );
6144     bSig = extractFloatx80Frac( b );
6145     bExp = extractFloatx80Exp( b );
6146     expDiff = aExp - bExp;
6147     if ( 0 < expDiff ) goto aExpBigger;
6148     if ( expDiff < 0 ) goto bExpBigger;
6149     if ( aExp == 0x7FFF ) {
6150         if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
6151             return propagateFloatx80NaN(a, b, status);
6152         }
6153         float_raise(float_flag_invalid, status);
6154         return floatx80_default_nan(status);
6155     }
6156     if ( aExp == 0 ) {
6157         aExp = 1;
6158         bExp = 1;
6159     }
6160     zSig1 = 0;
6161     if ( bSig < aSig ) goto aBigger;
6162     if ( aSig < bSig ) goto bBigger;
6163     return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
6164  bExpBigger:
6165     if ( bExp == 0x7FFF ) {
6166         if ((uint64_t)(bSig << 1)) {
6167             return propagateFloatx80NaN(a, b, status);
6168         }
6169         return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6170                             floatx80_infinity_low);
6171     }
6172     if ( aExp == 0 ) ++expDiff;
6173     shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6174  bBigger:
6175     sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6176     zExp = bExp;
6177     zSign ^= 1;
6178     goto normalizeRoundAndPack;
6179  aExpBigger:
6180     if ( aExp == 0x7FFF ) {
6181         if ((uint64_t)(aSig << 1)) {
6182             return propagateFloatx80NaN(a, b, status);
6183         }
6184         return a;
6185     }
6186     if ( bExp == 0 ) --expDiff;
6187     shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6188  aBigger:
6189     sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6190     zExp = aExp;
6191  normalizeRoundAndPack:
6192     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6193                                          zSign, zExp, zSig0, zSig1, status);
6194 }
6195
6196 /*----------------------------------------------------------------------------
6197 | Returns the result of adding the extended double-precision floating-point
6198 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
6199 | Standard for Binary Floating-Point Arithmetic.
6200 *----------------------------------------------------------------------------*/
6201
6202 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
6203 {
6204     bool aSign, bSign;
6205
6206     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6207         float_raise(float_flag_invalid, status);
6208         return floatx80_default_nan(status);
6209     }
6210     aSign = extractFloatx80Sign( a );
6211     bSign = extractFloatx80Sign( b );
6212     if ( aSign == bSign ) {
6213         return addFloatx80Sigs(a, b, aSign, status);
6214     }
6215     else {
6216         return subFloatx80Sigs(a, b, aSign, status);
6217     }
6218
6219 }
6220
6221 /*----------------------------------------------------------------------------
6222 | Returns the result of subtracting the extended double-precision floating-
6223 | point values `a' and `b'.  The operation is performed according to the
6224 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6225 *----------------------------------------------------------------------------*/
6226
6227 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
6228 {
6229     bool aSign, bSign;
6230
6231     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6232         float_raise(float_flag_invalid, status);
6233         return floatx80_default_nan(status);
6234     }
6235     aSign = extractFloatx80Sign( a );
6236     bSign = extractFloatx80Sign( b );
6237     if ( aSign == bSign ) {
6238         return subFloatx80Sigs(a, b, aSign, status);
6239     }
6240     else {
6241         return addFloatx80Sigs(a, b, aSign, status);
6242     }
6243
6244 }
6245
6246 /*----------------------------------------------------------------------------
6247 | Returns the result of multiplying the extended double-precision floating-
6248 | point values `a' and `b'.  The operation is performed according to the
6249 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6250 *----------------------------------------------------------------------------*/
6251
6252 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6253 {
6254     bool aSign, bSign, zSign;
6255     int32_t aExp, bExp, zExp;
6256     uint64_t aSig, bSig, zSig0, zSig1;
6257
6258     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6259         float_raise(float_flag_invalid, status);
6260         return floatx80_default_nan(status);
6261     }
6262     aSig = extractFloatx80Frac( a );
6263     aExp = extractFloatx80Exp( a );
6264     aSign = extractFloatx80Sign( a );
6265     bSig = extractFloatx80Frac( b );
6266     bExp = extractFloatx80Exp( b );
6267     bSign = extractFloatx80Sign( b );
6268     zSign = aSign ^ bSign;
6269     if ( aExp == 0x7FFF ) {
6270         if (    (uint64_t) ( aSig<<1 )
6271              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6272             return propagateFloatx80NaN(a, b, status);
6273         }
6274         if ( ( bExp | bSig ) == 0 ) goto invalid;
6275         return packFloatx80(zSign, floatx80_infinity_high,
6276                                    floatx80_infinity_low);
6277     }
6278     if ( bExp == 0x7FFF ) {
6279         if ((uint64_t)(bSig << 1)) {
6280             return propagateFloatx80NaN(a, b, status);
6281         }
6282         if ( ( aExp | aSig ) == 0 ) {
6283  invalid:
6284             float_raise(float_flag_invalid, status);
6285             return floatx80_default_nan(status);
6286         }
6287         return packFloatx80(zSign, floatx80_infinity_high,
6288                                    floatx80_infinity_low);
6289     }
6290     if ( aExp == 0 ) {
6291         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6292         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6293     }
6294     if ( bExp == 0 ) {
6295         if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6296         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6297     }
6298     zExp = aExp + bExp - 0x3FFE;
6299     mul64To128( aSig, bSig, &zSig0, &zSig1 );
6300     if ( 0 < (int64_t) zSig0 ) {
6301         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6302         --zExp;
6303     }
6304     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6305                                 zSign, zExp, zSig0, zSig1, status);
6306 }
6307
6308 /*----------------------------------------------------------------------------
6309 | Returns the result of dividing the extended double-precision floating-point
6310 | value `a' by the corresponding value `b'.  The operation is performed
6311 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6312 *----------------------------------------------------------------------------*/
6313
6314 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6315 {
6316     bool aSign, bSign, zSign;
6317     int32_t aExp, bExp, zExp;
6318     uint64_t aSig, bSig, zSig0, zSig1;
6319     uint64_t rem0, rem1, rem2, term0, term1, term2;
6320
6321     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6322         float_raise(float_flag_invalid, status);
6323         return floatx80_default_nan(status);
6324     }
6325     aSig = extractFloatx80Frac( a );
6326     aExp = extractFloatx80Exp( a );
6327     aSign = extractFloatx80Sign( a );
6328     bSig = extractFloatx80Frac( b );
6329     bExp = extractFloatx80Exp( b );
6330     bSign = extractFloatx80Sign( b );
6331     zSign = aSign ^ bSign;
6332     if ( aExp == 0x7FFF ) {
6333         if ((uint64_t)(aSig << 1)) {
6334             return propagateFloatx80NaN(a, b, status);
6335         }
6336         if ( bExp == 0x7FFF ) {
6337             if ((uint64_t)(bSig << 1)) {
6338                 return propagateFloatx80NaN(a, b, status);
6339             }
6340             goto invalid;
6341         }
6342         return packFloatx80(zSign, floatx80_infinity_high,
6343                                    floatx80_infinity_low);
6344     }
6345     if ( bExp == 0x7FFF ) {
6346         if ((uint64_t)(bSig << 1)) {
6347             return propagateFloatx80NaN(a, b, status);
6348         }
6349         return packFloatx80( zSign, 0, 0 );
6350     }
6351     if ( bExp == 0 ) {
6352         if ( bSig == 0 ) {
6353             if ( ( aExp | aSig ) == 0 ) {
6354  invalid:
6355                 float_raise(float_flag_invalid, status);
6356                 return floatx80_default_nan(status);
6357             }
6358             float_raise(float_flag_divbyzero, status);
6359             return packFloatx80(zSign, floatx80_infinity_high,
6360                                        floatx80_infinity_low);
6361         }
6362         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6363     }
6364     if ( aExp == 0 ) {
6365         if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6366         normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6367     }
6368     zExp = aExp - bExp + 0x3FFE;
6369     rem1 = 0;
6370     if ( bSig <= aSig ) {
6371         shift128Right( aSig, 0, 1, &aSig, &rem1 );
6372         ++zExp;
6373     }
6374     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6375     mul64To128( bSig, zSig0, &term0, &term1 );
6376     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6377     while ( (int64_t) rem0 < 0 ) {
6378         --zSig0;
6379         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6380     }
6381     zSig1 = estimateDiv128To64( rem1, 0, bSig );
6382     if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6383         mul64To128( bSig, zSig1, &term1, &term2 );
6384         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6385         while ( (int64_t) rem1 < 0 ) {
6386             --zSig1;
6387             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6388         }
6389         zSig1 |= ( ( rem1 | rem2 ) != 0 );
6390     }
6391     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6392                                 zSign, zExp, zSig0, zSig1, status);
6393 }
6394
6395 /*----------------------------------------------------------------------------
6396 | Returns the remainder of the extended double-precision floating-point value
6397 | `a' with respect to the corresponding value `b'.  The operation is performed
6398 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6399 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6400 | the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6401 | the absolute value of the integer quotient.
6402 *----------------------------------------------------------------------------*/
6403
6404 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6405                          float_status *status)
6406 {
6407     bool aSign, zSign;
6408     int32_t aExp, bExp, expDiff, aExpOrig;
6409     uint64_t aSig0, aSig1, bSig;
6410     uint64_t q, term0, term1, alternateASig0, alternateASig1;
6411
6412     *quotient = 0;
6413     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6414         float_raise(float_flag_invalid, status);
6415         return floatx80_default_nan(status);
6416     }
6417     aSig0 = extractFloatx80Frac( a );
6418     aExpOrig = aExp = extractFloatx80Exp( a );
6419     aSign = extractFloatx80Sign( a );
6420     bSig = extractFloatx80Frac( b );
6421     bExp = extractFloatx80Exp( b );
6422     if ( aExp == 0x7FFF ) {
6423         if (    (uint64_t) ( aSig0<<1 )
6424              || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6425             return propagateFloatx80NaN(a, b, status);
6426         }
6427         goto invalid;
6428     }
6429     if ( bExp == 0x7FFF ) {
6430         if ((uint64_t)(bSig << 1)) {
6431             return propagateFloatx80NaN(a, b, status);
6432         }
6433         if (aExp == 0 && aSig0 >> 63) {
6434             /*
6435              * Pseudo-denormal argument must be returned in normalized
6436              * form.
6437              */
6438             return packFloatx80(aSign, 1, aSig0);
6439         }
6440         return a;
6441     }
6442     if ( bExp == 0 ) {
6443         if ( bSig == 0 ) {
6444  invalid:
6445             float_raise(float_flag_invalid, status);
6446             return floatx80_default_nan(status);
6447         }
6448         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6449     }
6450     if ( aExp == 0 ) {
6451         if ( aSig0 == 0 ) return a;
6452         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6453     }
6454     zSign = aSign;
6455     expDiff = aExp - bExp;
6456     aSig1 = 0;
6457     if ( expDiff < 0 ) {
6458         if ( mod || expDiff < -1 ) {
6459             if (aExp == 1 && aExpOrig == 0) {
6460                 /*
6461                  * Pseudo-denormal argument must be returned in
6462                  * normalized form.
6463                  */
6464                 return packFloatx80(aSign, aExp, aSig0);
6465             }
6466             return a;
6467         }
6468         shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6469         expDiff = 0;
6470     }
6471     *quotient = q = ( bSig <= aSig0 );
6472     if ( q ) aSig0 -= bSig;
6473     expDiff -= 64;
6474     while ( 0 < expDiff ) {
6475         q = estimateDiv128To64( aSig0, aSig1, bSig );
6476         q = ( 2 < q ) ? q - 2 : 0;
6477         mul64To128( bSig, q, &term0, &term1 );
6478         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6479         shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6480         expDiff -= 62;
6481         *quotient <<= 62;
6482         *quotient += q;
6483     }
6484     expDiff += 64;
6485     if ( 0 < expDiff ) {
6486         q = estimateDiv128To64( aSig0, aSig1, bSig );
6487         q = ( 2 < q ) ? q - 2 : 0;
6488         q >>= 64 - expDiff;
6489         mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6490         sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6491         shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6492         while ( le128( term0, term1, aSig0, aSig1 ) ) {
6493             ++q;
6494             sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6495         }
6496         if (expDiff < 64) {
6497             *quotient <<= expDiff;
6498         } else {
6499             *quotient = 0;
6500         }
6501         *quotient += q;
6502     }
6503     else {
6504         term1 = 0;
6505         term0 = bSig;
6506     }
6507     if (!mod) {
6508         sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6509         if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6510                 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6511                         && ( q & 1 ) )
6512             ) {
6513             aSig0 = alternateASig0;
6514             aSig1 = alternateASig1;
6515             zSign = ! zSign;
6516             ++*quotient;
6517         }
6518     }
6519     return
6520         normalizeRoundAndPackFloatx80(
6521             80, zSign, bExp + expDiff, aSig0, aSig1, status);
6522
6523 }
6524
6525 /*----------------------------------------------------------------------------
6526 | Returns the remainder of the extended double-precision floating-point value
6527 | `a' with respect to the corresponding value `b'.  The operation is performed
6528 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6529 *----------------------------------------------------------------------------*/
6530
6531 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6532 {
6533     uint64_t quotient;
6534     return floatx80_modrem(a, b, false, &quotient, status);
6535 }
6536
6537 /*----------------------------------------------------------------------------
6538 | Returns the remainder of the extended double-precision floating-point value
6539 | `a' with respect to the corresponding value `b', with the quotient truncated
6540 | toward zero.
6541 *----------------------------------------------------------------------------*/
6542
6543 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6544 {
6545     uint64_t quotient;
6546     return floatx80_modrem(a, b, true, &quotient, status);
6547 }
6548
6549 /*----------------------------------------------------------------------------
6550 | Returns the square root of the extended double-precision floating-point
6551 | value `a'.  The operation is performed according to the IEC/IEEE Standard
6552 | for Binary Floating-Point Arithmetic.
6553 *----------------------------------------------------------------------------*/
6554
6555 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6556 {
6557     bool aSign;
6558     int32_t aExp, zExp;
6559     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6560     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6561
6562     if (floatx80_invalid_encoding(a)) {
6563         float_raise(float_flag_invalid, status);
6564         return floatx80_default_nan(status);
6565     }
6566     aSig0 = extractFloatx80Frac( a );
6567     aExp = extractFloatx80Exp( a );
6568     aSign = extractFloatx80Sign( a );
6569     if ( aExp == 0x7FFF ) {
6570         if ((uint64_t)(aSig0 << 1)) {
6571             return propagateFloatx80NaN(a, a, status);
6572         }
6573         if ( ! aSign ) return a;
6574         goto invalid;
6575     }
6576     if ( aSign ) {
6577         if ( ( aExp | aSig0 ) == 0 ) return a;
6578  invalid:
6579         float_raise(float_flag_invalid, status);
6580         return floatx80_default_nan(status);
6581     }
6582     if ( aExp == 0 ) {
6583         if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6584         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6585     }
6586     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6587     zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6588     shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6589     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6590     doubleZSig0 = zSig0<<1;
6591     mul64To128( zSig0, zSig0, &term0, &term1 );
6592     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6593     while ( (int64_t) rem0 < 0 ) {
6594         --zSig0;
6595         doubleZSig0 -= 2;
6596         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6597     }
6598     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6599     if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6600         if ( zSig1 == 0 ) zSig1 = 1;
6601         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6602         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6603         mul64To128( zSig1, zSig1, &term2, &term3 );
6604         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6605         while ( (int64_t) rem1 < 0 ) {
6606             --zSig1;
6607             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6608             term3 |= 1;
6609             term2 |= doubleZSig0;
6610             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6611         }
6612         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6613     }
6614     shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6615     zSig0 |= doubleZSig0;
6616     return roundAndPackFloatx80(status->floatx80_rounding_precision,
6617                                 0, zExp, zSig0, zSig1, status);
6618 }
6619
6620 /*----------------------------------------------------------------------------
6621 | Returns the result of converting the quadruple-precision floating-point
6622 | value `a' to the 32-bit two's complement integer format.  The conversion
6623 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6624 | Arithmetic---which means in particular that the conversion is rounded
6625 | according to the current rounding mode.  If `a' is a NaN, the largest
6626 | positive integer is returned.  Otherwise, if the conversion overflows, the
6627 | largest integer with the same sign as `a' is returned.
6628 *----------------------------------------------------------------------------*/
6629
6630 int32_t float128_to_int32(float128 a, float_status *status)
6631 {
6632     bool aSign;
6633     int32_t aExp, shiftCount;
6634     uint64_t aSig0, aSig1;
6635
6636     aSig1 = extractFloat128Frac1( a );
6637     aSig0 = extractFloat128Frac0( a );
6638     aExp = extractFloat128Exp( a );
6639     aSign = extractFloat128Sign( a );
6640     if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6641     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6642     aSig0 |= ( aSig1 != 0 );
6643     shiftCount = 0x4028 - aExp;
6644     if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6645     return roundAndPackInt32(aSign, aSig0, status);
6646
6647 }
6648
6649 /*----------------------------------------------------------------------------
6650 | Returns the result of converting the quadruple-precision floating-point
6651 | value `a' to the 32-bit two's complement integer format.  The conversion
6652 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6653 | Arithmetic, except that the conversion is always rounded toward zero.  If
6654 | `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6655 | conversion overflows, the largest integer with the same sign as `a' is
6656 | returned.
6657 *----------------------------------------------------------------------------*/
6658
6659 int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6660 {
6661     bool aSign;
6662     int32_t aExp, shiftCount;
6663     uint64_t aSig0, aSig1, savedASig;
6664     int32_t z;
6665
6666     aSig1 = extractFloat128Frac1( a );
6667     aSig0 = extractFloat128Frac0( a );
6668     aExp = extractFloat128Exp( a );
6669     aSign = extractFloat128Sign( a );
6670     aSig0 |= ( aSig1 != 0 );
6671     if ( 0x401E < aExp ) {
6672         if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6673         goto invalid;
6674     }
6675     else if ( aExp < 0x3FFF ) {
6676         if (aExp || aSig0) {
6677             float_raise(float_flag_inexact, status);
6678         }
6679         return 0;
6680     }
6681     aSig0 |= UINT64_C(0x0001000000000000);
6682     shiftCount = 0x402F - aExp;
6683     savedASig = aSig0;
6684     aSig0 >>= shiftCount;
6685     z = aSig0;
6686     if ( aSign ) z = - z;
6687     if ( ( z < 0 ) ^ aSign ) {
6688  invalid:
6689         float_raise(float_flag_invalid, status);
6690         return aSign ? INT32_MIN : INT32_MAX;
6691     }
6692     if ( ( aSig0<<shiftCount ) != savedASig ) {
6693         float_raise(float_flag_inexact, status);
6694     }
6695     return z;
6696
6697 }
6698
6699 /*----------------------------------------------------------------------------
6700 | Returns the result of converting the quadruple-precision floating-point
6701 | value `a' to the 64-bit two's complement integer format.  The conversion
6702 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6703 | Arithmetic---which means in particular that the conversion is rounded
6704 | according to the current rounding mode.  If `a' is a NaN, the largest
6705 | positive integer is returned.  Otherwise, if the conversion overflows, the
6706 | largest integer with the same sign as `a' is returned.
6707 *----------------------------------------------------------------------------*/
6708
6709 int64_t float128_to_int64(float128 a, float_status *status)
6710 {
6711     bool aSign;
6712     int32_t aExp, shiftCount;
6713     uint64_t aSig0, aSig1;
6714
6715     aSig1 = extractFloat128Frac1( a );
6716     aSig0 = extractFloat128Frac0( a );
6717     aExp = extractFloat128Exp( a );
6718     aSign = extractFloat128Sign( a );
6719     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6720     shiftCount = 0x402F - aExp;
6721     if ( shiftCount <= 0 ) {
6722         if ( 0x403E < aExp ) {
6723             float_raise(float_flag_invalid, status);
6724             if (    ! aSign
6725                  || (    ( aExp == 0x7FFF )
6726                       && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6727                     )
6728                ) {
6729                 return INT64_MAX;
6730             }
6731             return INT64_MIN;
6732         }
6733         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6734     }
6735     else {
6736         shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6737     }
6738     return roundAndPackInt64(aSign, aSig0, aSig1, status);
6739
6740 }
6741
6742 /*----------------------------------------------------------------------------
6743 | Returns the result of converting the quadruple-precision floating-point
6744 | value `a' to the 64-bit two's complement integer format.  The conversion
6745 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6746 | Arithmetic, except that the conversion is always rounded toward zero.
6747 | If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6748 | the conversion overflows, the largest integer with the same sign as `a' is
6749 | returned.
6750 *----------------------------------------------------------------------------*/
6751
6752 int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6753 {
6754     bool aSign;
6755     int32_t aExp, shiftCount;
6756     uint64_t aSig0, aSig1;
6757     int64_t z;
6758
6759     aSig1 = extractFloat128Frac1( a );
6760     aSig0 = extractFloat128Frac0( a );
6761     aExp = extractFloat128Exp( a );
6762     aSign = extractFloat128Sign( a );
6763     if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6764     shiftCount = aExp - 0x402F;
6765     if ( 0 < shiftCount ) {
6766         if ( 0x403E <= aExp ) {
6767             aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6768             if (    ( a.high == UINT64_C(0xC03E000000000000) )
6769                  && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6770                 if (aSig1) {
6771                     float_raise(float_flag_inexact, status);
6772                 }
6773             }
6774             else {
6775                 float_raise(float_flag_invalid, status);
6776                 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6777                     return INT64_MAX;
6778                 }
6779             }
6780             return INT64_MIN;
6781         }
6782         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6783         if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6784             float_raise(float_flag_inexact, status);
6785         }
6786     }
6787     else {
6788         if ( aExp < 0x3FFF ) {
6789             if ( aExp | aSig0 | aSig1 ) {
6790                 float_raise(float_flag_inexact, status);
6791             }
6792             return 0;
6793         }
6794         z = aSig0>>( - shiftCount );
6795         if (    aSig1
6796              || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6797             float_raise(float_flag_inexact, status);
6798         }
6799     }
6800     if ( aSign ) z = - z;
6801     return z;
6802
6803 }
6804
6805 /*----------------------------------------------------------------------------
6806 | Returns the result of converting the quadruple-precision floating-point value
6807 | `a' to the 64-bit unsigned integer format.  The conversion is
6808 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6809 | Arithmetic---which means in particular that the conversion is rounded
6810 | according to the current rounding mode.  If `a' is a NaN, the largest
6811 | positive integer is returned.  If the conversion overflows, the
6812 | largest unsigned integer is returned.  If 'a' is negative, the value is
6813 | rounded and zero is returned; negative values that do not round to zero
6814 | will raise the inexact exception.
6815 *----------------------------------------------------------------------------*/
6816
6817 uint64_t float128_to_uint64(float128 a, float_status *status)
6818 {
6819     bool aSign;
6820     int aExp;
6821     int shiftCount;
6822     uint64_t aSig0, aSig1;
6823
6824     aSig0 = extractFloat128Frac0(a);
6825     aSig1 = extractFloat128Frac1(a);
6826     aExp = extractFloat128Exp(a);
6827     aSign = extractFloat128Sign(a);
6828     if (aSign && (aExp > 0x3FFE)) {
6829         float_raise(float_flag_invalid, status);
6830         if (float128_is_any_nan(a)) {
6831             return UINT64_MAX;
6832         } else {
6833             return 0;
6834         }
6835     }
6836     if (aExp) {
6837         aSig0 |= UINT64_C(0x0001000000000000);
6838     }
6839     shiftCount = 0x402F - aExp;
6840     if (shiftCount <= 0) {
6841         if (0x403E < aExp) {
6842             float_raise(float_flag_invalid, status);
6843             return UINT64_MAX;
6844         }
6845         shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6846     } else {
6847         shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6848     }
6849     return roundAndPackUint64(aSign, aSig0, aSig1, status);
6850 }
6851
6852 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6853 {
6854     uint64_t v;
6855     signed char current_rounding_mode = status->float_rounding_mode;
6856
6857     set_float_rounding_mode(float_round_to_zero, status);
6858     v = float128_to_uint64(a, status);
6859     set_float_rounding_mode(current_rounding_mode, status);
6860
6861     return v;
6862 }
6863
6864 /*----------------------------------------------------------------------------
6865 | Returns the result of converting the quadruple-precision floating-point
6866 | value `a' to the 32-bit unsigned integer format.  The conversion
6867 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
6868 | Arithmetic except that the conversion is always rounded toward zero.
6869 | If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6870 | if the conversion overflows, the largest unsigned integer is returned.
6871 | If 'a' is negative, the value is rounded and zero is returned; negative
6872 | values that do not round to zero will raise the inexact exception.
6873 *----------------------------------------------------------------------------*/
6874
6875 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6876 {
6877     uint64_t v;
6878     uint32_t res;
6879     int old_exc_flags = get_float_exception_flags(status);
6880
6881     v = float128_to_uint64_round_to_zero(a, status);
6882     if (v > 0xffffffff) {
6883         res = 0xffffffff;
6884     } else {
6885         return v;
6886     }
6887     set_float_exception_flags(old_exc_flags, status);
6888     float_raise(float_flag_invalid, status);
6889     return res;
6890 }
6891
6892 /*----------------------------------------------------------------------------
6893 | Returns the result of converting the quadruple-precision floating-point value
6894 | `a' to the 32-bit unsigned integer format.  The conversion is
6895 | performed according to the IEC/IEEE Standard for Binary Floating-Point
6896 | Arithmetic---which means in particular that the conversion is rounded
6897 | according to the current rounding mode.  If `a' is a NaN, the largest
6898 | positive integer is returned.  If the conversion overflows, the
6899 | largest unsigned integer is returned.  If 'a' is negative, the value is
6900 | rounded and zero is returned; negative values that do not round to zero
6901 | will raise the inexact exception.
6902 *----------------------------------------------------------------------------*/
6903
6904 uint32_t float128_to_uint32(float128 a, float_status *status)
6905 {
6906     uint64_t v;
6907     uint32_t res;
6908     int old_exc_flags = get_float_exception_flags(status);
6909
6910     v = float128_to_uint64(a, status);
6911     if (v > 0xffffffff) {
6912         res = 0xffffffff;
6913     } else {
6914         return v;
6915     }
6916     set_float_exception_flags(old_exc_flags, status);
6917     float_raise(float_flag_invalid, status);
6918     return res;
6919 }
6920
6921 /*----------------------------------------------------------------------------
6922 | Returns the result of converting the quadruple-precision floating-point
6923 | value `a' to the extended double-precision floating-point format.  The
6924 | conversion is performed according to the IEC/IEEE Standard for Binary
6925 | Floating-Point Arithmetic.
6926 *----------------------------------------------------------------------------*/
6927
6928 floatx80 float128_to_floatx80(float128 a, float_status *status)
6929 {
6930     bool aSign;
6931     int32_t aExp;
6932     uint64_t aSig0, aSig1;
6933
6934     aSig1 = extractFloat128Frac1( a );
6935     aSig0 = extractFloat128Frac0( a );
6936     aExp = extractFloat128Exp( a );
6937     aSign = extractFloat128Sign( a );
6938     if ( aExp == 0x7FFF ) {
6939         if ( aSig0 | aSig1 ) {
6940             floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6941                                                status);
6942             return floatx80_silence_nan(res, status);
6943         }
6944         return packFloatx80(aSign, floatx80_infinity_high,
6945                                    floatx80_infinity_low);
6946     }
6947     if ( aExp == 0 ) {
6948         if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6949         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6950     }
6951     else {
6952         aSig0 |= UINT64_C(0x0001000000000000);
6953     }
6954     shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6955     return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6956
6957 }
6958
6959 /*----------------------------------------------------------------------------
6960 | Rounds the quadruple-precision floating-point value `a' to an integer, and
6961 | returns the result as a quadruple-precision floating-point value.  The
6962 | operation is performed according to the IEC/IEEE Standard for Binary
6963 | Floating-Point Arithmetic.
6964 *----------------------------------------------------------------------------*/
6965
6966 float128 float128_round_to_int(float128 a, float_status *status)
6967 {
6968     bool aSign;
6969     int32_t aExp;
6970     uint64_t lastBitMask, roundBitsMask;
6971     float128 z;
6972
6973     aExp = extractFloat128Exp( a );
6974     if ( 0x402F <= aExp ) {
6975         if ( 0x406F <= aExp ) {
6976             if (    ( aExp == 0x7FFF )
6977                  && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6978                ) {
6979                 return propagateFloat128NaN(a, a, status);
6980             }
6981             return a;
6982         }
6983         lastBitMask = 1;
6984         lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6985         roundBitsMask = lastBitMask - 1;
6986         z = a;
6987         switch (status->float_rounding_mode) {
6988         case float_round_nearest_even:
6989             if ( lastBitMask ) {
6990                 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6991                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6992             }
6993             else {
6994                 if ( (int64_t) z.low < 0 ) {
6995                     ++z.high;
6996                     if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6997                 }
6998             }
6999             break;
7000         case float_round_ties_away:
7001             if (lastBitMask) {
7002                 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7003             } else {
7004                 if ((int64_t) z.low < 0) {
7005                     ++z.high;
7006                 }
7007             }
7008             break;
7009         case float_round_to_zero:
7010             break;
7011         case float_round_up:
7012             if (!extractFloat128Sign(z)) {
7013                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7014             }
7015             break;
7016         case float_round_down:
7017             if (extractFloat128Sign(z)) {
7018                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7019             }
7020             break;
7021         case float_round_to_odd:
7022             /*
7023              * Note that if lastBitMask == 0, the last bit is the lsb
7024              * of high, and roundBitsMask == -1.
7025              */
7026             if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7027                 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7028             }
7029             break;
7030         default:
7031             abort();
7032         }
7033         z.low &= ~ roundBitsMask;
7034     }
7035     else {
7036         if ( aExp < 0x3FFF ) {
7037             if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
7038             float_raise(float_flag_inexact, status);
7039             aSign = extractFloat128Sign( a );
7040             switch (status->float_rounding_mode) {
7041             case float_round_nearest_even:
7042                 if (    ( aExp == 0x3FFE )
7043                      && (   extractFloat128Frac0( a )
7044                           | extractFloat128Frac1( a ) )
7045                    ) {
7046                     return packFloat128( aSign, 0x3FFF, 0, 0 );
7047                 }
7048                 break;
7049             case float_round_ties_away:
7050                 if (aExp == 0x3FFE) {
7051                     return packFloat128(aSign, 0x3FFF, 0, 0);
7052                 }
7053                 break;
7054             case float_round_down:
7055                 return
7056                       aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7057                     : packFloat128( 0, 0, 0, 0 );
7058             case float_round_up:
7059                 return
7060                       aSign ? packFloat128( 1, 0, 0, 0 )
7061                     : packFloat128( 0, 0x3FFF, 0, 0 );
7062
7063             case float_round_to_odd:
7064                 return packFloat128(aSign, 0x3FFF, 0, 0);
7065
7066             case float_round_to_zero:
7067                 break;
7068             }
7069             return packFloat128( aSign, 0, 0, 0 );
7070         }
7071         lastBitMask = 1;
7072         lastBitMask <<= 0x402F - aExp;
7073         roundBitsMask = lastBitMask - 1;
7074         z.low = 0;
7075         z.high = a.high;
7076         switch (status->float_rounding_mode) {
7077         case float_round_nearest_even:
7078             z.high += lastBitMask>>1;
7079             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7080                 z.high &= ~ lastBitMask;
7081             }
7082             break;
7083         case float_round_ties_away:
7084             z.high += lastBitMask>>1;
7085             break;
7086         case float_round_to_zero:
7087             break;
7088         case float_round_up:
7089             if (!extractFloat128Sign(z)) {
7090                 z.high |= ( a.low != 0 );
7091                 z.high += roundBitsMask;
7092             }
7093             break;
7094         case float_round_down:
7095             if (extractFloat128Sign(z)) {
7096                 z.high |= (a.low != 0);
7097                 z.high += roundBitsMask;
7098             }
7099             break;
7100         case float_round_to_odd:
7101             if ((z.high & lastBitMask) == 0) {
7102                 z.high |= (a.low != 0);
7103                 z.high += roundBitsMask;
7104             }
7105             break;
7106         default:
7107             abort();
7108         }
7109         z.high &= ~ roundBitsMask;
7110     }
7111     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
7112         float_raise(float_flag_inexact, status);
7113     }
7114     return z;
7115
7116 }
7117
7118 /*----------------------------------------------------------------------------
7119 | Returns the remainder of the quadruple-precision floating-point value `a'
7120 | with respect to the corresponding value `b'.  The operation is performed
7121 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7122 *----------------------------------------------------------------------------*/
7123
7124 float128 float128_rem(float128 a, float128 b, float_status *status)
7125 {
7126     bool aSign, zSign;
7127     int32_t aExp, bExp, expDiff;
7128     uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7129     uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7130     int64_t sigMean0;
7131
7132     aSig1 = extractFloat128Frac1( a );
7133     aSig0 = extractFloat128Frac0( a );
7134     aExp = extractFloat128Exp( a );
7135     aSign = extractFloat128Sign( a );
7136     bSig1 = extractFloat128Frac1( b );
7137     bSig0 = extractFloat128Frac0( b );
7138     bExp = extractFloat128Exp( b );
7139     if ( aExp == 0x7FFF ) {
7140         if (    ( aSig0 | aSig1 )
7141              || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7142             return propagateFloat128NaN(a, b, status);
7143         }
7144         goto invalid;
7145     }
7146     if ( bExp == 0x7FFF ) {
7147         if (bSig0 | bSig1) {
7148             return propagateFloat128NaN(a, b, status);
7149         }
7150         return a;
7151     }
7152     if ( bExp == 0 ) {
7153         if ( ( bSig0 | bSig1 ) == 0 ) {
7154  invalid:
7155             float_raise(float_flag_invalid, status);
7156             return float128_default_nan(status);
7157         }
7158         normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7159     }
7160     if ( aExp == 0 ) {
7161         if ( ( aSig0 | aSig1 ) == 0 ) return a;
7162         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7163     }
7164     expDiff = aExp - bExp;
7165     if ( expDiff < -1 ) return a;
7166     shortShift128Left(
7167         aSig0 | UINT64_C(0x0001000000000000),
7168         aSig1,
7169         15 - ( expDiff < 0 ),
7170         &aSig0,
7171         &aSig1
7172     );
7173     shortShift128Left(
7174         bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7175     q = le128( bSig0, bSig1, aSig0, aSig1 );
7176     if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7177     expDiff -= 64;
7178     while ( 0 < expDiff ) {
7179         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7180         q = ( 4 < q ) ? q - 4 : 0;
7181         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7182         shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7183         shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7184         sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7185         expDiff -= 61;
7186     }
7187     if ( -64 < expDiff ) {
7188         q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7189         q = ( 4 < q ) ? q - 4 : 0;
7190         q >>= - expDiff;
7191         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7192         expDiff += 52;
7193         if ( expDiff < 0 ) {
7194             shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7195         }
7196         else {
7197             shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7198         }
7199         mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7200         sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7201     }
7202     else {
7203         shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7204         shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7205     }
7206     do {
7207         alternateASig0 = aSig0;
7208         alternateASig1 = aSig1;
7209         ++q;
7210         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7211     } while ( 0 <= (int64_t) aSig0 );
7212     add128(
7213         aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7214     if (    ( sigMean0 < 0 )
7215          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7216         aSig0 = alternateASig0;
7217         aSig1 = alternateASig1;
7218     }
7219     zSign = ( (int64_t) aSig0 < 0 );
7220     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7221     return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7222                                          status);
7223 }
7224
7225 /*----------------------------------------------------------------------------
7226 | Returns the square root of the quadruple-precision floating-point value `a'.
7227 | The operation is performed according to the IEC/IEEE Standard for Binary
7228 | Floating-Point Arithmetic.
7229 *----------------------------------------------------------------------------*/
7230
7231 float128 float128_sqrt(float128 a, float_status *status)
7232 {
7233     bool aSign;
7234     int32_t aExp, zExp;
7235     uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7236     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7237
7238     aSig1 = extractFloat128Frac1( a );
7239     aSig0 = extractFloat128Frac0( a );
7240     aExp = extractFloat128Exp( a );
7241     aSign = extractFloat128Sign( a );
7242     if ( aExp == 0x7FFF ) {
7243         if (aSig0 | aSig1) {
7244             return propagateFloat128NaN(a, a, status);
7245         }
7246         if ( ! aSign ) return a;
7247         goto invalid;
7248     }
7249     if ( aSign ) {
7250         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7251  invalid:
7252         float_raise(float_flag_invalid, status);
7253         return float128_default_nan(status);
7254     }
7255     if ( aExp == 0 ) {
7256         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7257         normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7258     }
7259     zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7260     aSig0 |= UINT64_C(0x0001000000000000);
7261     zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7262     shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7263     zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7264     doubleZSig0 = zSig0<<1;
7265     mul64To128( zSig0, zSig0, &term0, &term1 );
7266     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7267     while ( (int64_t) rem0 < 0 ) {
7268         --zSig0;
7269         doubleZSig0 -= 2;
7270         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7271     }
7272     zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7273     if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7274         if ( zSig1 == 0 ) zSig1 = 1;
7275         mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7276         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7277         mul64To128( zSig1, zSig1, &term2, &term3 );
7278         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7279         while ( (int64_t) rem1 < 0 ) {
7280             --zSig1;
7281             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7282             term3 |= 1;
7283             term2 |= doubleZSig0;
7284             add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7285         }
7286         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7287     }
7288     shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7289     return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7290
7291 }
7292
7293 static inline FloatRelation
7294 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7295                           float_status *status)
7296 {
7297     bool aSign, bSign;
7298
7299     if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7300         float_raise(float_flag_invalid, status);
7301         return float_relation_unordered;
7302     }
7303     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7304           ( extractFloatx80Frac( a )<<1 ) ) ||
7305         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7306           ( extractFloatx80Frac( b )<<1 ) )) {
7307         if (!is_quiet ||
7308             floatx80_is_signaling_nan(a, status) ||
7309             floatx80_is_signaling_nan(b, status)) {
7310             float_raise(float_flag_invalid, status);
7311         }
7312         return float_relation_unordered;
7313     }
7314     aSign = extractFloatx80Sign( a );
7315     bSign = extractFloatx80Sign( b );
7316     if ( aSign != bSign ) {
7317
7318         if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7319              ( ( a.low | b.low ) == 0 ) ) {
7320             /* zero case */
7321             return float_relation_equal;
7322         } else {
7323             return 1 - (2 * aSign);
7324         }
7325     } else {
7326         /* Normalize pseudo-denormals before comparison.  */
7327         if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7328             ++a.high;
7329         }
7330         if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7331             ++b.high;
7332         }
7333         if (a.low == b.low && a.high == b.high) {
7334             return float_relation_equal;
7335         } else {
7336             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7337         }
7338     }
7339 }
7340
7341 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7342 {
7343     return floatx80_compare_internal(a, b, 0, status);
7344 }
7345
7346 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7347                                      float_status *status)
7348 {
7349     return floatx80_compare_internal(a, b, 1, status);
7350 }
7351
7352 static inline FloatRelation
7353 float128_compare_internal(float128 a, float128 b, bool is_quiet,
7354                           float_status *status)
7355 {
7356     bool aSign, bSign;
7357
7358     if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7359           ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7360         ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7361           ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7362         if (!is_quiet ||
7363             float128_is_signaling_nan(a, status) ||
7364             float128_is_signaling_nan(b, status)) {
7365             float_raise(float_flag_invalid, status);
7366         }
7367         return float_relation_unordered;
7368     }
7369     aSign = extractFloat128Sign( a );
7370     bSign = extractFloat128Sign( b );
7371     if ( aSign != bSign ) {
7372         if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7373             /* zero case */
7374             return float_relation_equal;
7375         } else {
7376             return 1 - (2 * aSign);
7377         }
7378     } else {
7379         if (a.low == b.low && a.high == b.high) {
7380             return float_relation_equal;
7381         } else {
7382             return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7383         }
7384     }
7385 }
7386
7387 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7388 {
7389     return float128_compare_internal(a, b, 0, status);
7390 }
7391
7392 FloatRelation float128_compare_quiet(float128 a, float128 b,
7393                                      float_status *status)
7394 {
7395     return float128_compare_internal(a, b, 1, status);
7396 }
7397
7398 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7399 {
7400     bool aSign;
7401     int32_t aExp;
7402     uint64_t aSig;
7403
7404     if (floatx80_invalid_encoding(a)) {
7405         float_raise(float_flag_invalid, status);
7406         return floatx80_default_nan(status);
7407     }
7408     aSig = extractFloatx80Frac( a );
7409     aExp = extractFloatx80Exp( a );
7410     aSign = extractFloatx80Sign( a );
7411
7412     if ( aExp == 0x7FFF ) {
7413         if ( aSig<<1 ) {
7414             return propagateFloatx80NaN(a, a, status);
7415         }
7416         return a;
7417     }
7418
7419     if (aExp == 0) {
7420         if (aSig == 0) {
7421             return a;
7422         }
7423         aExp++;
7424     }
7425
7426     if (n > 0x10000) {
7427         n = 0x10000;
7428     } else if (n < -0x10000) {
7429         n = -0x10000;
7430     }
7431
7432     aExp += n;
7433     return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7434                                          aSign, aExp, aSig, 0, status);
7435 }
7436
7437 float128 float128_scalbn(float128 a, int n, float_status *status)
7438 {
7439     bool aSign;
7440     int32_t aExp;
7441     uint64_t aSig0, aSig1;
7442
7443     aSig1 = extractFloat128Frac1( a );
7444     aSig0 = extractFloat128Frac0( a );
7445     aExp = extractFloat128Exp( a );
7446     aSign = extractFloat128Sign( a );
7447     if ( aExp == 0x7FFF ) {
7448         if ( aSig0 | aSig1 ) {
7449             return propagateFloat128NaN(a, a, status);
7450         }
7451         return a;
7452     }
7453     if (aExp != 0) {
7454         aSig0 |= UINT64_C(0x0001000000000000);
7455     } else if (aSig0 == 0 && aSig1 == 0) {
7456         return a;
7457     } else {
7458         aExp++;
7459     }
7460
7461     if (n > 0x10000) {
7462         n = 0x10000;
7463     } else if (n < -0x10000) {
7464         n = -0x10000;
7465     }
7466
7467     aExp += n - 1;
7468     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7469                                          , status);
7470
7471 }
7472
7473 static void __attribute__((constructor)) softfloat_init(void)
7474 {
7475     union_float64 ua, ub, uc, ur;
7476
7477     if (QEMU_NO_HARDFLOAT) {
7478         return;
7479     }
7480     /*
7481      * Test that the host's FMA is not obviously broken. For example,
7482      * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7483      *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7484      */
7485     ua.s = 0x0020000000000001ULL;
7486     ub.s = 0x3ca0000000000000ULL;
7487     uc.s = 0x0020000000000000ULL;
7488     ur.h = fma(ua.h, ub.h, uc.h);
7489     if (ur.s != 0x0020000000000001ULL) {
7490         force_soft_fma = true;
7491     }
7492 }