target/arm/vfp_helper.c

   1 /*
   2  * ARM VFP floating-point operations
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "qemu/log.h"
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "fpu/softfloat.h"
  25 #include "internals.h"
  26
  27
  28 /* VFP support.  We follow the convention used for VFP instructions:
  29    Single precision routines have a "s" suffix, double precision a
  30    "d" suffix.  */
  31
  32 /* Convert host exception flags to vfp form.  */
  33 static inline int vfp_exceptbits_from_host(int host_bits)
  34 {
  35     int target_bits = 0;
  36
  37     if (host_bits & float_flag_invalid) {
  38         target_bits |= 1;
  39     }
  40     if (host_bits & float_flag_divbyzero) {
  41         target_bits |= 2;
  42     }
  43     if (host_bits & float_flag_overflow) {
  44         target_bits |= 4;
  45     }
  46     if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
  47         target_bits |= 8;
  48     }
  49     if (host_bits & float_flag_inexact) {
  50         target_bits |= 0x10;
  51     }
  52     if (host_bits & float_flag_input_denormal) {
  53         target_bits |= 0x80;
  54     }
  55     return target_bits;
  56 }
  57
  58 /* Convert vfp exception flags to target form.  */
  59 static inline int vfp_exceptbits_to_host(int target_bits)
  60 {
  61     int host_bits = 0;
  62
  63     if (target_bits & 1) {
  64         host_bits |= float_flag_invalid;
  65     }
  66     if (target_bits & 2) {
  67         host_bits |= float_flag_divbyzero;
  68     }
  69     if (target_bits & 4) {
  70         host_bits |= float_flag_overflow;
  71     }
  72     if (target_bits & 8) {
  73         host_bits |= float_flag_underflow;
  74     }
  75     if (target_bits & 0x10) {
  76         host_bits |= float_flag_inexact;
  77     }
  78     if (target_bits & 0x80) {
  79         host_bits |= float_flag_input_denormal;
  80     }
  81     return host_bits;
  82 }
  83
  84 static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
  85 {
  86     uint32_t i;
  87
  88     i = get_float_exception_flags(&env->vfp.fp_status);
  89     i |= get_float_exception_flags(&env->vfp.standard_fp_status);
  90     /* FZ16 does not generate an input denormal exception.  */
  91     i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
  92           & ~float_flag_input_denormal);
  93     return vfp_exceptbits_from_host(i);
  94 }
  95
  96 static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
  97 {
  98     int i;
  99     uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
 100
 101     changed ^= val;
 102     if (changed & (3 << 22)) {
 103         i = (val >> 22) & 3;
 104         switch (i) {
 105         case FPROUNDING_TIEEVEN:
 106             i = float_round_nearest_even;
 107             break;
 108         case FPROUNDING_POSINF:
 109             i = float_round_up;
 110             break;
 111         case FPROUNDING_NEGINF:
 112             i = float_round_down;
 113             break;
 114         case FPROUNDING_ZERO:
 115             i = float_round_to_zero;
 116             break;
 117         }
 118         set_float_rounding_mode(i, &env->vfp.fp_status);
 119         set_float_rounding_mode(i, &env->vfp.fp_status_f16);
 120     }
 121     if (changed & FPCR_FZ16) {
 122         bool ftz_enabled = val & FPCR_FZ16;
 123         set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
 124         set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
 125     }
 126     if (changed & FPCR_FZ) {
 127         bool ftz_enabled = val & FPCR_FZ;
 128         set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
 129         set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
 130     }
 131     if (changed & FPCR_DN) {
 132         bool dnan_enabled = val & FPCR_DN;
 133         set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
 134         set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
 135     }
 136
 137     /*
 138      * The exception flags are ORed together when we read fpscr so we
 139      * only need to preserve the current state in one of our
 140      * float_status values.
 141      */
 142     i = vfp_exceptbits_to_host(val);
 143     set_float_exception_flags(i, &env->vfp.fp_status);
 144     set_float_exception_flags(0, &env->vfp.fp_status_f16);
 145     set_float_exception_flags(0, &env->vfp.standard_fp_status);
 146 }
 147
 148 uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
 149 {
 150     uint32_t i, fpscr;
 151
 152     fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
 153             | (env->vfp.vec_len << 16)
 154             | (env->vfp.vec_stride << 20);
 155
 156     fpscr |= vfp_get_fpscr_from_host(env);
 157
 158     i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
 159     fpscr |= i ? FPCR_QC : 0;
 160
 161     return fpscr;
 162 }
 163
 164 uint32_t vfp_get_fpscr(CPUARMState *env)
 165 {
 166     return HELPER(vfp_get_fpscr)(env);
 167 }
 168
 169 void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
 170 {
 171     /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
 172     if (!cpu_isar_feature(aa64_fp16, env_archcpu(env))) {
 173         val &= ~FPCR_FZ16;
 174     }
 175
 176     if (arm_feature(env, ARM_FEATURE_M)) {
 177         /*
 178          * M profile FPSCR is RES0 for the QC, STRIDE, FZ16, LEN bits
 179          * and also for the trapped-exception-handling bits IxE.
 180          */
 181         val &= 0xf7c0009f;
 182     }
 183
 184     /*
 185      * We don't implement trapped exception handling, so the
 186      * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
 187      *
 188      * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
 189      * (which are stored in fp_status), and the other RES0 bits
 190      * in between, then we clear all of the low 16 bits.
 191      */
 192     env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
 193     env->vfp.vec_len = (val >> 16) & 7;
 194     env->vfp.vec_stride = (val >> 20) & 3;
 195
 196     /*
 197      * The bit we set within fpscr_q is arbitrary; the register as a
 198      * whole being zero/non-zero is what counts.
 199      */
 200     env->vfp.qc[0] = val & FPCR_QC;
 201     env->vfp.qc[1] = 0;
 202     env->vfp.qc[2] = 0;
 203     env->vfp.qc[3] = 0;
 204
 205     vfp_set_fpscr_to_host(env, val);
 206 }
 207
 208 void vfp_set_fpscr(CPUARMState *env, uint32_t val)
 209 {
 210     HELPER(vfp_set_fpscr)(env, val);
 211 }
 212
 213 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
 214
 215 #define VFP_BINOP(name) \
 216 float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
 217 { \
 218     float_status *fpst = fpstp; \
 219     return float32_ ## name(a, b, fpst); \
 220 } \
 221 float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
 222 { \
 223     float_status *fpst = fpstp; \
 224     return float64_ ## name(a, b, fpst); \
 225 }
 226 VFP_BINOP(add)
 227 VFP_BINOP(sub)
 228 VFP_BINOP(mul)
 229 VFP_BINOP(div)
 230 VFP_BINOP(min)
 231 VFP_BINOP(max)
 232 VFP_BINOP(minnum)
 233 VFP_BINOP(maxnum)
 234 #undef VFP_BINOP
 235
 236 float32 VFP_HELPER(neg, s)(float32 a)
 237 {
 238     return float32_chs(a);
 239 }
 240
 241 float64 VFP_HELPER(neg, d)(float64 a)
 242 {
 243     return float64_chs(a);
 244 }
 245
 246 float32 VFP_HELPER(abs, s)(float32 a)
 247 {
 248     return float32_abs(a);
 249 }
 250
 251 float64 VFP_HELPER(abs, d)(float64 a)
 252 {
 253     return float64_abs(a);
 254 }
 255
 256 float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
 257 {
 258     return float32_sqrt(a, &env->vfp.fp_status);
 259 }
 260
 261 float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
 262 {
 263     return float64_sqrt(a, &env->vfp.fp_status);
 264 }
 265
 266 static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
 267 {
 268     uint32_t flags;
 269     switch (cmp) {
 270     case float_relation_equal:
 271         flags = 0x6;
 272         break;
 273     case float_relation_less:
 274         flags = 0x8;
 275         break;
 276     case float_relation_greater:
 277         flags = 0x2;
 278         break;
 279     case float_relation_unordered:
 280         flags = 0x3;
 281         break;
 282     default:
 283         g_assert_not_reached();
 284     }
 285     env->vfp.xregs[ARM_VFP_FPSCR] =
 286         deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
 287 }
 288
 289 /* XXX: check quiet/signaling case */
 290 #define DO_VFP_cmp(p, type) \
 291 void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env)  \
 292 { \
 293     softfloat_to_vfp_compare(env, \
 294         type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
 295 } \
 296 void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
 297 { \
 298     softfloat_to_vfp_compare(env, \
 299         type ## _compare(a, b, &env->vfp.fp_status)); \
 300 }
 301 DO_VFP_cmp(s, float32)
 302 DO_VFP_cmp(d, float64)
 303 #undef DO_VFP_cmp
 304
 305 /* Integer to float and float to integer conversions */
 306
 307 #define CONV_ITOF(name, ftype, fsz, sign)                           \
 308 ftype HELPER(name)(uint32_t x, void *fpstp)                         \
 309 {                                                                   \
 310     float_status *fpst = fpstp;                                     \
 311     return sign##int32_to_##float##fsz((sign##int32_t)x, fpst);     \
 312 }
 313
 314 #define CONV_FTOI(name, ftype, fsz, sign, round)                \
 315 sign##int32_t HELPER(name)(ftype x, void *fpstp)                \
 316 {                                                               \
 317     float_status *fpst = fpstp;                                 \
 318     if (float##fsz##_is_any_nan(x)) {                           \
 319         float_raise(float_flag_invalid, fpst);                  \
 320         return 0;                                               \
 321     }                                                           \
 322     return float##fsz##_to_##sign##int32##round(x, fpst);       \
 323 }
 324
 325 #define FLOAT_CONVS(name, p, ftype, fsz, sign)            \
 326     CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign)        \
 327     CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, )        \
 328     CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
 329
 330 FLOAT_CONVS(si, h, uint32_t, 16, )
 331 FLOAT_CONVS(si, s, float32, 32, )
 332 FLOAT_CONVS(si, d, float64, 64, )
 333 FLOAT_CONVS(ui, h, uint32_t, 16, u)
 334 FLOAT_CONVS(ui, s, float32, 32, u)
 335 FLOAT_CONVS(ui, d, float64, 64, u)
 336
 337 #undef CONV_ITOF
 338 #undef CONV_FTOI
 339 #undef FLOAT_CONVS
 340
 341 /* floating point conversion */
 342 float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
 343 {
 344     return float32_to_float64(x, &env->vfp.fp_status);
 345 }
 346
 347 float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
 348 {
 349     return float64_to_float32(x, &env->vfp.fp_status);
 350 }
 351
 352 /* VFP3 fixed point conversion.  */
 353 #define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
 354 float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift, \
 355                                      void *fpstp) \
 356 { return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
 357
 358 #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff)   \
 359 uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
 360                                             void *fpst)                   \
 361 {                                                                         \
 362     if (unlikely(float##fsz##_is_any_nan(x))) {                           \
 363         float_raise(float_flag_invalid, fpst);                            \
 364         return 0;                                                         \
 365     }                                                                     \
 366     return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
 367 }
 368
 369 #define VFP_CONV_FIX(name, p, fsz, isz, itype)                   \
 370 VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
 371 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
 372                          float_round_to_zero, _round_to_zero)    \
 373 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
 374                          get_float_rounding_mode(fpst), )
 375
 376 #define VFP_CONV_FIX_A64(name, p, fsz, isz, itype)               \
 377 VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
 378 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
 379                          get_float_rounding_mode(fpst), )
 380
 381 VFP_CONV_FIX(sh, d, 64, 64, int16)
 382 VFP_CONV_FIX(sl, d, 64, 64, int32)
 383 VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
 384 VFP_CONV_FIX(uh, d, 64, 64, uint16)
 385 VFP_CONV_FIX(ul, d, 64, 64, uint32)
 386 VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
 387 VFP_CONV_FIX(sh, s, 32, 32, int16)
 388 VFP_CONV_FIX(sl, s, 32, 32, int32)
 389 VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
 390 VFP_CONV_FIX(uh, s, 32, 32, uint16)
 391 VFP_CONV_FIX(ul, s, 32, 32, uint32)
 392 VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
 393
 394 #undef VFP_CONV_FIX
 395 #undef VFP_CONV_FIX_FLOAT
 396 #undef VFP_CONV_FLOAT_FIX_ROUND
 397 #undef VFP_CONV_FIX_A64
 398
 399 uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
 400 {
 401     return int32_to_float16_scalbn(x, -shift, fpst);
 402 }
 403
 404 uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
 405 {
 406     return uint32_to_float16_scalbn(x, -shift, fpst);
 407 }
 408
 409 uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
 410 {
 411     return int64_to_float16_scalbn(x, -shift, fpst);
 412 }
 413
 414 uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
 415 {
 416     return uint64_to_float16_scalbn(x, -shift, fpst);
 417 }
 418
 419 uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
 420 {
 421     if (unlikely(float16_is_any_nan(x))) {
 422         float_raise(float_flag_invalid, fpst);
 423         return 0;
 424     }
 425     return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
 426                                    shift, fpst);
 427 }
 428
 429 uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
 430 {
 431     if (unlikely(float16_is_any_nan(x))) {
 432         float_raise(float_flag_invalid, fpst);
 433         return 0;
 434     }
 435     return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
 436                                     shift, fpst);
 437 }
 438
 439 uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
 440 {
 441     if (unlikely(float16_is_any_nan(x))) {
 442         float_raise(float_flag_invalid, fpst);
 443         return 0;
 444     }
 445     return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
 446                                    shift, fpst);
 447 }
 448
 449 uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
 450 {
 451     if (unlikely(float16_is_any_nan(x))) {
 452         float_raise(float_flag_invalid, fpst);
 453         return 0;
 454     }
 455     return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
 456                                     shift, fpst);
 457 }
 458
 459 uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
 460 {
 461     if (unlikely(float16_is_any_nan(x))) {
 462         float_raise(float_flag_invalid, fpst);
 463         return 0;
 464     }
 465     return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
 466                                    shift, fpst);
 467 }
 468
 469 uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
 470 {
 471     if (unlikely(float16_is_any_nan(x))) {
 472         float_raise(float_flag_invalid, fpst);
 473         return 0;
 474     }
 475     return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
 476                                     shift, fpst);
 477 }
 478
 479 /* Set the current fp rounding mode and return the old one.
 480  * The argument is a softfloat float_round_ value.
 481  */
 482 uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
 483 {
 484     float_status *fp_status = fpstp;
 485
 486     uint32_t prev_rmode = get_float_rounding_mode(fp_status);
 487     set_float_rounding_mode(rmode, fp_status);
 488
 489     return prev_rmode;
 490 }
 491
 492 /* Set the current fp rounding mode in the standard fp status and return
 493  * the old one. This is for NEON instructions that need to change the
 494  * rounding mode but wish to use the standard FPSCR values for everything
 495  * else. Always set the rounding mode back to the correct value after
 496  * modifying it.
 497  * The argument is a softfloat float_round_ value.
 498  */
 499 uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
 500 {
 501     float_status *fp_status = &env->vfp.standard_fp_status;
 502
 503     uint32_t prev_rmode = get_float_rounding_mode(fp_status);
 504     set_float_rounding_mode(rmode, fp_status);
 505
 506     return prev_rmode;
 507 }
 508
 509 /* Half precision conversions.  */
 510 float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
 511 {
 512     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
 513      * it would affect flushing input denormals.
 514      */
 515     float_status *fpst = fpstp;
 516     flag save = get_flush_inputs_to_zero(fpst);
 517     set_flush_inputs_to_zero(false, fpst);
 518     float32 r = float16_to_float32(a, !ahp_mode, fpst);
 519     set_flush_inputs_to_zero(save, fpst);
 520     return r;
 521 }
 522
 523 uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
 524 {
 525     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
 526      * it would affect flushing output denormals.
 527      */
 528     float_status *fpst = fpstp;
 529     flag save = get_flush_to_zero(fpst);
 530     set_flush_to_zero(false, fpst);
 531     float16 r = float32_to_float16(a, !ahp_mode, fpst);
 532     set_flush_to_zero(save, fpst);
 533     return r;
 534 }
 535
 536 float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
 537 {
 538     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
 539      * it would affect flushing input denormals.
 540      */
 541     float_status *fpst = fpstp;
 542     flag save = get_flush_inputs_to_zero(fpst);
 543     set_flush_inputs_to_zero(false, fpst);
 544     float64 r = float16_to_float64(a, !ahp_mode, fpst);
 545     set_flush_inputs_to_zero(save, fpst);
 546     return r;
 547 }
 548
 549 uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
 550 {
 551     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
 552      * it would affect flushing output denormals.
 553      */
 554     float_status *fpst = fpstp;
 555     flag save = get_flush_to_zero(fpst);
 556     set_flush_to_zero(false, fpst);
 557     float16 r = float64_to_float16(a, !ahp_mode, fpst);
 558     set_flush_to_zero(save, fpst);
 559     return r;
 560 }
 561
 562 #define float32_two make_float32(0x40000000)
 563 #define float32_three make_float32(0x40400000)
 564 #define float32_one_point_five make_float32(0x3fc00000)
 565
 566 float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
 567 {
 568     float_status *s = &env->vfp.standard_fp_status;
 569     if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
 570         (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
 571         if (!(float32_is_zero(a) || float32_is_zero(b))) {
 572             float_raise(float_flag_input_denormal, s);
 573         }
 574         return float32_two;
 575     }
 576     return float32_sub(float32_two, float32_mul(a, b, s), s);
 577 }
 578
 579 float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
 580 {
 581     float_status *s = &env->vfp.standard_fp_status;
 582     float32 product;
 583     if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
 584         (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
 585         if (!(float32_is_zero(a) || float32_is_zero(b))) {
 586             float_raise(float_flag_input_denormal, s);
 587         }
 588         return float32_one_point_five;
 589     }
 590     product = float32_mul(a, b, s);
 591     return float32_div(float32_sub(float32_three, product, s), float32_two, s);
 592 }
 593
 594 /* NEON helpers.  */
 595
 596 /* Constants 256 and 512 are used in some helpers; we avoid relying on
 597  * int->float conversions at run-time.  */
 598 #define float64_256 make_float64(0x4070000000000000LL)
 599 #define float64_512 make_float64(0x4080000000000000LL)
 600 #define float16_maxnorm make_float16(0x7bff)
 601 #define float32_maxnorm make_float32(0x7f7fffff)
 602 #define float64_maxnorm make_float64(0x7fefffffffffffffLL)
 603
 604 /* Reciprocal functions
 605  *
 606  * The algorithm that must be used to calculate the estimate
 607  * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
 608  */
 609
 610 /* See RecipEstimate()
 611  *
 612  * input is a 9 bit fixed point number
 613  * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
 614  * result range 256 .. 511 for a number from 1.0 to 511/256.
 615  */
 616
 617 static int recip_estimate(int input)
 618 {
 619     int a, b, r;
 620     assert(256 <= input && input < 512);
 621     a = (input * 2) + 1;
 622     b = (1 << 19) / a;
 623     r = (b + 1) >> 1;
 624     assert(256 <= r && r < 512);
 625     return r;
 626 }
 627
 628 /*
 629  * Common wrapper to call recip_estimate
 630  *
 631  * The parameters are exponent and 64 bit fraction (without implicit
 632  * bit) where the binary point is nominally at bit 52. Returns a
 633  * float64 which can then be rounded to the appropriate size by the
 634  * callee.
 635  */
 636
 637 static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
 638 {
 639     uint32_t scaled, estimate;
 640     uint64_t result_frac;
 641     int result_exp;
 642
 643     /* Handle sub-normals */
 644     if (*exp == 0) {
 645         if (extract64(frac, 51, 1) == 0) {
 646             *exp = -1;
 647             frac <<= 2;
 648         } else {
 649             frac <<= 1;
 650         }
 651     }
 652
 653     /* scaled = UInt('1':fraction<51:44>) */
 654     scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
 655     estimate = recip_estimate(scaled);
 656
 657     result_exp = exp_off - *exp;
 658     result_frac = deposit64(0, 44, 8, estimate);
 659     if (result_exp == 0) {
 660         result_frac = deposit64(result_frac >> 1, 51, 1, 1);
 661     } else if (result_exp == -1) {
 662         result_frac = deposit64(result_frac >> 2, 50, 2, 1);
 663         result_exp = 0;
 664     }
 665
 666     *exp = result_exp;
 667
 668     return result_frac;
 669 }
 670
 671 static bool round_to_inf(float_status *fpst, bool sign_bit)
 672 {
 673     switch (fpst->float_rounding_mode) {
 674     case float_round_nearest_even: /* Round to Nearest */
 675         return true;
 676     case float_round_up: /* Round to +Inf */
 677         return !sign_bit;
 678     case float_round_down: /* Round to -Inf */
 679         return sign_bit;
 680     case float_round_to_zero: /* Round to Zero */
 681         return false;
 682     }
 683
 684     g_assert_not_reached();
 685 }
 686
 687 uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
 688 {
 689     float_status *fpst = fpstp;
 690     float16 f16 = float16_squash_input_denormal(input, fpst);
 691     uint32_t f16_val = float16_val(f16);
 692     uint32_t f16_sign = float16_is_neg(f16);
 693     int f16_exp = extract32(f16_val, 10, 5);
 694     uint32_t f16_frac = extract32(f16_val, 0, 10);
 695     uint64_t f64_frac;
 696
 697     if (float16_is_any_nan(f16)) {
 698         float16 nan = f16;
 699         if (float16_is_signaling_nan(f16, fpst)) {
 700             float_raise(float_flag_invalid, fpst);
 701             nan = float16_silence_nan(f16, fpst);
 702         }
 703         if (fpst->default_nan_mode) {
 704             nan =  float16_default_nan(fpst);
 705         }
 706         return nan;
 707     } else if (float16_is_infinity(f16)) {
 708         return float16_set_sign(float16_zero, float16_is_neg(f16));
 709     } else if (float16_is_zero(f16)) {
 710         float_raise(float_flag_divbyzero, fpst);
 711         return float16_set_sign(float16_infinity, float16_is_neg(f16));
 712     } else if (float16_abs(f16) < (1 << 8)) {
 713         /* Abs(value) < 2.0^-16 */
 714         float_raise(float_flag_overflow | float_flag_inexact, fpst);
 715         if (round_to_inf(fpst, f16_sign)) {
 716             return float16_set_sign(float16_infinity, f16_sign);
 717         } else {
 718             return float16_set_sign(float16_maxnorm, f16_sign);
 719         }
 720     } else if (f16_exp >= 29 && fpst->flush_to_zero) {
 721         float_raise(float_flag_underflow, fpst);
 722         return float16_set_sign(float16_zero, float16_is_neg(f16));
 723     }
 724
 725     f64_frac = call_recip_estimate(&f16_exp, 29,
 726                                    ((uint64_t) f16_frac) << (52 - 10));
 727
 728     /* result = sign : result_exp<4:0> : fraction<51:42> */
 729     f16_val = deposit32(0, 15, 1, f16_sign);
 730     f16_val = deposit32(f16_val, 10, 5, f16_exp);
 731     f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
 732     return make_float16(f16_val);
 733 }
 734
 735 float32 HELPER(recpe_f32)(float32 input, void *fpstp)
 736 {
 737     float_status *fpst = fpstp;
 738     float32 f32 = float32_squash_input_denormal(input, fpst);
 739     uint32_t f32_val = float32_val(f32);
 740     bool f32_sign = float32_is_neg(f32);
 741     int f32_exp = extract32(f32_val, 23, 8);
 742     uint32_t f32_frac = extract32(f32_val, 0, 23);
 743     uint64_t f64_frac;
 744
 745     if (float32_is_any_nan(f32)) {
 746         float32 nan = f32;
 747         if (float32_is_signaling_nan(f32, fpst)) {
 748             float_raise(float_flag_invalid, fpst);
 749             nan = float32_silence_nan(f32, fpst);
 750         }
 751         if (fpst->default_nan_mode) {
 752             nan =  float32_default_nan(fpst);
 753         }
 754         return nan;
 755     } else if (float32_is_infinity(f32)) {
 756         return float32_set_sign(float32_zero, float32_is_neg(f32));
 757     } else if (float32_is_zero(f32)) {
 758         float_raise(float_flag_divbyzero, fpst);
 759         return float32_set_sign(float32_infinity, float32_is_neg(f32));
 760     } else if (float32_abs(f32) < (1ULL << 21)) {
 761         /* Abs(value) < 2.0^-128 */
 762         float_raise(float_flag_overflow | float_flag_inexact, fpst);
 763         if (round_to_inf(fpst, f32_sign)) {
 764             return float32_set_sign(float32_infinity, f32_sign);
 765         } else {
 766             return float32_set_sign(float32_maxnorm, f32_sign);
 767         }
 768     } else if (f32_exp >= 253 && fpst->flush_to_zero) {
 769         float_raise(float_flag_underflow, fpst);
 770         return float32_set_sign(float32_zero, float32_is_neg(f32));
 771     }
 772
 773     f64_frac = call_recip_estimate(&f32_exp, 253,
 774                                    ((uint64_t) f32_frac) << (52 - 23));
 775
 776     /* result = sign : result_exp<7:0> : fraction<51:29> */
 777     f32_val = deposit32(0, 31, 1, f32_sign);
 778     f32_val = deposit32(f32_val, 23, 8, f32_exp);
 779     f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
 780     return make_float32(f32_val);
 781 }
 782
 783 float64 HELPER(recpe_f64)(float64 input, void *fpstp)
 784 {
 785     float_status *fpst = fpstp;
 786     float64 f64 = float64_squash_input_denormal(input, fpst);
 787     uint64_t f64_val = float64_val(f64);
 788     bool f64_sign = float64_is_neg(f64);
 789     int f64_exp = extract64(f64_val, 52, 11);
 790     uint64_t f64_frac = extract64(f64_val, 0, 52);
 791
 792     /* Deal with any special cases */
 793     if (float64_is_any_nan(f64)) {
 794         float64 nan = f64;
 795         if (float64_is_signaling_nan(f64, fpst)) {
 796             float_raise(float_flag_invalid, fpst);
 797             nan = float64_silence_nan(f64, fpst);
 798         }
 799         if (fpst->default_nan_mode) {
 800             nan =  float64_default_nan(fpst);
 801         }
 802         return nan;
 803     } else if (float64_is_infinity(f64)) {
 804         return float64_set_sign(float64_zero, float64_is_neg(f64));
 805     } else if (float64_is_zero(f64)) {
 806         float_raise(float_flag_divbyzero, fpst);
 807         return float64_set_sign(float64_infinity, float64_is_neg(f64));
 808     } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
 809         /* Abs(value) < 2.0^-1024 */
 810         float_raise(float_flag_overflow | float_flag_inexact, fpst);
 811         if (round_to_inf(fpst, f64_sign)) {
 812             return float64_set_sign(float64_infinity, f64_sign);
 813         } else {
 814             return float64_set_sign(float64_maxnorm, f64_sign);
 815         }
 816     } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
 817         float_raise(float_flag_underflow, fpst);
 818         return float64_set_sign(float64_zero, float64_is_neg(f64));
 819     }
 820
 821     f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
 822
 823     /* result = sign : result_exp<10:0> : fraction<51:0>; */
 824     f64_val = deposit64(0, 63, 1, f64_sign);
 825     f64_val = deposit64(f64_val, 52, 11, f64_exp);
 826     f64_val = deposit64(f64_val, 0, 52, f64_frac);
 827     return make_float64(f64_val);
 828 }
 829
 830 /* The algorithm that must be used to calculate the estimate
 831  * is specified by the ARM ARM.
 832  */
 833
 834 static int do_recip_sqrt_estimate(int a)
 835 {
 836     int b, estimate;
 837
 838     assert(128 <= a && a < 512);
 839     if (a < 256) {
 840         a = a * 2 + 1;
 841     } else {
 842         a = (a >> 1) << 1;
 843         a = (a + 1) * 2;
 844     }
 845     b = 512;
 846     while (a * (b + 1) * (b + 1) < (1 << 28)) {
 847         b += 1;
 848     }
 849     estimate = (b + 1) / 2;
 850     assert(256 <= estimate && estimate < 512);
 851
 852     return estimate;
 853 }
 854
 855
 856 static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
 857 {
 858     int estimate;
 859     uint32_t scaled;
 860
 861     if (*exp == 0) {
 862         while (extract64(frac, 51, 1) == 0) {
 863             frac = frac << 1;
 864             *exp -= 1;
 865         }
 866         frac = extract64(frac, 0, 51) << 1;
 867     }
 868
 869     if (*exp & 1) {
 870         /* scaled = UInt('01':fraction<51:45>) */
 871         scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
 872     } else {
 873         /* scaled = UInt('1':fraction<51:44>) */
 874         scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
 875     }
 876     estimate = do_recip_sqrt_estimate(scaled);
 877
 878     *exp = (exp_off - *exp) / 2;
 879     return extract64(estimate, 0, 8) << 44;
 880 }
 881
 882 uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
 883 {
 884     float_status *s = fpstp;
 885     float16 f16 = float16_squash_input_denormal(input, s);
 886     uint16_t val = float16_val(f16);
 887     bool f16_sign = float16_is_neg(f16);
 888     int f16_exp = extract32(val, 10, 5);
 889     uint16_t f16_frac = extract32(val, 0, 10);
 890     uint64_t f64_frac;
 891
 892     if (float16_is_any_nan(f16)) {
 893         float16 nan = f16;
 894         if (float16_is_signaling_nan(f16, s)) {
 895             float_raise(float_flag_invalid, s);
 896             nan = float16_silence_nan(f16, s);
 897         }
 898         if (s->default_nan_mode) {
 899             nan =  float16_default_nan(s);
 900         }
 901         return nan;
 902     } else if (float16_is_zero(f16)) {
 903         float_raise(float_flag_divbyzero, s);
 904         return float16_set_sign(float16_infinity, f16_sign);
 905     } else if (f16_sign) {
 906         float_raise(float_flag_invalid, s);
 907         return float16_default_nan(s);
 908     } else if (float16_is_infinity(f16)) {
 909         return float16_zero;
 910     }
 911
 912     /* Scale and normalize to a double-precision value between 0.25 and 1.0,
 913      * preserving the parity of the exponent.  */
 914
 915     f64_frac = ((uint64_t) f16_frac) << (52 - 10);
 916
 917     f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
 918
 919     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
 920     val = deposit32(0, 15, 1, f16_sign);
 921     val = deposit32(val, 10, 5, f16_exp);
 922     val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
 923     return make_float16(val);
 924 }
 925
 926 float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
 927 {
 928     float_status *s = fpstp;
 929     float32 f32 = float32_squash_input_denormal(input, s);
 930     uint32_t val = float32_val(f32);
 931     uint32_t f32_sign = float32_is_neg(f32);
 932     int f32_exp = extract32(val, 23, 8);
 933     uint32_t f32_frac = extract32(val, 0, 23);
 934     uint64_t f64_frac;
 935
 936     if (float32_is_any_nan(f32)) {
 937         float32 nan = f32;
 938         if (float32_is_signaling_nan(f32, s)) {
 939             float_raise(float_flag_invalid, s);
 940             nan = float32_silence_nan(f32, s);
 941         }
 942         if (s->default_nan_mode) {
 943             nan =  float32_default_nan(s);
 944         }
 945         return nan;
 946     } else if (float32_is_zero(f32)) {
 947         float_raise(float_flag_divbyzero, s);
 948         return float32_set_sign(float32_infinity, float32_is_neg(f32));
 949     } else if (float32_is_neg(f32)) {
 950         float_raise(float_flag_invalid, s);
 951         return float32_default_nan(s);
 952     } else if (float32_is_infinity(f32)) {
 953         return float32_zero;
 954     }
 955
 956     /* Scale and normalize to a double-precision value between 0.25 and 1.0,
 957      * preserving the parity of the exponent.  */
 958
 959     f64_frac = ((uint64_t) f32_frac) << 29;
 960
 961     f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
 962
 963     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
 964     val = deposit32(0, 31, 1, f32_sign);
 965     val = deposit32(val, 23, 8, f32_exp);
 966     val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
 967     return make_float32(val);
 968 }
 969
 970 float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
 971 {
 972     float_status *s = fpstp;
 973     float64 f64 = float64_squash_input_denormal(input, s);
 974     uint64_t val = float64_val(f64);
 975     bool f64_sign = float64_is_neg(f64);
 976     int f64_exp = extract64(val, 52, 11);
 977     uint64_t f64_frac = extract64(val, 0, 52);
 978
 979     if (float64_is_any_nan(f64)) {
 980         float64 nan = f64;
 981         if (float64_is_signaling_nan(f64, s)) {
 982             float_raise(float_flag_invalid, s);
 983             nan = float64_silence_nan(f64, s);
 984         }
 985         if (s->default_nan_mode) {
 986             nan =  float64_default_nan(s);
 987         }
 988         return nan;
 989     } else if (float64_is_zero(f64)) {
 990         float_raise(float_flag_divbyzero, s);
 991         return float64_set_sign(float64_infinity, float64_is_neg(f64));
 992     } else if (float64_is_neg(f64)) {
 993         float_raise(float_flag_invalid, s);
 994         return float64_default_nan(s);
 995     } else if (float64_is_infinity(f64)) {
 996         return float64_zero;
 997     }
 998
 999     f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
1000
1001     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
1002     val = deposit64(0, 61, 1, f64_sign);
1003     val = deposit64(val, 52, 11, f64_exp);
1004     val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
1005     return make_float64(val);
1006 }
1007
1008 uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
1009 {
1010     /* float_status *s = fpstp; */
1011     int input, estimate;
1012
1013     if ((a & 0x80000000) == 0) {
1014         return 0xffffffff;
1015     }
1016
1017     input = extract32(a, 23, 9);
1018     estimate = recip_estimate(input);
1019
1020     return deposit32(0, (32 - 9), 9, estimate);
1021 }
1022
1023 uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
1024 {
1025     int estimate;
1026
1027     if ((a & 0xc0000000) == 0) {
1028         return 0xffffffff;
1029     }
1030
1031     estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
1032
1033     return deposit32(0, 23, 9, estimate);
1034 }
1035
1036 /* VFPv4 fused multiply-accumulate */
1037 float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
1038 {
1039     float_status *fpst = fpstp;
1040     return float32_muladd(a, b, c, 0, fpst);
1041 }
1042
1043 float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
1044 {
1045     float_status *fpst = fpstp;
1046     return float64_muladd(a, b, c, 0, fpst);
1047 }
1048
1049 /* ARMv8 round to integral */
1050 float32 HELPER(rints_exact)(float32 x, void *fp_status)
1051 {
1052     return float32_round_to_int(x, fp_status);
1053 }
1054
1055 float64 HELPER(rintd_exact)(float64 x, void *fp_status)
1056 {
1057     return float64_round_to_int(x, fp_status);
1058 }
1059
1060 float32 HELPER(rints)(float32 x, void *fp_status)
1061 {
1062     int old_flags = get_float_exception_flags(fp_status), new_flags;
1063     float32 ret;
1064
1065     ret = float32_round_to_int(x, fp_status);
1066
1067     /* Suppress any inexact exceptions the conversion produced */
1068     if (!(old_flags & float_flag_inexact)) {
1069         new_flags = get_float_exception_flags(fp_status);
1070         set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
1071     }
1072
1073     return ret;
1074 }
1075
1076 float64 HELPER(rintd)(float64 x, void *fp_status)
1077 {
1078     int old_flags = get_float_exception_flags(fp_status), new_flags;
1079     float64 ret;
1080
1081     ret = float64_round_to_int(x, fp_status);
1082
1083     new_flags = get_float_exception_flags(fp_status);
1084
1085     /* Suppress any inexact exceptions the conversion produced */
1086     if (!(old_flags & float_flag_inexact)) {
1087         new_flags = get_float_exception_flags(fp_status);
1088         set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
1089     }
1090
1091     return ret;
1092 }
1093
1094 /* Convert ARM rounding mode to softfloat */
1095 int arm_rmode_to_sf(int rmode)
1096 {
1097     switch (rmode) {
1098     case FPROUNDING_TIEAWAY:
1099         rmode = float_round_ties_away;
1100         break;
1101     case FPROUNDING_ODD:
1102         /* FIXME: add support for TIEAWAY and ODD */
1103         qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
1104                       rmode);
1105         /* fall through for now */
1106     case FPROUNDING_TIEEVEN:
1107     default:
1108         rmode = float_round_nearest_even;
1109         break;
1110     case FPROUNDING_POSINF:
1111         rmode = float_round_up;
1112         break;
1113     case FPROUNDING_NEGINF:
1114         rmode = float_round_down;
1115         break;
1116     case FPROUNDING_ZERO:
1117         rmode = float_round_to_zero;
1118         break;
1119     }
1120     return rmode;
1121 }
1122
1123 /*
1124  * Implement float64 to int32_t conversion without saturation;
1125  * the result is supplied modulo 2^32.
1126  */
1127 uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
1128 {
1129     float_status *status = vstatus;
1130     uint32_t exp, sign;
1131     uint64_t frac;
1132     uint32_t inexact = 1; /* !Z */
1133
1134     sign = extract64(value, 63, 1);
1135     exp = extract64(value, 52, 11);
1136     frac = extract64(value, 0, 52);
1137
1138     if (exp == 0) {
1139         /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript.  */
1140         inexact = sign;
1141         if (frac != 0) {
1142             if (status->flush_inputs_to_zero) {
1143                 float_raise(float_flag_input_denormal, status);
1144             } else {
1145                 float_raise(float_flag_inexact, status);
1146                 inexact = 1;
1147             }
1148         }
1149         frac = 0;
1150     } else if (exp == 0x7ff) {
1151         /* This operation raises Invalid for both NaN and overflow (Inf).  */
1152         float_raise(float_flag_invalid, status);
1153         frac = 0;
1154     } else {
1155         int true_exp = exp - 1023;
1156         int shift = true_exp - 52;
1157
1158         /* Restore implicit bit.  */
1159         frac |= 1ull << 52;
1160
1161         /* Shift the fraction into place.  */
1162         if (shift >= 0) {
1163             /* The number is so large we must shift the fraction left.  */
1164             if (shift >= 64) {
1165                 /* The fraction is shifted out entirely.  */
1166                 frac = 0;
1167             } else {
1168                 frac <<= shift;
1169             }
1170         } else if (shift > -64) {
1171             /* Normal case -- shift right and notice if bits shift out.  */
1172             inexact = (frac << (64 + shift)) != 0;
1173             frac >>= -shift;
1174         } else {
1175             /* The fraction is shifted out entirely.  */
1176             frac = 0;
1177         }
1178
1179         /* Notice overflow or inexact exceptions.  */
1180         if (true_exp > 31 || frac > (sign ? 0x80000000ull : 0x7fffffff)) {
1181             /* Overflow, for which this operation raises invalid.  */
1182             float_raise(float_flag_invalid, status);
1183             inexact = 1;
1184         } else if (inexact) {
1185             float_raise(float_flag_inexact, status);
1186         }
1187
1188         /* Honor the sign.  */
1189         if (sign) {
1190             frac = -frac;
1191         }
1192     }
1193
1194     /* Pack the result and the env->ZF representation of Z together.  */
1195     return deposit64(frac, 32, 32, inexact);
1196 }
1197
1198 uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
1199 {
1200     uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
1201     uint32_t result = pair;
1202     uint32_t z = (pair >> 32) == 0;
1203
1204     /* Store Z, clear NCV, in FPSCR.NZCV.  */
1205     env->vfp.xregs[ARM_VFP_FPSCR]
1206         = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
1207
1208     return result;
1209 }
1210
1211 /* Round a float32 to an integer that fits in int32_t or int64_t.  */
1212 static float32 frint_s(float32 f, float_status *fpst, int intsize)
1213 {
1214     int old_flags = get_float_exception_flags(fpst);
1215     uint32_t exp = extract32(f, 23, 8);
1216
1217     if (unlikely(exp == 0xff)) {
1218         /* NaN or Inf.  */
1219         goto overflow;
1220     }
1221
1222     /* Round and re-extract the exponent.  */
1223     f = float32_round_to_int(f, fpst);
1224     exp = extract32(f, 23, 8);
1225
1226     /* Validate the range of the result.  */
1227     if (exp < 126 + intsize) {
1228         /* abs(F) <= INT{N}_MAX */
1229         return f;
1230     }
1231     if (exp == 126 + intsize) {
1232         uint32_t sign = extract32(f, 31, 1);
1233         uint32_t frac = extract32(f, 0, 23);
1234         if (sign && frac == 0) {
1235             /* F == INT{N}_MIN */
1236             return f;
1237         }
1238     }
1239
1240  overflow:
1241     /*
1242      * Raise Invalid and return INT{N}_MIN as a float.  Revert any
1243      * inexact exception float32_round_to_int may have raised.
1244      */
1245     set_float_exception_flags(old_flags | float_flag_invalid, fpst);
1246     return (0x100u + 126u + intsize) << 23;
1247 }
1248
1249 float32 HELPER(frint32_s)(float32 f, void *fpst)
1250 {
1251     return frint_s(f, fpst, 32);
1252 }
1253
1254 float32 HELPER(frint64_s)(float32 f, void *fpst)
1255 {
1256     return frint_s(f, fpst, 64);
1257 }
1258
1259 /* Round a float64 to an integer that fits in int32_t or int64_t.  */
1260 static float64 frint_d(float64 f, float_status *fpst, int intsize)
1261 {
1262     int old_flags = get_float_exception_flags(fpst);
1263     uint32_t exp = extract64(f, 52, 11);
1264
1265     if (unlikely(exp == 0x7ff)) {
1266         /* NaN or Inf.  */
1267         goto overflow;
1268     }
1269
1270     /* Round and re-extract the exponent.  */
1271     f = float64_round_to_int(f, fpst);
1272     exp = extract64(f, 52, 11);
1273
1274     /* Validate the range of the result.  */
1275     if (exp < 1022 + intsize) {
1276         /* abs(F) <= INT{N}_MAX */
1277         return f;
1278     }
1279     if (exp == 1022 + intsize) {
1280         uint64_t sign = extract64(f, 63, 1);
1281         uint64_t frac = extract64(f, 0, 52);
1282         if (sign && frac == 0) {
1283             /* F == INT{N}_MIN */
1284             return f;
1285         }
1286     }
1287
1288  overflow:
1289     /*
1290      * Raise Invalid and return INT{N}_MIN as a float.  Revert any
1291      * inexact exception float64_round_to_int may have raised.
1292      */
1293     set_float_exception_flags(old_flags | float_flag_invalid, fpst);
1294     return (uint64_t)(0x800 + 1022 + intsize) << 52;
1295 }
1296
1297 float64 HELPER(frint32_d)(float64 f, void *fpst)
1298 {
1299     return frint_d(f, fpst, 32);
1300 }
1301
1302 float64 HELPER(frint64_d)(float64 f, void *fpst)
1303 {
1304     return frint_d(f, fpst, 64);
1305 }