target/i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/exec-all.h"
  26 #include "exec/cpu_ldst.h"
  27
  28 #define FPU_RC_MASK         0xc00
  29 #define FPU_RC_NEAR         0x000
  30 #define FPU_RC_DOWN         0x400
  31 #define FPU_RC_UP           0x800
  32 #define FPU_RC_CHOP         0xc00
  33
  34 #define MAXTAN 9223372036854775808.0
  35
  36 /* the following deal with x86 long double-precision numbers */
  37 #define MAXEXPD 0x7fff
  38 #define EXPBIAS 16383
  39 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  40 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  41 #define MANTD(fp)       (fp.l.lower)
  42 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  43
  44 #define FPUS_IE (1 << 0)
  45 #define FPUS_DE (1 << 1)
  46 #define FPUS_ZE (1 << 2)
  47 #define FPUS_OE (1 << 3)
  48 #define FPUS_UE (1 << 4)
  49 #define FPUS_PE (1 << 5)
  50 #define FPUS_SF (1 << 6)
  51 #define FPUS_SE (1 << 7)
  52 #define FPUS_B  (1 << 15)
  53
  54 #define FPUC_EM 0x3f
  55
  56 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  57 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  58 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  59
  60 static inline void fpush(CPUX86State *env)
  61 {
  62     env->fpstt = (env->fpstt - 1) & 7;
  63     env->fptags[env->fpstt] = 0; /* validate stack entry */
  64 }
  65
  66 static inline void fpop(CPUX86State *env)
  67 {
  68     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  69     env->fpstt = (env->fpstt + 1) & 7;
  70 }
  71
  72 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  73                                    uintptr_t retaddr)
  74 {
  75     CPU_LDoubleU temp;
  76
  77     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  78     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  79     return temp.d;
  80 }
  81
  82 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  83                                uintptr_t retaddr)
  84 {
  85     CPU_LDoubleU temp;
  86
  87     temp.d = f;
  88     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  89     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  90 }
  91
  92 /* x87 FPU helpers */
  93
  94 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  95 {
  96     union {
  97         float64 f64;
  98         double d;
  99     } u;
 100
 101     u.f64 = floatx80_to_float64(a, &env->fp_status);
 102     return u.d;
 103 }
 104
 105 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 106 {
 107     union {
 108         float64 f64;
 109         double d;
 110     } u;
 111
 112     u.d = a;
 113     return float64_to_floatx80(u.f64, &env->fp_status);
 114 }
 115
 116 static void fpu_set_exception(CPUX86State *env, int mask)
 117 {
 118     env->fpus |= mask;
 119     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 120         env->fpus |= FPUS_SE | FPUS_B;
 121     }
 122 }
 123
 124 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 125 {
 126     if (floatx80_is_zero(b)) {
 127         fpu_set_exception(env, FPUS_ZE);
 128     }
 129     return floatx80_div(a, b, &env->fp_status);
 130 }
 131
 132 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 133 {
 134     if (env->cr[0] & CR0_NE_MASK) {
 135         raise_exception_ra(env, EXCP10_COPR, retaddr);
 136     }
 137 #if !defined(CONFIG_USER_ONLY)
 138     else {
 139         cpu_set_ferr(env);
 140     }
 141 #endif
 142 }
 143
 144 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 145 {
 146     union {
 147         float32 f;
 148         uint32_t i;
 149     } u;
 150
 151     u.i = val;
 152     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 153 }
 154
 155 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 156 {
 157     union {
 158         float64 f;
 159         uint64_t i;
 160     } u;
 161
 162     u.i = val;
 163     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 164 }
 165
 166 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 167 {
 168     FT0 = int32_to_floatx80(val, &env->fp_status);
 169 }
 170
 171 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 172 {
 173     int new_fpstt;
 174     union {
 175         float32 f;
 176         uint32_t i;
 177     } u;
 178
 179     new_fpstt = (env->fpstt - 1) & 7;
 180     u.i = val;
 181     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 182     env->fpstt = new_fpstt;
 183     env->fptags[new_fpstt] = 0; /* validate stack entry */
 184 }
 185
 186 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 187 {
 188     int new_fpstt;
 189     union {
 190         float64 f;
 191         uint64_t i;
 192     } u;
 193
 194     new_fpstt = (env->fpstt - 1) & 7;
 195     u.i = val;
 196     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 197     env->fpstt = new_fpstt;
 198     env->fptags[new_fpstt] = 0; /* validate stack entry */
 199 }
 200
 201 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 202 {
 203     int new_fpstt;
 204
 205     new_fpstt = (env->fpstt - 1) & 7;
 206     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 207     env->fpstt = new_fpstt;
 208     env->fptags[new_fpstt] = 0; /* validate stack entry */
 209 }
 210
 211 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 212 {
 213     int new_fpstt;
 214
 215     new_fpstt = (env->fpstt - 1) & 7;
 216     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 217     env->fpstt = new_fpstt;
 218     env->fptags[new_fpstt] = 0; /* validate stack entry */
 219 }
 220
 221 uint32_t helper_fsts_ST0(CPUX86State *env)
 222 {
 223     union {
 224         float32 f;
 225         uint32_t i;
 226     } u;
 227
 228     u.f = floatx80_to_float32(ST0, &env->fp_status);
 229     return u.i;
 230 }
 231
 232 uint64_t helper_fstl_ST0(CPUX86State *env)
 233 {
 234     union {
 235         float64 f;
 236         uint64_t i;
 237     } u;
 238
 239     u.f = floatx80_to_float64(ST0, &env->fp_status);
 240     return u.i;
 241 }
 242
 243 int32_t helper_fist_ST0(CPUX86State *env)
 244 {
 245     int32_t val;
 246
 247     val = floatx80_to_int32(ST0, &env->fp_status);
 248     if (val != (int16_t)val) {
 249         val = -32768;
 250     }
 251     return val;
 252 }
 253
 254 int32_t helper_fistl_ST0(CPUX86State *env)
 255 {
 256     int32_t val;
 257     signed char old_exp_flags;
 258
 259     old_exp_flags = get_float_exception_flags(&env->fp_status);
 260     set_float_exception_flags(0, &env->fp_status);
 261
 262     val = floatx80_to_int32(ST0, &env->fp_status);
 263     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 264         val = 0x80000000;
 265     }
 266     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 267                                 | old_exp_flags, &env->fp_status);
 268     return val;
 269 }
 270
 271 int64_t helper_fistll_ST0(CPUX86State *env)
 272 {
 273     int64_t val;
 274     signed char old_exp_flags;
 275
 276     old_exp_flags = get_float_exception_flags(&env->fp_status);
 277     set_float_exception_flags(0, &env->fp_status);
 278
 279     val = floatx80_to_int64(ST0, &env->fp_status);
 280     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 281         val = 0x8000000000000000ULL;
 282     }
 283     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 284                                 | old_exp_flags, &env->fp_status);
 285     return val;
 286 }
 287
 288 int32_t helper_fistt_ST0(CPUX86State *env)
 289 {
 290     int32_t val;
 291
 292     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 293     if (val != (int16_t)val) {
 294         val = -32768;
 295     }
 296     return val;
 297 }
 298
 299 int32_t helper_fisttl_ST0(CPUX86State *env)
 300 {
 301     return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 302 }
 303
 304 int64_t helper_fisttll_ST0(CPUX86State *env)
 305 {
 306     return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 307 }
 308
 309 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 310 {
 311     int new_fpstt;
 312
 313     new_fpstt = (env->fpstt - 1) & 7;
 314     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 315     env->fpstt = new_fpstt;
 316     env->fptags[new_fpstt] = 0; /* validate stack entry */
 317 }
 318
 319 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 320 {
 321     helper_fstt(env, ST0, ptr, GETPC());
 322 }
 323
 324 void helper_fpush(CPUX86State *env)
 325 {
 326     fpush(env);
 327 }
 328
 329 void helper_fpop(CPUX86State *env)
 330 {
 331     fpop(env);
 332 }
 333
 334 void helper_fdecstp(CPUX86State *env)
 335 {
 336     env->fpstt = (env->fpstt - 1) & 7;
 337     env->fpus &= ~0x4700;
 338 }
 339
 340 void helper_fincstp(CPUX86State *env)
 341 {
 342     env->fpstt = (env->fpstt + 1) & 7;
 343     env->fpus &= ~0x4700;
 344 }
 345
 346 /* FPU move */
 347
 348 void helper_ffree_STN(CPUX86State *env, int st_index)
 349 {
 350     env->fptags[(env->fpstt + st_index) & 7] = 1;
 351 }
 352
 353 void helper_fmov_ST0_FT0(CPUX86State *env)
 354 {
 355     ST0 = FT0;
 356 }
 357
 358 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 359 {
 360     FT0 = ST(st_index);
 361 }
 362
 363 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 364 {
 365     ST0 = ST(st_index);
 366 }
 367
 368 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 369 {
 370     ST(st_index) = ST0;
 371 }
 372
 373 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 374 {
 375     floatx80 tmp;
 376
 377     tmp = ST(st_index);
 378     ST(st_index) = ST0;
 379     ST0 = tmp;
 380 }
 381
 382 /* FPU operations */
 383
 384 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 385
 386 void helper_fcom_ST0_FT0(CPUX86State *env)
 387 {
 388     int ret;
 389
 390     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 391     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 392 }
 393
 394 void helper_fucom_ST0_FT0(CPUX86State *env)
 395 {
 396     int ret;
 397
 398     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 399     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 400 }
 401
 402 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 403
 404 void helper_fcomi_ST0_FT0(CPUX86State *env)
 405 {
 406     int eflags;
 407     int ret;
 408
 409     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 410     eflags = cpu_cc_compute_all(env, CC_OP);
 411     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 412     CC_SRC = eflags;
 413 }
 414
 415 void helper_fucomi_ST0_FT0(CPUX86State *env)
 416 {
 417     int eflags;
 418     int ret;
 419
 420     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 421     eflags = cpu_cc_compute_all(env, CC_OP);
 422     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 423     CC_SRC = eflags;
 424 }
 425
 426 void helper_fadd_ST0_FT0(CPUX86State *env)
 427 {
 428     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 429 }
 430
 431 void helper_fmul_ST0_FT0(CPUX86State *env)
 432 {
 433     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 434 }
 435
 436 void helper_fsub_ST0_FT0(CPUX86State *env)
 437 {
 438     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 439 }
 440
 441 void helper_fsubr_ST0_FT0(CPUX86State *env)
 442 {
 443     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 444 }
 445
 446 void helper_fdiv_ST0_FT0(CPUX86State *env)
 447 {
 448     ST0 = helper_fdiv(env, ST0, FT0);
 449 }
 450
 451 void helper_fdivr_ST0_FT0(CPUX86State *env)
 452 {
 453     ST0 = helper_fdiv(env, FT0, ST0);
 454 }
 455
 456 /* fp operations between STN and ST0 */
 457
 458 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 459 {
 460     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 461 }
 462
 463 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 464 {
 465     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 466 }
 467
 468 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 469 {
 470     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 471 }
 472
 473 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 474 {
 475     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 476 }
 477
 478 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 479 {
 480     floatx80 *p;
 481
 482     p = &ST(st_index);
 483     *p = helper_fdiv(env, *p, ST0);
 484 }
 485
 486 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 487 {
 488     floatx80 *p;
 489
 490     p = &ST(st_index);
 491     *p = helper_fdiv(env, ST0, *p);
 492 }
 493
 494 /* misc FPU operations */
 495 void helper_fchs_ST0(CPUX86State *env)
 496 {
 497     ST0 = floatx80_chs(ST0);
 498 }
 499
 500 void helper_fabs_ST0(CPUX86State *env)
 501 {
 502     ST0 = floatx80_abs(ST0);
 503 }
 504
 505 void helper_fld1_ST0(CPUX86State *env)
 506 {
 507     ST0 = floatx80_one;
 508 }
 509
 510 void helper_fldl2t_ST0(CPUX86State *env)
 511 {
 512     ST0 = floatx80_l2t;
 513 }
 514
 515 void helper_fldl2e_ST0(CPUX86State *env)
 516 {
 517     ST0 = floatx80_l2e;
 518 }
 519
 520 void helper_fldpi_ST0(CPUX86State *env)
 521 {
 522     ST0 = floatx80_pi;
 523 }
 524
 525 void helper_fldlg2_ST0(CPUX86State *env)
 526 {
 527     ST0 = floatx80_lg2;
 528 }
 529
 530 void helper_fldln2_ST0(CPUX86State *env)
 531 {
 532     ST0 = floatx80_ln2;
 533 }
 534
 535 void helper_fldz_ST0(CPUX86State *env)
 536 {
 537     ST0 = floatx80_zero;
 538 }
 539
 540 void helper_fldz_FT0(CPUX86State *env)
 541 {
 542     FT0 = floatx80_zero;
 543 }
 544
 545 uint32_t helper_fnstsw(CPUX86State *env)
 546 {
 547     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 548 }
 549
 550 uint32_t helper_fnstcw(CPUX86State *env)
 551 {
 552     return env->fpuc;
 553 }
 554
 555 void update_fp_status(CPUX86State *env)
 556 {
 557     int rnd_type;
 558
 559     /* set rounding mode */
 560     switch (env->fpuc & FPU_RC_MASK) {
 561     default:
 562     case FPU_RC_NEAR:
 563         rnd_type = float_round_nearest_even;
 564         break;
 565     case FPU_RC_DOWN:
 566         rnd_type = float_round_down;
 567         break;
 568     case FPU_RC_UP:
 569         rnd_type = float_round_up;
 570         break;
 571     case FPU_RC_CHOP:
 572         rnd_type = float_round_to_zero;
 573         break;
 574     }
 575     set_float_rounding_mode(rnd_type, &env->fp_status);
 576     switch ((env->fpuc >> 8) & 3) {
 577     case 0:
 578         rnd_type = 32;
 579         break;
 580     case 2:
 581         rnd_type = 64;
 582         break;
 583     case 3:
 584     default:
 585         rnd_type = 80;
 586         break;
 587     }
 588     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 589 }
 590
 591 void helper_fldcw(CPUX86State *env, uint32_t val)
 592 {
 593     cpu_set_fpuc(env, val);
 594 }
 595
 596 void helper_fclex(CPUX86State *env)
 597 {
 598     env->fpus &= 0x7f00;
 599 }
 600
 601 void helper_fwait(CPUX86State *env)
 602 {
 603     if (env->fpus & FPUS_SE) {
 604         fpu_raise_exception(env, GETPC());
 605     }
 606 }
 607
 608 void helper_fninit(CPUX86State *env)
 609 {
 610     env->fpus = 0;
 611     env->fpstt = 0;
 612     cpu_set_fpuc(env, 0x37f);
 613     env->fptags[0] = 1;
 614     env->fptags[1] = 1;
 615     env->fptags[2] = 1;
 616     env->fptags[3] = 1;
 617     env->fptags[4] = 1;
 618     env->fptags[5] = 1;
 619     env->fptags[6] = 1;
 620     env->fptags[7] = 1;
 621 }
 622
 623 /* BCD ops */
 624
 625 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 626 {
 627     floatx80 tmp;
 628     uint64_t val;
 629     unsigned int v;
 630     int i;
 631
 632     val = 0;
 633     for (i = 8; i >= 0; i--) {
 634         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 635         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 636     }
 637     tmp = int64_to_floatx80(val, &env->fp_status);
 638     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 639         tmp = floatx80_chs(tmp);
 640     }
 641     fpush(env);
 642     ST0 = tmp;
 643 }
 644
 645 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 646 {
 647     int v;
 648     target_ulong mem_ref, mem_end;
 649     int64_t val;
 650
 651     val = floatx80_to_int64(ST0, &env->fp_status);
 652     mem_ref = ptr;
 653     mem_end = mem_ref + 9;
 654     if (val < 0) {
 655         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 656         val = -val;
 657     } else {
 658         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 659     }
 660     while (mem_ref < mem_end) {
 661         if (val == 0) {
 662             break;
 663         }
 664         v = val % 100;
 665         val = val / 100;
 666         v = ((v / 10) << 4) | (v % 10);
 667         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 668     }
 669     while (mem_ref < mem_end) {
 670         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 671     }
 672 }
 673
 674 void helper_f2xm1(CPUX86State *env)
 675 {
 676     double val = floatx80_to_double(env, ST0);
 677
 678     val = pow(2.0, val) - 1.0;
 679     ST0 = double_to_floatx80(env, val);
 680 }
 681
 682 void helper_fyl2x(CPUX86State *env)
 683 {
 684     double fptemp = floatx80_to_double(env, ST0);
 685
 686     if (fptemp > 0.0) {
 687         fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 688         fptemp *= floatx80_to_double(env, ST1);
 689         ST1 = double_to_floatx80(env, fptemp);
 690         fpop(env);
 691     } else {
 692         env->fpus &= ~0x4700;
 693         env->fpus |= 0x400;
 694     }
 695 }
 696
 697 void helper_fptan(CPUX86State *env)
 698 {
 699     double fptemp = floatx80_to_double(env, ST0);
 700
 701     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 702         env->fpus |= 0x400;
 703     } else {
 704         fptemp = tan(fptemp);
 705         ST0 = double_to_floatx80(env, fptemp);
 706         fpush(env);
 707         ST0 = floatx80_one;
 708         env->fpus &= ~0x400; /* C2 <-- 0 */
 709         /* the above code is for |arg| < 2**52 only */
 710     }
 711 }
 712
 713 void helper_fpatan(CPUX86State *env)
 714 {
 715     double fptemp, fpsrcop;
 716
 717     fpsrcop = floatx80_to_double(env, ST1);
 718     fptemp = floatx80_to_double(env, ST0);
 719     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 720     fpop(env);
 721 }
 722
 723 void helper_fxtract(CPUX86State *env)
 724 {
 725     CPU_LDoubleU temp;
 726
 727     temp.d = ST0;
 728
 729     if (floatx80_is_zero(ST0)) {
 730         /* Easy way to generate -inf and raising division by 0 exception */
 731         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 732                            &env->fp_status);
 733         fpush(env);
 734         ST0 = temp.d;
 735     } else {
 736         int expdif;
 737
 738         expdif = EXPD(temp) - EXPBIAS;
 739         /* DP exponent bias */
 740         ST0 = int32_to_floatx80(expdif, &env->fp_status);
 741         fpush(env);
 742         BIASEXPONENT(temp);
 743         ST0 = temp.d;
 744     }
 745 }
 746
 747 void helper_fprem1(CPUX86State *env)
 748 {
 749     double st0, st1, dblq, fpsrcop, fptemp;
 750     CPU_LDoubleU fpsrcop1, fptemp1;
 751     int expdif;
 752     signed long long int q;
 753
 754     st0 = floatx80_to_double(env, ST0);
 755     st1 = floatx80_to_double(env, ST1);
 756
 757     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 758         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 759         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 760         return;
 761     }
 762
 763     fpsrcop = st0;
 764     fptemp = st1;
 765     fpsrcop1.d = ST0;
 766     fptemp1.d = ST1;
 767     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 768
 769     if (expdif < 0) {
 770         /* optimisation? taken from the AMD docs */
 771         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 772         /* ST0 is unchanged */
 773         return;
 774     }
 775
 776     if (expdif < 53) {
 777         dblq = fpsrcop / fptemp;
 778         /* round dblq towards nearest integer */
 779         dblq = rint(dblq);
 780         st0 = fpsrcop - fptemp * dblq;
 781
 782         /* convert dblq to q by truncating towards zero */
 783         if (dblq < 0.0) {
 784             q = (signed long long int)(-dblq);
 785         } else {
 786             q = (signed long long int)dblq;
 787         }
 788
 789         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 790         /* (C0,C3,C1) <-- (q2,q1,q0) */
 791         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 792         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 793         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 794     } else {
 795         env->fpus |= 0x400;  /* C2 <-- 1 */
 796         fptemp = pow(2.0, expdif - 50);
 797         fpsrcop = (st0 / st1) / fptemp;
 798         /* fpsrcop = integer obtained by chopping */
 799         fpsrcop = (fpsrcop < 0.0) ?
 800                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 801         st0 -= (st1 * fpsrcop * fptemp);
 802     }
 803     ST0 = double_to_floatx80(env, st0);
 804 }
 805
 806 void helper_fprem(CPUX86State *env)
 807 {
 808     double st0, st1, dblq, fpsrcop, fptemp;
 809     CPU_LDoubleU fpsrcop1, fptemp1;
 810     int expdif;
 811     signed long long int q;
 812
 813     st0 = floatx80_to_double(env, ST0);
 814     st1 = floatx80_to_double(env, ST1);
 815
 816     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 817         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 818         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 819         return;
 820     }
 821
 822     fpsrcop = st0;
 823     fptemp = st1;
 824     fpsrcop1.d = ST0;
 825     fptemp1.d = ST1;
 826     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 827
 828     if (expdif < 0) {
 829         /* optimisation? taken from the AMD docs */
 830         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 831         /* ST0 is unchanged */
 832         return;
 833     }
 834
 835     if (expdif < 53) {
 836         dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 837         /* round dblq towards zero */
 838         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 839         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 840
 841         /* convert dblq to q by truncating towards zero */
 842         if (dblq < 0.0) {
 843             q = (signed long long int)(-dblq);
 844         } else {
 845             q = (signed long long int)dblq;
 846         }
 847
 848         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 849         /* (C0,C3,C1) <-- (q2,q1,q0) */
 850         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 851         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 852         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 853     } else {
 854         int N = 32 + (expdif % 32); /* as per AMD docs */
 855
 856         env->fpus |= 0x400;  /* C2 <-- 1 */
 857         fptemp = pow(2.0, (double)(expdif - N));
 858         fpsrcop = (st0 / st1) / fptemp;
 859         /* fpsrcop = integer obtained by chopping */
 860         fpsrcop = (fpsrcop < 0.0) ?
 861                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 862         st0 -= (st1 * fpsrcop * fptemp);
 863     }
 864     ST0 = double_to_floatx80(env, st0);
 865 }
 866
 867 void helper_fyl2xp1(CPUX86State *env)
 868 {
 869     double fptemp = floatx80_to_double(env, ST0);
 870
 871     if ((fptemp + 1.0) > 0.0) {
 872         fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 873         fptemp *= floatx80_to_double(env, ST1);
 874         ST1 = double_to_floatx80(env, fptemp);
 875         fpop(env);
 876     } else {
 877         env->fpus &= ~0x4700;
 878         env->fpus |= 0x400;
 879     }
 880 }
 881
 882 void helper_fsqrt(CPUX86State *env)
 883 {
 884     if (floatx80_is_neg(ST0)) {
 885         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 886         env->fpus |= 0x400;
 887     }
 888     ST0 = floatx80_sqrt(ST0, &env->fp_status);
 889 }
 890
 891 void helper_fsincos(CPUX86State *env)
 892 {
 893     double fptemp = floatx80_to_double(env, ST0);
 894
 895     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 896         env->fpus |= 0x400;
 897     } else {
 898         ST0 = double_to_floatx80(env, sin(fptemp));
 899         fpush(env);
 900         ST0 = double_to_floatx80(env, cos(fptemp));
 901         env->fpus &= ~0x400;  /* C2 <-- 0 */
 902         /* the above code is for |arg| < 2**63 only */
 903     }
 904 }
 905
 906 void helper_frndint(CPUX86State *env)
 907 {
 908     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 909 }
 910
 911 void helper_fscale(CPUX86State *env)
 912 {
 913     if (floatx80_is_any_nan(ST1)) {
 914         ST0 = ST1;
 915     } else {
 916         int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 917         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 918     }
 919 }
 920
 921 void helper_fsin(CPUX86State *env)
 922 {
 923     double fptemp = floatx80_to_double(env, ST0);
 924
 925     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 926         env->fpus |= 0x400;
 927     } else {
 928         ST0 = double_to_floatx80(env, sin(fptemp));
 929         env->fpus &= ~0x400;  /* C2 <-- 0 */
 930         /* the above code is for |arg| < 2**53 only */
 931     }
 932 }
 933
 934 void helper_fcos(CPUX86State *env)
 935 {
 936     double fptemp = floatx80_to_double(env, ST0);
 937
 938     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 939         env->fpus |= 0x400;
 940     } else {
 941         ST0 = double_to_floatx80(env, cos(fptemp));
 942         env->fpus &= ~0x400;  /* C2 <-- 0 */
 943         /* the above code is for |arg| < 2**63 only */
 944     }
 945 }
 946
 947 void helper_fxam_ST0(CPUX86State *env)
 948 {
 949     CPU_LDoubleU temp;
 950     int expdif;
 951
 952     temp.d = ST0;
 953
 954     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 955     if (SIGND(temp)) {
 956         env->fpus |= 0x200; /* C1 <-- 1 */
 957     }
 958
 959     /* XXX: test fptags too */
 960     expdif = EXPD(temp);
 961     if (expdif == MAXEXPD) {
 962         if (MANTD(temp) == 0x8000000000000000ULL) {
 963             env->fpus |= 0x500; /* Infinity */
 964         } else {
 965             env->fpus |= 0x100; /* NaN */
 966         }
 967     } else if (expdif == 0) {
 968         if (MANTD(temp) == 0) {
 969             env->fpus |=  0x4000; /* Zero */
 970         } else {
 971             env->fpus |= 0x4400; /* Denormal */
 972         }
 973     } else {
 974         env->fpus |= 0x400;
 975     }
 976 }
 977
 978 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 979                       uintptr_t retaddr)
 980 {
 981     int fpus, fptag, exp, i;
 982     uint64_t mant;
 983     CPU_LDoubleU tmp;
 984
 985     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 986     fptag = 0;
 987     for (i = 7; i >= 0; i--) {
 988         fptag <<= 2;
 989         if (env->fptags[i]) {
 990             fptag |= 3;
 991         } else {
 992             tmp.d = env->fpregs[i].d;
 993             exp = EXPD(tmp);
 994             mant = MANTD(tmp);
 995             if (exp == 0 && mant == 0) {
 996                 /* zero */
 997                 fptag |= 1;
 998             } else if (exp == 0 || exp == MAXEXPD
 999                        || (mant & (1LL << 63)) == 0) {
1000                 /* NaNs, infinity, denormal */
1001                 fptag |= 2;
1002             }
1003         }
1004     }
1005     if (data32) {
1006         /* 32 bit */
1007         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1008         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1009         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1010         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1011         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1012         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1013         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1014     } else {
1015         /* 16 bit */
1016         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1017         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1018         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1019         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1020         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1021         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1022         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1023     }
1024 }
1025
1026 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1027 {
1028     do_fstenv(env, ptr, data32, GETPC());
1029 }
1030
1031 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1032                       uintptr_t retaddr)
1033 {
1034     int i, fpus, fptag;
1035
1036     if (data32) {
1037         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1038         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1039         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1040     } else {
1041         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1042         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1043         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044     }
1045     env->fpstt = (fpus >> 11) & 7;
1046     env->fpus = fpus & ~0x3800;
1047     for (i = 0; i < 8; i++) {
1048         env->fptags[i] = ((fptag & 3) == 3);
1049         fptag >>= 2;
1050     }
1051 }
1052
1053 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1054 {
1055     do_fldenv(env, ptr, data32, GETPC());
1056 }
1057
1058 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1059 {
1060     floatx80 tmp;
1061     int i;
1062
1063     do_fstenv(env, ptr, data32, GETPC());
1064
1065     ptr += (14 << data32);
1066     for (i = 0; i < 8; i++) {
1067         tmp = ST(i);
1068         helper_fstt(env, tmp, ptr, GETPC());
1069         ptr += 10;
1070     }
1071
1072     /* fninit */
1073     env->fpus = 0;
1074     env->fpstt = 0;
1075     cpu_set_fpuc(env, 0x37f);
1076     env->fptags[0] = 1;
1077     env->fptags[1] = 1;
1078     env->fptags[2] = 1;
1079     env->fptags[3] = 1;
1080     env->fptags[4] = 1;
1081     env->fptags[5] = 1;
1082     env->fptags[6] = 1;
1083     env->fptags[7] = 1;
1084 }
1085
1086 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1087 {
1088     floatx80 tmp;
1089     int i;
1090
1091     do_fldenv(env, ptr, data32, GETPC());
1092     ptr += (14 << data32);
1093
1094     for (i = 0; i < 8; i++) {
1095         tmp = helper_fldt(env, ptr, GETPC());
1096         ST(i) = tmp;
1097         ptr += 10;
1098     }
1099 }
1100
1101 #if defined(CONFIG_USER_ONLY)
1102 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1103 {
1104     helper_fsave(env, ptr, data32);
1105 }
1106
1107 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1108 {
1109     helper_frstor(env, ptr, data32);
1110 }
1111 #endif
1112
1113 #define XO(X)  offsetof(X86XSaveArea, X)
1114
1115 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1116 {
1117     int fpus, fptag, i;
1118     target_ulong addr;
1119
1120     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1121     fptag = 0;
1122     for (i = 0; i < 8; i++) {
1123         fptag |= (env->fptags[i] << i);
1124     }
1125
1126     cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1127     cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1128     cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1129
1130     /* In 32-bit mode this is eip, sel, dp, sel.
1131        In 64-bit mode this is rip, rdp.
1132        But in either case we don't write actual data, just zeros.  */
1133     cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1134     cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1135
1136     addr = ptr + XO(legacy.fpregs);
1137     for (i = 0; i < 8; i++) {
1138         floatx80 tmp = ST(i);
1139         helper_fstt(env, tmp, addr, ra);
1140         addr += 16;
1141     }
1142 }
1143
1144 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1145 {
1146     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1147     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1148 }
1149
1150 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1151 {
1152     int i, nb_xmm_regs;
1153     target_ulong addr;
1154
1155     if (env->hflags & HF_CS64_MASK) {
1156         nb_xmm_regs = 16;
1157     } else {
1158         nb_xmm_regs = 8;
1159     }
1160
1161     addr = ptr + XO(legacy.xmm_regs);
1162     for (i = 0; i < nb_xmm_regs; i++) {
1163         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1164         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1165         addr += 16;
1166     }
1167 }
1168
1169 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1170 {
1171     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1172     int i;
1173
1174     for (i = 0; i < 4; i++, addr += 16) {
1175         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1176         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1177     }
1178 }
1179
1180 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1181 {
1182     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1183                     env->bndcs_regs.cfgu, ra);
1184     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1185                     env->bndcs_regs.sts, ra);
1186 }
1187
1188 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1189 {
1190     cpu_stq_data_ra(env, ptr, env->pkru, ra);
1191 }
1192
1193 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194 {
1195     uintptr_t ra = GETPC();
1196
1197     /* The operand must be 16 byte aligned */
1198     if (ptr & 0xf) {
1199         raise_exception_ra(env, EXCP0D_GPF, ra);
1200     }
1201
1202     do_xsave_fpu(env, ptr, ra);
1203
1204     if (env->cr[4] & CR4_OSFXSR_MASK) {
1205         do_xsave_mxcsr(env, ptr, ra);
1206         /* Fast FXSAVE leaves out the XMM registers */
1207         if (!(env->efer & MSR_EFER_FFXSR)
1208             || (env->hflags & HF_CPL_MASK)
1209             || !(env->hflags & HF_LMA_MASK)) {
1210             do_xsave_sse(env, ptr, ra);
1211         }
1212     }
1213 }
1214
1215 static uint64_t get_xinuse(CPUX86State *env)
1216 {
1217     uint64_t inuse = -1;
1218
1219     /* For the most part, we don't track XINUSE.  We could calculate it
1220        here for all components, but it's probably less work to simply
1221        indicate in use.  That said, the state of BNDREGS is important
1222        enough to track in HFLAGS, so we might as well use that here.  */
1223     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1224        inuse &= ~XSTATE_BNDREGS_MASK;
1225     }
1226     return inuse;
1227 }
1228
1229 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230                      uint64_t inuse, uint64_t opt, uintptr_t ra)
1231 {
1232     uint64_t old_bv, new_bv;
1233
1234     /* The OS must have enabled XSAVE.  */
1235     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236         raise_exception_ra(env, EXCP06_ILLOP, ra);
1237     }
1238
1239     /* The operand must be 64 byte aligned.  */
1240     if (ptr & 63) {
1241         raise_exception_ra(env, EXCP0D_GPF, ra);
1242     }
1243
1244     /* Never save anything not enabled by XCR0.  */
1245     rfbm &= env->xcr0;
1246     opt &= rfbm;
1247
1248     if (opt & XSTATE_FP_MASK) {
1249         do_xsave_fpu(env, ptr, ra);
1250     }
1251     if (rfbm & XSTATE_SSE_MASK) {
1252         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1253         do_xsave_mxcsr(env, ptr, ra);
1254     }
1255     if (opt & XSTATE_SSE_MASK) {
1256         do_xsave_sse(env, ptr, ra);
1257     }
1258     if (opt & XSTATE_BNDREGS_MASK) {
1259         do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1260     }
1261     if (opt & XSTATE_BNDCSR_MASK) {
1262         do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1263     }
1264     if (opt & XSTATE_PKRU_MASK) {
1265         do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1266     }
1267
1268     /* Update the XSTATE_BV field.  */
1269     old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1270     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1271     cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1272 }
1273
1274 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1275 {
1276     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1277 }
1278
1279 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1280 {
1281     uint64_t inuse = get_xinuse(env);
1282     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1283 }
1284
1285 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1286 {
1287     int i, fpuc, fpus, fptag;
1288     target_ulong addr;
1289
1290     fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1291     fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1292     fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1293     cpu_set_fpuc(env, fpuc);
1294     env->fpstt = (fpus >> 11) & 7;
1295     env->fpus = fpus & ~0x3800;
1296     fptag ^= 0xff;
1297     for (i = 0; i < 8; i++) {
1298         env->fptags[i] = ((fptag >> i) & 1);
1299     }
1300
1301     addr = ptr + XO(legacy.fpregs);
1302     for (i = 0; i < 8; i++) {
1303         floatx80 tmp = helper_fldt(env, addr, ra);
1304         ST(i) = tmp;
1305         addr += 16;
1306     }
1307 }
1308
1309 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1310 {
1311     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1312 }
1313
1314 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1315 {
1316     int i, nb_xmm_regs;
1317     target_ulong addr;
1318
1319     if (env->hflags & HF_CS64_MASK) {
1320         nb_xmm_regs = 16;
1321     } else {
1322         nb_xmm_regs = 8;
1323     }
1324
1325     addr = ptr + XO(legacy.xmm_regs);
1326     for (i = 0; i < nb_xmm_regs; i++) {
1327         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1328         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1329         addr += 16;
1330     }
1331 }
1332
1333 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1334 {
1335     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1336     int i;
1337
1338     for (i = 0; i < 4; i++, addr += 16) {
1339         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1340         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1341     }
1342 }
1343
1344 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1345 {
1346     /* FIXME: Extend highest implemented bit of linear address.  */
1347     env->bndcs_regs.cfgu
1348         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1349     env->bndcs_regs.sts
1350         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1351 }
1352
1353 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1354 {
1355     env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1356 }
1357
1358 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1359 {
1360     uintptr_t ra = GETPC();
1361
1362     /* The operand must be 16 byte aligned */
1363     if (ptr & 0xf) {
1364         raise_exception_ra(env, EXCP0D_GPF, ra);
1365     }
1366
1367     do_xrstor_fpu(env, ptr, ra);
1368
1369     if (env->cr[4] & CR4_OSFXSR_MASK) {
1370         do_xrstor_mxcsr(env, ptr, ra);
1371         /* Fast FXRSTOR leaves out the XMM registers */
1372         if (!(env->efer & MSR_EFER_FFXSR)
1373             || (env->hflags & HF_CPL_MASK)
1374             || !(env->hflags & HF_LMA_MASK)) {
1375             do_xrstor_sse(env, ptr, ra);
1376         }
1377     }
1378 }
1379
1380 #if defined(CONFIG_USER_ONLY)
1381 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1382 {
1383     helper_fxsave(env, ptr);
1384 }
1385
1386 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1387 {
1388     helper_fxrstor(env, ptr);
1389 }
1390 #endif
1391
1392 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1393 {
1394     uintptr_t ra = GETPC();
1395     uint64_t xstate_bv, xcomp_bv, reserve0;
1396
1397     rfbm &= env->xcr0;
1398
1399     /* The OS must have enabled XSAVE.  */
1400     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1401         raise_exception_ra(env, EXCP06_ILLOP, ra);
1402     }
1403
1404     /* The operand must be 64 byte aligned.  */
1405     if (ptr & 63) {
1406         raise_exception_ra(env, EXCP0D_GPF, ra);
1407     }
1408
1409     xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1410
1411     if ((int64_t)xstate_bv < 0) {
1412         /* FIXME: Compact form.  */
1413         raise_exception_ra(env, EXCP0D_GPF, ra);
1414     }
1415
1416     /* Standard form.  */
1417
1418     /* The XSTATE_BV field must not set bits not present in XCR0.  */
1419     if (xstate_bv & ~env->xcr0) {
1420         raise_exception_ra(env, EXCP0D_GPF, ra);
1421     }
1422
1423     /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
1424        revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1425        describes only XCOMP_BV, but the description of the standard form
1426        of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1427        includes the next 64-bit field.  */
1428     xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1429     reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1430     if (xcomp_bv || reserve0) {
1431         raise_exception_ra(env, EXCP0D_GPF, ra);
1432     }
1433
1434     if (rfbm & XSTATE_FP_MASK) {
1435         if (xstate_bv & XSTATE_FP_MASK) {
1436             do_xrstor_fpu(env, ptr, ra);
1437         } else {
1438             helper_fninit(env);
1439             memset(env->fpregs, 0, sizeof(env->fpregs));
1440         }
1441     }
1442     if (rfbm & XSTATE_SSE_MASK) {
1443         /* Note that the standard form of XRSTOR loads MXCSR from memory
1444            whether or not the XSTATE_BV bit is set.  */
1445         do_xrstor_mxcsr(env, ptr, ra);
1446         if (xstate_bv & XSTATE_SSE_MASK) {
1447             do_xrstor_sse(env, ptr, ra);
1448         } else {
1449             /* ??? When AVX is implemented, we may have to be more
1450                selective in the clearing.  */
1451             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1452         }
1453     }
1454     if (rfbm & XSTATE_BNDREGS_MASK) {
1455         if (xstate_bv & XSTATE_BNDREGS_MASK) {
1456             do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1457             env->hflags |= HF_MPX_IU_MASK;
1458         } else {
1459             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1460             env->hflags &= ~HF_MPX_IU_MASK;
1461         }
1462     }
1463     if (rfbm & XSTATE_BNDCSR_MASK) {
1464         if (xstate_bv & XSTATE_BNDCSR_MASK) {
1465             do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1466         } else {
1467             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1468         }
1469         cpu_sync_bndcs_hflags(env);
1470     }
1471     if (rfbm & XSTATE_PKRU_MASK) {
1472         uint64_t old_pkru = env->pkru;
1473         if (xstate_bv & XSTATE_PKRU_MASK) {
1474             do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1475         } else {
1476             env->pkru = 0;
1477         }
1478         if (env->pkru != old_pkru) {
1479             CPUState *cs = CPU(x86_env_get_cpu(env));
1480             tlb_flush(cs);
1481         }
1482     }
1483 }
1484
1485 #undef XO
1486
1487 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1488 {
1489     /* The OS must have enabled XSAVE.  */
1490     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1491         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1492     }
1493
1494     switch (ecx) {
1495     case 0:
1496         return env->xcr0;
1497     case 1:
1498         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1499             return env->xcr0 & get_xinuse(env);
1500         }
1501         break;
1502     }
1503     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1504 }
1505
1506 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1507 {
1508     uint32_t dummy, ena_lo, ena_hi;
1509     uint64_t ena;
1510
1511     /* The OS must have enabled XSAVE.  */
1512     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1513         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1514     }
1515
1516     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1517     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1518         goto do_gpf;
1519     }
1520
1521     /* Disallow enabling unimplemented features.  */
1522     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1523     ena = ((uint64_t)ena_hi << 32) | ena_lo;
1524     if (mask & ~ena) {
1525         goto do_gpf;
1526     }
1527
1528     /* Disallow enabling only half of MPX.  */
1529     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1530         & XSTATE_BNDCSR_MASK) {
1531         goto do_gpf;
1532     }
1533
1534     env->xcr0 = mask;
1535     cpu_sync_bndcs_hflags(env);
1536     return;
1537
1538  do_gpf:
1539     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1540 }
1541
1542 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1543 {
1544     CPU_LDoubleU temp;
1545
1546     temp.d = f;
1547     *pmant = temp.l.lower;
1548     *pexp = temp.l.upper;
1549 }
1550
1551 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1552 {
1553     CPU_LDoubleU temp;
1554
1555     temp.l.upper = upper;
1556     temp.l.lower = mant;
1557     return temp.d;
1558 }
1559
1560 /* MMX/SSE */
1561 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1562
1563 #define SSE_DAZ             0x0040
1564 #define SSE_RC_MASK         0x6000
1565 #define SSE_RC_NEAR         0x0000
1566 #define SSE_RC_DOWN         0x2000
1567 #define SSE_RC_UP           0x4000
1568 #define SSE_RC_CHOP         0x6000
1569 #define SSE_FZ              0x8000
1570
1571 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1572 {
1573     int rnd_type;
1574
1575     env->mxcsr = mxcsr;
1576
1577     /* set rounding mode */
1578     switch (mxcsr & SSE_RC_MASK) {
1579     default:
1580     case SSE_RC_NEAR:
1581         rnd_type = float_round_nearest_even;
1582         break;
1583     case SSE_RC_DOWN:
1584         rnd_type = float_round_down;
1585         break;
1586     case SSE_RC_UP:
1587         rnd_type = float_round_up;
1588         break;
1589     case SSE_RC_CHOP:
1590         rnd_type = float_round_to_zero;
1591         break;
1592     }
1593     set_float_rounding_mode(rnd_type, &env->sse_status);
1594
1595     /* set denormals are zero */
1596     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1597
1598     /* set flush to zero */
1599     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1600 }
1601
1602 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1603 {
1604     env->fpuc = val;
1605     update_fp_status(env);
1606 }
1607
1608 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1609 {
1610     cpu_set_mxcsr(env, val);
1611 }
1612
1613 void helper_enter_mmx(CPUX86State *env)
1614 {
1615     env->fpstt = 0;
1616     *(uint32_t *)(env->fptags) = 0;
1617     *(uint32_t *)(env->fptags + 4) = 0;
1618 }
1619
1620 void helper_emms(CPUX86State *env)
1621 {
1622     /* set to empty state */
1623     *(uint32_t *)(env->fptags) = 0x01010101;
1624     *(uint32_t *)(env->fptags + 4) = 0x01010101;
1625 }
1626
1627 /* XXX: suppress */
1628 void helper_movq(CPUX86State *env, void *d, void *s)
1629 {
1630     *(uint64_t *)d = *(uint64_t *)s;
1631 }
1632
1633 #define SHIFT 0
1634 #include "ops_sse.h"
1635
1636 #define SHIFT 1
1637 #include "ops_sse.h"