2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
24 #include "exec/helper-proto.h"
25 #include "fpu/softfloat.h"
26 #include "fpu/softfloat-macros.h"
27 #include "helper-tcg.h"
30 #define FT0 (env->ft0)
31 #define ST0 (env->fpregs[env->fpstt].d)
32 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
35 #define FPU_RC_MASK 0xc00
36 #define FPU_RC_NEAR 0x000
37 #define FPU_RC_DOWN 0x400
38 #define FPU_RC_UP 0x800
39 #define FPU_RC_CHOP 0xc00
41 #define MAXTAN 9223372036854775808.0
43 /* the following deal with x86 long double-precision numbers */
44 #define MAXEXPD 0x7fff
46 #define EXPD(fp) (fp.l.upper & 0x7fff)
47 #define SIGND(fp) ((fp.l.upper) & 0x8000)
48 #define MANTD(fp) (fp.l.lower)
49 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
51 #define FPUS_IE (1 << 0)
52 #define FPUS_DE (1 << 1)
53 #define FPUS_ZE (1 << 2)
54 #define FPUS_OE (1 << 3)
55 #define FPUS_UE (1 << 4)
56 #define FPUS_PE (1 << 5)
57 #define FPUS_SF (1 << 6)
58 #define FPUS_SE (1 << 7)
59 #define FPUS_B (1 << 15)
63 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
64 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
65 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
66 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
67 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
68 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
69 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
70 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
72 static inline void fpush(CPUX86State
*env
)
74 env
->fpstt
= (env
->fpstt
- 1) & 7;
75 env
->fptags
[env
->fpstt
] = 0; /* validate stack entry */
78 static inline void fpop(CPUX86State
*env
)
80 env
->fptags
[env
->fpstt
] = 1; /* invalidate stack entry */
81 env
->fpstt
= (env
->fpstt
+ 1) & 7;
84 static floatx80
do_fldt(CPUX86State
*env
, target_ulong ptr
, uintptr_t retaddr
)
88 temp
.l
.lower
= cpu_ldq_data_ra(env
, ptr
, retaddr
);
89 temp
.l
.upper
= cpu_lduw_data_ra(env
, ptr
+ 8, retaddr
);
93 static void do_fstt(CPUX86State
*env
, floatx80 f
, target_ulong ptr
,
99 cpu_stq_data_ra(env
, ptr
, temp
.l
.lower
, retaddr
);
100 cpu_stw_data_ra(env
, ptr
+ 8, temp
.l
.upper
, retaddr
);
103 /* x87 FPU helpers */
105 static inline double floatx80_to_double(CPUX86State
*env
, floatx80 a
)
112 u
.f64
= floatx80_to_float64(a
, &env
->fp_status
);
116 static inline floatx80
double_to_floatx80(CPUX86State
*env
, double a
)
124 return float64_to_floatx80(u
.f64
, &env
->fp_status
);
127 static void fpu_set_exception(CPUX86State
*env
, int mask
)
130 if (env
->fpus
& (~env
->fpuc
& FPUC_EM
)) {
131 env
->fpus
|= FPUS_SE
| FPUS_B
;
135 static inline uint8_t save_exception_flags(CPUX86State
*env
)
137 uint8_t old_flags
= get_float_exception_flags(&env
->fp_status
);
138 set_float_exception_flags(0, &env
->fp_status
);
142 static void merge_exception_flags(CPUX86State
*env
, uint8_t old_flags
)
144 uint8_t new_flags
= get_float_exception_flags(&env
->fp_status
);
145 float_raise(old_flags
, &env
->fp_status
);
146 fpu_set_exception(env
,
147 ((new_flags
& float_flag_invalid
? FPUS_IE
: 0) |
148 (new_flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
149 (new_flags
& float_flag_overflow
? FPUS_OE
: 0) |
150 (new_flags
& float_flag_underflow
? FPUS_UE
: 0) |
151 (new_flags
& float_flag_inexact
? FPUS_PE
: 0) |
152 (new_flags
& float_flag_input_denormal
? FPUS_DE
: 0)));
155 static inline floatx80
helper_fdiv(CPUX86State
*env
, floatx80 a
, floatx80 b
)
157 uint8_t old_flags
= save_exception_flags(env
);
158 floatx80 ret
= floatx80_div(a
, b
, &env
->fp_status
);
159 merge_exception_flags(env
, old_flags
);
163 static void fpu_raise_exception(CPUX86State
*env
, uintptr_t retaddr
)
165 if (env
->cr
[0] & CR0_NE_MASK
) {
166 raise_exception_ra(env
, EXCP10_COPR
, retaddr
);
168 #if !defined(CONFIG_USER_ONLY)
170 fpu_check_raise_ferr_irq(env
);
175 void helper_flds_FT0(CPUX86State
*env
, uint32_t val
)
177 uint8_t old_flags
= save_exception_flags(env
);
184 FT0
= float32_to_floatx80(u
.f
, &env
->fp_status
);
185 merge_exception_flags(env
, old_flags
);
188 void helper_fldl_FT0(CPUX86State
*env
, uint64_t val
)
190 uint8_t old_flags
= save_exception_flags(env
);
197 FT0
= float64_to_floatx80(u
.f
, &env
->fp_status
);
198 merge_exception_flags(env
, old_flags
);
201 void helper_fildl_FT0(CPUX86State
*env
, int32_t val
)
203 FT0
= int32_to_floatx80(val
, &env
->fp_status
);
206 void helper_flds_ST0(CPUX86State
*env
, uint32_t val
)
208 uint8_t old_flags
= save_exception_flags(env
);
215 new_fpstt
= (env
->fpstt
- 1) & 7;
217 env
->fpregs
[new_fpstt
].d
= float32_to_floatx80(u
.f
, &env
->fp_status
);
218 env
->fpstt
= new_fpstt
;
219 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
220 merge_exception_flags(env
, old_flags
);
223 void helper_fldl_ST0(CPUX86State
*env
, uint64_t val
)
225 uint8_t old_flags
= save_exception_flags(env
);
232 new_fpstt
= (env
->fpstt
- 1) & 7;
234 env
->fpregs
[new_fpstt
].d
= float64_to_floatx80(u
.f
, &env
->fp_status
);
235 env
->fpstt
= new_fpstt
;
236 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
237 merge_exception_flags(env
, old_flags
);
240 static FloatX80RoundPrec
tmp_maximise_precision(float_status
*st
)
242 FloatX80RoundPrec old
= get_floatx80_rounding_precision(st
);
243 set_floatx80_rounding_precision(floatx80_precision_x
, st
);
247 void helper_fildl_ST0(CPUX86State
*env
, int32_t val
)
250 FloatX80RoundPrec old
= tmp_maximise_precision(&env
->fp_status
);
252 new_fpstt
= (env
->fpstt
- 1) & 7;
253 env
->fpregs
[new_fpstt
].d
= int32_to_floatx80(val
, &env
->fp_status
);
254 env
->fpstt
= new_fpstt
;
255 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
257 set_floatx80_rounding_precision(old
, &env
->fp_status
);
260 void helper_fildll_ST0(CPUX86State
*env
, int64_t val
)
263 FloatX80RoundPrec old
= tmp_maximise_precision(&env
->fp_status
);
265 new_fpstt
= (env
->fpstt
- 1) & 7;
266 env
->fpregs
[new_fpstt
].d
= int64_to_floatx80(val
, &env
->fp_status
);
267 env
->fpstt
= new_fpstt
;
268 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
270 set_floatx80_rounding_precision(old
, &env
->fp_status
);
273 uint32_t helper_fsts_ST0(CPUX86State
*env
)
275 uint8_t old_flags
= save_exception_flags(env
);
281 u
.f
= floatx80_to_float32(ST0
, &env
->fp_status
);
282 merge_exception_flags(env
, old_flags
);
286 uint64_t helper_fstl_ST0(CPUX86State
*env
)
288 uint8_t old_flags
= save_exception_flags(env
);
294 u
.f
= floatx80_to_float64(ST0
, &env
->fp_status
);
295 merge_exception_flags(env
, old_flags
);
299 int32_t helper_fist_ST0(CPUX86State
*env
)
301 uint8_t old_flags
= save_exception_flags(env
);
304 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
305 if (val
!= (int16_t)val
) {
306 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
309 merge_exception_flags(env
, old_flags
);
313 int32_t helper_fistl_ST0(CPUX86State
*env
)
315 uint8_t old_flags
= save_exception_flags(env
);
318 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
319 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
322 merge_exception_flags(env
, old_flags
);
326 int64_t helper_fistll_ST0(CPUX86State
*env
)
328 uint8_t old_flags
= save_exception_flags(env
);
331 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
332 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
333 val
= 0x8000000000000000ULL
;
335 merge_exception_flags(env
, old_flags
);
339 int32_t helper_fistt_ST0(CPUX86State
*env
)
341 uint8_t old_flags
= save_exception_flags(env
);
344 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
345 if (val
!= (int16_t)val
) {
346 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
349 merge_exception_flags(env
, old_flags
);
353 int32_t helper_fisttl_ST0(CPUX86State
*env
)
355 uint8_t old_flags
= save_exception_flags(env
);
358 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
359 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
362 merge_exception_flags(env
, old_flags
);
366 int64_t helper_fisttll_ST0(CPUX86State
*env
)
368 uint8_t old_flags
= save_exception_flags(env
);
371 val
= floatx80_to_int64_round_to_zero(ST0
, &env
->fp_status
);
372 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
373 val
= 0x8000000000000000ULL
;
375 merge_exception_flags(env
, old_flags
);
379 void helper_fldt_ST0(CPUX86State
*env
, target_ulong ptr
)
383 new_fpstt
= (env
->fpstt
- 1) & 7;
384 env
->fpregs
[new_fpstt
].d
= do_fldt(env
, ptr
, GETPC());
385 env
->fpstt
= new_fpstt
;
386 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
389 void helper_fstt_ST0(CPUX86State
*env
, target_ulong ptr
)
391 do_fstt(env
, ST0
, ptr
, GETPC());
394 void helper_fpush(CPUX86State
*env
)
399 void helper_fpop(CPUX86State
*env
)
404 void helper_fdecstp(CPUX86State
*env
)
406 env
->fpstt
= (env
->fpstt
- 1) & 7;
407 env
->fpus
&= ~0x4700;
410 void helper_fincstp(CPUX86State
*env
)
412 env
->fpstt
= (env
->fpstt
+ 1) & 7;
413 env
->fpus
&= ~0x4700;
418 void helper_ffree_STN(CPUX86State
*env
, int st_index
)
420 env
->fptags
[(env
->fpstt
+ st_index
) & 7] = 1;
423 void helper_fmov_ST0_FT0(CPUX86State
*env
)
428 void helper_fmov_FT0_STN(CPUX86State
*env
, int st_index
)
433 void helper_fmov_ST0_STN(CPUX86State
*env
, int st_index
)
438 void helper_fmov_STN_ST0(CPUX86State
*env
, int st_index
)
443 void helper_fxchg_ST0_STN(CPUX86State
*env
, int st_index
)
454 static const int fcom_ccval
[4] = {0x0100, 0x4000, 0x0000, 0x4500};
456 void helper_fcom_ST0_FT0(CPUX86State
*env
)
458 uint8_t old_flags
= save_exception_flags(env
);
461 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
462 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
463 merge_exception_flags(env
, old_flags
);
466 void helper_fucom_ST0_FT0(CPUX86State
*env
)
468 uint8_t old_flags
= save_exception_flags(env
);
471 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
472 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
473 merge_exception_flags(env
, old_flags
);
476 static const int fcomi_ccval
[4] = {CC_C
, CC_Z
, 0, CC_Z
| CC_P
| CC_C
};
478 void helper_fcomi_ST0_FT0(CPUX86State
*env
)
480 uint8_t old_flags
= save_exception_flags(env
);
484 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
485 eflags
= cpu_cc_compute_all(env
, CC_OP
);
486 eflags
= (eflags
& ~(CC_Z
| CC_P
| CC_C
)) | fcomi_ccval
[ret
+ 1];
488 merge_exception_flags(env
, old_flags
);
491 void helper_fucomi_ST0_FT0(CPUX86State
*env
)
493 uint8_t old_flags
= save_exception_flags(env
);
497 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
498 eflags
= cpu_cc_compute_all(env
, CC_OP
);
499 eflags
= (eflags
& ~(CC_Z
| CC_P
| CC_C
)) | fcomi_ccval
[ret
+ 1];
501 merge_exception_flags(env
, old_flags
);
504 void helper_fadd_ST0_FT0(CPUX86State
*env
)
506 uint8_t old_flags
= save_exception_flags(env
);
507 ST0
= floatx80_add(ST0
, FT0
, &env
->fp_status
);
508 merge_exception_flags(env
, old_flags
);
511 void helper_fmul_ST0_FT0(CPUX86State
*env
)
513 uint8_t old_flags
= save_exception_flags(env
);
514 ST0
= floatx80_mul(ST0
, FT0
, &env
->fp_status
);
515 merge_exception_flags(env
, old_flags
);
518 void helper_fsub_ST0_FT0(CPUX86State
*env
)
520 uint8_t old_flags
= save_exception_flags(env
);
521 ST0
= floatx80_sub(ST0
, FT0
, &env
->fp_status
);
522 merge_exception_flags(env
, old_flags
);
525 void helper_fsubr_ST0_FT0(CPUX86State
*env
)
527 uint8_t old_flags
= save_exception_flags(env
);
528 ST0
= floatx80_sub(FT0
, ST0
, &env
->fp_status
);
529 merge_exception_flags(env
, old_flags
);
532 void helper_fdiv_ST0_FT0(CPUX86State
*env
)
534 ST0
= helper_fdiv(env
, ST0
, FT0
);
537 void helper_fdivr_ST0_FT0(CPUX86State
*env
)
539 ST0
= helper_fdiv(env
, FT0
, ST0
);
542 /* fp operations between STN and ST0 */
544 void helper_fadd_STN_ST0(CPUX86State
*env
, int st_index
)
546 uint8_t old_flags
= save_exception_flags(env
);
547 ST(st_index
) = floatx80_add(ST(st_index
), ST0
, &env
->fp_status
);
548 merge_exception_flags(env
, old_flags
);
551 void helper_fmul_STN_ST0(CPUX86State
*env
, int st_index
)
553 uint8_t old_flags
= save_exception_flags(env
);
554 ST(st_index
) = floatx80_mul(ST(st_index
), ST0
, &env
->fp_status
);
555 merge_exception_flags(env
, old_flags
);
558 void helper_fsub_STN_ST0(CPUX86State
*env
, int st_index
)
560 uint8_t old_flags
= save_exception_flags(env
);
561 ST(st_index
) = floatx80_sub(ST(st_index
), ST0
, &env
->fp_status
);
562 merge_exception_flags(env
, old_flags
);
565 void helper_fsubr_STN_ST0(CPUX86State
*env
, int st_index
)
567 uint8_t old_flags
= save_exception_flags(env
);
568 ST(st_index
) = floatx80_sub(ST0
, ST(st_index
), &env
->fp_status
);
569 merge_exception_flags(env
, old_flags
);
572 void helper_fdiv_STN_ST0(CPUX86State
*env
, int st_index
)
577 *p
= helper_fdiv(env
, *p
, ST0
);
580 void helper_fdivr_STN_ST0(CPUX86State
*env
, int st_index
)
585 *p
= helper_fdiv(env
, ST0
, *p
);
588 /* misc FPU operations */
589 void helper_fchs_ST0(CPUX86State
*env
)
591 ST0
= floatx80_chs(ST0
);
594 void helper_fabs_ST0(CPUX86State
*env
)
596 ST0
= floatx80_abs(ST0
);
599 void helper_fld1_ST0(CPUX86State
*env
)
604 void helper_fldl2t_ST0(CPUX86State
*env
)
606 switch (env
->fpuc
& FPU_RC_MASK
) {
608 ST0
= floatx80_l2t_u
;
616 void helper_fldl2e_ST0(CPUX86State
*env
)
618 switch (env
->fpuc
& FPU_RC_MASK
) {
621 ST0
= floatx80_l2e_d
;
629 void helper_fldpi_ST0(CPUX86State
*env
)
631 switch (env
->fpuc
& FPU_RC_MASK
) {
642 void helper_fldlg2_ST0(CPUX86State
*env
)
644 switch (env
->fpuc
& FPU_RC_MASK
) {
647 ST0
= floatx80_lg2_d
;
655 void helper_fldln2_ST0(CPUX86State
*env
)
657 switch (env
->fpuc
& FPU_RC_MASK
) {
660 ST0
= floatx80_ln2_d
;
668 void helper_fldz_ST0(CPUX86State
*env
)
673 void helper_fldz_FT0(CPUX86State
*env
)
678 uint32_t helper_fnstsw(CPUX86State
*env
)
680 return (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
683 uint32_t helper_fnstcw(CPUX86State
*env
)
688 void update_fp_status(CPUX86State
*env
)
690 FloatRoundMode rnd_mode
;
691 FloatX80RoundPrec rnd_prec
;
693 /* set rounding mode */
694 switch (env
->fpuc
& FPU_RC_MASK
) {
697 rnd_mode
= float_round_nearest_even
;
700 rnd_mode
= float_round_down
;
703 rnd_mode
= float_round_up
;
706 rnd_mode
= float_round_to_zero
;
709 set_float_rounding_mode(rnd_mode
, &env
->fp_status
);
711 switch ((env
->fpuc
>> 8) & 3) {
713 rnd_prec
= floatx80_precision_s
;
716 rnd_prec
= floatx80_precision_d
;
720 rnd_prec
= floatx80_precision_x
;
723 set_floatx80_rounding_precision(rnd_prec
, &env
->fp_status
);
726 void helper_fldcw(CPUX86State
*env
, uint32_t val
)
728 cpu_set_fpuc(env
, val
);
731 void helper_fclex(CPUX86State
*env
)
736 void helper_fwait(CPUX86State
*env
)
738 if (env
->fpus
& FPUS_SE
) {
739 fpu_raise_exception(env
, GETPC());
743 static void do_fninit(CPUX86State
*env
)
751 cpu_set_fpuc(env
, 0x37f);
762 void helper_fninit(CPUX86State
*env
)
769 void helper_fbld_ST0(CPUX86State
*env
, target_ulong ptr
)
777 for (i
= 8; i
>= 0; i
--) {
778 v
= cpu_ldub_data_ra(env
, ptr
+ i
, GETPC());
779 val
= (val
* 100) + ((v
>> 4) * 10) + (v
& 0xf);
781 tmp
= int64_to_floatx80(val
, &env
->fp_status
);
782 if (cpu_ldub_data_ra(env
, ptr
+ 9, GETPC()) & 0x80) {
783 tmp
= floatx80_chs(tmp
);
789 void helper_fbst_ST0(CPUX86State
*env
, target_ulong ptr
)
791 uint8_t old_flags
= save_exception_flags(env
);
793 target_ulong mem_ref
, mem_end
;
799 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
801 if (val
>= 1000000000000000000LL || val
<= -1000000000000000000LL) {
802 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
803 while (mem_ref
< ptr
+ 7) {
804 cpu_stb_data_ra(env
, mem_ref
++, 0, GETPC());
806 cpu_stb_data_ra(env
, mem_ref
++, 0xc0, GETPC());
807 cpu_stb_data_ra(env
, mem_ref
++, 0xff, GETPC());
808 cpu_stb_data_ra(env
, mem_ref
++, 0xff, GETPC());
809 merge_exception_flags(env
, old_flags
);
812 mem_end
= mem_ref
+ 9;
814 cpu_stb_data_ra(env
, mem_end
, 0x80, GETPC());
817 cpu_stb_data_ra(env
, mem_end
, 0x00, GETPC());
819 while (mem_ref
< mem_end
) {
825 v
= ((v
/ 10) << 4) | (v
% 10);
826 cpu_stb_data_ra(env
, mem_ref
++, v
, GETPC());
828 while (mem_ref
< mem_end
) {
829 cpu_stb_data_ra(env
, mem_ref
++, 0, GETPC());
831 merge_exception_flags(env
, old_flags
);
834 /* 128-bit significand of log(2). */
835 #define ln2_sig_high 0xb17217f7d1cf79abULL
836 #define ln2_sig_low 0xc9e3b39803f2f6afULL
839 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
840 * the interval [-1/64, 1/64].
842 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
843 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
844 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
845 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
846 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
847 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
848 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
849 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
850 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
854 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
855 * are very close to exact floatx80 values.
858 /* The value of 2^t. */
860 /* The value of 2^t - 1. */
864 static const struct f2xm1_data f2xm1_table
[65] = {
865 { make_floatx80_init(0xbfff, 0x8000000000000000ULL
),
866 make_floatx80_init(0x3ffe, 0x8000000000000000ULL
),
867 make_floatx80_init(0xbffe, 0x8000000000000000ULL
) },
868 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL
),
869 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL
),
870 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL
) },
871 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL
),
872 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL
),
873 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL
) },
874 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL
),
875 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL
),
876 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL
) },
877 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL
),
878 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL
),
879 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL
) },
880 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL
),
881 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL
),
882 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL
) },
883 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL
),
884 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL
),
885 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL
) },
886 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL
),
887 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL
),
888 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL
) },
889 { make_floatx80_init(0xbffe, 0xc000000000006530ULL
),
890 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL
),
891 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL
) },
892 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL
),
893 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL
),
894 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL
) },
895 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL
),
896 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL
),
897 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL
) },
898 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL
),
899 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL
),
900 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL
) },
901 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL
),
902 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL
),
903 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL
) },
904 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL
),
905 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL
),
906 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL
) },
907 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL
),
908 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL
),
909 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL
) },
910 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL
),
911 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL
),
912 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL
) },
913 { make_floatx80_init(0xbffe, 0x800000000000227dULL
),
914 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL
),
915 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL
) },
916 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL
),
917 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL
),
918 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL
) },
919 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL
),
920 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL
),
921 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL
) },
922 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL
),
923 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL
),
924 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL
) },
925 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL
),
926 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL
),
927 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL
) },
928 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL
),
929 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL
),
930 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL
) },
931 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL
),
932 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL
),
933 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL
) },
934 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL
),
935 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL
),
936 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL
) },
937 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL
),
938 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL
),
939 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL
) },
940 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL
),
941 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL
),
942 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL
) },
943 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL
),
944 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL
),
945 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL
) },
946 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL
),
947 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL
),
948 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL
) },
949 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL
),
950 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL
),
951 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL
) },
952 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL
),
953 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL
),
954 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL
) },
955 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL
),
956 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL
),
957 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL
) },
958 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL
),
959 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL
),
960 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL
) },
961 { floatx80_zero_init
,
962 make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
963 floatx80_zero_init
},
964 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL
),
965 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL
),
966 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL
) },
967 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL
),
968 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL
),
969 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL
) },
970 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL
),
971 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL
),
972 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL
) },
973 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL
),
974 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL
),
975 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL
) },
976 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL
),
977 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL
),
978 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL
) },
979 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL
),
980 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL
),
981 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL
) },
982 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL
),
983 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL
),
984 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL
) },
985 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL
),
986 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL
),
987 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL
) },
988 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL
),
989 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL
),
990 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL
) },
991 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL
),
992 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL
),
993 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL
) },
994 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL
),
995 make_floatx80_init(0x3fff, 0xa27043030c49370aULL
),
996 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL
) },
997 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL
),
998 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL
),
999 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL
) },
1000 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL
),
1001 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL
),
1002 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL
) },
1003 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL
),
1004 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL
),
1005 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL
) },
1006 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL
),
1007 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL
),
1008 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL
) },
1009 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL
),
1010 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL
),
1011 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL
) },
1012 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL
),
1013 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL
),
1014 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL
) },
1015 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL
),
1016 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL
),
1017 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL
) },
1018 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL
),
1019 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL
),
1020 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL
) },
1021 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL
),
1022 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL
),
1023 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL
) },
1024 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL
),
1025 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL
),
1026 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL
) },
1027 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL
),
1028 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL
),
1029 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL
) },
1030 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL
),
1031 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL
),
1032 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL
) },
1033 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL
),
1034 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL
),
1035 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL
) },
1036 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL
),
1037 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL
),
1038 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL
) },
1039 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL
),
1040 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL
),
1041 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL
) },
1042 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL
),
1043 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL
),
1044 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL
) },
1045 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL
),
1046 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL
),
1047 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL
) },
1048 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL
),
1049 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL
),
1050 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL
) },
1051 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL
),
1052 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL
),
1053 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL
) },
1054 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL
),
1055 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL
),
1056 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL
) },
1057 { make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
1058 make_floatx80_init(0x4000, 0x8000000000000000ULL
),
1059 make_floatx80_init(0x3fff, 0x8000000000000000ULL
) },
1062 void helper_f2xm1(CPUX86State
*env
)
1064 uint8_t old_flags
= save_exception_flags(env
);
1065 uint64_t sig
= extractFloatx80Frac(ST0
);
1066 int32_t exp
= extractFloatx80Exp(ST0
);
1067 bool sign
= extractFloatx80Sign(ST0
);
1069 if (floatx80_invalid_encoding(ST0
)) {
1070 float_raise(float_flag_invalid
, &env
->fp_status
);
1071 ST0
= floatx80_default_nan(&env
->fp_status
);
1072 } else if (floatx80_is_any_nan(ST0
)) {
1073 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1074 float_raise(float_flag_invalid
, &env
->fp_status
);
1075 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1077 } else if (exp
> 0x3fff ||
1078 (exp
== 0x3fff && sig
!= (0x8000000000000000ULL
))) {
1079 /* Out of range for the instruction, treat as invalid. */
1080 float_raise(float_flag_invalid
, &env
->fp_status
);
1081 ST0
= floatx80_default_nan(&env
->fp_status
);
1082 } else if (exp
== 0x3fff) {
1083 /* Argument 1 or -1, exact result 1 or -0.5. */
1085 ST0
= make_floatx80(0xbffe, 0x8000000000000000ULL
);
1087 } else if (exp
< 0x3fb0) {
1088 if (!floatx80_is_zero(ST0
)) {
1090 * Multiplying the argument by an extra-precision version
1091 * of log(2) is sufficiently precise. Zero arguments are
1092 * returned unchanged.
1094 uint64_t sig0
, sig1
, sig2
;
1096 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1098 mul128By64To192(ln2_sig_high
, ln2_sig_low
, sig
, &sig0
, &sig1
,
1100 /* This result is inexact. */
1102 ST0
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1103 sign
, exp
, sig0
, sig1
,
1107 floatx80 tmp
, y
, accum
;
1109 int32_t n
, aexp
, bexp
;
1110 uint64_t asig0
, asig1
, asig2
, bsig0
, bsig1
;
1111 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1112 FloatX80RoundPrec save_prec
=
1113 env
->fp_status
.floatx80_rounding_precision
;
1114 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1115 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
1117 /* Find the nearest multiple of 1/32 to the argument. */
1118 tmp
= floatx80_scalbn(ST0
, 5, &env
->fp_status
);
1119 n
= 32 + floatx80_to_int32(tmp
, &env
->fp_status
);
1120 y
= floatx80_sub(ST0
, f2xm1_table
[n
].t
, &env
->fp_status
);
1122 if (floatx80_is_zero(y
)) {
1124 * Use the value of 2^t - 1 from the table, to avoid
1125 * needing to special-case zero as a result of
1126 * multiplication below.
1128 ST0
= f2xm1_table
[n
].t
;
1129 set_float_exception_flags(float_flag_inexact
, &env
->fp_status
);
1130 env
->fp_status
.float_rounding_mode
= save_mode
;
1133 * Compute the lower parts of a polynomial expansion for
1136 accum
= floatx80_mul(f2xm1_coeff_7
, y
, &env
->fp_status
);
1137 accum
= floatx80_add(f2xm1_coeff_6
, accum
, &env
->fp_status
);
1138 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1139 accum
= floatx80_add(f2xm1_coeff_5
, accum
, &env
->fp_status
);
1140 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1141 accum
= floatx80_add(f2xm1_coeff_4
, accum
, &env
->fp_status
);
1142 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1143 accum
= floatx80_add(f2xm1_coeff_3
, accum
, &env
->fp_status
);
1144 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1145 accum
= floatx80_add(f2xm1_coeff_2
, accum
, &env
->fp_status
);
1146 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1147 accum
= floatx80_add(f2xm1_coeff_1
, accum
, &env
->fp_status
);
1148 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1149 accum
= floatx80_add(f2xm1_coeff_0_low
, accum
, &env
->fp_status
);
1152 * The full polynomial expansion is f2xm1_coeff_0 + accum
1153 * (where accum has much lower magnitude, and so, in
1154 * particular, carry out of the addition is not possible).
1155 * (This expansion is only accurate to about 70 bits, not
1158 aexp
= extractFloatx80Exp(f2xm1_coeff_0
);
1159 asign
= extractFloatx80Sign(f2xm1_coeff_0
);
1160 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1161 aexp
- extractFloatx80Exp(accum
),
1163 bsig0
= extractFloatx80Frac(f2xm1_coeff_0
);
1165 if (asign
== extractFloatx80Sign(accum
)) {
1166 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1168 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1170 /* And thus compute an approximation to 2^y - 1. */
1171 mul128By64To192(asig0
, asig1
, extractFloatx80Frac(y
),
1172 &asig0
, &asig1
, &asig2
);
1173 aexp
+= extractFloatx80Exp(y
) - 0x3ffe;
1174 asign
^= extractFloatx80Sign(y
);
1177 * Multiply this by the precomputed value of 2^t and
1178 * add that of 2^t - 1.
1180 mul128By64To192(asig0
, asig1
,
1181 extractFloatx80Frac(f2xm1_table
[n
].exp2
),
1182 &asig0
, &asig1
, &asig2
);
1183 aexp
+= extractFloatx80Exp(f2xm1_table
[n
].exp2
) - 0x3ffe;
1184 bexp
= extractFloatx80Exp(f2xm1_table
[n
].exp2m1
);
1185 bsig0
= extractFloatx80Frac(f2xm1_table
[n
].exp2m1
);
1188 shift128RightJamming(bsig0
, bsig1
, aexp
- bexp
,
1190 } else if (aexp
< bexp
) {
1191 shift128RightJamming(asig0
, asig1
, bexp
- aexp
,
1195 /* The sign of 2^t - 1 is always that of the result. */
1196 bsign
= extractFloatx80Sign(f2xm1_table
[n
].exp2m1
);
1197 if (asign
== bsign
) {
1198 /* Avoid possible carry out of the addition. */
1199 shift128RightJamming(asig0
, asig1
, 1,
1201 shift128RightJamming(bsig0
, bsig1
, 1,
1204 add128(asig0
, asig1
, bsig0
, bsig1
, &asig0
, &asig1
);
1206 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1210 env
->fp_status
.float_rounding_mode
= save_mode
;
1211 /* This result is inexact. */
1213 ST0
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1214 asign
, aexp
, asig0
, asig1
,
1218 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1220 merge_exception_flags(env
, old_flags
);
1223 void helper_fptan(CPUX86State
*env
)
1225 double fptemp
= floatx80_to_double(env
, ST0
);
1227 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
1230 fptemp
= tan(fptemp
);
1231 ST0
= double_to_floatx80(env
, fptemp
);
1234 env
->fpus
&= ~0x400; /* C2 <-- 0 */
1235 /* the above code is for |arg| < 2**52 only */
1239 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1240 #define pi_4_exp 0x3ffe
1241 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1242 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1243 #define pi_2_exp 0x3fff
1244 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1245 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1246 #define pi_34_exp 0x4000
1247 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1248 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1249 #define pi_exp 0x4000
1250 #define pi_sig_high 0xc90fdaa22168c234ULL
1251 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1254 * Polynomial coefficients for an approximation to atan(x), with only
1255 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1256 * for some other approximations, no low part is needed for the first
1257 * coefficient here to achieve a sufficiently accurate result, because
1258 * the coefficient in this minimax approximation is very close to
1261 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1262 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1263 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1264 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1265 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1266 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1267 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1269 struct fpatan_data
{
1270 /* High and low parts of atan(x). */
1271 floatx80 atan_high
, atan_low
;
1274 static const struct fpatan_data fpatan_table
[9] = {
1275 { floatx80_zero_init
,
1276 floatx80_zero_init
},
1277 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL
),
1278 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL
) },
1279 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL
),
1280 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL
) },
1281 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL
),
1282 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL
) },
1283 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL
),
1284 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL
) },
1285 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL
),
1286 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL
) },
1287 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL
),
1288 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL
) },
1289 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL
),
1290 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL
) },
1291 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL
),
1292 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL
) },
1295 void helper_fpatan(CPUX86State
*env
)
1297 uint8_t old_flags
= save_exception_flags(env
);
1298 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1299 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1300 bool arg0_sign
= extractFloatx80Sign(ST0
);
1301 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1302 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1303 bool arg1_sign
= extractFloatx80Sign(ST1
);
1305 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1306 float_raise(float_flag_invalid
, &env
->fp_status
);
1307 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1308 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
1309 float_raise(float_flag_invalid
, &env
->fp_status
);
1310 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
1311 } else if (floatx80_invalid_encoding(ST0
) ||
1312 floatx80_invalid_encoding(ST1
)) {
1313 float_raise(float_flag_invalid
, &env
->fp_status
);
1314 ST1
= floatx80_default_nan(&env
->fp_status
);
1315 } else if (floatx80_is_any_nan(ST0
)) {
1317 } else if (floatx80_is_any_nan(ST1
)) {
1318 /* Pass this NaN through. */
1319 } else if (floatx80_is_zero(ST1
) && !arg0_sign
) {
1320 /* Pass this zero through. */
1321 } else if (((floatx80_is_infinity(ST0
) && !floatx80_is_infinity(ST1
)) ||
1322 arg0_exp
- arg1_exp
>= 80) &&
1325 * Dividing ST1 by ST0 gives the correct result up to
1326 * rounding, and avoids spurious underflow exceptions that
1327 * might result from passing some small values through the
1328 * polynomial approximation, but if a finite nonzero result of
1329 * division is exact, the result of fpatan is still inexact
1330 * (and underflowing where appropriate).
1332 FloatX80RoundPrec save_prec
=
1333 env
->fp_status
.floatx80_rounding_precision
;
1334 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
1335 ST1
= floatx80_div(ST1
, ST0
, &env
->fp_status
);
1336 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1337 if (!floatx80_is_zero(ST1
) &&
1338 !(get_float_exception_flags(&env
->fp_status
) &
1339 float_flag_inexact
)) {
1341 * The mathematical result is very slightly closer to zero
1342 * than this exact result. Round a value with the
1343 * significand adjusted accordingly to get the correct
1344 * exceptions, and possibly an adjusted result depending
1345 * on the rounding mode.
1347 uint64_t sig
= extractFloatx80Frac(ST1
);
1348 int32_t exp
= extractFloatx80Exp(ST1
);
1349 bool sign
= extractFloatx80Sign(ST1
);
1351 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1353 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1355 -1, &env
->fp_status
);
1358 /* The result is inexact. */
1359 bool rsign
= arg1_sign
;
1361 uint64_t rsig0
, rsig1
;
1362 if (floatx80_is_zero(ST1
)) {
1364 * ST0 is negative. The result is pi with the sign of
1368 rsig0
= pi_sig_high
;
1370 } else if (floatx80_is_infinity(ST1
)) {
1371 if (floatx80_is_infinity(ST0
)) {
1374 rsig0
= pi_34_sig_high
;
1375 rsig1
= pi_34_sig_low
;
1378 rsig0
= pi_4_sig_high
;
1379 rsig1
= pi_4_sig_low
;
1383 rsig0
= pi_2_sig_high
;
1384 rsig1
= pi_2_sig_low
;
1386 } else if (floatx80_is_zero(ST0
) || arg1_exp
- arg0_exp
>= 80) {
1388 rsig0
= pi_2_sig_high
;
1389 rsig1
= pi_2_sig_low
;
1390 } else if (floatx80_is_infinity(ST0
) || arg0_exp
- arg1_exp
>= 80) {
1391 /* ST0 is negative. */
1393 rsig0
= pi_sig_high
;
1397 * ST0 and ST1 are finite, nonzero and with exponents not
1400 int32_t adj_exp
, num_exp
, den_exp
, xexp
, yexp
, n
, texp
, zexp
, aexp
;
1401 int32_t azexp
, axexp
;
1402 bool adj_sub
, ysign
, zsign
;
1403 uint64_t adj_sig0
, adj_sig1
, num_sig
, den_sig
, xsig0
, xsig1
;
1404 uint64_t msig0
, msig1
, msig2
, remsig0
, remsig1
, remsig2
;
1405 uint64_t ysig0
, ysig1
, tsig
, zsig0
, zsig1
, asig0
, asig1
;
1406 uint64_t azsig0
, azsig1
;
1407 uint64_t azsig2
, azsig3
, axsig0
, axsig1
;
1409 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1410 FloatX80RoundPrec save_prec
=
1411 env
->fp_status
.floatx80_rounding_precision
;
1412 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1413 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
1415 if (arg0_exp
== 0) {
1416 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
1418 if (arg1_exp
== 0) {
1419 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
1421 if (arg0_exp
> arg1_exp
||
1422 (arg0_exp
== arg1_exp
&& arg0_sig
>= arg1_sig
)) {
1423 /* Work with abs(ST1) / abs(ST0). */
1429 /* The result is subtracted from pi. */
1431 adj_sig0
= pi_sig_high
;
1432 adj_sig1
= pi_sig_low
;
1435 /* The result is used as-is. */
1442 /* Work with abs(ST0) / abs(ST1). */
1447 /* The result is added to or subtracted from pi/2. */
1449 adj_sig0
= pi_2_sig_high
;
1450 adj_sig1
= pi_2_sig_low
;
1451 adj_sub
= !arg0_sign
;
1455 * Compute x = num/den, where 0 < x <= 1 and x is not too
1458 xexp
= num_exp
- den_exp
+ 0x3ffe;
1461 if (den_sig
<= remsig0
) {
1462 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1465 xsig0
= estimateDiv128To64(remsig0
, remsig1
, den_sig
);
1466 mul64To128(den_sig
, xsig0
, &msig0
, &msig1
);
1467 sub128(remsig0
, remsig1
, msig0
, msig1
, &remsig0
, &remsig1
);
1468 while ((int64_t) remsig0
< 0) {
1470 add128(remsig0
, remsig1
, 0, den_sig
, &remsig0
, &remsig1
);
1472 xsig1
= estimateDiv128To64(remsig1
, 0, den_sig
);
1474 * No need to correct any estimation error in xsig1; even
1475 * with such error, it is accurate enough.
1479 * Split x as x = t + y, where t = n/8 is the nearest
1480 * multiple of 1/8 to x.
1482 x8
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
1483 false, xexp
+ 3, xsig0
,
1484 xsig1
, &env
->fp_status
);
1485 n
= floatx80_to_int32(x8
, &env
->fp_status
);
1494 int shift
= clz32(n
) + 32;
1495 texp
= 0x403b - shift
;
1499 sub128(xsig0
, xsig1
, tsig
, 0, &ysig0
, &ysig1
);
1500 if ((int64_t) ysig0
>= 0) {
1506 shift
= clz64(ysig1
) + 64;
1507 yexp
= xexp
- shift
;
1508 shift128Left(ysig0
, ysig1
, shift
,
1512 shift
= clz64(ysig0
);
1513 yexp
= xexp
- shift
;
1514 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1518 sub128(0, 0, ysig0
, ysig1
, &ysig0
, &ysig1
);
1520 shift
= clz64(ysig1
) + 64;
1522 shift
= clz64(ysig0
);
1524 yexp
= xexp
- shift
;
1525 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1529 * t's exponent must be greater than x's because t
1530 * is positive and the nearest multiple of 1/8 to
1531 * x, and if x has a greater exponent, the power
1532 * of 2 with that exponent is also a multiple of
1535 uint64_t usig0
, usig1
;
1536 shift128RightJamming(xsig0
, xsig1
, texp
- xexp
,
1539 sub128(tsig
, 0, usig0
, usig1
, &ysig0
, &ysig1
);
1541 shift
= clz64(ysig1
) + 64;
1543 shift
= clz64(ysig0
);
1545 yexp
= texp
- shift
;
1546 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1551 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1555 if (texp
== 0 || yexp
== 0) {
1561 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1563 int32_t dexp
= texp
+ xexp
- 0x3ffe;
1564 uint64_t dsig0
, dsig1
, dsig2
;
1565 mul128By64To192(xsig0
, xsig1
, tsig
, &dsig0
, &dsig1
, &dsig2
);
1567 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1568 * bit). Add 1 to produce the denominator 1+tx.
1570 shift128RightJamming(dsig0
, dsig1
, 0x3fff - dexp
,
1572 dsig0
|= 0x8000000000000000ULL
;
1577 if (dsig0
<= remsig0
) {
1578 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1581 zsig0
= estimateDiv128To64(remsig0
, remsig1
, dsig0
);
1582 mul128By64To192(dsig0
, dsig1
, zsig0
, &msig0
, &msig1
, &msig2
);
1583 sub192(remsig0
, remsig1
, remsig2
, msig0
, msig1
, msig2
,
1584 &remsig0
, &remsig1
, &remsig2
);
1585 while ((int64_t) remsig0
< 0) {
1587 add192(remsig0
, remsig1
, remsig2
, 0, dsig0
, dsig1
,
1588 &remsig0
, &remsig1
, &remsig2
);
1590 zsig1
= estimateDiv128To64(remsig1
, remsig2
, dsig0
);
1591 /* No need to correct any estimation error in zsig1. */
1600 uint64_t z2sig0
, z2sig1
, z2sig2
, z2sig3
;
1602 mul128To256(zsig0
, zsig1
, zsig0
, zsig1
,
1603 &z2sig0
, &z2sig1
, &z2sig2
, &z2sig3
);
1604 z2
= normalizeRoundAndPackFloatx80(floatx80_precision_x
, false,
1605 zexp
+ zexp
- 0x3ffe,
1609 /* Compute the lower parts of the polynomial expansion. */
1610 accum
= floatx80_mul(fpatan_coeff_6
, z2
, &env
->fp_status
);
1611 accum
= floatx80_add(fpatan_coeff_5
, accum
, &env
->fp_status
);
1612 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1613 accum
= floatx80_add(fpatan_coeff_4
, accum
, &env
->fp_status
);
1614 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1615 accum
= floatx80_add(fpatan_coeff_3
, accum
, &env
->fp_status
);
1616 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1617 accum
= floatx80_add(fpatan_coeff_2
, accum
, &env
->fp_status
);
1618 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1619 accum
= floatx80_add(fpatan_coeff_1
, accum
, &env
->fp_status
);
1620 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1623 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1624 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1626 aexp
= extractFloatx80Exp(fpatan_coeff_0
);
1627 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1628 aexp
- extractFloatx80Exp(accum
),
1630 sub128(extractFloatx80Frac(fpatan_coeff_0
), 0, asig0
, asig1
,
1632 /* Multiply by z to compute arctan(z). */
1633 azexp
= aexp
+ zexp
- 0x3ffe;
1634 mul128To256(asig0
, asig1
, zsig0
, zsig1
, &azsig0
, &azsig1
,
1638 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1640 /* z is positive. */
1645 bool low_sign
= extractFloatx80Sign(fpatan_table
[n
].atan_low
);
1646 int32_t low_exp
= extractFloatx80Exp(fpatan_table
[n
].atan_low
);
1648 extractFloatx80Frac(fpatan_table
[n
].atan_low
);
1649 uint64_t low_sig1
= 0;
1650 axexp
= extractFloatx80Exp(fpatan_table
[n
].atan_high
);
1651 axsig0
= extractFloatx80Frac(fpatan_table
[n
].atan_high
);
1653 shift128RightJamming(low_sig0
, low_sig1
, axexp
- low_exp
,
1654 &low_sig0
, &low_sig1
);
1656 sub128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1659 add128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1662 if (azexp
>= axexp
) {
1663 shift128RightJamming(axsig0
, axsig1
, azexp
- axexp
+ 1,
1666 shift128RightJamming(azsig0
, azsig1
, 1,
1669 shift128RightJamming(axsig0
, axsig1
, 1,
1671 shift128RightJamming(azsig0
, azsig1
, axexp
- azexp
+ 1,
1676 sub128(axsig0
, axsig1
, azsig0
, azsig1
,
1679 add128(axsig0
, axsig1
, azsig0
, azsig1
,
1690 * Add or subtract arctan(x) (exponent axexp,
1691 * significand axsig0 and axsig1, positive, not
1692 * necessarily normalized) to the number given by
1693 * adj_exp, adj_sig0 and adj_sig1, according to
1696 if (adj_exp
>= axexp
) {
1697 shift128RightJamming(axsig0
, axsig1
, adj_exp
- axexp
+ 1,
1700 shift128RightJamming(adj_sig0
, adj_sig1
, 1,
1701 &adj_sig0
, &adj_sig1
);
1703 shift128RightJamming(axsig0
, axsig1
, 1,
1705 shift128RightJamming(adj_sig0
, adj_sig1
,
1706 axexp
- adj_exp
+ 1,
1707 &adj_sig0
, &adj_sig1
);
1711 sub128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1714 add128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1719 env
->fp_status
.float_rounding_mode
= save_mode
;
1720 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1722 /* This result is inexact. */
1724 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
, rsign
, rexp
,
1725 rsig0
, rsig1
, &env
->fp_status
);
1729 merge_exception_flags(env
, old_flags
);
1732 void helper_fxtract(CPUX86State
*env
)
1734 uint8_t old_flags
= save_exception_flags(env
);
1739 if (floatx80_is_zero(ST0
)) {
1740 /* Easy way to generate -inf and raising division by 0 exception */
1741 ST0
= floatx80_div(floatx80_chs(floatx80_one
), floatx80_zero
,
1745 } else if (floatx80_invalid_encoding(ST0
)) {
1746 float_raise(float_flag_invalid
, &env
->fp_status
);
1747 ST0
= floatx80_default_nan(&env
->fp_status
);
1750 } else if (floatx80_is_any_nan(ST0
)) {
1751 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1752 float_raise(float_flag_invalid
, &env
->fp_status
);
1753 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1757 } else if (floatx80_is_infinity(ST0
)) {
1760 ST1
= floatx80_infinity
;
1764 if (EXPD(temp
) == 0) {
1765 int shift
= clz64(temp
.l
.lower
);
1766 temp
.l
.lower
<<= shift
;
1767 expdif
= 1 - EXPBIAS
- shift
;
1768 float_raise(float_flag_input_denormal
, &env
->fp_status
);
1770 expdif
= EXPD(temp
) - EXPBIAS
;
1772 /* DP exponent bias */
1773 ST0
= int32_to_floatx80(expdif
, &env
->fp_status
);
1778 merge_exception_flags(env
, old_flags
);
1781 static void helper_fprem_common(CPUX86State
*env
, bool mod
)
1783 uint8_t old_flags
= save_exception_flags(env
);
1785 CPU_LDoubleU temp0
, temp1
;
1786 int exp0
, exp1
, expdiff
;
1793 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1794 if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
1795 exp0
== 0x7fff || exp1
== 0x7fff ||
1796 floatx80_invalid_encoding(ST0
) || floatx80_invalid_encoding(ST1
)) {
1797 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1800 exp0
= 1 - clz64(temp0
.l
.lower
);
1803 exp1
= 1 - clz64(temp1
.l
.lower
);
1805 expdiff
= exp0
- exp1
;
1807 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1808 env
->fpus
|= (quotient
& 0x4) << (8 - 2); /* (C0) <-- q2 */
1809 env
->fpus
|= (quotient
& 0x2) << (14 - 1); /* (C3) <-- q1 */
1810 env
->fpus
|= (quotient
& 0x1) << (9 - 0); /* (C1) <-- q0 */
1813 * Partial remainder. This choice of how many bits to
1814 * process at once is specified in AMD instruction set
1815 * manuals, and empirically is followed by Intel
1816 * processors as well; it ensures that the final remainder
1817 * operation in a loop does produce the correct low three
1818 * bits of the quotient. AMD manuals specify that the
1819 * flags other than C2 are cleared, and empirically Intel
1820 * processors clear them as well.
1822 int n
= 32 + (expdiff
% 32);
1823 temp1
.d
= floatx80_scalbn(temp1
.d
, expdiff
- n
, &env
->fp_status
);
1824 ST0
= floatx80_mod(ST0
, temp1
.d
, &env
->fp_status
);
1825 env
->fpus
|= 0x400; /* C2 <-- 1 */
1828 merge_exception_flags(env
, old_flags
);
1831 void helper_fprem1(CPUX86State
*env
)
1833 helper_fprem_common(env
, false);
1836 void helper_fprem(CPUX86State
*env
)
1838 helper_fprem_common(env
, true);
1841 /* 128-bit significand of log2(e). */
1842 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1843 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1846 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1847 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1848 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1849 * interval [sqrt(2)/2, sqrt(2)].
1851 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1852 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1853 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1854 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1855 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1856 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1857 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1858 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1859 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1860 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1861 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1864 * Compute an approximation of log2(1+arg), where 1+arg is in the
1865 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1866 * function is called, rounding precision is set to 80 and the
1867 * round-to-nearest mode is in effect. arg must not be exactly zero,
1868 * and must not be so close to zero that underflow might occur.
1870 static void helper_fyl2x_common(CPUX86State
*env
, floatx80 arg
, int32_t *exp
,
1871 uint64_t *sig0
, uint64_t *sig1
)
1873 uint64_t arg0_sig
= extractFloatx80Frac(arg
);
1874 int32_t arg0_exp
= extractFloatx80Exp(arg
);
1875 bool arg0_sign
= extractFloatx80Sign(arg
);
1877 int32_t dexp
, texp
, aexp
;
1878 uint64_t dsig0
, dsig1
, tsig0
, tsig1
, rsig0
, rsig1
, rsig2
;
1879 uint64_t msig0
, msig1
, msig2
, t2sig0
, t2sig1
, t2sig2
, t2sig3
;
1880 uint64_t asig0
, asig1
, asig2
, asig3
, bsig0
, bsig1
;
1884 * Compute an approximation of arg/(2+arg), with extra precision,
1885 * as the argument to a polynomial approximation. The extra
1886 * precision is only needed for the first term of the
1887 * approximation, with subsequent terms being significantly
1888 * smaller; the approximation only uses odd exponents, and the
1889 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1893 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1894 sub128(0, 0, dsig0
, dsig1
, &dsig0
, &dsig1
);
1897 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1898 dsig0
|= 0x8000000000000000ULL
;
1900 texp
= arg0_exp
- dexp
+ 0x3ffe;
1904 if (dsig0
<= rsig0
) {
1905 shift128Right(rsig0
, rsig1
, 1, &rsig0
, &rsig1
);
1908 tsig0
= estimateDiv128To64(rsig0
, rsig1
, dsig0
);
1909 mul128By64To192(dsig0
, dsig1
, tsig0
, &msig0
, &msig1
, &msig2
);
1910 sub192(rsig0
, rsig1
, rsig2
, msig0
, msig1
, msig2
,
1911 &rsig0
, &rsig1
, &rsig2
);
1912 while ((int64_t) rsig0
< 0) {
1914 add192(rsig0
, rsig1
, rsig2
, 0, dsig0
, dsig1
,
1915 &rsig0
, &rsig1
, &rsig2
);
1917 tsig1
= estimateDiv128To64(rsig1
, rsig2
, dsig0
);
1919 * No need to correct any estimation error in tsig1; even with
1920 * such error, it is accurate enough. Now compute the square of
1921 * that approximation.
1923 mul128To256(tsig0
, tsig1
, tsig0
, tsig1
,
1924 &t2sig0
, &t2sig1
, &t2sig2
, &t2sig3
);
1925 t2
= normalizeRoundAndPackFloatx80(floatx80_precision_x
, false,
1926 texp
+ texp
- 0x3ffe,
1927 t2sig0
, t2sig1
, &env
->fp_status
);
1929 /* Compute the lower parts of the polynomial expansion. */
1930 accum
= floatx80_mul(fyl2x_coeff_9
, t2
, &env
->fp_status
);
1931 accum
= floatx80_add(fyl2x_coeff_8
, accum
, &env
->fp_status
);
1932 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1933 accum
= floatx80_add(fyl2x_coeff_7
, accum
, &env
->fp_status
);
1934 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1935 accum
= floatx80_add(fyl2x_coeff_6
, accum
, &env
->fp_status
);
1936 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1937 accum
= floatx80_add(fyl2x_coeff_5
, accum
, &env
->fp_status
);
1938 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1939 accum
= floatx80_add(fyl2x_coeff_4
, accum
, &env
->fp_status
);
1940 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1941 accum
= floatx80_add(fyl2x_coeff_3
, accum
, &env
->fp_status
);
1942 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1943 accum
= floatx80_add(fyl2x_coeff_2
, accum
, &env
->fp_status
);
1944 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1945 accum
= floatx80_add(fyl2x_coeff_1
, accum
, &env
->fp_status
);
1946 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1947 accum
= floatx80_add(fyl2x_coeff_0_low
, accum
, &env
->fp_status
);
1950 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1951 * accum has much lower magnitude, and so, in particular, carry
1952 * out of the addition is not possible), multiplied by t. (This
1953 * expansion is only accurate to about 70 bits, not 128 bits.)
1955 aexp
= extractFloatx80Exp(fyl2x_coeff_0
);
1956 asign
= extractFloatx80Sign(fyl2x_coeff_0
);
1957 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1958 aexp
- extractFloatx80Exp(accum
),
1960 bsig0
= extractFloatx80Frac(fyl2x_coeff_0
);
1962 if (asign
== extractFloatx80Sign(accum
)) {
1963 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1965 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1967 /* Multiply by t to compute the required result. */
1968 mul128To256(asig0
, asig1
, tsig0
, tsig1
,
1969 &asig0
, &asig1
, &asig2
, &asig3
);
1970 aexp
+= texp
- 0x3ffe;
1976 void helper_fyl2xp1(CPUX86State
*env
)
1978 uint8_t old_flags
= save_exception_flags(env
);
1979 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1980 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1981 bool arg0_sign
= extractFloatx80Sign(ST0
);
1982 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1983 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1984 bool arg1_sign
= extractFloatx80Sign(ST1
);
1986 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1987 float_raise(float_flag_invalid
, &env
->fp_status
);
1988 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1989 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
1990 float_raise(float_flag_invalid
, &env
->fp_status
);
1991 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
1992 } else if (floatx80_invalid_encoding(ST0
) ||
1993 floatx80_invalid_encoding(ST1
)) {
1994 float_raise(float_flag_invalid
, &env
->fp_status
);
1995 ST1
= floatx80_default_nan(&env
->fp_status
);
1996 } else if (floatx80_is_any_nan(ST0
)) {
1998 } else if (floatx80_is_any_nan(ST1
)) {
1999 /* Pass this NaN through. */
2000 } else if (arg0_exp
> 0x3ffd ||
2001 (arg0_exp
== 0x3ffd && arg0_sig
> (arg0_sign
?
2002 0x95f619980c4336f7ULL
:
2003 0xd413cccfe7799211ULL
))) {
2005 * Out of range for the instruction (ST0 must have absolute
2006 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2007 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2008 * to sqrt(2) - 1, which we allow here), treat as invalid.
2010 float_raise(float_flag_invalid
, &env
->fp_status
);
2011 ST1
= floatx80_default_nan(&env
->fp_status
);
2012 } else if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
2013 arg1_exp
== 0x7fff) {
2015 * One argument is zero, or multiplying by infinity; correct
2016 * result is exact and can be obtained by multiplying the
2019 ST1
= floatx80_mul(ST0
, ST1
, &env
->fp_status
);
2020 } else if (arg0_exp
< 0x3fb0) {
2022 * Multiplying both arguments and an extra-precision version
2023 * of log2(e) is sufficiently precise.
2025 uint64_t sig0
, sig1
, sig2
;
2027 if (arg0_exp
== 0) {
2028 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2030 if (arg1_exp
== 0) {
2031 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2033 mul128By64To192(log2_e_sig_high
, log2_e_sig_low
, arg0_sig
,
2034 &sig0
, &sig1
, &sig2
);
2036 mul128By64To192(sig0
, sig1
, arg1_sig
, &sig0
, &sig1
, &sig2
);
2037 exp
+= arg1_exp
- 0x3ffe;
2038 /* This result is inexact. */
2040 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
2041 arg0_sign
^ arg1_sign
, exp
,
2042 sig0
, sig1
, &env
->fp_status
);
2045 uint64_t asig0
, asig1
, asig2
;
2046 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2047 FloatX80RoundPrec save_prec
=
2048 env
->fp_status
.floatx80_rounding_precision
;
2049 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2050 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
2052 helper_fyl2x_common(env
, ST0
, &aexp
, &asig0
, &asig1
);
2054 * Multiply by the second argument to compute the required
2057 if (arg1_exp
== 0) {
2058 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2060 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2061 aexp
+= arg1_exp
- 0x3ffe;
2062 /* This result is inexact. */
2064 env
->fp_status
.float_rounding_mode
= save_mode
;
2065 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
2066 arg0_sign
^ arg1_sign
, aexp
,
2067 asig0
, asig1
, &env
->fp_status
);
2068 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2071 merge_exception_flags(env
, old_flags
);
2074 void helper_fyl2x(CPUX86State
*env
)
2076 uint8_t old_flags
= save_exception_flags(env
);
2077 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
2078 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
2079 bool arg0_sign
= extractFloatx80Sign(ST0
);
2080 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
2081 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
2082 bool arg1_sign
= extractFloatx80Sign(ST1
);
2084 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2085 float_raise(float_flag_invalid
, &env
->fp_status
);
2086 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2087 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
2088 float_raise(float_flag_invalid
, &env
->fp_status
);
2089 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
2090 } else if (floatx80_invalid_encoding(ST0
) ||
2091 floatx80_invalid_encoding(ST1
)) {
2092 float_raise(float_flag_invalid
, &env
->fp_status
);
2093 ST1
= floatx80_default_nan(&env
->fp_status
);
2094 } else if (floatx80_is_any_nan(ST0
)) {
2096 } else if (floatx80_is_any_nan(ST1
)) {
2097 /* Pass this NaN through. */
2098 } else if (arg0_sign
&& !floatx80_is_zero(ST0
)) {
2099 float_raise(float_flag_invalid
, &env
->fp_status
);
2100 ST1
= floatx80_default_nan(&env
->fp_status
);
2101 } else if (floatx80_is_infinity(ST1
)) {
2102 FloatRelation cmp
= floatx80_compare(ST0
, floatx80_one
,
2105 case float_relation_less
:
2106 ST1
= floatx80_chs(ST1
);
2108 case float_relation_greater
:
2109 /* Result is infinity of the same sign as ST1. */
2112 float_raise(float_flag_invalid
, &env
->fp_status
);
2113 ST1
= floatx80_default_nan(&env
->fp_status
);
2116 } else if (floatx80_is_infinity(ST0
)) {
2117 if (floatx80_is_zero(ST1
)) {
2118 float_raise(float_flag_invalid
, &env
->fp_status
);
2119 ST1
= floatx80_default_nan(&env
->fp_status
);
2120 } else if (arg1_sign
) {
2121 ST1
= floatx80_chs(ST0
);
2125 } else if (floatx80_is_zero(ST0
)) {
2126 if (floatx80_is_zero(ST1
)) {
2127 float_raise(float_flag_invalid
, &env
->fp_status
);
2128 ST1
= floatx80_default_nan(&env
->fp_status
);
2130 /* Result is infinity with opposite sign to ST1. */
2131 float_raise(float_flag_divbyzero
, &env
->fp_status
);
2132 ST1
= make_floatx80(arg1_sign
? 0x7fff : 0xffff,
2133 0x8000000000000000ULL
);
2135 } else if (floatx80_is_zero(ST1
)) {
2136 if (floatx80_lt(ST0
, floatx80_one
, &env
->fp_status
)) {
2137 ST1
= floatx80_chs(ST1
);
2139 /* Otherwise, ST1 is already the correct result. */
2140 } else if (floatx80_eq(ST0
, floatx80_one
, &env
->fp_status
)) {
2142 ST1
= floatx80_chs(floatx80_zero
);
2144 ST1
= floatx80_zero
;
2149 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2150 FloatX80RoundPrec save_prec
=
2151 env
->fp_status
.floatx80_rounding_precision
;
2152 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2153 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
2155 if (arg0_exp
== 0) {
2156 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2158 if (arg1_exp
== 0) {
2159 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2161 int_exp
= arg0_exp
- 0x3fff;
2162 if (arg0_sig
> 0xb504f333f9de6484ULL
) {
2165 arg0_m1
= floatx80_sub(floatx80_scalbn(ST0
, -int_exp
,
2167 floatx80_one
, &env
->fp_status
);
2168 if (floatx80_is_zero(arg0_m1
)) {
2169 /* Exact power of 2; multiply by ST1. */
2170 env
->fp_status
.float_rounding_mode
= save_mode
;
2171 ST1
= floatx80_mul(int32_to_floatx80(int_exp
, &env
->fp_status
),
2172 ST1
, &env
->fp_status
);
2174 bool asign
= extractFloatx80Sign(arg0_m1
);
2176 uint64_t asig0
, asig1
, asig2
;
2177 helper_fyl2x_common(env
, arg0_m1
, &aexp
, &asig0
, &asig1
);
2179 bool isign
= (int_exp
< 0);
2183 int_exp
= isign
? -int_exp
: int_exp
;
2184 shift
= clz32(int_exp
) + 32;
2187 iexp
= 0x403e - shift
;
2188 shift128RightJamming(asig0
, asig1
, iexp
- aexp
,
2190 if (asign
== isign
) {
2191 add128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2193 sub128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2199 * Multiply by the second argument to compute the required
2202 if (arg1_exp
== 0) {
2203 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2205 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2206 aexp
+= arg1_exp
- 0x3ffe;
2207 /* This result is inexact. */
2209 env
->fp_status
.float_rounding_mode
= save_mode
;
2210 ST1
= normalizeRoundAndPackFloatx80(floatx80_precision_x
,
2211 asign
^ arg1_sign
, aexp
,
2212 asig0
, asig1
, &env
->fp_status
);
2215 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2218 merge_exception_flags(env
, old_flags
);
2221 void helper_fsqrt(CPUX86State
*env
)
2223 uint8_t old_flags
= save_exception_flags(env
);
2224 if (floatx80_is_neg(ST0
)) {
2225 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2228 ST0
= floatx80_sqrt(ST0
, &env
->fp_status
);
2229 merge_exception_flags(env
, old_flags
);
2232 void helper_fsincos(CPUX86State
*env
)
2234 double fptemp
= floatx80_to_double(env
, ST0
);
2236 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2239 ST0
= double_to_floatx80(env
, sin(fptemp
));
2241 ST0
= double_to_floatx80(env
, cos(fptemp
));
2242 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2243 /* the above code is for |arg| < 2**63 only */
2247 void helper_frndint(CPUX86State
*env
)
2249 uint8_t old_flags
= save_exception_flags(env
);
2250 ST0
= floatx80_round_to_int(ST0
, &env
->fp_status
);
2251 merge_exception_flags(env
, old_flags
);
2254 void helper_fscale(CPUX86State
*env
)
2256 uint8_t old_flags
= save_exception_flags(env
);
2257 if (floatx80_invalid_encoding(ST1
) || floatx80_invalid_encoding(ST0
)) {
2258 float_raise(float_flag_invalid
, &env
->fp_status
);
2259 ST0
= floatx80_default_nan(&env
->fp_status
);
2260 } else if (floatx80_is_any_nan(ST1
)) {
2261 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2262 float_raise(float_flag_invalid
, &env
->fp_status
);
2265 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2266 float_raise(float_flag_invalid
, &env
->fp_status
);
2267 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2269 } else if (floatx80_is_infinity(ST1
) &&
2270 !floatx80_invalid_encoding(ST0
) &&
2271 !floatx80_is_any_nan(ST0
)) {
2272 if (floatx80_is_neg(ST1
)) {
2273 if (floatx80_is_infinity(ST0
)) {
2274 float_raise(float_flag_invalid
, &env
->fp_status
);
2275 ST0
= floatx80_default_nan(&env
->fp_status
);
2277 ST0
= (floatx80_is_neg(ST0
) ?
2278 floatx80_chs(floatx80_zero
) :
2282 if (floatx80_is_zero(ST0
)) {
2283 float_raise(float_flag_invalid
, &env
->fp_status
);
2284 ST0
= floatx80_default_nan(&env
->fp_status
);
2286 ST0
= (floatx80_is_neg(ST0
) ?
2287 floatx80_chs(floatx80_infinity
) :
2293 FloatX80RoundPrec save
= env
->fp_status
.floatx80_rounding_precision
;
2294 uint8_t save_flags
= get_float_exception_flags(&env
->fp_status
);
2295 set_float_exception_flags(0, &env
->fp_status
);
2296 n
= floatx80_to_int32_round_to_zero(ST1
, &env
->fp_status
);
2297 set_float_exception_flags(save_flags
, &env
->fp_status
);
2298 env
->fp_status
.floatx80_rounding_precision
= floatx80_precision_x
;
2299 ST0
= floatx80_scalbn(ST0
, n
, &env
->fp_status
);
2300 env
->fp_status
.floatx80_rounding_precision
= save
;
2302 merge_exception_flags(env
, old_flags
);
2305 void helper_fsin(CPUX86State
*env
)
2307 double fptemp
= floatx80_to_double(env
, ST0
);
2309 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2312 ST0
= double_to_floatx80(env
, sin(fptemp
));
2313 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2314 /* the above code is for |arg| < 2**53 only */
2318 void helper_fcos(CPUX86State
*env
)
2320 double fptemp
= floatx80_to_double(env
, ST0
);
2322 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2325 ST0
= double_to_floatx80(env
, cos(fptemp
));
2326 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2327 /* the above code is for |arg| < 2**63 only */
2331 void helper_fxam_ST0(CPUX86State
*env
)
2338 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2340 env
->fpus
|= 0x200; /* C1 <-- 1 */
2343 if (env
->fptags
[env
->fpstt
]) {
2344 env
->fpus
|= 0x4100; /* Empty */
2348 expdif
= EXPD(temp
);
2349 if (expdif
== MAXEXPD
) {
2350 if (MANTD(temp
) == 0x8000000000000000ULL
) {
2351 env
->fpus
|= 0x500; /* Infinity */
2352 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2353 env
->fpus
|= 0x100; /* NaN */
2355 } else if (expdif
== 0) {
2356 if (MANTD(temp
) == 0) {
2357 env
->fpus
|= 0x4000; /* Zero */
2359 env
->fpus
|= 0x4400; /* Denormal */
2361 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2366 static void do_fstenv(CPUX86State
*env
, target_ulong ptr
, int data32
,
2369 int fpus
, fptag
, exp
, i
;
2373 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2375 for (i
= 7; i
>= 0; i
--) {
2377 if (env
->fptags
[i
]) {
2380 tmp
.d
= env
->fpregs
[i
].d
;
2383 if (exp
== 0 && mant
== 0) {
2386 } else if (exp
== 0 || exp
== MAXEXPD
2387 || (mant
& (1LL << 63)) == 0) {
2388 /* NaNs, infinity, denormal */
2395 cpu_stl_data_ra(env
, ptr
, env
->fpuc
, retaddr
);
2396 cpu_stl_data_ra(env
, ptr
+ 4, fpus
, retaddr
);
2397 cpu_stl_data_ra(env
, ptr
+ 8, fptag
, retaddr
);
2398 cpu_stl_data_ra(env
, ptr
+ 12, env
->fpip
, retaddr
); /* fpip */
2399 cpu_stl_data_ra(env
, ptr
+ 16, env
->fpcs
, retaddr
); /* fpcs */
2400 cpu_stl_data_ra(env
, ptr
+ 20, env
->fpdp
, retaddr
); /* fpoo */
2401 cpu_stl_data_ra(env
, ptr
+ 24, env
->fpds
, retaddr
); /* fpos */
2404 cpu_stw_data_ra(env
, ptr
, env
->fpuc
, retaddr
);
2405 cpu_stw_data_ra(env
, ptr
+ 2, fpus
, retaddr
);
2406 cpu_stw_data_ra(env
, ptr
+ 4, fptag
, retaddr
);
2407 cpu_stw_data_ra(env
, ptr
+ 6, env
->fpip
, retaddr
);
2408 cpu_stw_data_ra(env
, ptr
+ 8, env
->fpcs
, retaddr
);
2409 cpu_stw_data_ra(env
, ptr
+ 10, env
->fpdp
, retaddr
);
2410 cpu_stw_data_ra(env
, ptr
+ 12, env
->fpds
, retaddr
);
2414 void helper_fstenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2416 do_fstenv(env
, ptr
, data32
, GETPC());
2419 static void cpu_set_fpus(CPUX86State
*env
, uint16_t fpus
)
2421 env
->fpstt
= (fpus
>> 11) & 7;
2422 env
->fpus
= fpus
& ~0x3800 & ~FPUS_B
;
2423 env
->fpus
|= env
->fpus
& FPUS_SE
? FPUS_B
: 0;
2424 #if !defined(CONFIG_USER_ONLY)
2425 if (!(env
->fpus
& FPUS_SE
)) {
2427 * Here the processor deasserts FERR#; in response, the chipset deasserts
2435 static void do_fldenv(CPUX86State
*env
, target_ulong ptr
, int data32
,
2441 cpu_set_fpuc(env
, cpu_lduw_data_ra(env
, ptr
, retaddr
));
2442 fpus
= cpu_lduw_data_ra(env
, ptr
+ 4, retaddr
);
2443 fptag
= cpu_lduw_data_ra(env
, ptr
+ 8, retaddr
);
2445 cpu_set_fpuc(env
, cpu_lduw_data_ra(env
, ptr
, retaddr
));
2446 fpus
= cpu_lduw_data_ra(env
, ptr
+ 2, retaddr
);
2447 fptag
= cpu_lduw_data_ra(env
, ptr
+ 4, retaddr
);
2449 cpu_set_fpus(env
, fpus
);
2450 for (i
= 0; i
< 8; i
++) {
2451 env
->fptags
[i
] = ((fptag
& 3) == 3);
2456 void helper_fldenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2458 do_fldenv(env
, ptr
, data32
, GETPC());
2461 static void do_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
,
2467 do_fstenv(env
, ptr
, data32
, retaddr
);
2469 ptr
+= (14 << data32
);
2470 for (i
= 0; i
< 8; i
++) {
2472 do_fstt(env
, tmp
, ptr
, retaddr
);
2479 void helper_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
)
2481 do_fsave(env
, ptr
, data32
, GETPC());
2484 static void do_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
,
2490 do_fldenv(env
, ptr
, data32
, retaddr
);
2491 ptr
+= (14 << data32
);
2493 for (i
= 0; i
< 8; i
++) {
2494 tmp
= do_fldt(env
, ptr
, retaddr
);
2500 void helper_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
)
2502 do_frstor(env
, ptr
, data32
, GETPC());
2505 #if defined(CONFIG_USER_ONLY)
2506 void cpu_x86_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
)
2508 do_fsave(env
, ptr
, data32
, 0);
2511 void cpu_x86_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
)
2513 do_frstor(env
, ptr
, data32
, 0);
2517 #define XO(X) offsetof(X86XSaveArea, X)
2519 static void do_xsave_fpu(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2524 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2526 for (i
= 0; i
< 8; i
++) {
2527 fptag
|= (env
->fptags
[i
] << i
);
2530 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.fcw
), env
->fpuc
, ra
);
2531 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.fsw
), fpus
, ra
);
2532 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.ftw
), fptag
^ 0xff, ra
);
2534 /* In 32-bit mode this is eip, sel, dp, sel.
2535 In 64-bit mode this is rip, rdp.
2536 But in either case we don't write actual data, just zeros. */
2537 cpu_stq_data_ra(env
, ptr
+ XO(legacy
.fpip
), 0, ra
); /* eip+sel; rip */
2538 cpu_stq_data_ra(env
, ptr
+ XO(legacy
.fpdp
), 0, ra
); /* edp+sel; rdp */
2540 addr
= ptr
+ XO(legacy
.fpregs
);
2541 for (i
= 0; i
< 8; i
++) {
2542 floatx80 tmp
= ST(i
);
2543 do_fstt(env
, tmp
, addr
, ra
);
2548 static void do_xsave_mxcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2550 update_mxcsr_from_sse_status(env
);
2551 cpu_stl_data_ra(env
, ptr
+ XO(legacy
.mxcsr
), env
->mxcsr
, ra
);
2552 cpu_stl_data_ra(env
, ptr
+ XO(legacy
.mxcsr_mask
), 0x0000ffff, ra
);
2555 static void do_xsave_sse(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2560 if (env
->hflags
& HF_CS64_MASK
) {
2566 addr
= ptr
+ XO(legacy
.xmm_regs
);
2567 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2568 cpu_stq_data_ra(env
, addr
, env
->xmm_regs
[i
].ZMM_Q(0), ra
);
2569 cpu_stq_data_ra(env
, addr
+ 8, env
->xmm_regs
[i
].ZMM_Q(1), ra
);
2574 static void do_xsave_bndregs(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2576 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2579 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2580 cpu_stq_data_ra(env
, addr
, env
->bnd_regs
[i
].lb
, ra
);
2581 cpu_stq_data_ra(env
, addr
+ 8, env
->bnd_regs
[i
].ub
, ra
);
2585 static void do_xsave_bndcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2587 cpu_stq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
),
2588 env
->bndcs_regs
.cfgu
, ra
);
2589 cpu_stq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
),
2590 env
->bndcs_regs
.sts
, ra
);
2593 static void do_xsave_pkru(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2595 cpu_stq_data_ra(env
, ptr
, env
->pkru
, ra
);
2598 static void do_fxsave(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2600 /* The operand must be 16 byte aligned */
2602 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2605 do_xsave_fpu(env
, ptr
, ra
);
2607 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2608 do_xsave_mxcsr(env
, ptr
, ra
);
2609 /* Fast FXSAVE leaves out the XMM registers */
2610 if (!(env
->efer
& MSR_EFER_FFXSR
)
2611 || (env
->hflags
& HF_CPL_MASK
)
2612 || !(env
->hflags
& HF_LMA_MASK
)) {
2613 do_xsave_sse(env
, ptr
, ra
);
2618 void helper_fxsave(CPUX86State
*env
, target_ulong ptr
)
2620 do_fxsave(env
, ptr
, GETPC());
2623 static uint64_t get_xinuse(CPUX86State
*env
)
2625 uint64_t inuse
= -1;
2627 /* For the most part, we don't track XINUSE. We could calculate it
2628 here for all components, but it's probably less work to simply
2629 indicate in use. That said, the state of BNDREGS is important
2630 enough to track in HFLAGS, so we might as well use that here. */
2631 if ((env
->hflags
& HF_MPX_IU_MASK
) == 0) {
2632 inuse
&= ~XSTATE_BNDREGS_MASK
;
2637 static void do_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
,
2638 uint64_t inuse
, uint64_t opt
, uintptr_t ra
)
2640 uint64_t old_bv
, new_bv
;
2642 /* The OS must have enabled XSAVE. */
2643 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2644 raise_exception_ra(env
, EXCP06_ILLOP
, ra
);
2647 /* The operand must be 64 byte aligned. */
2649 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2652 /* Never save anything not enabled by XCR0. */
2656 if (opt
& XSTATE_FP_MASK
) {
2657 do_xsave_fpu(env
, ptr
, ra
);
2659 if (rfbm
& XSTATE_SSE_MASK
) {
2660 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2661 do_xsave_mxcsr(env
, ptr
, ra
);
2663 if (opt
& XSTATE_SSE_MASK
) {
2664 do_xsave_sse(env
, ptr
, ra
);
2666 if (opt
& XSTATE_BNDREGS_MASK
) {
2667 do_xsave_bndregs(env
, ptr
+ XO(bndreg_state
), ra
);
2669 if (opt
& XSTATE_BNDCSR_MASK
) {
2670 do_xsave_bndcsr(env
, ptr
+ XO(bndcsr_state
), ra
);
2672 if (opt
& XSTATE_PKRU_MASK
) {
2673 do_xsave_pkru(env
, ptr
+ XO(pkru_state
), ra
);
2676 /* Update the XSTATE_BV field. */
2677 old_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), ra
);
2678 new_bv
= (old_bv
& ~rfbm
) | (inuse
& rfbm
);
2679 cpu_stq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), new_bv
, ra
);
2682 void helper_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2684 do_xsave(env
, ptr
, rfbm
, get_xinuse(env
), -1, GETPC());
2687 void helper_xsaveopt(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2689 uint64_t inuse
= get_xinuse(env
);
2690 do_xsave(env
, ptr
, rfbm
, inuse
, inuse
, GETPC());
2693 static void do_xrstor_fpu(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2695 int i
, fpuc
, fpus
, fptag
;
2698 fpuc
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.fcw
), ra
);
2699 fpus
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.fsw
), ra
);
2700 fptag
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.ftw
), ra
);
2701 cpu_set_fpuc(env
, fpuc
);
2702 cpu_set_fpus(env
, fpus
);
2704 for (i
= 0; i
< 8; i
++) {
2705 env
->fptags
[i
] = ((fptag
>> i
) & 1);
2708 addr
= ptr
+ XO(legacy
.fpregs
);
2709 for (i
= 0; i
< 8; i
++) {
2710 floatx80 tmp
= do_fldt(env
, addr
, ra
);
2716 static void do_xrstor_mxcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2718 cpu_set_mxcsr(env
, cpu_ldl_data_ra(env
, ptr
+ XO(legacy
.mxcsr
), ra
));
2721 static void do_xrstor_sse(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2726 if (env
->hflags
& HF_CS64_MASK
) {
2732 addr
= ptr
+ XO(legacy
.xmm_regs
);
2733 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2734 env
->xmm_regs
[i
].ZMM_Q(0) = cpu_ldq_data_ra(env
, addr
, ra
);
2735 env
->xmm_regs
[i
].ZMM_Q(1) = cpu_ldq_data_ra(env
, addr
+ 8, ra
);
2740 static void do_xrstor_bndregs(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2742 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2745 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2746 env
->bnd_regs
[i
].lb
= cpu_ldq_data_ra(env
, addr
, ra
);
2747 env
->bnd_regs
[i
].ub
= cpu_ldq_data_ra(env
, addr
+ 8, ra
);
2751 static void do_xrstor_bndcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2753 /* FIXME: Extend highest implemented bit of linear address. */
2754 env
->bndcs_regs
.cfgu
2755 = cpu_ldq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
), ra
);
2757 = cpu_ldq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
), ra
);
2760 static void do_xrstor_pkru(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2762 env
->pkru
= cpu_ldq_data_ra(env
, ptr
, ra
);
2765 static void do_fxrstor(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2767 /* The operand must be 16 byte aligned */
2769 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2772 do_xrstor_fpu(env
, ptr
, ra
);
2774 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2775 do_xrstor_mxcsr(env
, ptr
, ra
);
2776 /* Fast FXRSTOR leaves out the XMM registers */
2777 if (!(env
->efer
& MSR_EFER_FFXSR
)
2778 || (env
->hflags
& HF_CPL_MASK
)
2779 || !(env
->hflags
& HF_LMA_MASK
)) {
2780 do_xrstor_sse(env
, ptr
, ra
);
2785 void helper_fxrstor(CPUX86State
*env
, target_ulong ptr
)
2787 do_fxrstor(env
, ptr
, GETPC());
2790 #if defined(CONFIG_USER_ONLY)
2791 void cpu_x86_fxsave(CPUX86State
*env
, target_ulong ptr
)
2793 do_fxsave(env
, ptr
, 0);
2796 void cpu_x86_fxrstor(CPUX86State
*env
, target_ulong ptr
)
2798 do_fxrstor(env
, ptr
, 0);
2802 void helper_xrstor(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2804 uintptr_t ra
= GETPC();
2805 uint64_t xstate_bv
, xcomp_bv
, reserve0
;
2809 /* The OS must have enabled XSAVE. */
2810 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2811 raise_exception_ra(env
, EXCP06_ILLOP
, ra
);
2814 /* The operand must be 64 byte aligned. */
2816 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2819 xstate_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), ra
);
2821 if ((int64_t)xstate_bv
< 0) {
2822 /* FIXME: Compact form. */
2823 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2826 /* Standard form. */
2828 /* The XSTATE_BV field must not set bits not present in XCR0. */
2829 if (xstate_bv
& ~env
->xcr0
) {
2830 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2833 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2834 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2835 describes only XCOMP_BV, but the description of the standard form
2836 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2837 includes the next 64-bit field. */
2838 xcomp_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xcomp_bv
), ra
);
2839 reserve0
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.reserve0
), ra
);
2840 if (xcomp_bv
|| reserve0
) {
2841 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2844 if (rfbm
& XSTATE_FP_MASK
) {
2845 if (xstate_bv
& XSTATE_FP_MASK
) {
2846 do_xrstor_fpu(env
, ptr
, ra
);
2849 memset(env
->fpregs
, 0, sizeof(env
->fpregs
));
2852 if (rfbm
& XSTATE_SSE_MASK
) {
2853 /* Note that the standard form of XRSTOR loads MXCSR from memory
2854 whether or not the XSTATE_BV bit is set. */
2855 do_xrstor_mxcsr(env
, ptr
, ra
);
2856 if (xstate_bv
& XSTATE_SSE_MASK
) {
2857 do_xrstor_sse(env
, ptr
, ra
);
2859 /* ??? When AVX is implemented, we may have to be more
2860 selective in the clearing. */
2861 memset(env
->xmm_regs
, 0, sizeof(env
->xmm_regs
));
2864 if (rfbm
& XSTATE_BNDREGS_MASK
) {
2865 if (xstate_bv
& XSTATE_BNDREGS_MASK
) {
2866 do_xrstor_bndregs(env
, ptr
+ XO(bndreg_state
), ra
);
2867 env
->hflags
|= HF_MPX_IU_MASK
;
2869 memset(env
->bnd_regs
, 0, sizeof(env
->bnd_regs
));
2870 env
->hflags
&= ~HF_MPX_IU_MASK
;
2873 if (rfbm
& XSTATE_BNDCSR_MASK
) {
2874 if (xstate_bv
& XSTATE_BNDCSR_MASK
) {
2875 do_xrstor_bndcsr(env
, ptr
+ XO(bndcsr_state
), ra
);
2877 memset(&env
->bndcs_regs
, 0, sizeof(env
->bndcs_regs
));
2879 cpu_sync_bndcs_hflags(env
);
2881 if (rfbm
& XSTATE_PKRU_MASK
) {
2882 uint64_t old_pkru
= env
->pkru
;
2883 if (xstate_bv
& XSTATE_PKRU_MASK
) {
2884 do_xrstor_pkru(env
, ptr
+ XO(pkru_state
), ra
);
2888 if (env
->pkru
!= old_pkru
) {
2889 CPUState
*cs
= env_cpu(env
);
2897 uint64_t helper_xgetbv(CPUX86State
*env
, uint32_t ecx
)
2899 /* The OS must have enabled XSAVE. */
2900 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2901 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
2908 if (env
->features
[FEAT_XSAVE
] & CPUID_XSAVE_XGETBV1
) {
2909 return env
->xcr0
& get_xinuse(env
);
2913 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
2916 void helper_xsetbv(CPUX86State
*env
, uint32_t ecx
, uint64_t mask
)
2918 uint32_t dummy
, ena_lo
, ena_hi
;
2921 /* The OS must have enabled XSAVE. */
2922 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2923 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
2926 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2927 if (ecx
!= 0 || (mask
& XSTATE_FP_MASK
) == 0) {
2931 /* Disallow enabling unimplemented features. */
2932 cpu_x86_cpuid(env
, 0x0d, 0, &ena_lo
, &dummy
, &dummy
, &ena_hi
);
2933 ena
= ((uint64_t)ena_hi
<< 32) | ena_lo
;
2938 /* Disallow enabling only half of MPX. */
2939 if ((mask
^ (mask
* (XSTATE_BNDCSR_MASK
/ XSTATE_BNDREGS_MASK
)))
2940 & XSTATE_BNDCSR_MASK
) {
2945 cpu_sync_bndcs_hflags(env
);
2949 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
2953 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2955 #define SSE_DAZ 0x0040
2956 #define SSE_RC_MASK 0x6000
2957 #define SSE_RC_NEAR 0x0000
2958 #define SSE_RC_DOWN 0x2000
2959 #define SSE_RC_UP 0x4000
2960 #define SSE_RC_CHOP 0x6000
2961 #define SSE_FZ 0x8000
2963 void update_mxcsr_status(CPUX86State
*env
)
2965 uint32_t mxcsr
= env
->mxcsr
;
2968 /* set rounding mode */
2969 switch (mxcsr
& SSE_RC_MASK
) {
2972 rnd_type
= float_round_nearest_even
;
2975 rnd_type
= float_round_down
;
2978 rnd_type
= float_round_up
;
2981 rnd_type
= float_round_to_zero
;
2984 set_float_rounding_mode(rnd_type
, &env
->sse_status
);
2986 /* Set exception flags. */
2987 set_float_exception_flags((mxcsr
& FPUS_IE
? float_flag_invalid
: 0) |
2988 (mxcsr
& FPUS_ZE
? float_flag_divbyzero
: 0) |
2989 (mxcsr
& FPUS_OE
? float_flag_overflow
: 0) |
2990 (mxcsr
& FPUS_UE
? float_flag_underflow
: 0) |
2991 (mxcsr
& FPUS_PE
? float_flag_inexact
: 0),
2994 /* set denormals are zero */
2995 set_flush_inputs_to_zero((mxcsr
& SSE_DAZ
) ? 1 : 0, &env
->sse_status
);
2997 /* set flush to zero */
2998 set_flush_to_zero((mxcsr
& SSE_FZ
) ? 1 : 0, &env
->sse_status
);
3001 void update_mxcsr_from_sse_status(CPUX86State
*env
)
3003 uint8_t flags
= get_float_exception_flags(&env
->sse_status
);
3005 * The MXCSR denormal flag has opposite semantics to
3006 * float_flag_input_denormal (the softfloat code sets that flag
3007 * only when flushing input denormals to zero, but SSE sets it
3008 * only when not flushing them to zero), so is not converted
3011 env
->mxcsr
|= ((flags
& float_flag_invalid
? FPUS_IE
: 0) |
3012 (flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
3013 (flags
& float_flag_overflow
? FPUS_OE
: 0) |
3014 (flags
& float_flag_underflow
? FPUS_UE
: 0) |
3015 (flags
& float_flag_inexact
? FPUS_PE
: 0) |
3016 (flags
& float_flag_output_denormal
? FPUS_UE
| FPUS_PE
:
3020 void helper_update_mxcsr(CPUX86State
*env
)
3022 update_mxcsr_from_sse_status(env
);
3025 void helper_ldmxcsr(CPUX86State
*env
, uint32_t val
)
3027 cpu_set_mxcsr(env
, val
);
3030 void helper_enter_mmx(CPUX86State
*env
)
3033 *(uint32_t *)(env
->fptags
) = 0;
3034 *(uint32_t *)(env
->fptags
+ 4) = 0;
3037 void helper_emms(CPUX86State
*env
)
3039 /* set to empty state */
3040 *(uint32_t *)(env
->fptags
) = 0x01010101;
3041 *(uint32_t *)(env
->fptags
+ 4) = 0x01010101;
3045 void helper_movq(CPUX86State
*env
, void *d
, void *s
)
3047 *(uint64_t *)d
= *(uint64_t *)s
;
3051 #include "ops_sse.h"
3054 #include "ops_sse.h"