]> git.proxmox.com Git - mirror_qemu.git/blame - target-i386/fpu_helper.c
Open 2.9 development tree
[mirror_qemu.git] / target-i386 / fpu_helper.c
CommitLineData
f299f437
BS
1/*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
b6a0aa05 20#include "qemu/osdep.h"
f299f437
BS
21#include <math.h>
22#include "cpu.h"
2ef6175a 23#include "exec/helper-proto.h"
c334a388 24#include "qemu/host-utils.h"
63c91552 25#include "exec/exec-all.h"
f08b6170 26#include "exec/cpu_ldst.h"
92fc4b58 27
f299f437
BS
28#define FPU_RC_MASK 0xc00
29#define FPU_RC_NEAR 0x000
30#define FPU_RC_DOWN 0x400
31#define FPU_RC_UP 0x800
32#define FPU_RC_CHOP 0xc00
33
34#define MAXTAN 9223372036854775808.0
35
36/* the following deal with x86 long double-precision numbers */
37#define MAXEXPD 0x7fff
38#define EXPBIAS 16383
39#define EXPD(fp) (fp.l.upper & 0x7fff)
40#define SIGND(fp) ((fp.l.upper) & 0x8000)
41#define MANTD(fp) (fp.l.lower)
42#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
43
44#define FPUS_IE (1 << 0)
45#define FPUS_DE (1 << 1)
46#define FPUS_ZE (1 << 2)
47#define FPUS_OE (1 << 3)
48#define FPUS_UE (1 << 4)
49#define FPUS_PE (1 << 5)
50#define FPUS_SF (1 << 6)
51#define FPUS_SE (1 << 7)
52#define FPUS_B (1 << 15)
53
54#define FPUC_EM 0x3f
55
56#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
57#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
58#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
59
d3eb5eae 60static inline void fpush(CPUX86State *env)
f299f437
BS
61{
62 env->fpstt = (env->fpstt - 1) & 7;
63 env->fptags[env->fpstt] = 0; /* validate stack entry */
64}
65
d3eb5eae 66static inline void fpop(CPUX86State *env)
f299f437
BS
67{
68 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
69 env->fpstt = (env->fpstt + 1) & 7;
70}
71
6cad09d2
PD
72static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
73 uintptr_t retaddr)
f299f437
BS
74{
75 CPU_LDoubleU temp;
76
6cad09d2
PD
77 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
78 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437
BS
79 return temp.d;
80}
81
6cad09d2
PD
82static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
83 uintptr_t retaddr)
f299f437
BS
84{
85 CPU_LDoubleU temp;
86
87 temp.d = f;
6cad09d2
PD
88 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
89 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
f299f437
BS
90}
91
92/* x87 FPU helpers */
93
d3eb5eae 94static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
f299f437
BS
95{
96 union {
97 float64 f64;
98 double d;
99 } u;
100
101 u.f64 = floatx80_to_float64(a, &env->fp_status);
102 return u.d;
103}
104
d3eb5eae 105static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
f299f437
BS
106{
107 union {
108 float64 f64;
109 double d;
110 } u;
111
112 u.d = a;
113 return float64_to_floatx80(u.f64, &env->fp_status);
114}
115
d3eb5eae 116static void fpu_set_exception(CPUX86State *env, int mask)
f299f437
BS
117{
118 env->fpus |= mask;
119 if (env->fpus & (~env->fpuc & FPUC_EM)) {
120 env->fpus |= FPUS_SE | FPUS_B;
121 }
122}
123
d3eb5eae 124static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
f299f437
BS
125{
126 if (floatx80_is_zero(b)) {
d3eb5eae 127 fpu_set_exception(env, FPUS_ZE);
f299f437
BS
128 }
129 return floatx80_div(a, b, &env->fp_status);
130}
131
6cad09d2 132static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
f299f437
BS
133{
134 if (env->cr[0] & CR0_NE_MASK) {
6cad09d2 135 raise_exception_ra(env, EXCP10_COPR, retaddr);
f299f437
BS
136 }
137#if !defined(CONFIG_USER_ONLY)
138 else {
139 cpu_set_ferr(env);
140 }
141#endif
142}
143
d3eb5eae 144void helper_flds_FT0(CPUX86State *env, uint32_t val)
f299f437
BS
145{
146 union {
147 float32 f;
148 uint32_t i;
149 } u;
150
151 u.i = val;
152 FT0 = float32_to_floatx80(u.f, &env->fp_status);
153}
154
d3eb5eae 155void helper_fldl_FT0(CPUX86State *env, uint64_t val)
f299f437
BS
156{
157 union {
158 float64 f;
159 uint64_t i;
160 } u;
161
162 u.i = val;
163 FT0 = float64_to_floatx80(u.f, &env->fp_status);
164}
165
d3eb5eae 166void helper_fildl_FT0(CPUX86State *env, int32_t val)
f299f437
BS
167{
168 FT0 = int32_to_floatx80(val, &env->fp_status);
169}
170
d3eb5eae 171void helper_flds_ST0(CPUX86State *env, uint32_t val)
f299f437
BS
172{
173 int new_fpstt;
174 union {
175 float32 f;
176 uint32_t i;
177 } u;
178
179 new_fpstt = (env->fpstt - 1) & 7;
180 u.i = val;
181 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
182 env->fpstt = new_fpstt;
183 env->fptags[new_fpstt] = 0; /* validate stack entry */
184}
185
d3eb5eae 186void helper_fldl_ST0(CPUX86State *env, uint64_t val)
f299f437
BS
187{
188 int new_fpstt;
189 union {
190 float64 f;
191 uint64_t i;
192 } u;
193
194 new_fpstt = (env->fpstt - 1) & 7;
195 u.i = val;
196 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
197 env->fpstt = new_fpstt;
198 env->fptags[new_fpstt] = 0; /* validate stack entry */
199}
200
d3eb5eae 201void helper_fildl_ST0(CPUX86State *env, int32_t val)
f299f437
BS
202{
203 int new_fpstt;
204
205 new_fpstt = (env->fpstt - 1) & 7;
206 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
207 env->fpstt = new_fpstt;
208 env->fptags[new_fpstt] = 0; /* validate stack entry */
209}
210
d3eb5eae 211void helper_fildll_ST0(CPUX86State *env, int64_t val)
f299f437
BS
212{
213 int new_fpstt;
214
215 new_fpstt = (env->fpstt - 1) & 7;
216 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
219}
220
d3eb5eae 221uint32_t helper_fsts_ST0(CPUX86State *env)
f299f437
BS
222{
223 union {
224 float32 f;
225 uint32_t i;
226 } u;
227
228 u.f = floatx80_to_float32(ST0, &env->fp_status);
229 return u.i;
230}
231
d3eb5eae 232uint64_t helper_fstl_ST0(CPUX86State *env)
f299f437
BS
233{
234 union {
235 float64 f;
236 uint64_t i;
237 } u;
238
239 u.f = floatx80_to_float64(ST0, &env->fp_status);
240 return u.i;
241}
242
d3eb5eae 243int32_t helper_fist_ST0(CPUX86State *env)
f299f437
BS
244{
245 int32_t val;
246
247 val = floatx80_to_int32(ST0, &env->fp_status);
248 if (val != (int16_t)val) {
249 val = -32768;
250 }
251 return val;
252}
253
d3eb5eae 254int32_t helper_fistl_ST0(CPUX86State *env)
f299f437
BS
255{
256 int32_t val;
ea32aaf1
DP
257 signed char old_exp_flags;
258
259 old_exp_flags = get_float_exception_flags(&env->fp_status);
260 set_float_exception_flags(0, &env->fp_status);
f299f437
BS
261
262 val = floatx80_to_int32(ST0, &env->fp_status);
ea32aaf1
DP
263 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
264 val = 0x80000000;
265 }
266 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
267 | old_exp_flags, &env->fp_status);
f299f437
BS
268 return val;
269}
270
d3eb5eae 271int64_t helper_fistll_ST0(CPUX86State *env)
f299f437
BS
272{
273 int64_t val;
ea32aaf1
DP
274 signed char old_exp_flags;
275
276 old_exp_flags = get_float_exception_flags(&env->fp_status);
277 set_float_exception_flags(0, &env->fp_status);
f299f437 278
178846bd 279 val = floatx80_to_int64(ST0, &env->fp_status);
ea32aaf1
DP
280 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
281 val = 0x8000000000000000ULL;
282 }
283 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
284 | old_exp_flags, &env->fp_status);
f299f437
BS
285 return val;
286}
287
d3eb5eae 288int32_t helper_fistt_ST0(CPUX86State *env)
f299f437
BS
289{
290 int32_t val;
291
292 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
293 if (val != (int16_t)val) {
294 val = -32768;
295 }
296 return val;
297}
298
d3eb5eae 299int32_t helper_fisttl_ST0(CPUX86State *env)
f299f437 300{
9be38598 301 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
f299f437
BS
302}
303
d3eb5eae 304int64_t helper_fisttll_ST0(CPUX86State *env)
f299f437 305{
9be38598 306 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
f299f437
BS
307}
308
d3eb5eae 309void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
310{
311 int new_fpstt;
312
313 new_fpstt = (env->fpstt - 1) & 7;
6cad09d2 314 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
f299f437
BS
315 env->fpstt = new_fpstt;
316 env->fptags[new_fpstt] = 0; /* validate stack entry */
317}
318
d3eb5eae 319void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
f299f437 320{
6cad09d2 321 helper_fstt(env, ST0, ptr, GETPC());
f299f437
BS
322}
323
d3eb5eae 324void helper_fpush(CPUX86State *env)
f299f437 325{
d3eb5eae 326 fpush(env);
f299f437
BS
327}
328
d3eb5eae 329void helper_fpop(CPUX86State *env)
f299f437 330{
d3eb5eae 331 fpop(env);
f299f437
BS
332}
333
d3eb5eae 334void helper_fdecstp(CPUX86State *env)
f299f437
BS
335{
336 env->fpstt = (env->fpstt - 1) & 7;
337 env->fpus &= ~0x4700;
338}
339
d3eb5eae 340void helper_fincstp(CPUX86State *env)
f299f437
BS
341{
342 env->fpstt = (env->fpstt + 1) & 7;
343 env->fpus &= ~0x4700;
344}
345
346/* FPU move */
347
d3eb5eae 348void helper_ffree_STN(CPUX86State *env, int st_index)
f299f437
BS
349{
350 env->fptags[(env->fpstt + st_index) & 7] = 1;
351}
352
d3eb5eae 353void helper_fmov_ST0_FT0(CPUX86State *env)
f299f437
BS
354{
355 ST0 = FT0;
356}
357
d3eb5eae 358void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
f299f437
BS
359{
360 FT0 = ST(st_index);
361}
362
d3eb5eae 363void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
364{
365 ST0 = ST(st_index);
366}
367
d3eb5eae 368void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
369{
370 ST(st_index) = ST0;
371}
372
d3eb5eae 373void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
374{
375 floatx80 tmp;
376
377 tmp = ST(st_index);
378 ST(st_index) = ST0;
379 ST0 = tmp;
380}
381
382/* FPU operations */
383
384static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
385
d3eb5eae 386void helper_fcom_ST0_FT0(CPUX86State *env)
f299f437
BS
387{
388 int ret;
389
390 ret = floatx80_compare(ST0, FT0, &env->fp_status);
391 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
392}
393
d3eb5eae 394void helper_fucom_ST0_FT0(CPUX86State *env)
f299f437
BS
395{
396 int ret;
397
398 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
399 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
400}
401
402static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
403
d3eb5eae 404void helper_fcomi_ST0_FT0(CPUX86State *env)
f299f437
BS
405{
406 int eflags;
407 int ret;
408
409 ret = floatx80_compare(ST0, FT0, &env->fp_status);
d3eb5eae 410 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
411 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
412 CC_SRC = eflags;
413}
414
d3eb5eae 415void helper_fucomi_ST0_FT0(CPUX86State *env)
f299f437
BS
416{
417 int eflags;
418 int ret;
419
420 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
d3eb5eae 421 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
422 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
423 CC_SRC = eflags;
424}
425
d3eb5eae 426void helper_fadd_ST0_FT0(CPUX86State *env)
f299f437
BS
427{
428 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
429}
430
d3eb5eae 431void helper_fmul_ST0_FT0(CPUX86State *env)
f299f437
BS
432{
433 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
434}
435
d3eb5eae 436void helper_fsub_ST0_FT0(CPUX86State *env)
f299f437
BS
437{
438 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
439}
440
d3eb5eae 441void helper_fsubr_ST0_FT0(CPUX86State *env)
f299f437
BS
442{
443 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
444}
445
d3eb5eae 446void helper_fdiv_ST0_FT0(CPUX86State *env)
f299f437 447{
d3eb5eae 448 ST0 = helper_fdiv(env, ST0, FT0);
f299f437
BS
449}
450
d3eb5eae 451void helper_fdivr_ST0_FT0(CPUX86State *env)
f299f437 452{
d3eb5eae 453 ST0 = helper_fdiv(env, FT0, ST0);
f299f437
BS
454}
455
456/* fp operations between STN and ST0 */
457
d3eb5eae 458void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
459{
460 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
461}
462
d3eb5eae 463void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
464{
465 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
466}
467
d3eb5eae 468void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
469{
470 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
471}
472
d3eb5eae 473void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
474{
475 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
476}
477
d3eb5eae 478void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
479{
480 floatx80 *p;
481
482 p = &ST(st_index);
d3eb5eae 483 *p = helper_fdiv(env, *p, ST0);
f299f437
BS
484}
485
d3eb5eae 486void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
487{
488 floatx80 *p;
489
490 p = &ST(st_index);
d3eb5eae 491 *p = helper_fdiv(env, ST0, *p);
f299f437
BS
492}
493
494/* misc FPU operations */
d3eb5eae 495void helper_fchs_ST0(CPUX86State *env)
f299f437
BS
496{
497 ST0 = floatx80_chs(ST0);
498}
499
d3eb5eae 500void helper_fabs_ST0(CPUX86State *env)
f299f437
BS
501{
502 ST0 = floatx80_abs(ST0);
503}
504
d3eb5eae 505void helper_fld1_ST0(CPUX86State *env)
f299f437
BS
506{
507 ST0 = floatx80_one;
508}
509
d3eb5eae 510void helper_fldl2t_ST0(CPUX86State *env)
f299f437
BS
511{
512 ST0 = floatx80_l2t;
513}
514
d3eb5eae 515void helper_fldl2e_ST0(CPUX86State *env)
f299f437
BS
516{
517 ST0 = floatx80_l2e;
518}
519
d3eb5eae 520void helper_fldpi_ST0(CPUX86State *env)
f299f437
BS
521{
522 ST0 = floatx80_pi;
523}
524
d3eb5eae 525void helper_fldlg2_ST0(CPUX86State *env)
f299f437
BS
526{
527 ST0 = floatx80_lg2;
528}
529
d3eb5eae 530void helper_fldln2_ST0(CPUX86State *env)
f299f437
BS
531{
532 ST0 = floatx80_ln2;
533}
534
d3eb5eae 535void helper_fldz_ST0(CPUX86State *env)
f299f437
BS
536{
537 ST0 = floatx80_zero;
538}
539
d3eb5eae 540void helper_fldz_FT0(CPUX86State *env)
f299f437
BS
541{
542 FT0 = floatx80_zero;
543}
544
d3eb5eae 545uint32_t helper_fnstsw(CPUX86State *env)
f299f437
BS
546{
547 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
548}
549
d3eb5eae 550uint32_t helper_fnstcw(CPUX86State *env)
f299f437
BS
551{
552 return env->fpuc;
553}
554
5bde1407 555void update_fp_status(CPUX86State *env)
f299f437
BS
556{
557 int rnd_type;
558
559 /* set rounding mode */
560 switch (env->fpuc & FPU_RC_MASK) {
561 default:
562 case FPU_RC_NEAR:
563 rnd_type = float_round_nearest_even;
564 break;
565 case FPU_RC_DOWN:
566 rnd_type = float_round_down;
567 break;
568 case FPU_RC_UP:
569 rnd_type = float_round_up;
570 break;
571 case FPU_RC_CHOP:
572 rnd_type = float_round_to_zero;
573 break;
574 }
575 set_float_rounding_mode(rnd_type, &env->fp_status);
576 switch ((env->fpuc >> 8) & 3) {
577 case 0:
578 rnd_type = 32;
579 break;
580 case 2:
581 rnd_type = 64;
582 break;
583 case 3:
584 default:
585 rnd_type = 80;
586 break;
587 }
588 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
589}
590
d3eb5eae 591void helper_fldcw(CPUX86State *env, uint32_t val)
f299f437 592{
5bde1407 593 cpu_set_fpuc(env, val);
f299f437
BS
594}
595
d3eb5eae 596void helper_fclex(CPUX86State *env)
f299f437
BS
597{
598 env->fpus &= 0x7f00;
599}
600
d3eb5eae 601void helper_fwait(CPUX86State *env)
f299f437
BS
602{
603 if (env->fpus & FPUS_SE) {
6cad09d2 604 fpu_raise_exception(env, GETPC());
f299f437
BS
605 }
606}
607
d3eb5eae 608void helper_fninit(CPUX86State *env)
f299f437
BS
609{
610 env->fpus = 0;
611 env->fpstt = 0;
5bde1407 612 cpu_set_fpuc(env, 0x37f);
f299f437
BS
613 env->fptags[0] = 1;
614 env->fptags[1] = 1;
615 env->fptags[2] = 1;
616 env->fptags[3] = 1;
617 env->fptags[4] = 1;
618 env->fptags[5] = 1;
619 env->fptags[6] = 1;
620 env->fptags[7] = 1;
621}
622
623/* BCD ops */
624
d3eb5eae 625void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
626{
627 floatx80 tmp;
628 uint64_t val;
629 unsigned int v;
630 int i;
631
632 val = 0;
633 for (i = 8; i >= 0; i--) {
6cad09d2 634 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
f299f437
BS
635 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
636 }
637 tmp = int64_to_floatx80(val, &env->fp_status);
6cad09d2 638 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
18b41f95 639 tmp = floatx80_chs(tmp);
f299f437 640 }
d3eb5eae 641 fpush(env);
f299f437
BS
642 ST0 = tmp;
643}
644
d3eb5eae 645void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
646{
647 int v;
648 target_ulong mem_ref, mem_end;
649 int64_t val;
650
651 val = floatx80_to_int64(ST0, &env->fp_status);
652 mem_ref = ptr;
653 mem_end = mem_ref + 9;
654 if (val < 0) {
6cad09d2 655 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
f299f437
BS
656 val = -val;
657 } else {
6cad09d2 658 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
f299f437
BS
659 }
660 while (mem_ref < mem_end) {
661 if (val == 0) {
662 break;
663 }
664 v = val % 100;
665 val = val / 100;
666 v = ((v / 10) << 4) | (v % 10);
6cad09d2 667 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
f299f437
BS
668 }
669 while (mem_ref < mem_end) {
6cad09d2 670 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
f299f437
BS
671 }
672}
673
d3eb5eae 674void helper_f2xm1(CPUX86State *env)
f299f437 675{
d3eb5eae 676 double val = floatx80_to_double(env, ST0);
f299f437
BS
677
678 val = pow(2.0, val) - 1.0;
d3eb5eae 679 ST0 = double_to_floatx80(env, val);
f299f437
BS
680}
681
d3eb5eae 682void helper_fyl2x(CPUX86State *env)
f299f437 683{
d3eb5eae 684 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
685
686 if (fptemp > 0.0) {
687 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
d3eb5eae
BS
688 fptemp *= floatx80_to_double(env, ST1);
689 ST1 = double_to_floatx80(env, fptemp);
690 fpop(env);
f299f437
BS
691 } else {
692 env->fpus &= ~0x4700;
693 env->fpus |= 0x400;
694 }
695}
696
d3eb5eae 697void helper_fptan(CPUX86State *env)
f299f437 698{
d3eb5eae 699 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
700
701 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
702 env->fpus |= 0x400;
703 } else {
704 fptemp = tan(fptemp);
d3eb5eae
BS
705 ST0 = double_to_floatx80(env, fptemp);
706 fpush(env);
f299f437
BS
707 ST0 = floatx80_one;
708 env->fpus &= ~0x400; /* C2 <-- 0 */
709 /* the above code is for |arg| < 2**52 only */
710 }
711}
712
d3eb5eae 713void helper_fpatan(CPUX86State *env)
f299f437
BS
714{
715 double fptemp, fpsrcop;
716
d3eb5eae
BS
717 fpsrcop = floatx80_to_double(env, ST1);
718 fptemp = floatx80_to_double(env, ST0);
719 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
720 fpop(env);
f299f437
BS
721}
722
d3eb5eae 723void helper_fxtract(CPUX86State *env)
f299f437
BS
724{
725 CPU_LDoubleU temp;
726
727 temp.d = ST0;
728
729 if (floatx80_is_zero(ST0)) {
730 /* Easy way to generate -inf and raising division by 0 exception */
731 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
732 &env->fp_status);
d3eb5eae 733 fpush(env);
f299f437
BS
734 ST0 = temp.d;
735 } else {
736 int expdif;
737
738 expdif = EXPD(temp) - EXPBIAS;
739 /* DP exponent bias */
740 ST0 = int32_to_floatx80(expdif, &env->fp_status);
d3eb5eae 741 fpush(env);
f299f437
BS
742 BIASEXPONENT(temp);
743 ST0 = temp.d;
744 }
745}
746
d3eb5eae 747void helper_fprem1(CPUX86State *env)
f299f437
BS
748{
749 double st0, st1, dblq, fpsrcop, fptemp;
750 CPU_LDoubleU fpsrcop1, fptemp1;
751 int expdif;
752 signed long long int q;
753
d3eb5eae
BS
754 st0 = floatx80_to_double(env, ST0);
755 st1 = floatx80_to_double(env, ST1);
f299f437
BS
756
757 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 758 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
759 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
760 return;
761 }
762
763 fpsrcop = st0;
764 fptemp = st1;
765 fpsrcop1.d = ST0;
766 fptemp1.d = ST1;
767 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
768
769 if (expdif < 0) {
770 /* optimisation? taken from the AMD docs */
771 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
772 /* ST0 is unchanged */
773 return;
774 }
775
776 if (expdif < 53) {
777 dblq = fpsrcop / fptemp;
778 /* round dblq towards nearest integer */
779 dblq = rint(dblq);
780 st0 = fpsrcop - fptemp * dblq;
781
782 /* convert dblq to q by truncating towards zero */
783 if (dblq < 0.0) {
784 q = (signed long long int)(-dblq);
785 } else {
786 q = (signed long long int)dblq;
787 }
788
789 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
790 /* (C0,C3,C1) <-- (q2,q1,q0) */
791 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
792 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
793 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
794 } else {
795 env->fpus |= 0x400; /* C2 <-- 1 */
796 fptemp = pow(2.0, expdif - 50);
797 fpsrcop = (st0 / st1) / fptemp;
798 /* fpsrcop = integer obtained by chopping */
799 fpsrcop = (fpsrcop < 0.0) ?
800 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
801 st0 -= (st1 * fpsrcop * fptemp);
802 }
d3eb5eae 803 ST0 = double_to_floatx80(env, st0);
f299f437
BS
804}
805
d3eb5eae 806void helper_fprem(CPUX86State *env)
f299f437
BS
807{
808 double st0, st1, dblq, fpsrcop, fptemp;
809 CPU_LDoubleU fpsrcop1, fptemp1;
810 int expdif;
811 signed long long int q;
812
d3eb5eae
BS
813 st0 = floatx80_to_double(env, ST0);
814 st1 = floatx80_to_double(env, ST1);
f299f437
BS
815
816 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 817 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
818 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
819 return;
820 }
821
822 fpsrcop = st0;
823 fptemp = st1;
824 fpsrcop1.d = ST0;
825 fptemp1.d = ST1;
826 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
827
828 if (expdif < 0) {
829 /* optimisation? taken from the AMD docs */
830 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
831 /* ST0 is unchanged */
832 return;
833 }
834
835 if (expdif < 53) {
836 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
837 /* round dblq towards zero */
838 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
839 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
840
841 /* convert dblq to q by truncating towards zero */
842 if (dblq < 0.0) {
843 q = (signed long long int)(-dblq);
844 } else {
845 q = (signed long long int)dblq;
846 }
847
848 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
849 /* (C0,C3,C1) <-- (q2,q1,q0) */
850 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
851 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
852 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
853 } else {
854 int N = 32 + (expdif % 32); /* as per AMD docs */
855
856 env->fpus |= 0x400; /* C2 <-- 1 */
857 fptemp = pow(2.0, (double)(expdif - N));
858 fpsrcop = (st0 / st1) / fptemp;
859 /* fpsrcop = integer obtained by chopping */
860 fpsrcop = (fpsrcop < 0.0) ?
861 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
862 st0 -= (st1 * fpsrcop * fptemp);
863 }
d3eb5eae 864 ST0 = double_to_floatx80(env, st0);
f299f437
BS
865}
866
d3eb5eae 867void helper_fyl2xp1(CPUX86State *env)
f299f437 868{
d3eb5eae 869 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
870
871 if ((fptemp + 1.0) > 0.0) {
872 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
d3eb5eae
BS
873 fptemp *= floatx80_to_double(env, ST1);
874 ST1 = double_to_floatx80(env, fptemp);
875 fpop(env);
f299f437
BS
876 } else {
877 env->fpus &= ~0x4700;
878 env->fpus |= 0x400;
879 }
880}
881
d3eb5eae 882void helper_fsqrt(CPUX86State *env)
f299f437
BS
883{
884 if (floatx80_is_neg(ST0)) {
885 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
886 env->fpus |= 0x400;
887 }
888 ST0 = floatx80_sqrt(ST0, &env->fp_status);
889}
890
d3eb5eae 891void helper_fsincos(CPUX86State *env)
f299f437 892{
d3eb5eae 893 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
894
895 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
896 env->fpus |= 0x400;
897 } else {
d3eb5eae
BS
898 ST0 = double_to_floatx80(env, sin(fptemp));
899 fpush(env);
900 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
901 env->fpus &= ~0x400; /* C2 <-- 0 */
902 /* the above code is for |arg| < 2**63 only */
903 }
904}
905
d3eb5eae 906void helper_frndint(CPUX86State *env)
f299f437
BS
907{
908 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
909}
910
d3eb5eae 911void helper_fscale(CPUX86State *env)
f299f437
BS
912{
913 if (floatx80_is_any_nan(ST1)) {
914 ST0 = ST1;
915 } else {
916 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
917 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
918 }
919}
920
d3eb5eae 921void helper_fsin(CPUX86State *env)
f299f437 922{
d3eb5eae 923 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
924
925 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
926 env->fpus |= 0x400;
927 } else {
d3eb5eae 928 ST0 = double_to_floatx80(env, sin(fptemp));
f299f437
BS
929 env->fpus &= ~0x400; /* C2 <-- 0 */
930 /* the above code is for |arg| < 2**53 only */
931 }
932}
933
d3eb5eae 934void helper_fcos(CPUX86State *env)
f299f437 935{
d3eb5eae 936 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
937
938 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
939 env->fpus |= 0x400;
940 } else {
d3eb5eae 941 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
942 env->fpus &= ~0x400; /* C2 <-- 0 */
943 /* the above code is for |arg| < 2**63 only */
944 }
945}
946
d3eb5eae 947void helper_fxam_ST0(CPUX86State *env)
f299f437
BS
948{
949 CPU_LDoubleU temp;
950 int expdif;
951
952 temp.d = ST0;
953
954 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
955 if (SIGND(temp)) {
956 env->fpus |= 0x200; /* C1 <-- 1 */
957 }
958
959 /* XXX: test fptags too */
960 expdif = EXPD(temp);
961 if (expdif == MAXEXPD) {
962 if (MANTD(temp) == 0x8000000000000000ULL) {
963 env->fpus |= 0x500; /* Infinity */
964 } else {
965 env->fpus |= 0x100; /* NaN */
966 }
967 } else if (expdif == 0) {
968 if (MANTD(temp) == 0) {
969 env->fpus |= 0x4000; /* Zero */
970 } else {
971 env->fpus |= 0x4400; /* Denormal */
972 }
973 } else {
974 env->fpus |= 0x400;
975 }
976}
977
6cad09d2
PD
978static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
979 uintptr_t retaddr)
f299f437
BS
980{
981 int fpus, fptag, exp, i;
982 uint64_t mant;
983 CPU_LDoubleU tmp;
984
985 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
986 fptag = 0;
987 for (i = 7; i >= 0; i--) {
988 fptag <<= 2;
989 if (env->fptags[i]) {
990 fptag |= 3;
991 } else {
992 tmp.d = env->fpregs[i].d;
993 exp = EXPD(tmp);
994 mant = MANTD(tmp);
995 if (exp == 0 && mant == 0) {
996 /* zero */
997 fptag |= 1;
998 } else if (exp == 0 || exp == MAXEXPD
999 || (mant & (1LL << 63)) == 0) {
1000 /* NaNs, infinity, denormal */
1001 fptag |= 2;
1002 }
1003 }
1004 }
1005 if (data32) {
1006 /* 32 bit */
6cad09d2
PD
1007 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1008 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1009 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1010 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1011 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1012 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1013 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
f299f437
BS
1014 } else {
1015 /* 16 bit */
6cad09d2
PD
1016 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1017 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1018 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1019 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1020 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1021 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1022 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
f299f437
BS
1023 }
1024}
1025
6cad09d2
PD
1026void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1027{
1028 do_fstenv(env, ptr, data32, GETPC());
1029}
1030
1031static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1032 uintptr_t retaddr)
f299f437
BS
1033{
1034 int i, fpus, fptag;
1035
1036 if (data32) {
6cad09d2
PD
1037 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1038 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1039 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437 1040 } else {
6cad09d2
PD
1041 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1042 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1043 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
f299f437
BS
1044 }
1045 env->fpstt = (fpus >> 11) & 7;
1046 env->fpus = fpus & ~0x3800;
1047 for (i = 0; i < 8; i++) {
1048 env->fptags[i] = ((fptag & 3) == 3);
1049 fptag >>= 2;
1050 }
1051}
1052
6cad09d2
PD
1053void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1054{
1055 do_fldenv(env, ptr, data32, GETPC());
1056}
1057
d3eb5eae 1058void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1059{
1060 floatx80 tmp;
1061 int i;
1062
6cad09d2 1063 do_fstenv(env, ptr, data32, GETPC());
f299f437
BS
1064
1065 ptr += (14 << data32);
1066 for (i = 0; i < 8; i++) {
1067 tmp = ST(i);
6cad09d2 1068 helper_fstt(env, tmp, ptr, GETPC());
f299f437
BS
1069 ptr += 10;
1070 }
1071
1072 /* fninit */
1073 env->fpus = 0;
1074 env->fpstt = 0;
5bde1407 1075 cpu_set_fpuc(env, 0x37f);
f299f437
BS
1076 env->fptags[0] = 1;
1077 env->fptags[1] = 1;
1078 env->fptags[2] = 1;
1079 env->fptags[3] = 1;
1080 env->fptags[4] = 1;
1081 env->fptags[5] = 1;
1082 env->fptags[6] = 1;
1083 env->fptags[7] = 1;
1084}
1085
d3eb5eae 1086void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1087{
1088 floatx80 tmp;
1089 int i;
1090
6cad09d2 1091 do_fldenv(env, ptr, data32, GETPC());
f299f437
BS
1092 ptr += (14 << data32);
1093
1094 for (i = 0; i < 8; i++) {
6cad09d2 1095 tmp = helper_fldt(env, ptr, GETPC());
f299f437
BS
1096 ST(i) = tmp;
1097 ptr += 10;
1098 }
1099}
1100
1101#if defined(CONFIG_USER_ONLY)
d3eb5eae 1102void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1103{
d3eb5eae 1104 helper_fsave(env, ptr, data32);
f299f437
BS
1105}
1106
d3eb5eae 1107void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1108{
d3eb5eae 1109 helper_frstor(env, ptr, data32);
f299f437
BS
1110}
1111#endif
1112
3f32bd21
RH
1113#define XO(X) offsetof(X86XSaveArea, X)
1114
64dbaff0 1115static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1116{
64dbaff0 1117 int fpus, fptag, i;
f299f437
BS
1118 target_ulong addr;
1119
f299f437
BS
1120 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1121 fptag = 0;
1122 for (i = 0; i < 8; i++) {
1123 fptag |= (env->fptags[i] << i);
1124 }
3f32bd21
RH
1125
1126 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1127 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1128 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
64dbaff0
RH
1129
1130 /* In 32-bit mode this is eip, sel, dp, sel.
1131 In 64-bit mode this is rip, rdp.
1132 But in either case we don't write actual data, just zeros. */
3f32bd21
RH
1133 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1134 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
f299f437 1135
3f32bd21 1136 addr = ptr + XO(legacy.fpregs);
f299f437 1137 for (i = 0; i < 8; i++) {
64dbaff0
RH
1138 floatx80 tmp = ST(i);
1139 helper_fstt(env, tmp, addr, ra);
f299f437
BS
1140 addr += 16;
1141 }
64dbaff0
RH
1142}
1143
1144static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1145{
3f32bd21
RH
1146 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1147 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
64dbaff0
RH
1148}
1149
1150static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1151{
1152 int i, nb_xmm_regs;
1153 target_ulong addr;
1154
1155 if (env->hflags & HF_CS64_MASK) {
1156 nb_xmm_regs = 16;
1157 } else {
1158 nb_xmm_regs = 8;
1159 }
1160
3f32bd21 1161 addr = ptr + XO(legacy.xmm_regs);
64dbaff0
RH
1162 for (i = 0; i < nb_xmm_regs; i++) {
1163 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1164 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1165 addr += 16;
1166 }
1167}
1168
3f32bd21 1169static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e 1170{
3f32bd21 1171 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
f4f1110e
RH
1172 int i;
1173
1174 for (i = 0; i < 4; i++, addr += 16) {
1175 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1176 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1177 }
1178}
1179
3f32bd21 1180static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e 1181{
3f32bd21
RH
1182 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1183 env->bndcs_regs.cfgu, ra);
1184 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1185 env->bndcs_regs.sts, ra);
f4f1110e
RH
1186}
1187
3f32bd21 1188static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
0f70ed47 1189{
3f32bd21 1190 cpu_stq_data_ra(env, ptr, env->pkru, ra);
0f70ed47
PB
1191}
1192
64dbaff0
RH
1193void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194{
1195 uintptr_t ra = GETPC();
1196
1197 /* The operand must be 16 byte aligned */
1198 if (ptr & 0xf) {
1199 raise_exception_ra(env, EXCP0D_GPF, ra);
1200 }
1201
1202 do_xsave_fpu(env, ptr, ra);
f299f437
BS
1203
1204 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0 1205 do_xsave_mxcsr(env, ptr, ra);
f299f437
BS
1206 /* Fast FXSAVE leaves out the XMM registers */
1207 if (!(env->efer & MSR_EFER_FFXSR)
1208 || (env->hflags & HF_CPL_MASK)
1209 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1210 do_xsave_sse(env, ptr, ra);
f299f437
BS
1211 }
1212 }
1213}
1214
19dc85db
RH
1215static uint64_t get_xinuse(CPUX86State *env)
1216{
f4f1110e
RH
1217 uint64_t inuse = -1;
1218
1219 /* For the most part, we don't track XINUSE. We could calculate it
1220 here for all components, but it's probably less work to simply
1221 indicate in use. That said, the state of BNDREGS is important
1222 enough to track in HFLAGS, so we might as well use that here. */
1223 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
cfc3b074 1224 inuse &= ~XSTATE_BNDREGS_MASK;
f4f1110e
RH
1225 }
1226 return inuse;
19dc85db
RH
1227}
1228
c9cfe8f9
RH
1229static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230 uint64_t inuse, uint64_t opt, uintptr_t ra)
19dc85db 1231{
19dc85db
RH
1232 uint64_t old_bv, new_bv;
1233
1234 /* The OS must have enabled XSAVE. */
1235 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236 raise_exception_ra(env, EXCP06_ILLOP, ra);
1237 }
1238
1239 /* The operand must be 64 byte aligned. */
1240 if (ptr & 63) {
1241 raise_exception_ra(env, EXCP0D_GPF, ra);
1242 }
1243
1244 /* Never save anything not enabled by XCR0. */
1245 rfbm &= env->xcr0;
c9cfe8f9 1246 opt &= rfbm;
19dc85db 1247
cfc3b074 1248 if (opt & XSTATE_FP_MASK) {
19dc85db
RH
1249 do_xsave_fpu(env, ptr, ra);
1250 }
cfc3b074 1251 if (rfbm & XSTATE_SSE_MASK) {
c9cfe8f9 1252 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
19dc85db 1253 do_xsave_mxcsr(env, ptr, ra);
c9cfe8f9 1254 }
cfc3b074 1255 if (opt & XSTATE_SSE_MASK) {
19dc85db
RH
1256 do_xsave_sse(env, ptr, ra);
1257 }
cfc3b074 1258 if (opt & XSTATE_BNDREGS_MASK) {
3f32bd21 1259 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
f4f1110e 1260 }
cfc3b074 1261 if (opt & XSTATE_BNDCSR_MASK) {
3f32bd21 1262 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
f4f1110e 1263 }
0f70ed47 1264 if (opt & XSTATE_PKRU_MASK) {
3f32bd21 1265 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
0f70ed47 1266 }
19dc85db
RH
1267
1268 /* Update the XSTATE_BV field. */
3f32bd21 1269 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
c9cfe8f9 1270 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
3f32bd21 1271 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
19dc85db
RH
1272}
1273
c9cfe8f9
RH
1274void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1275{
1276 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1277}
1278
1279void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1280{
1281 uint64_t inuse = get_xinuse(env);
1282 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1283}
1284
64dbaff0 1285static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1286{
3f32bd21 1287 int i, fpuc, fpus, fptag;
f299f437
BS
1288 target_ulong addr;
1289
3f32bd21
RH
1290 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1291 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1292 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1293 cpu_set_fpuc(env, fpuc);
f299f437
BS
1294 env->fpstt = (fpus >> 11) & 7;
1295 env->fpus = fpus & ~0x3800;
1296 fptag ^= 0xff;
1297 for (i = 0; i < 8; i++) {
1298 env->fptags[i] = ((fptag >> i) & 1);
1299 }
1300
3f32bd21 1301 addr = ptr + XO(legacy.fpregs);
f299f437 1302 for (i = 0; i < 8; i++) {
64dbaff0 1303 floatx80 tmp = helper_fldt(env, addr, ra);
f299f437
BS
1304 ST(i) = tmp;
1305 addr += 16;
1306 }
64dbaff0
RH
1307}
1308
1309static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1310{
3f32bd21 1311 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
64dbaff0
RH
1312}
1313
1314static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1315{
1316 int i, nb_xmm_regs;
1317 target_ulong addr;
1318
1319 if (env->hflags & HF_CS64_MASK) {
1320 nb_xmm_regs = 16;
1321 } else {
1322 nb_xmm_regs = 8;
1323 }
1324
3f32bd21 1325 addr = ptr + XO(legacy.xmm_regs);
64dbaff0
RH
1326 for (i = 0; i < nb_xmm_regs; i++) {
1327 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1328 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1329 addr += 16;
1330 }
1331}
1332
3f32bd21 1333static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e 1334{
3f32bd21 1335 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
f4f1110e
RH
1336 int i;
1337
1338 for (i = 0; i < 4; i++, addr += 16) {
1339 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1340 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1341 }
1342}
1343
3f32bd21 1344static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e
RH
1345{
1346 /* FIXME: Extend highest implemented bit of linear address. */
3f32bd21
RH
1347 env->bndcs_regs.cfgu
1348 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1349 env->bndcs_regs.sts
1350 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
f4f1110e
RH
1351}
1352
3f32bd21 1353static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
0f70ed47 1354{
3f32bd21 1355 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
0f70ed47
PB
1356}
1357
64dbaff0
RH
1358void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1359{
1360 uintptr_t ra = GETPC();
1361
1362 /* The operand must be 16 byte aligned */
1363 if (ptr & 0xf) {
1364 raise_exception_ra(env, EXCP0D_GPF, ra);
1365 }
1366
1367 do_xrstor_fpu(env, ptr, ra);
f299f437
BS
1368
1369 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0
RH
1370 do_xrstor_mxcsr(env, ptr, ra);
1371 /* Fast FXRSTOR leaves out the XMM registers */
f299f437
BS
1372 if (!(env->efer & MSR_EFER_FFXSR)
1373 || (env->hflags & HF_CPL_MASK)
1374 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1375 do_xrstor_sse(env, ptr, ra);
f299f437
BS
1376 }
1377 }
1378}
1379
19dc85db
RH
1380void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1381{
1382 uintptr_t ra = GETPC();
3f32bd21 1383 uint64_t xstate_bv, xcomp_bv, reserve0;
19dc85db
RH
1384
1385 rfbm &= env->xcr0;
1386
1387 /* The OS must have enabled XSAVE. */
1388 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1389 raise_exception_ra(env, EXCP06_ILLOP, ra);
1390 }
1391
1392 /* The operand must be 64 byte aligned. */
1393 if (ptr & 63) {
1394 raise_exception_ra(env, EXCP0D_GPF, ra);
1395 }
1396
3f32bd21 1397 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
19dc85db
RH
1398
1399 if ((int64_t)xstate_bv < 0) {
1400 /* FIXME: Compact form. */
1401 raise_exception_ra(env, EXCP0D_GPF, ra);
1402 }
1403
1404 /* Standard form. */
1405
3f32bd21 1406 /* The XSTATE_BV field must not set bits not present in XCR0. */
19dc85db
RH
1407 if (xstate_bv & ~env->xcr0) {
1408 raise_exception_ra(env, EXCP0D_GPF, ra);
1409 }
1410
3f32bd21
RH
1411 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1412 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1413 describes only XCOMP_BV, but the description of the standard form
1414 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1415 includes the next 64-bit field. */
1416 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1417 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1418 if (xcomp_bv || reserve0) {
19dc85db
RH
1419 raise_exception_ra(env, EXCP0D_GPF, ra);
1420 }
1421
cfc3b074
PB
1422 if (rfbm & XSTATE_FP_MASK) {
1423 if (xstate_bv & XSTATE_FP_MASK) {
19dc85db
RH
1424 do_xrstor_fpu(env, ptr, ra);
1425 } else {
1426 helper_fninit(env);
1427 memset(env->fpregs, 0, sizeof(env->fpregs));
1428 }
1429 }
cfc3b074 1430 if (rfbm & XSTATE_SSE_MASK) {
19dc85db
RH
1431 /* Note that the standard form of XRSTOR loads MXCSR from memory
1432 whether or not the XSTATE_BV bit is set. */
1433 do_xrstor_mxcsr(env, ptr, ra);
cfc3b074 1434 if (xstate_bv & XSTATE_SSE_MASK) {
19dc85db
RH
1435 do_xrstor_sse(env, ptr, ra);
1436 } else {
1437 /* ??? When AVX is implemented, we may have to be more
1438 selective in the clearing. */
1439 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1440 }
1441 }
cfc3b074
PB
1442 if (rfbm & XSTATE_BNDREGS_MASK) {
1443 if (xstate_bv & XSTATE_BNDREGS_MASK) {
3f32bd21 1444 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
f4f1110e
RH
1445 env->hflags |= HF_MPX_IU_MASK;
1446 } else {
1447 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1448 env->hflags &= ~HF_MPX_IU_MASK;
1449 }
1450 }
cfc3b074
PB
1451 if (rfbm & XSTATE_BNDCSR_MASK) {
1452 if (xstate_bv & XSTATE_BNDCSR_MASK) {
3f32bd21 1453 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
f4f1110e
RH
1454 } else {
1455 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1456 }
1457 cpu_sync_bndcs_hflags(env);
1458 }
0f70ed47
PB
1459 if (rfbm & XSTATE_PKRU_MASK) {
1460 uint64_t old_pkru = env->pkru;
1461 if (xstate_bv & XSTATE_PKRU_MASK) {
3f32bd21 1462 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
0f70ed47
PB
1463 } else {
1464 env->pkru = 0;
1465 }
1466 if (env->pkru != old_pkru) {
1467 CPUState *cs = CPU(x86_env_get_cpu(env));
1468 tlb_flush(cs, 1);
1469 }
1470 }
19dc85db
RH
1471}
1472
3f32bd21
RH
1473#undef XO
1474
19dc85db
RH
1475uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1476{
1477 /* The OS must have enabled XSAVE. */
1478 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1479 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1480 }
1481
1482 switch (ecx) {
1483 case 0:
1484 return env->xcr0;
1485 case 1:
c9cfe8f9
RH
1486 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1487 return env->xcr0 & get_xinuse(env);
1488 }
1489 break;
19dc85db
RH
1490 }
1491 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1492}
1493
1494void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1495{
1496 uint32_t dummy, ena_lo, ena_hi;
1497 uint64_t ena;
1498
1499 /* The OS must have enabled XSAVE. */
1500 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1501 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1502 }
1503
1504 /* Only XCR0 is defined at present; the FPU may not be disabled. */
cfc3b074 1505 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
19dc85db
RH
1506 goto do_gpf;
1507 }
1508
1509 /* Disallow enabling unimplemented features. */
1510 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1511 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1512 if (mask & ~ena) {
1513 goto do_gpf;
1514 }
1515
f4f1110e 1516 /* Disallow enabling only half of MPX. */
cfc3b074
PB
1517 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1518 & XSTATE_BNDCSR_MASK) {
f4f1110e
RH
1519 goto do_gpf;
1520 }
1521
19dc85db 1522 env->xcr0 = mask;
f4f1110e 1523 cpu_sync_bndcs_hflags(env);
19dc85db
RH
1524 return;
1525
1526 do_gpf:
1527 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1528}
1529
f299f437
BS
1530void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1531{
1532 CPU_LDoubleU temp;
1533
1534 temp.d = f;
1535 *pmant = temp.l.lower;
1536 *pexp = temp.l.upper;
1537}
1538
1539floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1540{
1541 CPU_LDoubleU temp;
1542
1543 temp.l.upper = upper;
1544 temp.l.lower = mant;
1545 return temp.d;
1546}
1547
1548/* MMX/SSE */
1549/* XXX: optimize by storing fptt and fptags in the static cpu state */
1550
1551#define SSE_DAZ 0x0040
1552#define SSE_RC_MASK 0x6000
1553#define SSE_RC_NEAR 0x0000
1554#define SSE_RC_DOWN 0x2000
1555#define SSE_RC_UP 0x4000
1556#define SSE_RC_CHOP 0x6000
1557#define SSE_FZ 0x8000
1558
4e47e39a 1559void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
f299f437
BS
1560{
1561 int rnd_type;
1562
4e47e39a
RH
1563 env->mxcsr = mxcsr;
1564
f299f437 1565 /* set rounding mode */
4e47e39a 1566 switch (mxcsr & SSE_RC_MASK) {
f299f437
BS
1567 default:
1568 case SSE_RC_NEAR:
1569 rnd_type = float_round_nearest_even;
1570 break;
1571 case SSE_RC_DOWN:
1572 rnd_type = float_round_down;
1573 break;
1574 case SSE_RC_UP:
1575 rnd_type = float_round_up;
1576 break;
1577 case SSE_RC_CHOP:
1578 rnd_type = float_round_to_zero;
1579 break;
1580 }
1581 set_float_rounding_mode(rnd_type, &env->sse_status);
1582
1583 /* set denormals are zero */
4e47e39a 1584 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
f299f437
BS
1585
1586 /* set flush to zero */
4e47e39a 1587 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
f299f437
BS
1588}
1589
5bde1407
PD
1590void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1591{
1592 env->fpuc = val;
1593 update_fp_status(env);
1594}
1595
d3eb5eae 1596void helper_ldmxcsr(CPUX86State *env, uint32_t val)
f299f437 1597{
4e47e39a 1598 cpu_set_mxcsr(env, val);
f299f437
BS
1599}
1600
d3eb5eae 1601void helper_enter_mmx(CPUX86State *env)
f299f437
BS
1602{
1603 env->fpstt = 0;
1604 *(uint32_t *)(env->fptags) = 0;
1605 *(uint32_t *)(env->fptags + 4) = 0;
1606}
1607
d3eb5eae 1608void helper_emms(CPUX86State *env)
f299f437
BS
1609{
1610 /* set to empty state */
1611 *(uint32_t *)(env->fptags) = 0x01010101;
1612 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1613}
1614
1615/* XXX: suppress */
d3eb5eae 1616void helper_movq(CPUX86State *env, void *d, void *s)
f299f437
BS
1617{
1618 *(uint64_t *)d = *(uint64_t *)s;
1619}
1620
1621#define SHIFT 0
1622#include "ops_sse.h"
1623
1624#define SHIFT 1
1625#include "ops_sse.h"