]> git.proxmox.com Git - mirror_qemu.git/blob - target/i386/fpu_helper.c
linux-user: Add signal handling support for x86_64
[mirror_qemu.git] / target / i386 / fpu_helper.c
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27
28 #define FPU_RC_MASK 0xc00
29 #define FPU_RC_NEAR 0x000
30 #define FPU_RC_DOWN 0x400
31 #define FPU_RC_UP 0x800
32 #define FPU_RC_CHOP 0xc00
33
34 #define MAXTAN 9223372036854775808.0
35
36 /* the following deal with x86 long double-precision numbers */
37 #define MAXEXPD 0x7fff
38 #define EXPBIAS 16383
39 #define EXPD(fp) (fp.l.upper & 0x7fff)
40 #define SIGND(fp) ((fp.l.upper) & 0x8000)
41 #define MANTD(fp) (fp.l.lower)
42 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
43
44 #define FPUS_IE (1 << 0)
45 #define FPUS_DE (1 << 1)
46 #define FPUS_ZE (1 << 2)
47 #define FPUS_OE (1 << 3)
48 #define FPUS_UE (1 << 4)
49 #define FPUS_PE (1 << 5)
50 #define FPUS_SF (1 << 6)
51 #define FPUS_SE (1 << 7)
52 #define FPUS_B (1 << 15)
53
54 #define FPUC_EM 0x3f
55
56 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
57 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
58 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
59
60 static inline void fpush(CPUX86State *env)
61 {
62 env->fpstt = (env->fpstt - 1) & 7;
63 env->fptags[env->fpstt] = 0; /* validate stack entry */
64 }
65
66 static inline void fpop(CPUX86State *env)
67 {
68 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
69 env->fpstt = (env->fpstt + 1) & 7;
70 }
71
72 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
73 uintptr_t retaddr)
74 {
75 CPU_LDoubleU temp;
76
77 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
78 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
79 return temp.d;
80 }
81
82 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
83 uintptr_t retaddr)
84 {
85 CPU_LDoubleU temp;
86
87 temp.d = f;
88 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
89 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
90 }
91
92 /* x87 FPU helpers */
93
94 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
95 {
96 union {
97 float64 f64;
98 double d;
99 } u;
100
101 u.f64 = floatx80_to_float64(a, &env->fp_status);
102 return u.d;
103 }
104
105 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
106 {
107 union {
108 float64 f64;
109 double d;
110 } u;
111
112 u.d = a;
113 return float64_to_floatx80(u.f64, &env->fp_status);
114 }
115
116 static void fpu_set_exception(CPUX86State *env, int mask)
117 {
118 env->fpus |= mask;
119 if (env->fpus & (~env->fpuc & FPUC_EM)) {
120 env->fpus |= FPUS_SE | FPUS_B;
121 }
122 }
123
124 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
125 {
126 if (floatx80_is_zero(b)) {
127 fpu_set_exception(env, FPUS_ZE);
128 }
129 return floatx80_div(a, b, &env->fp_status);
130 }
131
132 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
133 {
134 if (env->cr[0] & CR0_NE_MASK) {
135 raise_exception_ra(env, EXCP10_COPR, retaddr);
136 }
137 #if !defined(CONFIG_USER_ONLY)
138 else {
139 cpu_set_ferr(env);
140 }
141 #endif
142 }
143
144 void helper_flds_FT0(CPUX86State *env, uint32_t val)
145 {
146 union {
147 float32 f;
148 uint32_t i;
149 } u;
150
151 u.i = val;
152 FT0 = float32_to_floatx80(u.f, &env->fp_status);
153 }
154
155 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
156 {
157 union {
158 float64 f;
159 uint64_t i;
160 } u;
161
162 u.i = val;
163 FT0 = float64_to_floatx80(u.f, &env->fp_status);
164 }
165
166 void helper_fildl_FT0(CPUX86State *env, int32_t val)
167 {
168 FT0 = int32_to_floatx80(val, &env->fp_status);
169 }
170
171 void helper_flds_ST0(CPUX86State *env, uint32_t val)
172 {
173 int new_fpstt;
174 union {
175 float32 f;
176 uint32_t i;
177 } u;
178
179 new_fpstt = (env->fpstt - 1) & 7;
180 u.i = val;
181 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
182 env->fpstt = new_fpstt;
183 env->fptags[new_fpstt] = 0; /* validate stack entry */
184 }
185
186 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
187 {
188 int new_fpstt;
189 union {
190 float64 f;
191 uint64_t i;
192 } u;
193
194 new_fpstt = (env->fpstt - 1) & 7;
195 u.i = val;
196 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
197 env->fpstt = new_fpstt;
198 env->fptags[new_fpstt] = 0; /* validate stack entry */
199 }
200
201 void helper_fildl_ST0(CPUX86State *env, int32_t val)
202 {
203 int new_fpstt;
204
205 new_fpstt = (env->fpstt - 1) & 7;
206 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
207 env->fpstt = new_fpstt;
208 env->fptags[new_fpstt] = 0; /* validate stack entry */
209 }
210
211 void helper_fildll_ST0(CPUX86State *env, int64_t val)
212 {
213 int new_fpstt;
214
215 new_fpstt = (env->fpstt - 1) & 7;
216 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
219 }
220
221 uint32_t helper_fsts_ST0(CPUX86State *env)
222 {
223 union {
224 float32 f;
225 uint32_t i;
226 } u;
227
228 u.f = floatx80_to_float32(ST0, &env->fp_status);
229 return u.i;
230 }
231
232 uint64_t helper_fstl_ST0(CPUX86State *env)
233 {
234 union {
235 float64 f;
236 uint64_t i;
237 } u;
238
239 u.f = floatx80_to_float64(ST0, &env->fp_status);
240 return u.i;
241 }
242
243 int32_t helper_fist_ST0(CPUX86State *env)
244 {
245 int32_t val;
246
247 val = floatx80_to_int32(ST0, &env->fp_status);
248 if (val != (int16_t)val) {
249 val = -32768;
250 }
251 return val;
252 }
253
254 int32_t helper_fistl_ST0(CPUX86State *env)
255 {
256 int32_t val;
257 signed char old_exp_flags;
258
259 old_exp_flags = get_float_exception_flags(&env->fp_status);
260 set_float_exception_flags(0, &env->fp_status);
261
262 val = floatx80_to_int32(ST0, &env->fp_status);
263 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
264 val = 0x80000000;
265 }
266 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
267 | old_exp_flags, &env->fp_status);
268 return val;
269 }
270
271 int64_t helper_fistll_ST0(CPUX86State *env)
272 {
273 int64_t val;
274 signed char old_exp_flags;
275
276 old_exp_flags = get_float_exception_flags(&env->fp_status);
277 set_float_exception_flags(0, &env->fp_status);
278
279 val = floatx80_to_int64(ST0, &env->fp_status);
280 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
281 val = 0x8000000000000000ULL;
282 }
283 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
284 | old_exp_flags, &env->fp_status);
285 return val;
286 }
287
288 int32_t helper_fistt_ST0(CPUX86State *env)
289 {
290 int32_t val;
291
292 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
293 if (val != (int16_t)val) {
294 val = -32768;
295 }
296 return val;
297 }
298
299 int32_t helper_fisttl_ST0(CPUX86State *env)
300 {
301 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
302 }
303
304 int64_t helper_fisttll_ST0(CPUX86State *env)
305 {
306 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
307 }
308
309 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
310 {
311 int new_fpstt;
312
313 new_fpstt = (env->fpstt - 1) & 7;
314 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
315 env->fpstt = new_fpstt;
316 env->fptags[new_fpstt] = 0; /* validate stack entry */
317 }
318
319 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
320 {
321 helper_fstt(env, ST0, ptr, GETPC());
322 }
323
324 void helper_fpush(CPUX86State *env)
325 {
326 fpush(env);
327 }
328
329 void helper_fpop(CPUX86State *env)
330 {
331 fpop(env);
332 }
333
334 void helper_fdecstp(CPUX86State *env)
335 {
336 env->fpstt = (env->fpstt - 1) & 7;
337 env->fpus &= ~0x4700;
338 }
339
340 void helper_fincstp(CPUX86State *env)
341 {
342 env->fpstt = (env->fpstt + 1) & 7;
343 env->fpus &= ~0x4700;
344 }
345
346 /* FPU move */
347
348 void helper_ffree_STN(CPUX86State *env, int st_index)
349 {
350 env->fptags[(env->fpstt + st_index) & 7] = 1;
351 }
352
353 void helper_fmov_ST0_FT0(CPUX86State *env)
354 {
355 ST0 = FT0;
356 }
357
358 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
359 {
360 FT0 = ST(st_index);
361 }
362
363 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
364 {
365 ST0 = ST(st_index);
366 }
367
368 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
369 {
370 ST(st_index) = ST0;
371 }
372
373 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
374 {
375 floatx80 tmp;
376
377 tmp = ST(st_index);
378 ST(st_index) = ST0;
379 ST0 = tmp;
380 }
381
382 /* FPU operations */
383
384 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
385
386 void helper_fcom_ST0_FT0(CPUX86State *env)
387 {
388 int ret;
389
390 ret = floatx80_compare(ST0, FT0, &env->fp_status);
391 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
392 }
393
394 void helper_fucom_ST0_FT0(CPUX86State *env)
395 {
396 int ret;
397
398 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
399 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
400 }
401
402 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
403
404 void helper_fcomi_ST0_FT0(CPUX86State *env)
405 {
406 int eflags;
407 int ret;
408
409 ret = floatx80_compare(ST0, FT0, &env->fp_status);
410 eflags = cpu_cc_compute_all(env, CC_OP);
411 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
412 CC_SRC = eflags;
413 }
414
415 void helper_fucomi_ST0_FT0(CPUX86State *env)
416 {
417 int eflags;
418 int ret;
419
420 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
421 eflags = cpu_cc_compute_all(env, CC_OP);
422 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
423 CC_SRC = eflags;
424 }
425
426 void helper_fadd_ST0_FT0(CPUX86State *env)
427 {
428 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
429 }
430
431 void helper_fmul_ST0_FT0(CPUX86State *env)
432 {
433 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
434 }
435
436 void helper_fsub_ST0_FT0(CPUX86State *env)
437 {
438 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
439 }
440
441 void helper_fsubr_ST0_FT0(CPUX86State *env)
442 {
443 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
444 }
445
446 void helper_fdiv_ST0_FT0(CPUX86State *env)
447 {
448 ST0 = helper_fdiv(env, ST0, FT0);
449 }
450
451 void helper_fdivr_ST0_FT0(CPUX86State *env)
452 {
453 ST0 = helper_fdiv(env, FT0, ST0);
454 }
455
456 /* fp operations between STN and ST0 */
457
458 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
459 {
460 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
461 }
462
463 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
464 {
465 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
466 }
467
468 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
469 {
470 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
471 }
472
473 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
474 {
475 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
476 }
477
478 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
479 {
480 floatx80 *p;
481
482 p = &ST(st_index);
483 *p = helper_fdiv(env, *p, ST0);
484 }
485
486 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
487 {
488 floatx80 *p;
489
490 p = &ST(st_index);
491 *p = helper_fdiv(env, ST0, *p);
492 }
493
494 /* misc FPU operations */
495 void helper_fchs_ST0(CPUX86State *env)
496 {
497 ST0 = floatx80_chs(ST0);
498 }
499
500 void helper_fabs_ST0(CPUX86State *env)
501 {
502 ST0 = floatx80_abs(ST0);
503 }
504
505 void helper_fld1_ST0(CPUX86State *env)
506 {
507 ST0 = floatx80_one;
508 }
509
510 void helper_fldl2t_ST0(CPUX86State *env)
511 {
512 ST0 = floatx80_l2t;
513 }
514
515 void helper_fldl2e_ST0(CPUX86State *env)
516 {
517 ST0 = floatx80_l2e;
518 }
519
520 void helper_fldpi_ST0(CPUX86State *env)
521 {
522 ST0 = floatx80_pi;
523 }
524
525 void helper_fldlg2_ST0(CPUX86State *env)
526 {
527 ST0 = floatx80_lg2;
528 }
529
530 void helper_fldln2_ST0(CPUX86State *env)
531 {
532 ST0 = floatx80_ln2;
533 }
534
535 void helper_fldz_ST0(CPUX86State *env)
536 {
537 ST0 = floatx80_zero;
538 }
539
540 void helper_fldz_FT0(CPUX86State *env)
541 {
542 FT0 = floatx80_zero;
543 }
544
545 uint32_t helper_fnstsw(CPUX86State *env)
546 {
547 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
548 }
549
550 uint32_t helper_fnstcw(CPUX86State *env)
551 {
552 return env->fpuc;
553 }
554
555 void update_fp_status(CPUX86State *env)
556 {
557 int rnd_type;
558
559 /* set rounding mode */
560 switch (env->fpuc & FPU_RC_MASK) {
561 default:
562 case FPU_RC_NEAR:
563 rnd_type = float_round_nearest_even;
564 break;
565 case FPU_RC_DOWN:
566 rnd_type = float_round_down;
567 break;
568 case FPU_RC_UP:
569 rnd_type = float_round_up;
570 break;
571 case FPU_RC_CHOP:
572 rnd_type = float_round_to_zero;
573 break;
574 }
575 set_float_rounding_mode(rnd_type, &env->fp_status);
576 switch ((env->fpuc >> 8) & 3) {
577 case 0:
578 rnd_type = 32;
579 break;
580 case 2:
581 rnd_type = 64;
582 break;
583 case 3:
584 default:
585 rnd_type = 80;
586 break;
587 }
588 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
589 }
590
591 void helper_fldcw(CPUX86State *env, uint32_t val)
592 {
593 cpu_set_fpuc(env, val);
594 }
595
596 void helper_fclex(CPUX86State *env)
597 {
598 env->fpus &= 0x7f00;
599 }
600
601 void helper_fwait(CPUX86State *env)
602 {
603 if (env->fpus & FPUS_SE) {
604 fpu_raise_exception(env, GETPC());
605 }
606 }
607
608 void helper_fninit(CPUX86State *env)
609 {
610 env->fpus = 0;
611 env->fpstt = 0;
612 cpu_set_fpuc(env, 0x37f);
613 env->fptags[0] = 1;
614 env->fptags[1] = 1;
615 env->fptags[2] = 1;
616 env->fptags[3] = 1;
617 env->fptags[4] = 1;
618 env->fptags[5] = 1;
619 env->fptags[6] = 1;
620 env->fptags[7] = 1;
621 }
622
623 /* BCD ops */
624
625 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
626 {
627 floatx80 tmp;
628 uint64_t val;
629 unsigned int v;
630 int i;
631
632 val = 0;
633 for (i = 8; i >= 0; i--) {
634 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
635 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
636 }
637 tmp = int64_to_floatx80(val, &env->fp_status);
638 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
639 tmp = floatx80_chs(tmp);
640 }
641 fpush(env);
642 ST0 = tmp;
643 }
644
645 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
646 {
647 int v;
648 target_ulong mem_ref, mem_end;
649 int64_t val;
650
651 val = floatx80_to_int64(ST0, &env->fp_status);
652 mem_ref = ptr;
653 mem_end = mem_ref + 9;
654 if (val < 0) {
655 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
656 val = -val;
657 } else {
658 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
659 }
660 while (mem_ref < mem_end) {
661 if (val == 0) {
662 break;
663 }
664 v = val % 100;
665 val = val / 100;
666 v = ((v / 10) << 4) | (v % 10);
667 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
668 }
669 while (mem_ref < mem_end) {
670 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
671 }
672 }
673
674 void helper_f2xm1(CPUX86State *env)
675 {
676 double val = floatx80_to_double(env, ST0);
677
678 val = pow(2.0, val) - 1.0;
679 ST0 = double_to_floatx80(env, val);
680 }
681
682 void helper_fyl2x(CPUX86State *env)
683 {
684 double fptemp = floatx80_to_double(env, ST0);
685
686 if (fptemp > 0.0) {
687 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
688 fptemp *= floatx80_to_double(env, ST1);
689 ST1 = double_to_floatx80(env, fptemp);
690 fpop(env);
691 } else {
692 env->fpus &= ~0x4700;
693 env->fpus |= 0x400;
694 }
695 }
696
697 void helper_fptan(CPUX86State *env)
698 {
699 double fptemp = floatx80_to_double(env, ST0);
700
701 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
702 env->fpus |= 0x400;
703 } else {
704 fptemp = tan(fptemp);
705 ST0 = double_to_floatx80(env, fptemp);
706 fpush(env);
707 ST0 = floatx80_one;
708 env->fpus &= ~0x400; /* C2 <-- 0 */
709 /* the above code is for |arg| < 2**52 only */
710 }
711 }
712
713 void helper_fpatan(CPUX86State *env)
714 {
715 double fptemp, fpsrcop;
716
717 fpsrcop = floatx80_to_double(env, ST1);
718 fptemp = floatx80_to_double(env, ST0);
719 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
720 fpop(env);
721 }
722
723 void helper_fxtract(CPUX86State *env)
724 {
725 CPU_LDoubleU temp;
726
727 temp.d = ST0;
728
729 if (floatx80_is_zero(ST0)) {
730 /* Easy way to generate -inf and raising division by 0 exception */
731 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
732 &env->fp_status);
733 fpush(env);
734 ST0 = temp.d;
735 } else {
736 int expdif;
737
738 expdif = EXPD(temp) - EXPBIAS;
739 /* DP exponent bias */
740 ST0 = int32_to_floatx80(expdif, &env->fp_status);
741 fpush(env);
742 BIASEXPONENT(temp);
743 ST0 = temp.d;
744 }
745 }
746
747 void helper_fprem1(CPUX86State *env)
748 {
749 double st0, st1, dblq, fpsrcop, fptemp;
750 CPU_LDoubleU fpsrcop1, fptemp1;
751 int expdif;
752 signed long long int q;
753
754 st0 = floatx80_to_double(env, ST0);
755 st1 = floatx80_to_double(env, ST1);
756
757 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
758 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
759 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
760 return;
761 }
762
763 fpsrcop = st0;
764 fptemp = st1;
765 fpsrcop1.d = ST0;
766 fptemp1.d = ST1;
767 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
768
769 if (expdif < 0) {
770 /* optimisation? taken from the AMD docs */
771 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
772 /* ST0 is unchanged */
773 return;
774 }
775
776 if (expdif < 53) {
777 dblq = fpsrcop / fptemp;
778 /* round dblq towards nearest integer */
779 dblq = rint(dblq);
780 st0 = fpsrcop - fptemp * dblq;
781
782 /* convert dblq to q by truncating towards zero */
783 if (dblq < 0.0) {
784 q = (signed long long int)(-dblq);
785 } else {
786 q = (signed long long int)dblq;
787 }
788
789 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
790 /* (C0,C3,C1) <-- (q2,q1,q0) */
791 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
792 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
793 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
794 } else {
795 env->fpus |= 0x400; /* C2 <-- 1 */
796 fptemp = pow(2.0, expdif - 50);
797 fpsrcop = (st0 / st1) / fptemp;
798 /* fpsrcop = integer obtained by chopping */
799 fpsrcop = (fpsrcop < 0.0) ?
800 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
801 st0 -= (st1 * fpsrcop * fptemp);
802 }
803 ST0 = double_to_floatx80(env, st0);
804 }
805
806 void helper_fprem(CPUX86State *env)
807 {
808 double st0, st1, dblq, fpsrcop, fptemp;
809 CPU_LDoubleU fpsrcop1, fptemp1;
810 int expdif;
811 signed long long int q;
812
813 st0 = floatx80_to_double(env, ST0);
814 st1 = floatx80_to_double(env, ST1);
815
816 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
817 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
818 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
819 return;
820 }
821
822 fpsrcop = st0;
823 fptemp = st1;
824 fpsrcop1.d = ST0;
825 fptemp1.d = ST1;
826 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
827
828 if (expdif < 0) {
829 /* optimisation? taken from the AMD docs */
830 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
831 /* ST0 is unchanged */
832 return;
833 }
834
835 if (expdif < 53) {
836 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
837 /* round dblq towards zero */
838 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
839 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
840
841 /* convert dblq to q by truncating towards zero */
842 if (dblq < 0.0) {
843 q = (signed long long int)(-dblq);
844 } else {
845 q = (signed long long int)dblq;
846 }
847
848 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
849 /* (C0,C3,C1) <-- (q2,q1,q0) */
850 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
851 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
852 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
853 } else {
854 int N = 32 + (expdif % 32); /* as per AMD docs */
855
856 env->fpus |= 0x400; /* C2 <-- 1 */
857 fptemp = pow(2.0, (double)(expdif - N));
858 fpsrcop = (st0 / st1) / fptemp;
859 /* fpsrcop = integer obtained by chopping */
860 fpsrcop = (fpsrcop < 0.0) ?
861 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
862 st0 -= (st1 * fpsrcop * fptemp);
863 }
864 ST0 = double_to_floatx80(env, st0);
865 }
866
867 void helper_fyl2xp1(CPUX86State *env)
868 {
869 double fptemp = floatx80_to_double(env, ST0);
870
871 if ((fptemp + 1.0) > 0.0) {
872 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
873 fptemp *= floatx80_to_double(env, ST1);
874 ST1 = double_to_floatx80(env, fptemp);
875 fpop(env);
876 } else {
877 env->fpus &= ~0x4700;
878 env->fpus |= 0x400;
879 }
880 }
881
882 void helper_fsqrt(CPUX86State *env)
883 {
884 if (floatx80_is_neg(ST0)) {
885 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
886 env->fpus |= 0x400;
887 }
888 ST0 = floatx80_sqrt(ST0, &env->fp_status);
889 }
890
891 void helper_fsincos(CPUX86State *env)
892 {
893 double fptemp = floatx80_to_double(env, ST0);
894
895 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
896 env->fpus |= 0x400;
897 } else {
898 ST0 = double_to_floatx80(env, sin(fptemp));
899 fpush(env);
900 ST0 = double_to_floatx80(env, cos(fptemp));
901 env->fpus &= ~0x400; /* C2 <-- 0 */
902 /* the above code is for |arg| < 2**63 only */
903 }
904 }
905
906 void helper_frndint(CPUX86State *env)
907 {
908 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
909 }
910
911 void helper_fscale(CPUX86State *env)
912 {
913 if (floatx80_is_any_nan(ST1)) {
914 ST0 = ST1;
915 } else {
916 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
917 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
918 }
919 }
920
921 void helper_fsin(CPUX86State *env)
922 {
923 double fptemp = floatx80_to_double(env, ST0);
924
925 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
926 env->fpus |= 0x400;
927 } else {
928 ST0 = double_to_floatx80(env, sin(fptemp));
929 env->fpus &= ~0x400; /* C2 <-- 0 */
930 /* the above code is for |arg| < 2**53 only */
931 }
932 }
933
934 void helper_fcos(CPUX86State *env)
935 {
936 double fptemp = floatx80_to_double(env, ST0);
937
938 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
939 env->fpus |= 0x400;
940 } else {
941 ST0 = double_to_floatx80(env, cos(fptemp));
942 env->fpus &= ~0x400; /* C2 <-- 0 */
943 /* the above code is for |arg| < 2**63 only */
944 }
945 }
946
947 void helper_fxam_ST0(CPUX86State *env)
948 {
949 CPU_LDoubleU temp;
950 int expdif;
951
952 temp.d = ST0;
953
954 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
955 if (SIGND(temp)) {
956 env->fpus |= 0x200; /* C1 <-- 1 */
957 }
958
959 /* XXX: test fptags too */
960 expdif = EXPD(temp);
961 if (expdif == MAXEXPD) {
962 if (MANTD(temp) == 0x8000000000000000ULL) {
963 env->fpus |= 0x500; /* Infinity */
964 } else {
965 env->fpus |= 0x100; /* NaN */
966 }
967 } else if (expdif == 0) {
968 if (MANTD(temp) == 0) {
969 env->fpus |= 0x4000; /* Zero */
970 } else {
971 env->fpus |= 0x4400; /* Denormal */
972 }
973 } else {
974 env->fpus |= 0x400;
975 }
976 }
977
978 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
979 uintptr_t retaddr)
980 {
981 int fpus, fptag, exp, i;
982 uint64_t mant;
983 CPU_LDoubleU tmp;
984
985 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
986 fptag = 0;
987 for (i = 7; i >= 0; i--) {
988 fptag <<= 2;
989 if (env->fptags[i]) {
990 fptag |= 3;
991 } else {
992 tmp.d = env->fpregs[i].d;
993 exp = EXPD(tmp);
994 mant = MANTD(tmp);
995 if (exp == 0 && mant == 0) {
996 /* zero */
997 fptag |= 1;
998 } else if (exp == 0 || exp == MAXEXPD
999 || (mant & (1LL << 63)) == 0) {
1000 /* NaNs, infinity, denormal */
1001 fptag |= 2;
1002 }
1003 }
1004 }
1005 if (data32) {
1006 /* 32 bit */
1007 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1008 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1009 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1010 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1011 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1012 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1013 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1014 } else {
1015 /* 16 bit */
1016 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1017 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1018 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1019 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1020 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1021 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1022 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1023 }
1024 }
1025
1026 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1027 {
1028 do_fstenv(env, ptr, data32, GETPC());
1029 }
1030
1031 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1032 uintptr_t retaddr)
1033 {
1034 int i, fpus, fptag;
1035
1036 if (data32) {
1037 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1038 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1039 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1040 } else {
1041 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1042 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1043 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044 }
1045 env->fpstt = (fpus >> 11) & 7;
1046 env->fpus = fpus & ~0x3800;
1047 for (i = 0; i < 8; i++) {
1048 env->fptags[i] = ((fptag & 3) == 3);
1049 fptag >>= 2;
1050 }
1051 }
1052
1053 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1054 {
1055 do_fldenv(env, ptr, data32, GETPC());
1056 }
1057
1058 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1059 {
1060 floatx80 tmp;
1061 int i;
1062
1063 do_fstenv(env, ptr, data32, GETPC());
1064
1065 ptr += (14 << data32);
1066 for (i = 0; i < 8; i++) {
1067 tmp = ST(i);
1068 helper_fstt(env, tmp, ptr, GETPC());
1069 ptr += 10;
1070 }
1071
1072 /* fninit */
1073 env->fpus = 0;
1074 env->fpstt = 0;
1075 cpu_set_fpuc(env, 0x37f);
1076 env->fptags[0] = 1;
1077 env->fptags[1] = 1;
1078 env->fptags[2] = 1;
1079 env->fptags[3] = 1;
1080 env->fptags[4] = 1;
1081 env->fptags[5] = 1;
1082 env->fptags[6] = 1;
1083 env->fptags[7] = 1;
1084 }
1085
1086 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1087 {
1088 floatx80 tmp;
1089 int i;
1090
1091 do_fldenv(env, ptr, data32, GETPC());
1092 ptr += (14 << data32);
1093
1094 for (i = 0; i < 8; i++) {
1095 tmp = helper_fldt(env, ptr, GETPC());
1096 ST(i) = tmp;
1097 ptr += 10;
1098 }
1099 }
1100
1101 #if defined(CONFIG_USER_ONLY)
1102 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1103 {
1104 helper_fsave(env, ptr, data32);
1105 }
1106
1107 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1108 {
1109 helper_frstor(env, ptr, data32);
1110 }
1111 #endif
1112
1113 #define XO(X) offsetof(X86XSaveArea, X)
1114
1115 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1116 {
1117 int fpus, fptag, i;
1118 target_ulong addr;
1119
1120 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1121 fptag = 0;
1122 for (i = 0; i < 8; i++) {
1123 fptag |= (env->fptags[i] << i);
1124 }
1125
1126 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1127 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1128 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1129
1130 /* In 32-bit mode this is eip, sel, dp, sel.
1131 In 64-bit mode this is rip, rdp.
1132 But in either case we don't write actual data, just zeros. */
1133 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1134 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1135
1136 addr = ptr + XO(legacy.fpregs);
1137 for (i = 0; i < 8; i++) {
1138 floatx80 tmp = ST(i);
1139 helper_fstt(env, tmp, addr, ra);
1140 addr += 16;
1141 }
1142 }
1143
1144 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1145 {
1146 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1147 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1148 }
1149
1150 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1151 {
1152 int i, nb_xmm_regs;
1153 target_ulong addr;
1154
1155 if (env->hflags & HF_CS64_MASK) {
1156 nb_xmm_regs = 16;
1157 } else {
1158 nb_xmm_regs = 8;
1159 }
1160
1161 addr = ptr + XO(legacy.xmm_regs);
1162 for (i = 0; i < nb_xmm_regs; i++) {
1163 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1164 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1165 addr += 16;
1166 }
1167 }
1168
1169 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1170 {
1171 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1172 int i;
1173
1174 for (i = 0; i < 4; i++, addr += 16) {
1175 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1176 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1177 }
1178 }
1179
1180 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1181 {
1182 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1183 env->bndcs_regs.cfgu, ra);
1184 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1185 env->bndcs_regs.sts, ra);
1186 }
1187
1188 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1189 {
1190 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1191 }
1192
1193 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194 {
1195 uintptr_t ra = GETPC();
1196
1197 /* The operand must be 16 byte aligned */
1198 if (ptr & 0xf) {
1199 raise_exception_ra(env, EXCP0D_GPF, ra);
1200 }
1201
1202 do_xsave_fpu(env, ptr, ra);
1203
1204 if (env->cr[4] & CR4_OSFXSR_MASK) {
1205 do_xsave_mxcsr(env, ptr, ra);
1206 /* Fast FXSAVE leaves out the XMM registers */
1207 if (!(env->efer & MSR_EFER_FFXSR)
1208 || (env->hflags & HF_CPL_MASK)
1209 || !(env->hflags & HF_LMA_MASK)) {
1210 do_xsave_sse(env, ptr, ra);
1211 }
1212 }
1213 }
1214
1215 static uint64_t get_xinuse(CPUX86State *env)
1216 {
1217 uint64_t inuse = -1;
1218
1219 /* For the most part, we don't track XINUSE. We could calculate it
1220 here for all components, but it's probably less work to simply
1221 indicate in use. That said, the state of BNDREGS is important
1222 enough to track in HFLAGS, so we might as well use that here. */
1223 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1224 inuse &= ~XSTATE_BNDREGS_MASK;
1225 }
1226 return inuse;
1227 }
1228
1229 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230 uint64_t inuse, uint64_t opt, uintptr_t ra)
1231 {
1232 uint64_t old_bv, new_bv;
1233
1234 /* The OS must have enabled XSAVE. */
1235 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236 raise_exception_ra(env, EXCP06_ILLOP, ra);
1237 }
1238
1239 /* The operand must be 64 byte aligned. */
1240 if (ptr & 63) {
1241 raise_exception_ra(env, EXCP0D_GPF, ra);
1242 }
1243
1244 /* Never save anything not enabled by XCR0. */
1245 rfbm &= env->xcr0;
1246 opt &= rfbm;
1247
1248 if (opt & XSTATE_FP_MASK) {
1249 do_xsave_fpu(env, ptr, ra);
1250 }
1251 if (rfbm & XSTATE_SSE_MASK) {
1252 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1253 do_xsave_mxcsr(env, ptr, ra);
1254 }
1255 if (opt & XSTATE_SSE_MASK) {
1256 do_xsave_sse(env, ptr, ra);
1257 }
1258 if (opt & XSTATE_BNDREGS_MASK) {
1259 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1260 }
1261 if (opt & XSTATE_BNDCSR_MASK) {
1262 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1263 }
1264 if (opt & XSTATE_PKRU_MASK) {
1265 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1266 }
1267
1268 /* Update the XSTATE_BV field. */
1269 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1270 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1271 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1272 }
1273
1274 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1275 {
1276 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1277 }
1278
1279 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1280 {
1281 uint64_t inuse = get_xinuse(env);
1282 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1283 }
1284
1285 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1286 {
1287 int i, fpuc, fpus, fptag;
1288 target_ulong addr;
1289
1290 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1291 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1292 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1293 cpu_set_fpuc(env, fpuc);
1294 env->fpstt = (fpus >> 11) & 7;
1295 env->fpus = fpus & ~0x3800;
1296 fptag ^= 0xff;
1297 for (i = 0; i < 8; i++) {
1298 env->fptags[i] = ((fptag >> i) & 1);
1299 }
1300
1301 addr = ptr + XO(legacy.fpregs);
1302 for (i = 0; i < 8; i++) {
1303 floatx80 tmp = helper_fldt(env, addr, ra);
1304 ST(i) = tmp;
1305 addr += 16;
1306 }
1307 }
1308
1309 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1310 {
1311 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1312 }
1313
1314 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1315 {
1316 int i, nb_xmm_regs;
1317 target_ulong addr;
1318
1319 if (env->hflags & HF_CS64_MASK) {
1320 nb_xmm_regs = 16;
1321 } else {
1322 nb_xmm_regs = 8;
1323 }
1324
1325 addr = ptr + XO(legacy.xmm_regs);
1326 for (i = 0; i < nb_xmm_regs; i++) {
1327 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1328 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1329 addr += 16;
1330 }
1331 }
1332
1333 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1334 {
1335 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1336 int i;
1337
1338 for (i = 0; i < 4; i++, addr += 16) {
1339 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1340 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1341 }
1342 }
1343
1344 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1345 {
1346 /* FIXME: Extend highest implemented bit of linear address. */
1347 env->bndcs_regs.cfgu
1348 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1349 env->bndcs_regs.sts
1350 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1351 }
1352
1353 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1354 {
1355 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1356 }
1357
1358 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1359 {
1360 uintptr_t ra = GETPC();
1361
1362 /* The operand must be 16 byte aligned */
1363 if (ptr & 0xf) {
1364 raise_exception_ra(env, EXCP0D_GPF, ra);
1365 }
1366
1367 do_xrstor_fpu(env, ptr, ra);
1368
1369 if (env->cr[4] & CR4_OSFXSR_MASK) {
1370 do_xrstor_mxcsr(env, ptr, ra);
1371 /* Fast FXRSTOR leaves out the XMM registers */
1372 if (!(env->efer & MSR_EFER_FFXSR)
1373 || (env->hflags & HF_CPL_MASK)
1374 || !(env->hflags & HF_LMA_MASK)) {
1375 do_xrstor_sse(env, ptr, ra);
1376 }
1377 }
1378 }
1379
1380 #if defined(CONFIG_USER_ONLY)
1381 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1382 {
1383 helper_fxsave(env, ptr);
1384 }
1385
1386 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1387 {
1388 helper_fxrstor(env, ptr);
1389 }
1390 #endif
1391
1392 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1393 {
1394 uintptr_t ra = GETPC();
1395 uint64_t xstate_bv, xcomp_bv, reserve0;
1396
1397 rfbm &= env->xcr0;
1398
1399 /* The OS must have enabled XSAVE. */
1400 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1401 raise_exception_ra(env, EXCP06_ILLOP, ra);
1402 }
1403
1404 /* The operand must be 64 byte aligned. */
1405 if (ptr & 63) {
1406 raise_exception_ra(env, EXCP0D_GPF, ra);
1407 }
1408
1409 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1410
1411 if ((int64_t)xstate_bv < 0) {
1412 /* FIXME: Compact form. */
1413 raise_exception_ra(env, EXCP0D_GPF, ra);
1414 }
1415
1416 /* Standard form. */
1417
1418 /* The XSTATE_BV field must not set bits not present in XCR0. */
1419 if (xstate_bv & ~env->xcr0) {
1420 raise_exception_ra(env, EXCP0D_GPF, ra);
1421 }
1422
1423 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1424 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1425 describes only XCOMP_BV, but the description of the standard form
1426 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1427 includes the next 64-bit field. */
1428 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1429 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1430 if (xcomp_bv || reserve0) {
1431 raise_exception_ra(env, EXCP0D_GPF, ra);
1432 }
1433
1434 if (rfbm & XSTATE_FP_MASK) {
1435 if (xstate_bv & XSTATE_FP_MASK) {
1436 do_xrstor_fpu(env, ptr, ra);
1437 } else {
1438 helper_fninit(env);
1439 memset(env->fpregs, 0, sizeof(env->fpregs));
1440 }
1441 }
1442 if (rfbm & XSTATE_SSE_MASK) {
1443 /* Note that the standard form of XRSTOR loads MXCSR from memory
1444 whether or not the XSTATE_BV bit is set. */
1445 do_xrstor_mxcsr(env, ptr, ra);
1446 if (xstate_bv & XSTATE_SSE_MASK) {
1447 do_xrstor_sse(env, ptr, ra);
1448 } else {
1449 /* ??? When AVX is implemented, we may have to be more
1450 selective in the clearing. */
1451 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1452 }
1453 }
1454 if (rfbm & XSTATE_BNDREGS_MASK) {
1455 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1456 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1457 env->hflags |= HF_MPX_IU_MASK;
1458 } else {
1459 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1460 env->hflags &= ~HF_MPX_IU_MASK;
1461 }
1462 }
1463 if (rfbm & XSTATE_BNDCSR_MASK) {
1464 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1465 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1466 } else {
1467 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1468 }
1469 cpu_sync_bndcs_hflags(env);
1470 }
1471 if (rfbm & XSTATE_PKRU_MASK) {
1472 uint64_t old_pkru = env->pkru;
1473 if (xstate_bv & XSTATE_PKRU_MASK) {
1474 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1475 } else {
1476 env->pkru = 0;
1477 }
1478 if (env->pkru != old_pkru) {
1479 CPUState *cs = CPU(x86_env_get_cpu(env));
1480 tlb_flush(cs);
1481 }
1482 }
1483 }
1484
1485 #undef XO
1486
1487 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1488 {
1489 /* The OS must have enabled XSAVE. */
1490 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1491 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1492 }
1493
1494 switch (ecx) {
1495 case 0:
1496 return env->xcr0;
1497 case 1:
1498 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1499 return env->xcr0 & get_xinuse(env);
1500 }
1501 break;
1502 }
1503 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1504 }
1505
1506 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1507 {
1508 uint32_t dummy, ena_lo, ena_hi;
1509 uint64_t ena;
1510
1511 /* The OS must have enabled XSAVE. */
1512 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1513 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1514 }
1515
1516 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1517 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1518 goto do_gpf;
1519 }
1520
1521 /* Disallow enabling unimplemented features. */
1522 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1523 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1524 if (mask & ~ena) {
1525 goto do_gpf;
1526 }
1527
1528 /* Disallow enabling only half of MPX. */
1529 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1530 & XSTATE_BNDCSR_MASK) {
1531 goto do_gpf;
1532 }
1533
1534 env->xcr0 = mask;
1535 cpu_sync_bndcs_hflags(env);
1536 return;
1537
1538 do_gpf:
1539 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1540 }
1541
1542 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1543 {
1544 CPU_LDoubleU temp;
1545
1546 temp.d = f;
1547 *pmant = temp.l.lower;
1548 *pexp = temp.l.upper;
1549 }
1550
1551 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1552 {
1553 CPU_LDoubleU temp;
1554
1555 temp.l.upper = upper;
1556 temp.l.lower = mant;
1557 return temp.d;
1558 }
1559
1560 /* MMX/SSE */
1561 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1562
1563 #define SSE_DAZ 0x0040
1564 #define SSE_RC_MASK 0x6000
1565 #define SSE_RC_NEAR 0x0000
1566 #define SSE_RC_DOWN 0x2000
1567 #define SSE_RC_UP 0x4000
1568 #define SSE_RC_CHOP 0x6000
1569 #define SSE_FZ 0x8000
1570
1571 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1572 {
1573 int rnd_type;
1574
1575 env->mxcsr = mxcsr;
1576
1577 /* set rounding mode */
1578 switch (mxcsr & SSE_RC_MASK) {
1579 default:
1580 case SSE_RC_NEAR:
1581 rnd_type = float_round_nearest_even;
1582 break;
1583 case SSE_RC_DOWN:
1584 rnd_type = float_round_down;
1585 break;
1586 case SSE_RC_UP:
1587 rnd_type = float_round_up;
1588 break;
1589 case SSE_RC_CHOP:
1590 rnd_type = float_round_to_zero;
1591 break;
1592 }
1593 set_float_rounding_mode(rnd_type, &env->sse_status);
1594
1595 /* set denormals are zero */
1596 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1597
1598 /* set flush to zero */
1599 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1600 }
1601
1602 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1603 {
1604 env->fpuc = val;
1605 update_fp_status(env);
1606 }
1607
1608 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1609 {
1610 cpu_set_mxcsr(env, val);
1611 }
1612
1613 void helper_enter_mmx(CPUX86State *env)
1614 {
1615 env->fpstt = 0;
1616 *(uint32_t *)(env->fptags) = 0;
1617 *(uint32_t *)(env->fptags + 4) = 0;
1618 }
1619
1620 void helper_emms(CPUX86State *env)
1621 {
1622 /* set to empty state */
1623 *(uint32_t *)(env->fptags) = 0x01010101;
1624 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1625 }
1626
1627 /* XXX: suppress */
1628 void helper_movq(CPUX86State *env, void *d, void *s)
1629 {
1630 *(uint64_t *)d = *(uint64_t *)s;
1631 }
1632
1633 #define SHIFT 0
1634 #include "ops_sse.h"
1635
1636 #define SHIFT 1
1637 #include "ops_sse.h"