]> git.proxmox.com Git - mirror_qemu.git/blame - target-i386/fpu_helper.c
cpu: move exec-all.h inclusion out of cpu.h
[mirror_qemu.git] / target-i386 / fpu_helper.c
CommitLineData
f299f437
BS
1/*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
b6a0aa05 20#include "qemu/osdep.h"
f299f437
BS
21#include <math.h>
22#include "cpu.h"
2ef6175a 23#include "exec/helper-proto.h"
c334a388 24#include "qemu/host-utils.h"
63c91552 25#include "exec/exec-all.h"
f08b6170 26#include "exec/cpu_ldst.h"
92fc4b58 27
f299f437
BS
28#define FPU_RC_MASK 0xc00
29#define FPU_RC_NEAR 0x000
30#define FPU_RC_DOWN 0x400
31#define FPU_RC_UP 0x800
32#define FPU_RC_CHOP 0xc00
33
34#define MAXTAN 9223372036854775808.0
35
36/* the following deal with x86 long double-precision numbers */
37#define MAXEXPD 0x7fff
38#define EXPBIAS 16383
39#define EXPD(fp) (fp.l.upper & 0x7fff)
40#define SIGND(fp) ((fp.l.upper) & 0x8000)
41#define MANTD(fp) (fp.l.lower)
42#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
43
44#define FPUS_IE (1 << 0)
45#define FPUS_DE (1 << 1)
46#define FPUS_ZE (1 << 2)
47#define FPUS_OE (1 << 3)
48#define FPUS_UE (1 << 4)
49#define FPUS_PE (1 << 5)
50#define FPUS_SF (1 << 6)
51#define FPUS_SE (1 << 7)
52#define FPUS_B (1 << 15)
53
54#define FPUC_EM 0x3f
55
56#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
57#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
58#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
59
d3eb5eae 60static inline void fpush(CPUX86State *env)
f299f437
BS
61{
62 env->fpstt = (env->fpstt - 1) & 7;
63 env->fptags[env->fpstt] = 0; /* validate stack entry */
64}
65
d3eb5eae 66static inline void fpop(CPUX86State *env)
f299f437
BS
67{
68 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
69 env->fpstt = (env->fpstt + 1) & 7;
70}
71
6cad09d2
PD
72static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
73 uintptr_t retaddr)
f299f437
BS
74{
75 CPU_LDoubleU temp;
76
6cad09d2
PD
77 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
78 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437
BS
79 return temp.d;
80}
81
6cad09d2
PD
82static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
83 uintptr_t retaddr)
f299f437
BS
84{
85 CPU_LDoubleU temp;
86
87 temp.d = f;
6cad09d2
PD
88 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
89 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
f299f437
BS
90}
91
92/* x87 FPU helpers */
93
d3eb5eae 94static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
f299f437
BS
95{
96 union {
97 float64 f64;
98 double d;
99 } u;
100
101 u.f64 = floatx80_to_float64(a, &env->fp_status);
102 return u.d;
103}
104
d3eb5eae 105static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
f299f437
BS
106{
107 union {
108 float64 f64;
109 double d;
110 } u;
111
112 u.d = a;
113 return float64_to_floatx80(u.f64, &env->fp_status);
114}
115
d3eb5eae 116static void fpu_set_exception(CPUX86State *env, int mask)
f299f437
BS
117{
118 env->fpus |= mask;
119 if (env->fpus & (~env->fpuc & FPUC_EM)) {
120 env->fpus |= FPUS_SE | FPUS_B;
121 }
122}
123
d3eb5eae 124static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
f299f437
BS
125{
126 if (floatx80_is_zero(b)) {
d3eb5eae 127 fpu_set_exception(env, FPUS_ZE);
f299f437
BS
128 }
129 return floatx80_div(a, b, &env->fp_status);
130}
131
6cad09d2 132static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
f299f437
BS
133{
134 if (env->cr[0] & CR0_NE_MASK) {
6cad09d2 135 raise_exception_ra(env, EXCP10_COPR, retaddr);
f299f437
BS
136 }
137#if !defined(CONFIG_USER_ONLY)
138 else {
139 cpu_set_ferr(env);
140 }
141#endif
142}
143
d3eb5eae 144void helper_flds_FT0(CPUX86State *env, uint32_t val)
f299f437
BS
145{
146 union {
147 float32 f;
148 uint32_t i;
149 } u;
150
151 u.i = val;
152 FT0 = float32_to_floatx80(u.f, &env->fp_status);
153}
154
d3eb5eae 155void helper_fldl_FT0(CPUX86State *env, uint64_t val)
f299f437
BS
156{
157 union {
158 float64 f;
159 uint64_t i;
160 } u;
161
162 u.i = val;
163 FT0 = float64_to_floatx80(u.f, &env->fp_status);
164}
165
d3eb5eae 166void helper_fildl_FT0(CPUX86State *env, int32_t val)
f299f437
BS
167{
168 FT0 = int32_to_floatx80(val, &env->fp_status);
169}
170
d3eb5eae 171void helper_flds_ST0(CPUX86State *env, uint32_t val)
f299f437
BS
172{
173 int new_fpstt;
174 union {
175 float32 f;
176 uint32_t i;
177 } u;
178
179 new_fpstt = (env->fpstt - 1) & 7;
180 u.i = val;
181 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
182 env->fpstt = new_fpstt;
183 env->fptags[new_fpstt] = 0; /* validate stack entry */
184}
185
d3eb5eae 186void helper_fldl_ST0(CPUX86State *env, uint64_t val)
f299f437
BS
187{
188 int new_fpstt;
189 union {
190 float64 f;
191 uint64_t i;
192 } u;
193
194 new_fpstt = (env->fpstt - 1) & 7;
195 u.i = val;
196 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
197 env->fpstt = new_fpstt;
198 env->fptags[new_fpstt] = 0; /* validate stack entry */
199}
200
d3eb5eae 201void helper_fildl_ST0(CPUX86State *env, int32_t val)
f299f437
BS
202{
203 int new_fpstt;
204
205 new_fpstt = (env->fpstt - 1) & 7;
206 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
207 env->fpstt = new_fpstt;
208 env->fptags[new_fpstt] = 0; /* validate stack entry */
209}
210
d3eb5eae 211void helper_fildll_ST0(CPUX86State *env, int64_t val)
f299f437
BS
212{
213 int new_fpstt;
214
215 new_fpstt = (env->fpstt - 1) & 7;
216 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
219}
220
d3eb5eae 221uint32_t helper_fsts_ST0(CPUX86State *env)
f299f437
BS
222{
223 union {
224 float32 f;
225 uint32_t i;
226 } u;
227
228 u.f = floatx80_to_float32(ST0, &env->fp_status);
229 return u.i;
230}
231
d3eb5eae 232uint64_t helper_fstl_ST0(CPUX86State *env)
f299f437
BS
233{
234 union {
235 float64 f;
236 uint64_t i;
237 } u;
238
239 u.f = floatx80_to_float64(ST0, &env->fp_status);
240 return u.i;
241}
242
d3eb5eae 243int32_t helper_fist_ST0(CPUX86State *env)
f299f437
BS
244{
245 int32_t val;
246
247 val = floatx80_to_int32(ST0, &env->fp_status);
248 if (val != (int16_t)val) {
249 val = -32768;
250 }
251 return val;
252}
253
d3eb5eae 254int32_t helper_fistl_ST0(CPUX86State *env)
f299f437
BS
255{
256 int32_t val;
ea32aaf1
DP
257 signed char old_exp_flags;
258
259 old_exp_flags = get_float_exception_flags(&env->fp_status);
260 set_float_exception_flags(0, &env->fp_status);
f299f437
BS
261
262 val = floatx80_to_int32(ST0, &env->fp_status);
ea32aaf1
DP
263 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
264 val = 0x80000000;
265 }
266 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
267 | old_exp_flags, &env->fp_status);
f299f437
BS
268 return val;
269}
270
d3eb5eae 271int64_t helper_fistll_ST0(CPUX86State *env)
f299f437
BS
272{
273 int64_t val;
ea32aaf1
DP
274 signed char old_exp_flags;
275
276 old_exp_flags = get_float_exception_flags(&env->fp_status);
277 set_float_exception_flags(0, &env->fp_status);
f299f437 278
178846bd 279 val = floatx80_to_int64(ST0, &env->fp_status);
ea32aaf1
DP
280 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
281 val = 0x8000000000000000ULL;
282 }
283 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
284 | old_exp_flags, &env->fp_status);
f299f437
BS
285 return val;
286}
287
d3eb5eae 288int32_t helper_fistt_ST0(CPUX86State *env)
f299f437
BS
289{
290 int32_t val;
291
292 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
293 if (val != (int16_t)val) {
294 val = -32768;
295 }
296 return val;
297}
298
d3eb5eae 299int32_t helper_fisttl_ST0(CPUX86State *env)
f299f437
BS
300{
301 int32_t val;
302
303 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
304 return val;
305}
306
d3eb5eae 307int64_t helper_fisttll_ST0(CPUX86State *env)
f299f437
BS
308{
309 int64_t val;
310
311 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
312 return val;
313}
314
d3eb5eae 315void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
316{
317 int new_fpstt;
318
319 new_fpstt = (env->fpstt - 1) & 7;
6cad09d2 320 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
f299f437
BS
321 env->fpstt = new_fpstt;
322 env->fptags[new_fpstt] = 0; /* validate stack entry */
323}
324
d3eb5eae 325void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
f299f437 326{
6cad09d2 327 helper_fstt(env, ST0, ptr, GETPC());
f299f437
BS
328}
329
d3eb5eae 330void helper_fpush(CPUX86State *env)
f299f437 331{
d3eb5eae 332 fpush(env);
f299f437
BS
333}
334
d3eb5eae 335void helper_fpop(CPUX86State *env)
f299f437 336{
d3eb5eae 337 fpop(env);
f299f437
BS
338}
339
d3eb5eae 340void helper_fdecstp(CPUX86State *env)
f299f437
BS
341{
342 env->fpstt = (env->fpstt - 1) & 7;
343 env->fpus &= ~0x4700;
344}
345
d3eb5eae 346void helper_fincstp(CPUX86State *env)
f299f437
BS
347{
348 env->fpstt = (env->fpstt + 1) & 7;
349 env->fpus &= ~0x4700;
350}
351
352/* FPU move */
353
d3eb5eae 354void helper_ffree_STN(CPUX86State *env, int st_index)
f299f437
BS
355{
356 env->fptags[(env->fpstt + st_index) & 7] = 1;
357}
358
d3eb5eae 359void helper_fmov_ST0_FT0(CPUX86State *env)
f299f437
BS
360{
361 ST0 = FT0;
362}
363
d3eb5eae 364void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
f299f437
BS
365{
366 FT0 = ST(st_index);
367}
368
d3eb5eae 369void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
370{
371 ST0 = ST(st_index);
372}
373
d3eb5eae 374void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
375{
376 ST(st_index) = ST0;
377}
378
d3eb5eae 379void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
380{
381 floatx80 tmp;
382
383 tmp = ST(st_index);
384 ST(st_index) = ST0;
385 ST0 = tmp;
386}
387
388/* FPU operations */
389
390static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
391
d3eb5eae 392void helper_fcom_ST0_FT0(CPUX86State *env)
f299f437
BS
393{
394 int ret;
395
396 ret = floatx80_compare(ST0, FT0, &env->fp_status);
397 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
398}
399
d3eb5eae 400void helper_fucom_ST0_FT0(CPUX86State *env)
f299f437
BS
401{
402 int ret;
403
404 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
405 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
406}
407
408static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
409
d3eb5eae 410void helper_fcomi_ST0_FT0(CPUX86State *env)
f299f437
BS
411{
412 int eflags;
413 int ret;
414
415 ret = floatx80_compare(ST0, FT0, &env->fp_status);
d3eb5eae 416 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
417 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
418 CC_SRC = eflags;
419}
420
d3eb5eae 421void helper_fucomi_ST0_FT0(CPUX86State *env)
f299f437
BS
422{
423 int eflags;
424 int ret;
425
426 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
d3eb5eae 427 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
428 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
429 CC_SRC = eflags;
430}
431
d3eb5eae 432void helper_fadd_ST0_FT0(CPUX86State *env)
f299f437
BS
433{
434 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
435}
436
d3eb5eae 437void helper_fmul_ST0_FT0(CPUX86State *env)
f299f437
BS
438{
439 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
440}
441
d3eb5eae 442void helper_fsub_ST0_FT0(CPUX86State *env)
f299f437
BS
443{
444 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
445}
446
d3eb5eae 447void helper_fsubr_ST0_FT0(CPUX86State *env)
f299f437
BS
448{
449 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
450}
451
d3eb5eae 452void helper_fdiv_ST0_FT0(CPUX86State *env)
f299f437 453{
d3eb5eae 454 ST0 = helper_fdiv(env, ST0, FT0);
f299f437
BS
455}
456
d3eb5eae 457void helper_fdivr_ST0_FT0(CPUX86State *env)
f299f437 458{
d3eb5eae 459 ST0 = helper_fdiv(env, FT0, ST0);
f299f437
BS
460}
461
462/* fp operations between STN and ST0 */
463
d3eb5eae 464void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
465{
466 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
467}
468
d3eb5eae 469void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
470{
471 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
472}
473
d3eb5eae 474void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
475{
476 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
477}
478
d3eb5eae 479void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
480{
481 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
482}
483
d3eb5eae 484void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
485{
486 floatx80 *p;
487
488 p = &ST(st_index);
d3eb5eae 489 *p = helper_fdiv(env, *p, ST0);
f299f437
BS
490}
491
d3eb5eae 492void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
493{
494 floatx80 *p;
495
496 p = &ST(st_index);
d3eb5eae 497 *p = helper_fdiv(env, ST0, *p);
f299f437
BS
498}
499
500/* misc FPU operations */
d3eb5eae 501void helper_fchs_ST0(CPUX86State *env)
f299f437
BS
502{
503 ST0 = floatx80_chs(ST0);
504}
505
d3eb5eae 506void helper_fabs_ST0(CPUX86State *env)
f299f437
BS
507{
508 ST0 = floatx80_abs(ST0);
509}
510
d3eb5eae 511void helper_fld1_ST0(CPUX86State *env)
f299f437
BS
512{
513 ST0 = floatx80_one;
514}
515
d3eb5eae 516void helper_fldl2t_ST0(CPUX86State *env)
f299f437
BS
517{
518 ST0 = floatx80_l2t;
519}
520
d3eb5eae 521void helper_fldl2e_ST0(CPUX86State *env)
f299f437
BS
522{
523 ST0 = floatx80_l2e;
524}
525
d3eb5eae 526void helper_fldpi_ST0(CPUX86State *env)
f299f437
BS
527{
528 ST0 = floatx80_pi;
529}
530
d3eb5eae 531void helper_fldlg2_ST0(CPUX86State *env)
f299f437
BS
532{
533 ST0 = floatx80_lg2;
534}
535
d3eb5eae 536void helper_fldln2_ST0(CPUX86State *env)
f299f437
BS
537{
538 ST0 = floatx80_ln2;
539}
540
d3eb5eae 541void helper_fldz_ST0(CPUX86State *env)
f299f437
BS
542{
543 ST0 = floatx80_zero;
544}
545
d3eb5eae 546void helper_fldz_FT0(CPUX86State *env)
f299f437
BS
547{
548 FT0 = floatx80_zero;
549}
550
d3eb5eae 551uint32_t helper_fnstsw(CPUX86State *env)
f299f437
BS
552{
553 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
554}
555
d3eb5eae 556uint32_t helper_fnstcw(CPUX86State *env)
f299f437
BS
557{
558 return env->fpuc;
559}
560
5bde1407 561void update_fp_status(CPUX86State *env)
f299f437
BS
562{
563 int rnd_type;
564
565 /* set rounding mode */
566 switch (env->fpuc & FPU_RC_MASK) {
567 default:
568 case FPU_RC_NEAR:
569 rnd_type = float_round_nearest_even;
570 break;
571 case FPU_RC_DOWN:
572 rnd_type = float_round_down;
573 break;
574 case FPU_RC_UP:
575 rnd_type = float_round_up;
576 break;
577 case FPU_RC_CHOP:
578 rnd_type = float_round_to_zero;
579 break;
580 }
581 set_float_rounding_mode(rnd_type, &env->fp_status);
582 switch ((env->fpuc >> 8) & 3) {
583 case 0:
584 rnd_type = 32;
585 break;
586 case 2:
587 rnd_type = 64;
588 break;
589 case 3:
590 default:
591 rnd_type = 80;
592 break;
593 }
594 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
595}
596
d3eb5eae 597void helper_fldcw(CPUX86State *env, uint32_t val)
f299f437 598{
5bde1407 599 cpu_set_fpuc(env, val);
f299f437
BS
600}
601
d3eb5eae 602void helper_fclex(CPUX86State *env)
f299f437
BS
603{
604 env->fpus &= 0x7f00;
605}
606
d3eb5eae 607void helper_fwait(CPUX86State *env)
f299f437
BS
608{
609 if (env->fpus & FPUS_SE) {
6cad09d2 610 fpu_raise_exception(env, GETPC());
f299f437
BS
611 }
612}
613
d3eb5eae 614void helper_fninit(CPUX86State *env)
f299f437
BS
615{
616 env->fpus = 0;
617 env->fpstt = 0;
5bde1407 618 cpu_set_fpuc(env, 0x37f);
f299f437
BS
619 env->fptags[0] = 1;
620 env->fptags[1] = 1;
621 env->fptags[2] = 1;
622 env->fptags[3] = 1;
623 env->fptags[4] = 1;
624 env->fptags[5] = 1;
625 env->fptags[6] = 1;
626 env->fptags[7] = 1;
627}
628
629/* BCD ops */
630
d3eb5eae 631void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
632{
633 floatx80 tmp;
634 uint64_t val;
635 unsigned int v;
636 int i;
637
638 val = 0;
639 for (i = 8; i >= 0; i--) {
6cad09d2 640 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
f299f437
BS
641 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
642 }
643 tmp = int64_to_floatx80(val, &env->fp_status);
6cad09d2 644 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
18b41f95 645 tmp = floatx80_chs(tmp);
f299f437 646 }
d3eb5eae 647 fpush(env);
f299f437
BS
648 ST0 = tmp;
649}
650
d3eb5eae 651void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
652{
653 int v;
654 target_ulong mem_ref, mem_end;
655 int64_t val;
656
657 val = floatx80_to_int64(ST0, &env->fp_status);
658 mem_ref = ptr;
659 mem_end = mem_ref + 9;
660 if (val < 0) {
6cad09d2 661 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
f299f437
BS
662 val = -val;
663 } else {
6cad09d2 664 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
f299f437
BS
665 }
666 while (mem_ref < mem_end) {
667 if (val == 0) {
668 break;
669 }
670 v = val % 100;
671 val = val / 100;
672 v = ((v / 10) << 4) | (v % 10);
6cad09d2 673 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
f299f437
BS
674 }
675 while (mem_ref < mem_end) {
6cad09d2 676 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
f299f437
BS
677 }
678}
679
d3eb5eae 680void helper_f2xm1(CPUX86State *env)
f299f437 681{
d3eb5eae 682 double val = floatx80_to_double(env, ST0);
f299f437
BS
683
684 val = pow(2.0, val) - 1.0;
d3eb5eae 685 ST0 = double_to_floatx80(env, val);
f299f437
BS
686}
687
d3eb5eae 688void helper_fyl2x(CPUX86State *env)
f299f437 689{
d3eb5eae 690 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
691
692 if (fptemp > 0.0) {
693 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
d3eb5eae
BS
694 fptemp *= floatx80_to_double(env, ST1);
695 ST1 = double_to_floatx80(env, fptemp);
696 fpop(env);
f299f437
BS
697 } else {
698 env->fpus &= ~0x4700;
699 env->fpus |= 0x400;
700 }
701}
702
d3eb5eae 703void helper_fptan(CPUX86State *env)
f299f437 704{
d3eb5eae 705 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
706
707 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
708 env->fpus |= 0x400;
709 } else {
710 fptemp = tan(fptemp);
d3eb5eae
BS
711 ST0 = double_to_floatx80(env, fptemp);
712 fpush(env);
f299f437
BS
713 ST0 = floatx80_one;
714 env->fpus &= ~0x400; /* C2 <-- 0 */
715 /* the above code is for |arg| < 2**52 only */
716 }
717}
718
d3eb5eae 719void helper_fpatan(CPUX86State *env)
f299f437
BS
720{
721 double fptemp, fpsrcop;
722
d3eb5eae
BS
723 fpsrcop = floatx80_to_double(env, ST1);
724 fptemp = floatx80_to_double(env, ST0);
725 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
726 fpop(env);
f299f437
BS
727}
728
d3eb5eae 729void helper_fxtract(CPUX86State *env)
f299f437
BS
730{
731 CPU_LDoubleU temp;
732
733 temp.d = ST0;
734
735 if (floatx80_is_zero(ST0)) {
736 /* Easy way to generate -inf and raising division by 0 exception */
737 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
738 &env->fp_status);
d3eb5eae 739 fpush(env);
f299f437
BS
740 ST0 = temp.d;
741 } else {
742 int expdif;
743
744 expdif = EXPD(temp) - EXPBIAS;
745 /* DP exponent bias */
746 ST0 = int32_to_floatx80(expdif, &env->fp_status);
d3eb5eae 747 fpush(env);
f299f437
BS
748 BIASEXPONENT(temp);
749 ST0 = temp.d;
750 }
751}
752
d3eb5eae 753void helper_fprem1(CPUX86State *env)
f299f437
BS
754{
755 double st0, st1, dblq, fpsrcop, fptemp;
756 CPU_LDoubleU fpsrcop1, fptemp1;
757 int expdif;
758 signed long long int q;
759
d3eb5eae
BS
760 st0 = floatx80_to_double(env, ST0);
761 st1 = floatx80_to_double(env, ST1);
f299f437
BS
762
763 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 764 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
765 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
766 return;
767 }
768
769 fpsrcop = st0;
770 fptemp = st1;
771 fpsrcop1.d = ST0;
772 fptemp1.d = ST1;
773 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
774
775 if (expdif < 0) {
776 /* optimisation? taken from the AMD docs */
777 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
778 /* ST0 is unchanged */
779 return;
780 }
781
782 if (expdif < 53) {
783 dblq = fpsrcop / fptemp;
784 /* round dblq towards nearest integer */
785 dblq = rint(dblq);
786 st0 = fpsrcop - fptemp * dblq;
787
788 /* convert dblq to q by truncating towards zero */
789 if (dblq < 0.0) {
790 q = (signed long long int)(-dblq);
791 } else {
792 q = (signed long long int)dblq;
793 }
794
795 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
796 /* (C0,C3,C1) <-- (q2,q1,q0) */
797 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
798 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
799 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
800 } else {
801 env->fpus |= 0x400; /* C2 <-- 1 */
802 fptemp = pow(2.0, expdif - 50);
803 fpsrcop = (st0 / st1) / fptemp;
804 /* fpsrcop = integer obtained by chopping */
805 fpsrcop = (fpsrcop < 0.0) ?
806 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
807 st0 -= (st1 * fpsrcop * fptemp);
808 }
d3eb5eae 809 ST0 = double_to_floatx80(env, st0);
f299f437
BS
810}
811
d3eb5eae 812void helper_fprem(CPUX86State *env)
f299f437
BS
813{
814 double st0, st1, dblq, fpsrcop, fptemp;
815 CPU_LDoubleU fpsrcop1, fptemp1;
816 int expdif;
817 signed long long int q;
818
d3eb5eae
BS
819 st0 = floatx80_to_double(env, ST0);
820 st1 = floatx80_to_double(env, ST1);
f299f437
BS
821
822 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 823 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
824 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
825 return;
826 }
827
828 fpsrcop = st0;
829 fptemp = st1;
830 fpsrcop1.d = ST0;
831 fptemp1.d = ST1;
832 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
833
834 if (expdif < 0) {
835 /* optimisation? taken from the AMD docs */
836 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
837 /* ST0 is unchanged */
838 return;
839 }
840
841 if (expdif < 53) {
842 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
843 /* round dblq towards zero */
844 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
845 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
846
847 /* convert dblq to q by truncating towards zero */
848 if (dblq < 0.0) {
849 q = (signed long long int)(-dblq);
850 } else {
851 q = (signed long long int)dblq;
852 }
853
854 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
855 /* (C0,C3,C1) <-- (q2,q1,q0) */
856 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
857 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
858 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
859 } else {
860 int N = 32 + (expdif % 32); /* as per AMD docs */
861
862 env->fpus |= 0x400; /* C2 <-- 1 */
863 fptemp = pow(2.0, (double)(expdif - N));
864 fpsrcop = (st0 / st1) / fptemp;
865 /* fpsrcop = integer obtained by chopping */
866 fpsrcop = (fpsrcop < 0.0) ?
867 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
868 st0 -= (st1 * fpsrcop * fptemp);
869 }
d3eb5eae 870 ST0 = double_to_floatx80(env, st0);
f299f437
BS
871}
872
d3eb5eae 873void helper_fyl2xp1(CPUX86State *env)
f299f437 874{
d3eb5eae 875 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
876
877 if ((fptemp + 1.0) > 0.0) {
878 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
d3eb5eae
BS
879 fptemp *= floatx80_to_double(env, ST1);
880 ST1 = double_to_floatx80(env, fptemp);
881 fpop(env);
f299f437
BS
882 } else {
883 env->fpus &= ~0x4700;
884 env->fpus |= 0x400;
885 }
886}
887
d3eb5eae 888void helper_fsqrt(CPUX86State *env)
f299f437
BS
889{
890 if (floatx80_is_neg(ST0)) {
891 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
892 env->fpus |= 0x400;
893 }
894 ST0 = floatx80_sqrt(ST0, &env->fp_status);
895}
896
d3eb5eae 897void helper_fsincos(CPUX86State *env)
f299f437 898{
d3eb5eae 899 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
900
901 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
902 env->fpus |= 0x400;
903 } else {
d3eb5eae
BS
904 ST0 = double_to_floatx80(env, sin(fptemp));
905 fpush(env);
906 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
907 env->fpus &= ~0x400; /* C2 <-- 0 */
908 /* the above code is for |arg| < 2**63 only */
909 }
910}
911
d3eb5eae 912void helper_frndint(CPUX86State *env)
f299f437
BS
913{
914 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
915}
916
d3eb5eae 917void helper_fscale(CPUX86State *env)
f299f437
BS
918{
919 if (floatx80_is_any_nan(ST1)) {
920 ST0 = ST1;
921 } else {
922 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
923 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
924 }
925}
926
d3eb5eae 927void helper_fsin(CPUX86State *env)
f299f437 928{
d3eb5eae 929 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
930
931 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
932 env->fpus |= 0x400;
933 } else {
d3eb5eae 934 ST0 = double_to_floatx80(env, sin(fptemp));
f299f437
BS
935 env->fpus &= ~0x400; /* C2 <-- 0 */
936 /* the above code is for |arg| < 2**53 only */
937 }
938}
939
d3eb5eae 940void helper_fcos(CPUX86State *env)
f299f437 941{
d3eb5eae 942 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
943
944 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
945 env->fpus |= 0x400;
946 } else {
d3eb5eae 947 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
948 env->fpus &= ~0x400; /* C2 <-- 0 */
949 /* the above code is for |arg| < 2**63 only */
950 }
951}
952
d3eb5eae 953void helper_fxam_ST0(CPUX86State *env)
f299f437
BS
954{
955 CPU_LDoubleU temp;
956 int expdif;
957
958 temp.d = ST0;
959
960 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
961 if (SIGND(temp)) {
962 env->fpus |= 0x200; /* C1 <-- 1 */
963 }
964
965 /* XXX: test fptags too */
966 expdif = EXPD(temp);
967 if (expdif == MAXEXPD) {
968 if (MANTD(temp) == 0x8000000000000000ULL) {
969 env->fpus |= 0x500; /* Infinity */
970 } else {
971 env->fpus |= 0x100; /* NaN */
972 }
973 } else if (expdif == 0) {
974 if (MANTD(temp) == 0) {
975 env->fpus |= 0x4000; /* Zero */
976 } else {
977 env->fpus |= 0x4400; /* Denormal */
978 }
979 } else {
980 env->fpus |= 0x400;
981 }
982}
983
6cad09d2
PD
984static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
985 uintptr_t retaddr)
f299f437
BS
986{
987 int fpus, fptag, exp, i;
988 uint64_t mant;
989 CPU_LDoubleU tmp;
990
991 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
992 fptag = 0;
993 for (i = 7; i >= 0; i--) {
994 fptag <<= 2;
995 if (env->fptags[i]) {
996 fptag |= 3;
997 } else {
998 tmp.d = env->fpregs[i].d;
999 exp = EXPD(tmp);
1000 mant = MANTD(tmp);
1001 if (exp == 0 && mant == 0) {
1002 /* zero */
1003 fptag |= 1;
1004 } else if (exp == 0 || exp == MAXEXPD
1005 || (mant & (1LL << 63)) == 0) {
1006 /* NaNs, infinity, denormal */
1007 fptag |= 2;
1008 }
1009 }
1010 }
1011 if (data32) {
1012 /* 32 bit */
6cad09d2
PD
1013 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1014 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1015 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1016 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1017 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1018 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1019 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
f299f437
BS
1020 } else {
1021 /* 16 bit */
6cad09d2
PD
1022 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1023 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1024 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1025 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1026 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1027 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1028 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
f299f437
BS
1029 }
1030}
1031
6cad09d2
PD
1032void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1033{
1034 do_fstenv(env, ptr, data32, GETPC());
1035}
1036
1037static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1038 uintptr_t retaddr)
f299f437
BS
1039{
1040 int i, fpus, fptag;
1041
1042 if (data32) {
6cad09d2
PD
1043 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1044 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1045 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437 1046 } else {
6cad09d2
PD
1047 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1048 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1049 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
f299f437
BS
1050 }
1051 env->fpstt = (fpus >> 11) & 7;
1052 env->fpus = fpus & ~0x3800;
1053 for (i = 0; i < 8; i++) {
1054 env->fptags[i] = ((fptag & 3) == 3);
1055 fptag >>= 2;
1056 }
1057}
1058
6cad09d2
PD
1059void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1060{
1061 do_fldenv(env, ptr, data32, GETPC());
1062}
1063
d3eb5eae 1064void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1065{
1066 floatx80 tmp;
1067 int i;
1068
6cad09d2 1069 do_fstenv(env, ptr, data32, GETPC());
f299f437
BS
1070
1071 ptr += (14 << data32);
1072 for (i = 0; i < 8; i++) {
1073 tmp = ST(i);
6cad09d2 1074 helper_fstt(env, tmp, ptr, GETPC());
f299f437
BS
1075 ptr += 10;
1076 }
1077
1078 /* fninit */
1079 env->fpus = 0;
1080 env->fpstt = 0;
5bde1407 1081 cpu_set_fpuc(env, 0x37f);
f299f437
BS
1082 env->fptags[0] = 1;
1083 env->fptags[1] = 1;
1084 env->fptags[2] = 1;
1085 env->fptags[3] = 1;
1086 env->fptags[4] = 1;
1087 env->fptags[5] = 1;
1088 env->fptags[6] = 1;
1089 env->fptags[7] = 1;
1090}
1091
d3eb5eae 1092void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1093{
1094 floatx80 tmp;
1095 int i;
1096
6cad09d2 1097 do_fldenv(env, ptr, data32, GETPC());
f299f437
BS
1098 ptr += (14 << data32);
1099
1100 for (i = 0; i < 8; i++) {
6cad09d2 1101 tmp = helper_fldt(env, ptr, GETPC());
f299f437
BS
1102 ST(i) = tmp;
1103 ptr += 10;
1104 }
1105}
1106
1107#if defined(CONFIG_USER_ONLY)
d3eb5eae 1108void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1109{
d3eb5eae 1110 helper_fsave(env, ptr, data32);
f299f437
BS
1111}
1112
d3eb5eae 1113void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1114{
d3eb5eae 1115 helper_frstor(env, ptr, data32);
f299f437
BS
1116}
1117#endif
1118
64dbaff0 1119static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1120{
64dbaff0 1121 int fpus, fptag, i;
f299f437
BS
1122 target_ulong addr;
1123
f299f437
BS
1124 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1125 fptag = 0;
1126 for (i = 0; i < 8; i++) {
1127 fptag |= (env->fptags[i] << i);
1128 }
64dbaff0
RH
1129 cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1130 cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1131 cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1132
1133 /* In 32-bit mode this is eip, sel, dp, sel.
1134 In 64-bit mode this is rip, rdp.
1135 But in either case we don't write actual data, just zeros. */
1136 cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1137 cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
f299f437
BS
1138
1139 addr = ptr + 0x20;
1140 for (i = 0; i < 8; i++) {
64dbaff0
RH
1141 floatx80 tmp = ST(i);
1142 helper_fstt(env, tmp, addr, ra);
f299f437
BS
1143 addr += 16;
1144 }
64dbaff0
RH
1145}
1146
1147static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1148{
1149 cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1150 cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1151}
1152
1153static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1154{
1155 int i, nb_xmm_regs;
1156 target_ulong addr;
1157
1158 if (env->hflags & HF_CS64_MASK) {
1159 nb_xmm_regs = 16;
1160 } else {
1161 nb_xmm_regs = 8;
1162 }
1163
1164 addr = ptr + 0xa0;
1165 for (i = 0; i < nb_xmm_regs; i++) {
1166 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1167 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1168 addr += 16;
1169 }
1170}
1171
f4f1110e
RH
1172static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1173{
1174 int i;
1175
1176 for (i = 0; i < 4; i++, addr += 16) {
1177 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1178 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1179 }
1180}
1181
1182static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1183{
1184 cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1185 cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1186}
1187
0f70ed47
PB
1188static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1189{
1190 cpu_stq_data_ra(env, addr, env->pkru, ra);
1191}
1192
64dbaff0
RH
1193void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194{
1195 uintptr_t ra = GETPC();
1196
1197 /* The operand must be 16 byte aligned */
1198 if (ptr & 0xf) {
1199 raise_exception_ra(env, EXCP0D_GPF, ra);
1200 }
1201
1202 do_xsave_fpu(env, ptr, ra);
f299f437
BS
1203
1204 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0 1205 do_xsave_mxcsr(env, ptr, ra);
f299f437
BS
1206 /* Fast FXSAVE leaves out the XMM registers */
1207 if (!(env->efer & MSR_EFER_FFXSR)
1208 || (env->hflags & HF_CPL_MASK)
1209 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1210 do_xsave_sse(env, ptr, ra);
f299f437
BS
1211 }
1212 }
1213}
1214
19dc85db
RH
1215static uint64_t get_xinuse(CPUX86State *env)
1216{
f4f1110e
RH
1217 uint64_t inuse = -1;
1218
1219 /* For the most part, we don't track XINUSE. We could calculate it
1220 here for all components, but it's probably less work to simply
1221 indicate in use. That said, the state of BNDREGS is important
1222 enough to track in HFLAGS, so we might as well use that here. */
1223 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
cfc3b074 1224 inuse &= ~XSTATE_BNDREGS_MASK;
f4f1110e
RH
1225 }
1226 return inuse;
19dc85db
RH
1227}
1228
c9cfe8f9
RH
1229static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230 uint64_t inuse, uint64_t opt, uintptr_t ra)
19dc85db 1231{
19dc85db
RH
1232 uint64_t old_bv, new_bv;
1233
1234 /* The OS must have enabled XSAVE. */
1235 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236 raise_exception_ra(env, EXCP06_ILLOP, ra);
1237 }
1238
1239 /* The operand must be 64 byte aligned. */
1240 if (ptr & 63) {
1241 raise_exception_ra(env, EXCP0D_GPF, ra);
1242 }
1243
1244 /* Never save anything not enabled by XCR0. */
1245 rfbm &= env->xcr0;
c9cfe8f9 1246 opt &= rfbm;
19dc85db 1247
cfc3b074 1248 if (opt & XSTATE_FP_MASK) {
19dc85db
RH
1249 do_xsave_fpu(env, ptr, ra);
1250 }
cfc3b074 1251 if (rfbm & XSTATE_SSE_MASK) {
c9cfe8f9 1252 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
19dc85db 1253 do_xsave_mxcsr(env, ptr, ra);
c9cfe8f9 1254 }
cfc3b074 1255 if (opt & XSTATE_SSE_MASK) {
19dc85db
RH
1256 do_xsave_sse(env, ptr, ra);
1257 }
cfc3b074
PB
1258 if (opt & XSTATE_BNDREGS_MASK) {
1259 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
f4f1110e
RH
1260 do_xsave_bndregs(env, ptr + off, ra);
1261 }
cfc3b074
PB
1262 if (opt & XSTATE_BNDCSR_MASK) {
1263 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
f4f1110e
RH
1264 do_xsave_bndcsr(env, ptr + off, ra);
1265 }
0f70ed47
PB
1266 if (opt & XSTATE_PKRU_MASK) {
1267 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1268 do_xsave_pkru(env, ptr + off, ra);
1269 }
19dc85db
RH
1270
1271 /* Update the XSTATE_BV field. */
1272 old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
c9cfe8f9 1273 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
19dc85db
RH
1274 cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1275}
1276
c9cfe8f9
RH
1277void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1278{
1279 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1280}
1281
1282void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1283{
1284 uint64_t inuse = get_xinuse(env);
1285 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1286}
1287
64dbaff0 1288static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1289{
64dbaff0 1290 int i, fpus, fptag;
f299f437
BS
1291 target_ulong addr;
1292
64dbaff0
RH
1293 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1294 fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1295 fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
f299f437
BS
1296 env->fpstt = (fpus >> 11) & 7;
1297 env->fpus = fpus & ~0x3800;
1298 fptag ^= 0xff;
1299 for (i = 0; i < 8; i++) {
1300 env->fptags[i] = ((fptag >> i) & 1);
1301 }
1302
1303 addr = ptr + 0x20;
1304 for (i = 0; i < 8; i++) {
64dbaff0 1305 floatx80 tmp = helper_fldt(env, addr, ra);
f299f437
BS
1306 ST(i) = tmp;
1307 addr += 16;
1308 }
64dbaff0
RH
1309}
1310
1311static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1312{
1313 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1314}
1315
1316static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1317{
1318 int i, nb_xmm_regs;
1319 target_ulong addr;
1320
1321 if (env->hflags & HF_CS64_MASK) {
1322 nb_xmm_regs = 16;
1323 } else {
1324 nb_xmm_regs = 8;
1325 }
1326
1327 addr = ptr + 0xa0;
1328 for (i = 0; i < nb_xmm_regs; i++) {
1329 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1330 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1331 addr += 16;
1332 }
1333}
1334
f4f1110e
RH
1335static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1336{
1337 int i;
1338
1339 for (i = 0; i < 4; i++, addr += 16) {
1340 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1342 }
1343}
1344
1345static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1346{
1347 /* FIXME: Extend highest implemented bit of linear address. */
1348 env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1349 env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1350}
1351
0f70ed47
PB
1352static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1353{
1354 env->pkru = cpu_ldq_data_ra(env, addr, ra);
1355}
1356
64dbaff0
RH
1357void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1358{
1359 uintptr_t ra = GETPC();
1360
1361 /* The operand must be 16 byte aligned */
1362 if (ptr & 0xf) {
1363 raise_exception_ra(env, EXCP0D_GPF, ra);
1364 }
1365
1366 do_xrstor_fpu(env, ptr, ra);
f299f437
BS
1367
1368 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0
RH
1369 do_xrstor_mxcsr(env, ptr, ra);
1370 /* Fast FXRSTOR leaves out the XMM registers */
f299f437
BS
1371 if (!(env->efer & MSR_EFER_FFXSR)
1372 || (env->hflags & HF_CPL_MASK)
1373 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1374 do_xrstor_sse(env, ptr, ra);
f299f437
BS
1375 }
1376 }
1377}
1378
19dc85db
RH
1379void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1380{
1381 uintptr_t ra = GETPC();
1382 uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1383
1384 rfbm &= env->xcr0;
1385
1386 /* The OS must have enabled XSAVE. */
1387 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1388 raise_exception_ra(env, EXCP06_ILLOP, ra);
1389 }
1390
1391 /* The operand must be 64 byte aligned. */
1392 if (ptr & 63) {
1393 raise_exception_ra(env, EXCP0D_GPF, ra);
1394 }
1395
1396 xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1397
1398 if ((int64_t)xstate_bv < 0) {
1399 /* FIXME: Compact form. */
1400 raise_exception_ra(env, EXCP0D_GPF, ra);
1401 }
1402
1403 /* Standard form. */
1404
1405 /* The XSTATE field must not set bits not present in XCR0. */
1406 if (xstate_bv & ~env->xcr0) {
1407 raise_exception_ra(env, EXCP0D_GPF, ra);
1408 }
1409
1410 /* The XCOMP field must be zero. */
1411 xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1412 xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1413 if (xcomp_bv0 || xcomp_bv1) {
1414 raise_exception_ra(env, EXCP0D_GPF, ra);
1415 }
1416
cfc3b074
PB
1417 if (rfbm & XSTATE_FP_MASK) {
1418 if (xstate_bv & XSTATE_FP_MASK) {
19dc85db
RH
1419 do_xrstor_fpu(env, ptr, ra);
1420 } else {
1421 helper_fninit(env);
1422 memset(env->fpregs, 0, sizeof(env->fpregs));
1423 }
1424 }
cfc3b074 1425 if (rfbm & XSTATE_SSE_MASK) {
19dc85db
RH
1426 /* Note that the standard form of XRSTOR loads MXCSR from memory
1427 whether or not the XSTATE_BV bit is set. */
1428 do_xrstor_mxcsr(env, ptr, ra);
cfc3b074 1429 if (xstate_bv & XSTATE_SSE_MASK) {
19dc85db
RH
1430 do_xrstor_sse(env, ptr, ra);
1431 } else {
1432 /* ??? When AVX is implemented, we may have to be more
1433 selective in the clearing. */
1434 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1435 }
1436 }
cfc3b074
PB
1437 if (rfbm & XSTATE_BNDREGS_MASK) {
1438 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1439 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
f4f1110e
RH
1440 do_xrstor_bndregs(env, ptr + off, ra);
1441 env->hflags |= HF_MPX_IU_MASK;
1442 } else {
1443 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1444 env->hflags &= ~HF_MPX_IU_MASK;
1445 }
1446 }
cfc3b074
PB
1447 if (rfbm & XSTATE_BNDCSR_MASK) {
1448 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1449 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
f4f1110e
RH
1450 do_xrstor_bndcsr(env, ptr + off, ra);
1451 } else {
1452 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1453 }
1454 cpu_sync_bndcs_hflags(env);
1455 }
0f70ed47
PB
1456 if (rfbm & XSTATE_PKRU_MASK) {
1457 uint64_t old_pkru = env->pkru;
1458 if (xstate_bv & XSTATE_PKRU_MASK) {
1459 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1460 do_xrstor_pkru(env, ptr + off, ra);
1461 } else {
1462 env->pkru = 0;
1463 }
1464 if (env->pkru != old_pkru) {
1465 CPUState *cs = CPU(x86_env_get_cpu(env));
1466 tlb_flush(cs, 1);
1467 }
1468 }
19dc85db
RH
1469}
1470
1471uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1472{
1473 /* The OS must have enabled XSAVE. */
1474 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1475 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1476 }
1477
1478 switch (ecx) {
1479 case 0:
1480 return env->xcr0;
1481 case 1:
c9cfe8f9
RH
1482 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1483 return env->xcr0 & get_xinuse(env);
1484 }
1485 break;
19dc85db
RH
1486 }
1487 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1488}
1489
1490void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1491{
1492 uint32_t dummy, ena_lo, ena_hi;
1493 uint64_t ena;
1494
1495 /* The OS must have enabled XSAVE. */
1496 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1497 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1498 }
1499
1500 /* Only XCR0 is defined at present; the FPU may not be disabled. */
cfc3b074 1501 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
19dc85db
RH
1502 goto do_gpf;
1503 }
1504
1505 /* Disallow enabling unimplemented features. */
1506 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1507 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1508 if (mask & ~ena) {
1509 goto do_gpf;
1510 }
1511
f4f1110e 1512 /* Disallow enabling only half of MPX. */
cfc3b074
PB
1513 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1514 & XSTATE_BNDCSR_MASK) {
f4f1110e
RH
1515 goto do_gpf;
1516 }
1517
19dc85db 1518 env->xcr0 = mask;
f4f1110e 1519 cpu_sync_bndcs_hflags(env);
19dc85db
RH
1520 return;
1521
1522 do_gpf:
1523 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1524}
1525
f299f437
BS
1526void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1527{
1528 CPU_LDoubleU temp;
1529
1530 temp.d = f;
1531 *pmant = temp.l.lower;
1532 *pexp = temp.l.upper;
1533}
1534
1535floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1536{
1537 CPU_LDoubleU temp;
1538
1539 temp.l.upper = upper;
1540 temp.l.lower = mant;
1541 return temp.d;
1542}
1543
1544/* MMX/SSE */
1545/* XXX: optimize by storing fptt and fptags in the static cpu state */
1546
1547#define SSE_DAZ 0x0040
1548#define SSE_RC_MASK 0x6000
1549#define SSE_RC_NEAR 0x0000
1550#define SSE_RC_DOWN 0x2000
1551#define SSE_RC_UP 0x4000
1552#define SSE_RC_CHOP 0x6000
1553#define SSE_FZ 0x8000
1554
4e47e39a 1555void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
f299f437
BS
1556{
1557 int rnd_type;
1558
4e47e39a
RH
1559 env->mxcsr = mxcsr;
1560
f299f437 1561 /* set rounding mode */
4e47e39a 1562 switch (mxcsr & SSE_RC_MASK) {
f299f437
BS
1563 default:
1564 case SSE_RC_NEAR:
1565 rnd_type = float_round_nearest_even;
1566 break;
1567 case SSE_RC_DOWN:
1568 rnd_type = float_round_down;
1569 break;
1570 case SSE_RC_UP:
1571 rnd_type = float_round_up;
1572 break;
1573 case SSE_RC_CHOP:
1574 rnd_type = float_round_to_zero;
1575 break;
1576 }
1577 set_float_rounding_mode(rnd_type, &env->sse_status);
1578
1579 /* set denormals are zero */
4e47e39a 1580 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
f299f437
BS
1581
1582 /* set flush to zero */
4e47e39a 1583 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
f299f437
BS
1584}
1585
5bde1407
PD
1586void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1587{
1588 env->fpuc = val;
1589 update_fp_status(env);
1590}
1591
d3eb5eae 1592void helper_ldmxcsr(CPUX86State *env, uint32_t val)
f299f437 1593{
4e47e39a 1594 cpu_set_mxcsr(env, val);
f299f437
BS
1595}
1596
d3eb5eae 1597void helper_enter_mmx(CPUX86State *env)
f299f437
BS
1598{
1599 env->fpstt = 0;
1600 *(uint32_t *)(env->fptags) = 0;
1601 *(uint32_t *)(env->fptags + 4) = 0;
1602}
1603
d3eb5eae 1604void helper_emms(CPUX86State *env)
f299f437
BS
1605{
1606 /* set to empty state */
1607 *(uint32_t *)(env->fptags) = 0x01010101;
1608 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1609}
1610
1611/* XXX: suppress */
d3eb5eae 1612void helper_movq(CPUX86State *env, void *d, void *s)
f299f437
BS
1613{
1614 *(uint64_t *)d = *(uint64_t *)s;
1615}
1616
1617#define SHIFT 0
1618#include "ops_sse.h"
1619
1620#define SHIFT 1
1621#include "ops_sse.h"