]> git.proxmox.com Git - mirror_qemu.git/blame - target-i386/fpu_helper.c
spapr_pci: Switch to vfio_eeh_as_op() interface
[mirror_qemu.git] / target-i386 / fpu_helper.c
CommitLineData
f299f437
BS
1/*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
b6a0aa05 20#include "qemu/osdep.h"
f299f437
BS
21#include <math.h>
22#include "cpu.h"
2ef6175a 23#include "exec/helper-proto.h"
c334a388 24#include "qemu/host-utils.h"
f08b6170 25#include "exec/cpu_ldst.h"
92fc4b58 26
f299f437
BS
27#define FPU_RC_MASK 0xc00
28#define FPU_RC_NEAR 0x000
29#define FPU_RC_DOWN 0x400
30#define FPU_RC_UP 0x800
31#define FPU_RC_CHOP 0xc00
32
33#define MAXTAN 9223372036854775808.0
34
35/* the following deal with x86 long double-precision numbers */
36#define MAXEXPD 0x7fff
37#define EXPBIAS 16383
38#define EXPD(fp) (fp.l.upper & 0x7fff)
39#define SIGND(fp) ((fp.l.upper) & 0x8000)
40#define MANTD(fp) (fp.l.lower)
41#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
42
43#define FPUS_IE (1 << 0)
44#define FPUS_DE (1 << 1)
45#define FPUS_ZE (1 << 2)
46#define FPUS_OE (1 << 3)
47#define FPUS_UE (1 << 4)
48#define FPUS_PE (1 << 5)
49#define FPUS_SF (1 << 6)
50#define FPUS_SE (1 << 7)
51#define FPUS_B (1 << 15)
52
53#define FPUC_EM 0x3f
54
55#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
56#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
57#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
58
d3eb5eae 59static inline void fpush(CPUX86State *env)
f299f437
BS
60{
61 env->fpstt = (env->fpstt - 1) & 7;
62 env->fptags[env->fpstt] = 0; /* validate stack entry */
63}
64
d3eb5eae 65static inline void fpop(CPUX86State *env)
f299f437
BS
66{
67 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
68 env->fpstt = (env->fpstt + 1) & 7;
69}
70
6cad09d2
PD
71static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
72 uintptr_t retaddr)
f299f437
BS
73{
74 CPU_LDoubleU temp;
75
6cad09d2
PD
76 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
77 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437
BS
78 return temp.d;
79}
80
6cad09d2
PD
81static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
82 uintptr_t retaddr)
f299f437
BS
83{
84 CPU_LDoubleU temp;
85
86 temp.d = f;
6cad09d2
PD
87 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
88 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
f299f437
BS
89}
90
91/* x87 FPU helpers */
92
d3eb5eae 93static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
f299f437
BS
94{
95 union {
96 float64 f64;
97 double d;
98 } u;
99
100 u.f64 = floatx80_to_float64(a, &env->fp_status);
101 return u.d;
102}
103
d3eb5eae 104static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
f299f437
BS
105{
106 union {
107 float64 f64;
108 double d;
109 } u;
110
111 u.d = a;
112 return float64_to_floatx80(u.f64, &env->fp_status);
113}
114
d3eb5eae 115static void fpu_set_exception(CPUX86State *env, int mask)
f299f437
BS
116{
117 env->fpus |= mask;
118 if (env->fpus & (~env->fpuc & FPUC_EM)) {
119 env->fpus |= FPUS_SE | FPUS_B;
120 }
121}
122
d3eb5eae 123static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
f299f437
BS
124{
125 if (floatx80_is_zero(b)) {
d3eb5eae 126 fpu_set_exception(env, FPUS_ZE);
f299f437
BS
127 }
128 return floatx80_div(a, b, &env->fp_status);
129}
130
6cad09d2 131static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
f299f437
BS
132{
133 if (env->cr[0] & CR0_NE_MASK) {
6cad09d2 134 raise_exception_ra(env, EXCP10_COPR, retaddr);
f299f437
BS
135 }
136#if !defined(CONFIG_USER_ONLY)
137 else {
138 cpu_set_ferr(env);
139 }
140#endif
141}
142
d3eb5eae 143void helper_flds_FT0(CPUX86State *env, uint32_t val)
f299f437
BS
144{
145 union {
146 float32 f;
147 uint32_t i;
148 } u;
149
150 u.i = val;
151 FT0 = float32_to_floatx80(u.f, &env->fp_status);
152}
153
d3eb5eae 154void helper_fldl_FT0(CPUX86State *env, uint64_t val)
f299f437
BS
155{
156 union {
157 float64 f;
158 uint64_t i;
159 } u;
160
161 u.i = val;
162 FT0 = float64_to_floatx80(u.f, &env->fp_status);
163}
164
d3eb5eae 165void helper_fildl_FT0(CPUX86State *env, int32_t val)
f299f437
BS
166{
167 FT0 = int32_to_floatx80(val, &env->fp_status);
168}
169
d3eb5eae 170void helper_flds_ST0(CPUX86State *env, uint32_t val)
f299f437
BS
171{
172 int new_fpstt;
173 union {
174 float32 f;
175 uint32_t i;
176 } u;
177
178 new_fpstt = (env->fpstt - 1) & 7;
179 u.i = val;
180 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
181 env->fpstt = new_fpstt;
182 env->fptags[new_fpstt] = 0; /* validate stack entry */
183}
184
d3eb5eae 185void helper_fldl_ST0(CPUX86State *env, uint64_t val)
f299f437
BS
186{
187 int new_fpstt;
188 union {
189 float64 f;
190 uint64_t i;
191 } u;
192
193 new_fpstt = (env->fpstt - 1) & 7;
194 u.i = val;
195 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
196 env->fpstt = new_fpstt;
197 env->fptags[new_fpstt] = 0; /* validate stack entry */
198}
199
d3eb5eae 200void helper_fildl_ST0(CPUX86State *env, int32_t val)
f299f437
BS
201{
202 int new_fpstt;
203
204 new_fpstt = (env->fpstt - 1) & 7;
205 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
206 env->fpstt = new_fpstt;
207 env->fptags[new_fpstt] = 0; /* validate stack entry */
208}
209
d3eb5eae 210void helper_fildll_ST0(CPUX86State *env, int64_t val)
f299f437
BS
211{
212 int new_fpstt;
213
214 new_fpstt = (env->fpstt - 1) & 7;
215 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
216 env->fpstt = new_fpstt;
217 env->fptags[new_fpstt] = 0; /* validate stack entry */
218}
219
d3eb5eae 220uint32_t helper_fsts_ST0(CPUX86State *env)
f299f437
BS
221{
222 union {
223 float32 f;
224 uint32_t i;
225 } u;
226
227 u.f = floatx80_to_float32(ST0, &env->fp_status);
228 return u.i;
229}
230
d3eb5eae 231uint64_t helper_fstl_ST0(CPUX86State *env)
f299f437
BS
232{
233 union {
234 float64 f;
235 uint64_t i;
236 } u;
237
238 u.f = floatx80_to_float64(ST0, &env->fp_status);
239 return u.i;
240}
241
d3eb5eae 242int32_t helper_fist_ST0(CPUX86State *env)
f299f437
BS
243{
244 int32_t val;
245
246 val = floatx80_to_int32(ST0, &env->fp_status);
247 if (val != (int16_t)val) {
248 val = -32768;
249 }
250 return val;
251}
252
d3eb5eae 253int32_t helper_fistl_ST0(CPUX86State *env)
f299f437
BS
254{
255 int32_t val;
ea32aaf1
DP
256 signed char old_exp_flags;
257
258 old_exp_flags = get_float_exception_flags(&env->fp_status);
259 set_float_exception_flags(0, &env->fp_status);
f299f437
BS
260
261 val = floatx80_to_int32(ST0, &env->fp_status);
ea32aaf1
DP
262 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
263 val = 0x80000000;
264 }
265 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
266 | old_exp_flags, &env->fp_status);
f299f437
BS
267 return val;
268}
269
d3eb5eae 270int64_t helper_fistll_ST0(CPUX86State *env)
f299f437
BS
271{
272 int64_t val;
ea32aaf1
DP
273 signed char old_exp_flags;
274
275 old_exp_flags = get_float_exception_flags(&env->fp_status);
276 set_float_exception_flags(0, &env->fp_status);
f299f437 277
178846bd 278 val = floatx80_to_int64(ST0, &env->fp_status);
ea32aaf1
DP
279 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
280 val = 0x8000000000000000ULL;
281 }
282 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
283 | old_exp_flags, &env->fp_status);
f299f437
BS
284 return val;
285}
286
d3eb5eae 287int32_t helper_fistt_ST0(CPUX86State *env)
f299f437
BS
288{
289 int32_t val;
290
291 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
292 if (val != (int16_t)val) {
293 val = -32768;
294 }
295 return val;
296}
297
d3eb5eae 298int32_t helper_fisttl_ST0(CPUX86State *env)
f299f437
BS
299{
300 int32_t val;
301
302 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
303 return val;
304}
305
d3eb5eae 306int64_t helper_fisttll_ST0(CPUX86State *env)
f299f437
BS
307{
308 int64_t val;
309
310 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
311 return val;
312}
313
d3eb5eae 314void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
315{
316 int new_fpstt;
317
318 new_fpstt = (env->fpstt - 1) & 7;
6cad09d2 319 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
f299f437
BS
320 env->fpstt = new_fpstt;
321 env->fptags[new_fpstt] = 0; /* validate stack entry */
322}
323
d3eb5eae 324void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
f299f437 325{
6cad09d2 326 helper_fstt(env, ST0, ptr, GETPC());
f299f437
BS
327}
328
d3eb5eae 329void helper_fpush(CPUX86State *env)
f299f437 330{
d3eb5eae 331 fpush(env);
f299f437
BS
332}
333
d3eb5eae 334void helper_fpop(CPUX86State *env)
f299f437 335{
d3eb5eae 336 fpop(env);
f299f437
BS
337}
338
d3eb5eae 339void helper_fdecstp(CPUX86State *env)
f299f437
BS
340{
341 env->fpstt = (env->fpstt - 1) & 7;
342 env->fpus &= ~0x4700;
343}
344
d3eb5eae 345void helper_fincstp(CPUX86State *env)
f299f437
BS
346{
347 env->fpstt = (env->fpstt + 1) & 7;
348 env->fpus &= ~0x4700;
349}
350
351/* FPU move */
352
d3eb5eae 353void helper_ffree_STN(CPUX86State *env, int st_index)
f299f437
BS
354{
355 env->fptags[(env->fpstt + st_index) & 7] = 1;
356}
357
d3eb5eae 358void helper_fmov_ST0_FT0(CPUX86State *env)
f299f437
BS
359{
360 ST0 = FT0;
361}
362
d3eb5eae 363void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
f299f437
BS
364{
365 FT0 = ST(st_index);
366}
367
d3eb5eae 368void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
369{
370 ST0 = ST(st_index);
371}
372
d3eb5eae 373void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
374{
375 ST(st_index) = ST0;
376}
377
d3eb5eae 378void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
379{
380 floatx80 tmp;
381
382 tmp = ST(st_index);
383 ST(st_index) = ST0;
384 ST0 = tmp;
385}
386
387/* FPU operations */
388
389static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
390
d3eb5eae 391void helper_fcom_ST0_FT0(CPUX86State *env)
f299f437
BS
392{
393 int ret;
394
395 ret = floatx80_compare(ST0, FT0, &env->fp_status);
396 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
397}
398
d3eb5eae 399void helper_fucom_ST0_FT0(CPUX86State *env)
f299f437
BS
400{
401 int ret;
402
403 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
404 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
405}
406
407static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
408
d3eb5eae 409void helper_fcomi_ST0_FT0(CPUX86State *env)
f299f437
BS
410{
411 int eflags;
412 int ret;
413
414 ret = floatx80_compare(ST0, FT0, &env->fp_status);
d3eb5eae 415 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
416 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
417 CC_SRC = eflags;
418}
419
d3eb5eae 420void helper_fucomi_ST0_FT0(CPUX86State *env)
f299f437
BS
421{
422 int eflags;
423 int ret;
424
425 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
d3eb5eae 426 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
427 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
428 CC_SRC = eflags;
429}
430
d3eb5eae 431void helper_fadd_ST0_FT0(CPUX86State *env)
f299f437
BS
432{
433 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
434}
435
d3eb5eae 436void helper_fmul_ST0_FT0(CPUX86State *env)
f299f437
BS
437{
438 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
439}
440
d3eb5eae 441void helper_fsub_ST0_FT0(CPUX86State *env)
f299f437
BS
442{
443 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
444}
445
d3eb5eae 446void helper_fsubr_ST0_FT0(CPUX86State *env)
f299f437
BS
447{
448 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
449}
450
d3eb5eae 451void helper_fdiv_ST0_FT0(CPUX86State *env)
f299f437 452{
d3eb5eae 453 ST0 = helper_fdiv(env, ST0, FT0);
f299f437
BS
454}
455
d3eb5eae 456void helper_fdivr_ST0_FT0(CPUX86State *env)
f299f437 457{
d3eb5eae 458 ST0 = helper_fdiv(env, FT0, ST0);
f299f437
BS
459}
460
461/* fp operations between STN and ST0 */
462
d3eb5eae 463void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
464{
465 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
466}
467
d3eb5eae 468void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
469{
470 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
471}
472
d3eb5eae 473void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
474{
475 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
476}
477
d3eb5eae 478void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
479{
480 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
481}
482
d3eb5eae 483void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
484{
485 floatx80 *p;
486
487 p = &ST(st_index);
d3eb5eae 488 *p = helper_fdiv(env, *p, ST0);
f299f437
BS
489}
490
d3eb5eae 491void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
492{
493 floatx80 *p;
494
495 p = &ST(st_index);
d3eb5eae 496 *p = helper_fdiv(env, ST0, *p);
f299f437
BS
497}
498
499/* misc FPU operations */
d3eb5eae 500void helper_fchs_ST0(CPUX86State *env)
f299f437
BS
501{
502 ST0 = floatx80_chs(ST0);
503}
504
d3eb5eae 505void helper_fabs_ST0(CPUX86State *env)
f299f437
BS
506{
507 ST0 = floatx80_abs(ST0);
508}
509
d3eb5eae 510void helper_fld1_ST0(CPUX86State *env)
f299f437
BS
511{
512 ST0 = floatx80_one;
513}
514
d3eb5eae 515void helper_fldl2t_ST0(CPUX86State *env)
f299f437
BS
516{
517 ST0 = floatx80_l2t;
518}
519
d3eb5eae 520void helper_fldl2e_ST0(CPUX86State *env)
f299f437
BS
521{
522 ST0 = floatx80_l2e;
523}
524
d3eb5eae 525void helper_fldpi_ST0(CPUX86State *env)
f299f437
BS
526{
527 ST0 = floatx80_pi;
528}
529
d3eb5eae 530void helper_fldlg2_ST0(CPUX86State *env)
f299f437
BS
531{
532 ST0 = floatx80_lg2;
533}
534
d3eb5eae 535void helper_fldln2_ST0(CPUX86State *env)
f299f437
BS
536{
537 ST0 = floatx80_ln2;
538}
539
d3eb5eae 540void helper_fldz_ST0(CPUX86State *env)
f299f437
BS
541{
542 ST0 = floatx80_zero;
543}
544
d3eb5eae 545void helper_fldz_FT0(CPUX86State *env)
f299f437
BS
546{
547 FT0 = floatx80_zero;
548}
549
d3eb5eae 550uint32_t helper_fnstsw(CPUX86State *env)
f299f437
BS
551{
552 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
553}
554
d3eb5eae 555uint32_t helper_fnstcw(CPUX86State *env)
f299f437
BS
556{
557 return env->fpuc;
558}
559
5bde1407 560void update_fp_status(CPUX86State *env)
f299f437
BS
561{
562 int rnd_type;
563
564 /* set rounding mode */
565 switch (env->fpuc & FPU_RC_MASK) {
566 default:
567 case FPU_RC_NEAR:
568 rnd_type = float_round_nearest_even;
569 break;
570 case FPU_RC_DOWN:
571 rnd_type = float_round_down;
572 break;
573 case FPU_RC_UP:
574 rnd_type = float_round_up;
575 break;
576 case FPU_RC_CHOP:
577 rnd_type = float_round_to_zero;
578 break;
579 }
580 set_float_rounding_mode(rnd_type, &env->fp_status);
581 switch ((env->fpuc >> 8) & 3) {
582 case 0:
583 rnd_type = 32;
584 break;
585 case 2:
586 rnd_type = 64;
587 break;
588 case 3:
589 default:
590 rnd_type = 80;
591 break;
592 }
593 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
594}
595
d3eb5eae 596void helper_fldcw(CPUX86State *env, uint32_t val)
f299f437 597{
5bde1407 598 cpu_set_fpuc(env, val);
f299f437
BS
599}
600
d3eb5eae 601void helper_fclex(CPUX86State *env)
f299f437
BS
602{
603 env->fpus &= 0x7f00;
604}
605
d3eb5eae 606void helper_fwait(CPUX86State *env)
f299f437
BS
607{
608 if (env->fpus & FPUS_SE) {
6cad09d2 609 fpu_raise_exception(env, GETPC());
f299f437
BS
610 }
611}
612
d3eb5eae 613void helper_fninit(CPUX86State *env)
f299f437
BS
614{
615 env->fpus = 0;
616 env->fpstt = 0;
5bde1407 617 cpu_set_fpuc(env, 0x37f);
f299f437
BS
618 env->fptags[0] = 1;
619 env->fptags[1] = 1;
620 env->fptags[2] = 1;
621 env->fptags[3] = 1;
622 env->fptags[4] = 1;
623 env->fptags[5] = 1;
624 env->fptags[6] = 1;
625 env->fptags[7] = 1;
626}
627
628/* BCD ops */
629
d3eb5eae 630void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
631{
632 floatx80 tmp;
633 uint64_t val;
634 unsigned int v;
635 int i;
636
637 val = 0;
638 for (i = 8; i >= 0; i--) {
6cad09d2 639 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
f299f437
BS
640 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
641 }
642 tmp = int64_to_floatx80(val, &env->fp_status);
6cad09d2 643 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
18b41f95 644 tmp = floatx80_chs(tmp);
f299f437 645 }
d3eb5eae 646 fpush(env);
f299f437
BS
647 ST0 = tmp;
648}
649
d3eb5eae 650void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
651{
652 int v;
653 target_ulong mem_ref, mem_end;
654 int64_t val;
655
656 val = floatx80_to_int64(ST0, &env->fp_status);
657 mem_ref = ptr;
658 mem_end = mem_ref + 9;
659 if (val < 0) {
6cad09d2 660 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
f299f437
BS
661 val = -val;
662 } else {
6cad09d2 663 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
f299f437
BS
664 }
665 while (mem_ref < mem_end) {
666 if (val == 0) {
667 break;
668 }
669 v = val % 100;
670 val = val / 100;
671 v = ((v / 10) << 4) | (v % 10);
6cad09d2 672 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
f299f437
BS
673 }
674 while (mem_ref < mem_end) {
6cad09d2 675 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
f299f437
BS
676 }
677}
678
d3eb5eae 679void helper_f2xm1(CPUX86State *env)
f299f437 680{
d3eb5eae 681 double val = floatx80_to_double(env, ST0);
f299f437
BS
682
683 val = pow(2.0, val) - 1.0;
d3eb5eae 684 ST0 = double_to_floatx80(env, val);
f299f437
BS
685}
686
d3eb5eae 687void helper_fyl2x(CPUX86State *env)
f299f437 688{
d3eb5eae 689 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
690
691 if (fptemp > 0.0) {
692 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
d3eb5eae
BS
693 fptemp *= floatx80_to_double(env, ST1);
694 ST1 = double_to_floatx80(env, fptemp);
695 fpop(env);
f299f437
BS
696 } else {
697 env->fpus &= ~0x4700;
698 env->fpus |= 0x400;
699 }
700}
701
d3eb5eae 702void helper_fptan(CPUX86State *env)
f299f437 703{
d3eb5eae 704 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
705
706 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
707 env->fpus |= 0x400;
708 } else {
709 fptemp = tan(fptemp);
d3eb5eae
BS
710 ST0 = double_to_floatx80(env, fptemp);
711 fpush(env);
f299f437
BS
712 ST0 = floatx80_one;
713 env->fpus &= ~0x400; /* C2 <-- 0 */
714 /* the above code is for |arg| < 2**52 only */
715 }
716}
717
d3eb5eae 718void helper_fpatan(CPUX86State *env)
f299f437
BS
719{
720 double fptemp, fpsrcop;
721
d3eb5eae
BS
722 fpsrcop = floatx80_to_double(env, ST1);
723 fptemp = floatx80_to_double(env, ST0);
724 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
725 fpop(env);
f299f437
BS
726}
727
d3eb5eae 728void helper_fxtract(CPUX86State *env)
f299f437
BS
729{
730 CPU_LDoubleU temp;
731
732 temp.d = ST0;
733
734 if (floatx80_is_zero(ST0)) {
735 /* Easy way to generate -inf and raising division by 0 exception */
736 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
737 &env->fp_status);
d3eb5eae 738 fpush(env);
f299f437
BS
739 ST0 = temp.d;
740 } else {
741 int expdif;
742
743 expdif = EXPD(temp) - EXPBIAS;
744 /* DP exponent bias */
745 ST0 = int32_to_floatx80(expdif, &env->fp_status);
d3eb5eae 746 fpush(env);
f299f437
BS
747 BIASEXPONENT(temp);
748 ST0 = temp.d;
749 }
750}
751
d3eb5eae 752void helper_fprem1(CPUX86State *env)
f299f437
BS
753{
754 double st0, st1, dblq, fpsrcop, fptemp;
755 CPU_LDoubleU fpsrcop1, fptemp1;
756 int expdif;
757 signed long long int q;
758
d3eb5eae
BS
759 st0 = floatx80_to_double(env, ST0);
760 st1 = floatx80_to_double(env, ST1);
f299f437
BS
761
762 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 763 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
764 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
765 return;
766 }
767
768 fpsrcop = st0;
769 fptemp = st1;
770 fpsrcop1.d = ST0;
771 fptemp1.d = ST1;
772 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
773
774 if (expdif < 0) {
775 /* optimisation? taken from the AMD docs */
776 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
777 /* ST0 is unchanged */
778 return;
779 }
780
781 if (expdif < 53) {
782 dblq = fpsrcop / fptemp;
783 /* round dblq towards nearest integer */
784 dblq = rint(dblq);
785 st0 = fpsrcop - fptemp * dblq;
786
787 /* convert dblq to q by truncating towards zero */
788 if (dblq < 0.0) {
789 q = (signed long long int)(-dblq);
790 } else {
791 q = (signed long long int)dblq;
792 }
793
794 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
795 /* (C0,C3,C1) <-- (q2,q1,q0) */
796 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
797 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
798 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
799 } else {
800 env->fpus |= 0x400; /* C2 <-- 1 */
801 fptemp = pow(2.0, expdif - 50);
802 fpsrcop = (st0 / st1) / fptemp;
803 /* fpsrcop = integer obtained by chopping */
804 fpsrcop = (fpsrcop < 0.0) ?
805 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
806 st0 -= (st1 * fpsrcop * fptemp);
807 }
d3eb5eae 808 ST0 = double_to_floatx80(env, st0);
f299f437
BS
809}
810
d3eb5eae 811void helper_fprem(CPUX86State *env)
f299f437
BS
812{
813 double st0, st1, dblq, fpsrcop, fptemp;
814 CPU_LDoubleU fpsrcop1, fptemp1;
815 int expdif;
816 signed long long int q;
817
d3eb5eae
BS
818 st0 = floatx80_to_double(env, ST0);
819 st1 = floatx80_to_double(env, ST1);
f299f437
BS
820
821 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 822 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
823 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
824 return;
825 }
826
827 fpsrcop = st0;
828 fptemp = st1;
829 fpsrcop1.d = ST0;
830 fptemp1.d = ST1;
831 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
832
833 if (expdif < 0) {
834 /* optimisation? taken from the AMD docs */
835 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
836 /* ST0 is unchanged */
837 return;
838 }
839
840 if (expdif < 53) {
841 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
842 /* round dblq towards zero */
843 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
844 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
845
846 /* convert dblq to q by truncating towards zero */
847 if (dblq < 0.0) {
848 q = (signed long long int)(-dblq);
849 } else {
850 q = (signed long long int)dblq;
851 }
852
853 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
854 /* (C0,C3,C1) <-- (q2,q1,q0) */
855 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
856 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
857 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
858 } else {
859 int N = 32 + (expdif % 32); /* as per AMD docs */
860
861 env->fpus |= 0x400; /* C2 <-- 1 */
862 fptemp = pow(2.0, (double)(expdif - N));
863 fpsrcop = (st0 / st1) / fptemp;
864 /* fpsrcop = integer obtained by chopping */
865 fpsrcop = (fpsrcop < 0.0) ?
866 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
867 st0 -= (st1 * fpsrcop * fptemp);
868 }
d3eb5eae 869 ST0 = double_to_floatx80(env, st0);
f299f437
BS
870}
871
d3eb5eae 872void helper_fyl2xp1(CPUX86State *env)
f299f437 873{
d3eb5eae 874 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
875
876 if ((fptemp + 1.0) > 0.0) {
877 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
d3eb5eae
BS
878 fptemp *= floatx80_to_double(env, ST1);
879 ST1 = double_to_floatx80(env, fptemp);
880 fpop(env);
f299f437
BS
881 } else {
882 env->fpus &= ~0x4700;
883 env->fpus |= 0x400;
884 }
885}
886
d3eb5eae 887void helper_fsqrt(CPUX86State *env)
f299f437
BS
888{
889 if (floatx80_is_neg(ST0)) {
890 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
891 env->fpus |= 0x400;
892 }
893 ST0 = floatx80_sqrt(ST0, &env->fp_status);
894}
895
d3eb5eae 896void helper_fsincos(CPUX86State *env)
f299f437 897{
d3eb5eae 898 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
899
900 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
901 env->fpus |= 0x400;
902 } else {
d3eb5eae
BS
903 ST0 = double_to_floatx80(env, sin(fptemp));
904 fpush(env);
905 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
906 env->fpus &= ~0x400; /* C2 <-- 0 */
907 /* the above code is for |arg| < 2**63 only */
908 }
909}
910
d3eb5eae 911void helper_frndint(CPUX86State *env)
f299f437
BS
912{
913 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
914}
915
d3eb5eae 916void helper_fscale(CPUX86State *env)
f299f437
BS
917{
918 if (floatx80_is_any_nan(ST1)) {
919 ST0 = ST1;
920 } else {
921 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
922 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
923 }
924}
925
d3eb5eae 926void helper_fsin(CPUX86State *env)
f299f437 927{
d3eb5eae 928 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
929
930 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
931 env->fpus |= 0x400;
932 } else {
d3eb5eae 933 ST0 = double_to_floatx80(env, sin(fptemp));
f299f437
BS
934 env->fpus &= ~0x400; /* C2 <-- 0 */
935 /* the above code is for |arg| < 2**53 only */
936 }
937}
938
d3eb5eae 939void helper_fcos(CPUX86State *env)
f299f437 940{
d3eb5eae 941 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
942
943 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
944 env->fpus |= 0x400;
945 } else {
d3eb5eae 946 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
947 env->fpus &= ~0x400; /* C2 <-- 0 */
948 /* the above code is for |arg| < 2**63 only */
949 }
950}
951
d3eb5eae 952void helper_fxam_ST0(CPUX86State *env)
f299f437
BS
953{
954 CPU_LDoubleU temp;
955 int expdif;
956
957 temp.d = ST0;
958
959 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
960 if (SIGND(temp)) {
961 env->fpus |= 0x200; /* C1 <-- 1 */
962 }
963
964 /* XXX: test fptags too */
965 expdif = EXPD(temp);
966 if (expdif == MAXEXPD) {
967 if (MANTD(temp) == 0x8000000000000000ULL) {
968 env->fpus |= 0x500; /* Infinity */
969 } else {
970 env->fpus |= 0x100; /* NaN */
971 }
972 } else if (expdif == 0) {
973 if (MANTD(temp) == 0) {
974 env->fpus |= 0x4000; /* Zero */
975 } else {
976 env->fpus |= 0x4400; /* Denormal */
977 }
978 } else {
979 env->fpus |= 0x400;
980 }
981}
982
6cad09d2
PD
983static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
984 uintptr_t retaddr)
f299f437
BS
985{
986 int fpus, fptag, exp, i;
987 uint64_t mant;
988 CPU_LDoubleU tmp;
989
990 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
991 fptag = 0;
992 for (i = 7; i >= 0; i--) {
993 fptag <<= 2;
994 if (env->fptags[i]) {
995 fptag |= 3;
996 } else {
997 tmp.d = env->fpregs[i].d;
998 exp = EXPD(tmp);
999 mant = MANTD(tmp);
1000 if (exp == 0 && mant == 0) {
1001 /* zero */
1002 fptag |= 1;
1003 } else if (exp == 0 || exp == MAXEXPD
1004 || (mant & (1LL << 63)) == 0) {
1005 /* NaNs, infinity, denormal */
1006 fptag |= 2;
1007 }
1008 }
1009 }
1010 if (data32) {
1011 /* 32 bit */
6cad09d2
PD
1012 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1013 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1014 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1015 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1016 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1017 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1018 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
f299f437
BS
1019 } else {
1020 /* 16 bit */
6cad09d2
PD
1021 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1022 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1023 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1024 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1025 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1026 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1027 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
f299f437
BS
1028 }
1029}
1030
6cad09d2
PD
1031void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1032{
1033 do_fstenv(env, ptr, data32, GETPC());
1034}
1035
1036static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1037 uintptr_t retaddr)
f299f437
BS
1038{
1039 int i, fpus, fptag;
1040
1041 if (data32) {
6cad09d2
PD
1042 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437 1045 } else {
6cad09d2
PD
1046 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1047 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1048 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
f299f437
BS
1049 }
1050 env->fpstt = (fpus >> 11) & 7;
1051 env->fpus = fpus & ~0x3800;
1052 for (i = 0; i < 8; i++) {
1053 env->fptags[i] = ((fptag & 3) == 3);
1054 fptag >>= 2;
1055 }
1056}
1057
6cad09d2
PD
1058void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1059{
1060 do_fldenv(env, ptr, data32, GETPC());
1061}
1062
d3eb5eae 1063void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1064{
1065 floatx80 tmp;
1066 int i;
1067
6cad09d2 1068 do_fstenv(env, ptr, data32, GETPC());
f299f437
BS
1069
1070 ptr += (14 << data32);
1071 for (i = 0; i < 8; i++) {
1072 tmp = ST(i);
6cad09d2 1073 helper_fstt(env, tmp, ptr, GETPC());
f299f437
BS
1074 ptr += 10;
1075 }
1076
1077 /* fninit */
1078 env->fpus = 0;
1079 env->fpstt = 0;
5bde1407 1080 cpu_set_fpuc(env, 0x37f);
f299f437
BS
1081 env->fptags[0] = 1;
1082 env->fptags[1] = 1;
1083 env->fptags[2] = 1;
1084 env->fptags[3] = 1;
1085 env->fptags[4] = 1;
1086 env->fptags[5] = 1;
1087 env->fptags[6] = 1;
1088 env->fptags[7] = 1;
1089}
1090
d3eb5eae 1091void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1092{
1093 floatx80 tmp;
1094 int i;
1095
6cad09d2 1096 do_fldenv(env, ptr, data32, GETPC());
f299f437
BS
1097 ptr += (14 << data32);
1098
1099 for (i = 0; i < 8; i++) {
6cad09d2 1100 tmp = helper_fldt(env, ptr, GETPC());
f299f437
BS
1101 ST(i) = tmp;
1102 ptr += 10;
1103 }
1104}
1105
1106#if defined(CONFIG_USER_ONLY)
d3eb5eae 1107void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1108{
d3eb5eae 1109 helper_fsave(env, ptr, data32);
f299f437
BS
1110}
1111
d3eb5eae 1112void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1113{
d3eb5eae 1114 helper_frstor(env, ptr, data32);
f299f437
BS
1115}
1116#endif
1117
64dbaff0 1118static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1119{
64dbaff0 1120 int fpus, fptag, i;
f299f437
BS
1121 target_ulong addr;
1122
f299f437
BS
1123 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1124 fptag = 0;
1125 for (i = 0; i < 8; i++) {
1126 fptag |= (env->fptags[i] << i);
1127 }
64dbaff0
RH
1128 cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1129 cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1130 cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1131
1132 /* In 32-bit mode this is eip, sel, dp, sel.
1133 In 64-bit mode this is rip, rdp.
1134 But in either case we don't write actual data, just zeros. */
1135 cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1136 cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
f299f437
BS
1137
1138 addr = ptr + 0x20;
1139 for (i = 0; i < 8; i++) {
64dbaff0
RH
1140 floatx80 tmp = ST(i);
1141 helper_fstt(env, tmp, addr, ra);
f299f437
BS
1142 addr += 16;
1143 }
64dbaff0
RH
1144}
1145
1146static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1147{
1148 cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1149 cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1150}
1151
1152static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1153{
1154 int i, nb_xmm_regs;
1155 target_ulong addr;
1156
1157 if (env->hflags & HF_CS64_MASK) {
1158 nb_xmm_regs = 16;
1159 } else {
1160 nb_xmm_regs = 8;
1161 }
1162
1163 addr = ptr + 0xa0;
1164 for (i = 0; i < nb_xmm_regs; i++) {
1165 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1166 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1167 addr += 16;
1168 }
1169}
1170
f4f1110e
RH
1171static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1172{
1173 int i;
1174
1175 for (i = 0; i < 4; i++, addr += 16) {
1176 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1178 }
1179}
1180
1181static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1182{
1183 cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1184 cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1185}
1186
64dbaff0
RH
1187void helper_fxsave(CPUX86State *env, target_ulong ptr)
1188{
1189 uintptr_t ra = GETPC();
1190
1191 /* The operand must be 16 byte aligned */
1192 if (ptr & 0xf) {
1193 raise_exception_ra(env, EXCP0D_GPF, ra);
1194 }
1195
1196 do_xsave_fpu(env, ptr, ra);
f299f437
BS
1197
1198 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0 1199 do_xsave_mxcsr(env, ptr, ra);
f299f437
BS
1200 /* Fast FXSAVE leaves out the XMM registers */
1201 if (!(env->efer & MSR_EFER_FFXSR)
1202 || (env->hflags & HF_CPL_MASK)
1203 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1204 do_xsave_sse(env, ptr, ra);
f299f437
BS
1205 }
1206 }
1207}
1208
19dc85db
RH
1209static uint64_t get_xinuse(CPUX86State *env)
1210{
f4f1110e
RH
1211 uint64_t inuse = -1;
1212
1213 /* For the most part, we don't track XINUSE. We could calculate it
1214 here for all components, but it's probably less work to simply
1215 indicate in use. That said, the state of BNDREGS is important
1216 enough to track in HFLAGS, so we might as well use that here. */
1217 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
cfc3b074 1218 inuse &= ~XSTATE_BNDREGS_MASK;
f4f1110e
RH
1219 }
1220 return inuse;
19dc85db
RH
1221}
1222
c9cfe8f9
RH
1223static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1224 uint64_t inuse, uint64_t opt, uintptr_t ra)
19dc85db 1225{
19dc85db
RH
1226 uint64_t old_bv, new_bv;
1227
1228 /* The OS must have enabled XSAVE. */
1229 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1230 raise_exception_ra(env, EXCP06_ILLOP, ra);
1231 }
1232
1233 /* The operand must be 64 byte aligned. */
1234 if (ptr & 63) {
1235 raise_exception_ra(env, EXCP0D_GPF, ra);
1236 }
1237
1238 /* Never save anything not enabled by XCR0. */
1239 rfbm &= env->xcr0;
c9cfe8f9 1240 opt &= rfbm;
19dc85db 1241
cfc3b074 1242 if (opt & XSTATE_FP_MASK) {
19dc85db
RH
1243 do_xsave_fpu(env, ptr, ra);
1244 }
cfc3b074 1245 if (rfbm & XSTATE_SSE_MASK) {
c9cfe8f9 1246 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
19dc85db 1247 do_xsave_mxcsr(env, ptr, ra);
c9cfe8f9 1248 }
cfc3b074 1249 if (opt & XSTATE_SSE_MASK) {
19dc85db
RH
1250 do_xsave_sse(env, ptr, ra);
1251 }
cfc3b074
PB
1252 if (opt & XSTATE_BNDREGS_MASK) {
1253 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
f4f1110e
RH
1254 do_xsave_bndregs(env, ptr + off, ra);
1255 }
cfc3b074
PB
1256 if (opt & XSTATE_BNDCSR_MASK) {
1257 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
f4f1110e
RH
1258 do_xsave_bndcsr(env, ptr + off, ra);
1259 }
19dc85db
RH
1260
1261 /* Update the XSTATE_BV field. */
1262 old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
c9cfe8f9 1263 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
19dc85db
RH
1264 cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1265}
1266
c9cfe8f9
RH
1267void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1268{
1269 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1270}
1271
1272void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1273{
1274 uint64_t inuse = get_xinuse(env);
1275 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1276}
1277
64dbaff0 1278static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1279{
64dbaff0 1280 int i, fpus, fptag;
f299f437
BS
1281 target_ulong addr;
1282
64dbaff0
RH
1283 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1284 fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1285 fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
f299f437
BS
1286 env->fpstt = (fpus >> 11) & 7;
1287 env->fpus = fpus & ~0x3800;
1288 fptag ^= 0xff;
1289 for (i = 0; i < 8; i++) {
1290 env->fptags[i] = ((fptag >> i) & 1);
1291 }
1292
1293 addr = ptr + 0x20;
1294 for (i = 0; i < 8; i++) {
64dbaff0 1295 floatx80 tmp = helper_fldt(env, addr, ra);
f299f437
BS
1296 ST(i) = tmp;
1297 addr += 16;
1298 }
64dbaff0
RH
1299}
1300
1301static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1302{
1303 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1304}
1305
1306static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1307{
1308 int i, nb_xmm_regs;
1309 target_ulong addr;
1310
1311 if (env->hflags & HF_CS64_MASK) {
1312 nb_xmm_regs = 16;
1313 } else {
1314 nb_xmm_regs = 8;
1315 }
1316
1317 addr = ptr + 0xa0;
1318 for (i = 0; i < nb_xmm_regs; i++) {
1319 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1320 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1321 addr += 16;
1322 }
1323}
1324
f4f1110e
RH
1325static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1326{
1327 int i;
1328
1329 for (i = 0; i < 4; i++, addr += 16) {
1330 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1331 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1332 }
1333}
1334
1335static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1336{
1337 /* FIXME: Extend highest implemented bit of linear address. */
1338 env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1339 env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1340}
1341
64dbaff0
RH
1342void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1343{
1344 uintptr_t ra = GETPC();
1345
1346 /* The operand must be 16 byte aligned */
1347 if (ptr & 0xf) {
1348 raise_exception_ra(env, EXCP0D_GPF, ra);
1349 }
1350
1351 do_xrstor_fpu(env, ptr, ra);
f299f437
BS
1352
1353 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0
RH
1354 do_xrstor_mxcsr(env, ptr, ra);
1355 /* Fast FXRSTOR leaves out the XMM registers */
f299f437
BS
1356 if (!(env->efer & MSR_EFER_FFXSR)
1357 || (env->hflags & HF_CPL_MASK)
1358 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1359 do_xrstor_sse(env, ptr, ra);
f299f437
BS
1360 }
1361 }
1362}
1363
19dc85db
RH
1364void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1365{
1366 uintptr_t ra = GETPC();
1367 uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1368
1369 rfbm &= env->xcr0;
1370
1371 /* The OS must have enabled XSAVE. */
1372 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1373 raise_exception_ra(env, EXCP06_ILLOP, ra);
1374 }
1375
1376 /* The operand must be 64 byte aligned. */
1377 if (ptr & 63) {
1378 raise_exception_ra(env, EXCP0D_GPF, ra);
1379 }
1380
1381 xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1382
1383 if ((int64_t)xstate_bv < 0) {
1384 /* FIXME: Compact form. */
1385 raise_exception_ra(env, EXCP0D_GPF, ra);
1386 }
1387
1388 /* Standard form. */
1389
1390 /* The XSTATE field must not set bits not present in XCR0. */
1391 if (xstate_bv & ~env->xcr0) {
1392 raise_exception_ra(env, EXCP0D_GPF, ra);
1393 }
1394
1395 /* The XCOMP field must be zero. */
1396 xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1397 xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1398 if (xcomp_bv0 || xcomp_bv1) {
1399 raise_exception_ra(env, EXCP0D_GPF, ra);
1400 }
1401
cfc3b074
PB
1402 if (rfbm & XSTATE_FP_MASK) {
1403 if (xstate_bv & XSTATE_FP_MASK) {
19dc85db
RH
1404 do_xrstor_fpu(env, ptr, ra);
1405 } else {
1406 helper_fninit(env);
1407 memset(env->fpregs, 0, sizeof(env->fpregs));
1408 }
1409 }
cfc3b074 1410 if (rfbm & XSTATE_SSE_MASK) {
19dc85db
RH
1411 /* Note that the standard form of XRSTOR loads MXCSR from memory
1412 whether or not the XSTATE_BV bit is set. */
1413 do_xrstor_mxcsr(env, ptr, ra);
cfc3b074 1414 if (xstate_bv & XSTATE_SSE_MASK) {
19dc85db
RH
1415 do_xrstor_sse(env, ptr, ra);
1416 } else {
1417 /* ??? When AVX is implemented, we may have to be more
1418 selective in the clearing. */
1419 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1420 }
1421 }
cfc3b074
PB
1422 if (rfbm & XSTATE_BNDREGS_MASK) {
1423 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1424 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
f4f1110e
RH
1425 do_xrstor_bndregs(env, ptr + off, ra);
1426 env->hflags |= HF_MPX_IU_MASK;
1427 } else {
1428 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1429 env->hflags &= ~HF_MPX_IU_MASK;
1430 }
1431 }
cfc3b074
PB
1432 if (rfbm & XSTATE_BNDCSR_MASK) {
1433 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1434 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
f4f1110e
RH
1435 do_xrstor_bndcsr(env, ptr + off, ra);
1436 } else {
1437 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1438 }
1439 cpu_sync_bndcs_hflags(env);
1440 }
19dc85db
RH
1441}
1442
1443uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1444{
1445 /* The OS must have enabled XSAVE. */
1446 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1447 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1448 }
1449
1450 switch (ecx) {
1451 case 0:
1452 return env->xcr0;
1453 case 1:
c9cfe8f9
RH
1454 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1455 return env->xcr0 & get_xinuse(env);
1456 }
1457 break;
19dc85db
RH
1458 }
1459 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1460}
1461
1462void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1463{
1464 uint32_t dummy, ena_lo, ena_hi;
1465 uint64_t ena;
1466
1467 /* The OS must have enabled XSAVE. */
1468 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1469 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1470 }
1471
1472 /* Only XCR0 is defined at present; the FPU may not be disabled. */
cfc3b074 1473 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
19dc85db
RH
1474 goto do_gpf;
1475 }
1476
1477 /* Disallow enabling unimplemented features. */
1478 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1479 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1480 if (mask & ~ena) {
1481 goto do_gpf;
1482 }
1483
f4f1110e 1484 /* Disallow enabling only half of MPX. */
cfc3b074
PB
1485 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1486 & XSTATE_BNDCSR_MASK) {
f4f1110e
RH
1487 goto do_gpf;
1488 }
1489
19dc85db 1490 env->xcr0 = mask;
f4f1110e 1491 cpu_sync_bndcs_hflags(env);
19dc85db
RH
1492 return;
1493
1494 do_gpf:
1495 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1496}
1497
f299f437
BS
1498void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1499{
1500 CPU_LDoubleU temp;
1501
1502 temp.d = f;
1503 *pmant = temp.l.lower;
1504 *pexp = temp.l.upper;
1505}
1506
1507floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1508{
1509 CPU_LDoubleU temp;
1510
1511 temp.l.upper = upper;
1512 temp.l.lower = mant;
1513 return temp.d;
1514}
1515
1516/* MMX/SSE */
1517/* XXX: optimize by storing fptt and fptags in the static cpu state */
1518
1519#define SSE_DAZ 0x0040
1520#define SSE_RC_MASK 0x6000
1521#define SSE_RC_NEAR 0x0000
1522#define SSE_RC_DOWN 0x2000
1523#define SSE_RC_UP 0x4000
1524#define SSE_RC_CHOP 0x6000
1525#define SSE_FZ 0x8000
1526
4e47e39a 1527void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
f299f437
BS
1528{
1529 int rnd_type;
1530
4e47e39a
RH
1531 env->mxcsr = mxcsr;
1532
f299f437 1533 /* set rounding mode */
4e47e39a 1534 switch (mxcsr & SSE_RC_MASK) {
f299f437
BS
1535 default:
1536 case SSE_RC_NEAR:
1537 rnd_type = float_round_nearest_even;
1538 break;
1539 case SSE_RC_DOWN:
1540 rnd_type = float_round_down;
1541 break;
1542 case SSE_RC_UP:
1543 rnd_type = float_round_up;
1544 break;
1545 case SSE_RC_CHOP:
1546 rnd_type = float_round_to_zero;
1547 break;
1548 }
1549 set_float_rounding_mode(rnd_type, &env->sse_status);
1550
1551 /* set denormals are zero */
4e47e39a 1552 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
f299f437
BS
1553
1554 /* set flush to zero */
4e47e39a 1555 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
f299f437
BS
1556}
1557
5bde1407
PD
1558void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1559{
1560 env->fpuc = val;
1561 update_fp_status(env);
1562}
1563
d3eb5eae 1564void helper_ldmxcsr(CPUX86State *env, uint32_t val)
f299f437 1565{
4e47e39a 1566 cpu_set_mxcsr(env, val);
f299f437
BS
1567}
1568
d3eb5eae 1569void helper_enter_mmx(CPUX86State *env)
f299f437
BS
1570{
1571 env->fpstt = 0;
1572 *(uint32_t *)(env->fptags) = 0;
1573 *(uint32_t *)(env->fptags + 4) = 0;
1574}
1575
d3eb5eae 1576void helper_emms(CPUX86State *env)
f299f437
BS
1577{
1578 /* set to empty state */
1579 *(uint32_t *)(env->fptags) = 0x01010101;
1580 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1581}
1582
1583/* XXX: suppress */
d3eb5eae 1584void helper_movq(CPUX86State *env, void *d, void *s)
f299f437
BS
1585{
1586 *(uint64_t *)d = *(uint64_t *)s;
1587}
1588
1589#define SHIFT 0
1590#include "ops_sse.h"
1591
1592#define SHIFT 1
1593#include "ops_sse.h"