]> git.proxmox.com Git - mirror_qemu.git/blame - target/i386/fpu_helper.c
i386: Fix signedness of hyperv_spinlock_attempts
[mirror_qemu.git] / target / i386 / fpu_helper.c
CommitLineData
f299f437
BS
1/*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
b6a0aa05 20#include "qemu/osdep.h"
f299f437
BS
21#include <math.h>
22#include "cpu.h"
2ef6175a 23#include "exec/helper-proto.h"
c334a388 24#include "qemu/host-utils.h"
63c91552 25#include "exec/exec-all.h"
f08b6170 26#include "exec/cpu_ldst.h"
24f91e81 27#include "fpu/softfloat.h"
92fc4b58 28
f299f437
BS
29#define FPU_RC_MASK 0xc00
30#define FPU_RC_NEAR 0x000
31#define FPU_RC_DOWN 0x400
32#define FPU_RC_UP 0x800
33#define FPU_RC_CHOP 0xc00
34
35#define MAXTAN 9223372036854775808.0
36
37/* the following deal with x86 long double-precision numbers */
38#define MAXEXPD 0x7fff
39#define EXPBIAS 16383
40#define EXPD(fp) (fp.l.upper & 0x7fff)
41#define SIGND(fp) ((fp.l.upper) & 0x8000)
42#define MANTD(fp) (fp.l.lower)
43#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
44
45#define FPUS_IE (1 << 0)
46#define FPUS_DE (1 << 1)
47#define FPUS_ZE (1 << 2)
48#define FPUS_OE (1 << 3)
49#define FPUS_UE (1 << 4)
50#define FPUS_PE (1 << 5)
51#define FPUS_SF (1 << 6)
52#define FPUS_SE (1 << 7)
53#define FPUS_B (1 << 15)
54
55#define FPUC_EM 0x3f
56
57#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
58#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
59#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
60
d3eb5eae 61static inline void fpush(CPUX86State *env)
f299f437
BS
62{
63 env->fpstt = (env->fpstt - 1) & 7;
64 env->fptags[env->fpstt] = 0; /* validate stack entry */
65}
66
d3eb5eae 67static inline void fpop(CPUX86State *env)
f299f437
BS
68{
69 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
70 env->fpstt = (env->fpstt + 1) & 7;
71}
72
6cad09d2
PD
73static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
74 uintptr_t retaddr)
f299f437
BS
75{
76 CPU_LDoubleU temp;
77
6cad09d2
PD
78 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
79 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437
BS
80 return temp.d;
81}
82
6cad09d2
PD
83static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
84 uintptr_t retaddr)
f299f437
BS
85{
86 CPU_LDoubleU temp;
87
88 temp.d = f;
6cad09d2
PD
89 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
90 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
f299f437
BS
91}
92
93/* x87 FPU helpers */
94
d3eb5eae 95static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
f299f437
BS
96{
97 union {
98 float64 f64;
99 double d;
100 } u;
101
102 u.f64 = floatx80_to_float64(a, &env->fp_status);
103 return u.d;
104}
105
d3eb5eae 106static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
f299f437
BS
107{
108 union {
109 float64 f64;
110 double d;
111 } u;
112
113 u.d = a;
114 return float64_to_floatx80(u.f64, &env->fp_status);
115}
116
d3eb5eae 117static void fpu_set_exception(CPUX86State *env, int mask)
f299f437
BS
118{
119 env->fpus |= mask;
120 if (env->fpus & (~env->fpuc & FPUC_EM)) {
121 env->fpus |= FPUS_SE | FPUS_B;
122 }
123}
124
d3eb5eae 125static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
f299f437
BS
126{
127 if (floatx80_is_zero(b)) {
d3eb5eae 128 fpu_set_exception(env, FPUS_ZE);
f299f437
BS
129 }
130 return floatx80_div(a, b, &env->fp_status);
131}
132
6cad09d2 133static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
f299f437
BS
134{
135 if (env->cr[0] & CR0_NE_MASK) {
6cad09d2 136 raise_exception_ra(env, EXCP10_COPR, retaddr);
f299f437
BS
137 }
138#if !defined(CONFIG_USER_ONLY)
139 else {
140 cpu_set_ferr(env);
141 }
142#endif
143}
144
d3eb5eae 145void helper_flds_FT0(CPUX86State *env, uint32_t val)
f299f437
BS
146{
147 union {
148 float32 f;
149 uint32_t i;
150 } u;
151
152 u.i = val;
153 FT0 = float32_to_floatx80(u.f, &env->fp_status);
154}
155
d3eb5eae 156void helper_fldl_FT0(CPUX86State *env, uint64_t val)
f299f437
BS
157{
158 union {
159 float64 f;
160 uint64_t i;
161 } u;
162
163 u.i = val;
164 FT0 = float64_to_floatx80(u.f, &env->fp_status);
165}
166
d3eb5eae 167void helper_fildl_FT0(CPUX86State *env, int32_t val)
f299f437
BS
168{
169 FT0 = int32_to_floatx80(val, &env->fp_status);
170}
171
d3eb5eae 172void helper_flds_ST0(CPUX86State *env, uint32_t val)
f299f437
BS
173{
174 int new_fpstt;
175 union {
176 float32 f;
177 uint32_t i;
178 } u;
179
180 new_fpstt = (env->fpstt - 1) & 7;
181 u.i = val;
182 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
183 env->fpstt = new_fpstt;
184 env->fptags[new_fpstt] = 0; /* validate stack entry */
185}
186
d3eb5eae 187void helper_fldl_ST0(CPUX86State *env, uint64_t val)
f299f437
BS
188{
189 int new_fpstt;
190 union {
191 float64 f;
192 uint64_t i;
193 } u;
194
195 new_fpstt = (env->fpstt - 1) & 7;
196 u.i = val;
197 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
198 env->fpstt = new_fpstt;
199 env->fptags[new_fpstt] = 0; /* validate stack entry */
200}
201
d3eb5eae 202void helper_fildl_ST0(CPUX86State *env, int32_t val)
f299f437
BS
203{
204 int new_fpstt;
205
206 new_fpstt = (env->fpstt - 1) & 7;
207 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
208 env->fpstt = new_fpstt;
209 env->fptags[new_fpstt] = 0; /* validate stack entry */
210}
211
d3eb5eae 212void helper_fildll_ST0(CPUX86State *env, int64_t val)
f299f437
BS
213{
214 int new_fpstt;
215
216 new_fpstt = (env->fpstt - 1) & 7;
217 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
218 env->fpstt = new_fpstt;
219 env->fptags[new_fpstt] = 0; /* validate stack entry */
220}
221
d3eb5eae 222uint32_t helper_fsts_ST0(CPUX86State *env)
f299f437
BS
223{
224 union {
225 float32 f;
226 uint32_t i;
227 } u;
228
229 u.f = floatx80_to_float32(ST0, &env->fp_status);
230 return u.i;
231}
232
d3eb5eae 233uint64_t helper_fstl_ST0(CPUX86State *env)
f299f437
BS
234{
235 union {
236 float64 f;
237 uint64_t i;
238 } u;
239
240 u.f = floatx80_to_float64(ST0, &env->fp_status);
241 return u.i;
242}
243
d3eb5eae 244int32_t helper_fist_ST0(CPUX86State *env)
f299f437
BS
245{
246 int32_t val;
247
248 val = floatx80_to_int32(ST0, &env->fp_status);
249 if (val != (int16_t)val) {
250 val = -32768;
251 }
252 return val;
253}
254
d3eb5eae 255int32_t helper_fistl_ST0(CPUX86State *env)
f299f437
BS
256{
257 int32_t val;
ea32aaf1
DP
258 signed char old_exp_flags;
259
260 old_exp_flags = get_float_exception_flags(&env->fp_status);
261 set_float_exception_flags(0, &env->fp_status);
f299f437
BS
262
263 val = floatx80_to_int32(ST0, &env->fp_status);
ea32aaf1
DP
264 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
265 val = 0x80000000;
266 }
267 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
268 | old_exp_flags, &env->fp_status);
f299f437
BS
269 return val;
270}
271
d3eb5eae 272int64_t helper_fistll_ST0(CPUX86State *env)
f299f437
BS
273{
274 int64_t val;
ea32aaf1
DP
275 signed char old_exp_flags;
276
277 old_exp_flags = get_float_exception_flags(&env->fp_status);
278 set_float_exception_flags(0, &env->fp_status);
f299f437 279
178846bd 280 val = floatx80_to_int64(ST0, &env->fp_status);
ea32aaf1
DP
281 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
282 val = 0x8000000000000000ULL;
283 }
284 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
285 | old_exp_flags, &env->fp_status);
f299f437
BS
286 return val;
287}
288
d3eb5eae 289int32_t helper_fistt_ST0(CPUX86State *env)
f299f437
BS
290{
291 int32_t val;
292
293 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
294 if (val != (int16_t)val) {
295 val = -32768;
296 }
297 return val;
298}
299
d3eb5eae 300int32_t helper_fisttl_ST0(CPUX86State *env)
f299f437 301{
9be38598 302 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
f299f437
BS
303}
304
d3eb5eae 305int64_t helper_fisttll_ST0(CPUX86State *env)
f299f437 306{
9be38598 307 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
f299f437
BS
308}
309
d3eb5eae 310void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
311{
312 int new_fpstt;
313
314 new_fpstt = (env->fpstt - 1) & 7;
6cad09d2 315 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
f299f437
BS
316 env->fpstt = new_fpstt;
317 env->fptags[new_fpstt] = 0; /* validate stack entry */
318}
319
d3eb5eae 320void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
f299f437 321{
6cad09d2 322 helper_fstt(env, ST0, ptr, GETPC());
f299f437
BS
323}
324
d3eb5eae 325void helper_fpush(CPUX86State *env)
f299f437 326{
d3eb5eae 327 fpush(env);
f299f437
BS
328}
329
d3eb5eae 330void helper_fpop(CPUX86State *env)
f299f437 331{
d3eb5eae 332 fpop(env);
f299f437
BS
333}
334
d3eb5eae 335void helper_fdecstp(CPUX86State *env)
f299f437
BS
336{
337 env->fpstt = (env->fpstt - 1) & 7;
338 env->fpus &= ~0x4700;
339}
340
d3eb5eae 341void helper_fincstp(CPUX86State *env)
f299f437
BS
342{
343 env->fpstt = (env->fpstt + 1) & 7;
344 env->fpus &= ~0x4700;
345}
346
347/* FPU move */
348
d3eb5eae 349void helper_ffree_STN(CPUX86State *env, int st_index)
f299f437
BS
350{
351 env->fptags[(env->fpstt + st_index) & 7] = 1;
352}
353
d3eb5eae 354void helper_fmov_ST0_FT0(CPUX86State *env)
f299f437
BS
355{
356 ST0 = FT0;
357}
358
d3eb5eae 359void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
f299f437
BS
360{
361 FT0 = ST(st_index);
362}
363
d3eb5eae 364void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
365{
366 ST0 = ST(st_index);
367}
368
d3eb5eae 369void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
370{
371 ST(st_index) = ST0;
372}
373
d3eb5eae 374void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
f299f437
BS
375{
376 floatx80 tmp;
377
378 tmp = ST(st_index);
379 ST(st_index) = ST0;
380 ST0 = tmp;
381}
382
383/* FPU operations */
384
385static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
386
d3eb5eae 387void helper_fcom_ST0_FT0(CPUX86State *env)
f299f437
BS
388{
389 int ret;
390
391 ret = floatx80_compare(ST0, FT0, &env->fp_status);
392 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
393}
394
d3eb5eae 395void helper_fucom_ST0_FT0(CPUX86State *env)
f299f437
BS
396{
397 int ret;
398
399 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
400 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
401}
402
403static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
404
d3eb5eae 405void helper_fcomi_ST0_FT0(CPUX86State *env)
f299f437
BS
406{
407 int eflags;
408 int ret;
409
410 ret = floatx80_compare(ST0, FT0, &env->fp_status);
d3eb5eae 411 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
412 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
413 CC_SRC = eflags;
414}
415
d3eb5eae 416void helper_fucomi_ST0_FT0(CPUX86State *env)
f299f437
BS
417{
418 int eflags;
419 int ret;
420
421 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
d3eb5eae 422 eflags = cpu_cc_compute_all(env, CC_OP);
f299f437
BS
423 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
424 CC_SRC = eflags;
425}
426
d3eb5eae 427void helper_fadd_ST0_FT0(CPUX86State *env)
f299f437
BS
428{
429 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
430}
431
d3eb5eae 432void helper_fmul_ST0_FT0(CPUX86State *env)
f299f437
BS
433{
434 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
435}
436
d3eb5eae 437void helper_fsub_ST0_FT0(CPUX86State *env)
f299f437
BS
438{
439 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
440}
441
d3eb5eae 442void helper_fsubr_ST0_FT0(CPUX86State *env)
f299f437
BS
443{
444 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
445}
446
d3eb5eae 447void helper_fdiv_ST0_FT0(CPUX86State *env)
f299f437 448{
d3eb5eae 449 ST0 = helper_fdiv(env, ST0, FT0);
f299f437
BS
450}
451
d3eb5eae 452void helper_fdivr_ST0_FT0(CPUX86State *env)
f299f437 453{
d3eb5eae 454 ST0 = helper_fdiv(env, FT0, ST0);
f299f437
BS
455}
456
457/* fp operations between STN and ST0 */
458
d3eb5eae 459void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
460{
461 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
462}
463
d3eb5eae 464void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
465{
466 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
467}
468
d3eb5eae 469void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
470{
471 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
472}
473
d3eb5eae 474void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
475{
476 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
477}
478
d3eb5eae 479void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
480{
481 floatx80 *p;
482
483 p = &ST(st_index);
d3eb5eae 484 *p = helper_fdiv(env, *p, ST0);
f299f437
BS
485}
486
d3eb5eae 487void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
f299f437
BS
488{
489 floatx80 *p;
490
491 p = &ST(st_index);
d3eb5eae 492 *p = helper_fdiv(env, ST0, *p);
f299f437
BS
493}
494
495/* misc FPU operations */
d3eb5eae 496void helper_fchs_ST0(CPUX86State *env)
f299f437
BS
497{
498 ST0 = floatx80_chs(ST0);
499}
500
d3eb5eae 501void helper_fabs_ST0(CPUX86State *env)
f299f437
BS
502{
503 ST0 = floatx80_abs(ST0);
504}
505
d3eb5eae 506void helper_fld1_ST0(CPUX86State *env)
f299f437
BS
507{
508 ST0 = floatx80_one;
509}
510
d3eb5eae 511void helper_fldl2t_ST0(CPUX86State *env)
f299f437
BS
512{
513 ST0 = floatx80_l2t;
514}
515
d3eb5eae 516void helper_fldl2e_ST0(CPUX86State *env)
f299f437
BS
517{
518 ST0 = floatx80_l2e;
519}
520
d3eb5eae 521void helper_fldpi_ST0(CPUX86State *env)
f299f437
BS
522{
523 ST0 = floatx80_pi;
524}
525
d3eb5eae 526void helper_fldlg2_ST0(CPUX86State *env)
f299f437
BS
527{
528 ST0 = floatx80_lg2;
529}
530
d3eb5eae 531void helper_fldln2_ST0(CPUX86State *env)
f299f437
BS
532{
533 ST0 = floatx80_ln2;
534}
535
d3eb5eae 536void helper_fldz_ST0(CPUX86State *env)
f299f437
BS
537{
538 ST0 = floatx80_zero;
539}
540
d3eb5eae 541void helper_fldz_FT0(CPUX86State *env)
f299f437
BS
542{
543 FT0 = floatx80_zero;
544}
545
d3eb5eae 546uint32_t helper_fnstsw(CPUX86State *env)
f299f437
BS
547{
548 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
549}
550
d3eb5eae 551uint32_t helper_fnstcw(CPUX86State *env)
f299f437
BS
552{
553 return env->fpuc;
554}
555
5bde1407 556void update_fp_status(CPUX86State *env)
f299f437
BS
557{
558 int rnd_type;
559
560 /* set rounding mode */
561 switch (env->fpuc & FPU_RC_MASK) {
562 default:
563 case FPU_RC_NEAR:
564 rnd_type = float_round_nearest_even;
565 break;
566 case FPU_RC_DOWN:
567 rnd_type = float_round_down;
568 break;
569 case FPU_RC_UP:
570 rnd_type = float_round_up;
571 break;
572 case FPU_RC_CHOP:
573 rnd_type = float_round_to_zero;
574 break;
575 }
576 set_float_rounding_mode(rnd_type, &env->fp_status);
577 switch ((env->fpuc >> 8) & 3) {
578 case 0:
579 rnd_type = 32;
580 break;
581 case 2:
582 rnd_type = 64;
583 break;
584 case 3:
585 default:
586 rnd_type = 80;
587 break;
588 }
589 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
590}
591
d3eb5eae 592void helper_fldcw(CPUX86State *env, uint32_t val)
f299f437 593{
5bde1407 594 cpu_set_fpuc(env, val);
f299f437
BS
595}
596
d3eb5eae 597void helper_fclex(CPUX86State *env)
f299f437
BS
598{
599 env->fpus &= 0x7f00;
600}
601
d3eb5eae 602void helper_fwait(CPUX86State *env)
f299f437
BS
603{
604 if (env->fpus & FPUS_SE) {
6cad09d2 605 fpu_raise_exception(env, GETPC());
f299f437
BS
606 }
607}
608
d3eb5eae 609void helper_fninit(CPUX86State *env)
f299f437
BS
610{
611 env->fpus = 0;
612 env->fpstt = 0;
5bde1407 613 cpu_set_fpuc(env, 0x37f);
f299f437
BS
614 env->fptags[0] = 1;
615 env->fptags[1] = 1;
616 env->fptags[2] = 1;
617 env->fptags[3] = 1;
618 env->fptags[4] = 1;
619 env->fptags[5] = 1;
620 env->fptags[6] = 1;
621 env->fptags[7] = 1;
622}
623
624/* BCD ops */
625
d3eb5eae 626void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
627{
628 floatx80 tmp;
629 uint64_t val;
630 unsigned int v;
631 int i;
632
633 val = 0;
634 for (i = 8; i >= 0; i--) {
6cad09d2 635 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
f299f437
BS
636 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
637 }
638 tmp = int64_to_floatx80(val, &env->fp_status);
6cad09d2 639 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
18b41f95 640 tmp = floatx80_chs(tmp);
f299f437 641 }
d3eb5eae 642 fpush(env);
f299f437
BS
643 ST0 = tmp;
644}
645
d3eb5eae 646void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
f299f437
BS
647{
648 int v;
649 target_ulong mem_ref, mem_end;
650 int64_t val;
651
652 val = floatx80_to_int64(ST0, &env->fp_status);
653 mem_ref = ptr;
654 mem_end = mem_ref + 9;
655 if (val < 0) {
6cad09d2 656 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
f299f437
BS
657 val = -val;
658 } else {
6cad09d2 659 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
f299f437
BS
660 }
661 while (mem_ref < mem_end) {
662 if (val == 0) {
663 break;
664 }
665 v = val % 100;
666 val = val / 100;
667 v = ((v / 10) << 4) | (v % 10);
6cad09d2 668 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
f299f437
BS
669 }
670 while (mem_ref < mem_end) {
6cad09d2 671 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
f299f437
BS
672 }
673}
674
d3eb5eae 675void helper_f2xm1(CPUX86State *env)
f299f437 676{
d3eb5eae 677 double val = floatx80_to_double(env, ST0);
f299f437
BS
678
679 val = pow(2.0, val) - 1.0;
d3eb5eae 680 ST0 = double_to_floatx80(env, val);
f299f437
BS
681}
682
d3eb5eae 683void helper_fyl2x(CPUX86State *env)
f299f437 684{
d3eb5eae 685 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
686
687 if (fptemp > 0.0) {
688 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
d3eb5eae
BS
689 fptemp *= floatx80_to_double(env, ST1);
690 ST1 = double_to_floatx80(env, fptemp);
691 fpop(env);
f299f437
BS
692 } else {
693 env->fpus &= ~0x4700;
694 env->fpus |= 0x400;
695 }
696}
697
d3eb5eae 698void helper_fptan(CPUX86State *env)
f299f437 699{
d3eb5eae 700 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
701
702 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
703 env->fpus |= 0x400;
704 } else {
705 fptemp = tan(fptemp);
d3eb5eae
BS
706 ST0 = double_to_floatx80(env, fptemp);
707 fpush(env);
f299f437
BS
708 ST0 = floatx80_one;
709 env->fpus &= ~0x400; /* C2 <-- 0 */
710 /* the above code is for |arg| < 2**52 only */
711 }
712}
713
d3eb5eae 714void helper_fpatan(CPUX86State *env)
f299f437
BS
715{
716 double fptemp, fpsrcop;
717
d3eb5eae
BS
718 fpsrcop = floatx80_to_double(env, ST1);
719 fptemp = floatx80_to_double(env, ST0);
720 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
721 fpop(env);
f299f437
BS
722}
723
d3eb5eae 724void helper_fxtract(CPUX86State *env)
f299f437
BS
725{
726 CPU_LDoubleU temp;
727
728 temp.d = ST0;
729
730 if (floatx80_is_zero(ST0)) {
731 /* Easy way to generate -inf and raising division by 0 exception */
732 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
733 &env->fp_status);
d3eb5eae 734 fpush(env);
f299f437
BS
735 ST0 = temp.d;
736 } else {
737 int expdif;
738
739 expdif = EXPD(temp) - EXPBIAS;
740 /* DP exponent bias */
741 ST0 = int32_to_floatx80(expdif, &env->fp_status);
d3eb5eae 742 fpush(env);
f299f437
BS
743 BIASEXPONENT(temp);
744 ST0 = temp.d;
745 }
746}
747
d3eb5eae 748void helper_fprem1(CPUX86State *env)
f299f437
BS
749{
750 double st0, st1, dblq, fpsrcop, fptemp;
751 CPU_LDoubleU fpsrcop1, fptemp1;
752 int expdif;
753 signed long long int q;
754
d3eb5eae
BS
755 st0 = floatx80_to_double(env, ST0);
756 st1 = floatx80_to_double(env, ST1);
f299f437
BS
757
758 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 759 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
760 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
761 return;
762 }
763
764 fpsrcop = st0;
765 fptemp = st1;
766 fpsrcop1.d = ST0;
767 fptemp1.d = ST1;
768 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
769
770 if (expdif < 0) {
771 /* optimisation? taken from the AMD docs */
772 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
773 /* ST0 is unchanged */
774 return;
775 }
776
777 if (expdif < 53) {
778 dblq = fpsrcop / fptemp;
779 /* round dblq towards nearest integer */
780 dblq = rint(dblq);
781 st0 = fpsrcop - fptemp * dblq;
782
783 /* convert dblq to q by truncating towards zero */
784 if (dblq < 0.0) {
785 q = (signed long long int)(-dblq);
786 } else {
787 q = (signed long long int)dblq;
788 }
789
790 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
791 /* (C0,C3,C1) <-- (q2,q1,q0) */
792 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
793 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
794 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
795 } else {
796 env->fpus |= 0x400; /* C2 <-- 1 */
797 fptemp = pow(2.0, expdif - 50);
798 fpsrcop = (st0 / st1) / fptemp;
799 /* fpsrcop = integer obtained by chopping */
800 fpsrcop = (fpsrcop < 0.0) ?
801 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
802 st0 -= (st1 * fpsrcop * fptemp);
803 }
d3eb5eae 804 ST0 = double_to_floatx80(env, st0);
f299f437
BS
805}
806
d3eb5eae 807void helper_fprem(CPUX86State *env)
f299f437
BS
808{
809 double st0, st1, dblq, fpsrcop, fptemp;
810 CPU_LDoubleU fpsrcop1, fptemp1;
811 int expdif;
812 signed long long int q;
813
d3eb5eae
BS
814 st0 = floatx80_to_double(env, ST0);
815 st1 = floatx80_to_double(env, ST1);
f299f437
BS
816
817 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
d3eb5eae 818 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
f299f437
BS
819 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
820 return;
821 }
822
823 fpsrcop = st0;
824 fptemp = st1;
825 fpsrcop1.d = ST0;
826 fptemp1.d = ST1;
827 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
828
829 if (expdif < 0) {
830 /* optimisation? taken from the AMD docs */
831 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
832 /* ST0 is unchanged */
833 return;
834 }
835
836 if (expdif < 53) {
837 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
838 /* round dblq towards zero */
839 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
840 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
841
842 /* convert dblq to q by truncating towards zero */
843 if (dblq < 0.0) {
844 q = (signed long long int)(-dblq);
845 } else {
846 q = (signed long long int)dblq;
847 }
848
849 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
850 /* (C0,C3,C1) <-- (q2,q1,q0) */
851 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
852 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
853 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
854 } else {
855 int N = 32 + (expdif % 32); /* as per AMD docs */
856
857 env->fpus |= 0x400; /* C2 <-- 1 */
858 fptemp = pow(2.0, (double)(expdif - N));
859 fpsrcop = (st0 / st1) / fptemp;
860 /* fpsrcop = integer obtained by chopping */
861 fpsrcop = (fpsrcop < 0.0) ?
862 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
863 st0 -= (st1 * fpsrcop * fptemp);
864 }
d3eb5eae 865 ST0 = double_to_floatx80(env, st0);
f299f437
BS
866}
867
d3eb5eae 868void helper_fyl2xp1(CPUX86State *env)
f299f437 869{
d3eb5eae 870 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
871
872 if ((fptemp + 1.0) > 0.0) {
873 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
d3eb5eae
BS
874 fptemp *= floatx80_to_double(env, ST1);
875 ST1 = double_to_floatx80(env, fptemp);
876 fpop(env);
f299f437
BS
877 } else {
878 env->fpus &= ~0x4700;
879 env->fpus |= 0x400;
880 }
881}
882
d3eb5eae 883void helper_fsqrt(CPUX86State *env)
f299f437
BS
884{
885 if (floatx80_is_neg(ST0)) {
886 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
887 env->fpus |= 0x400;
888 }
889 ST0 = floatx80_sqrt(ST0, &env->fp_status);
890}
891
d3eb5eae 892void helper_fsincos(CPUX86State *env)
f299f437 893{
d3eb5eae 894 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
895
896 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
897 env->fpus |= 0x400;
898 } else {
d3eb5eae
BS
899 ST0 = double_to_floatx80(env, sin(fptemp));
900 fpush(env);
901 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
902 env->fpus &= ~0x400; /* C2 <-- 0 */
903 /* the above code is for |arg| < 2**63 only */
904 }
905}
906
d3eb5eae 907void helper_frndint(CPUX86State *env)
f299f437
BS
908{
909 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
910}
911
d3eb5eae 912void helper_fscale(CPUX86State *env)
f299f437
BS
913{
914 if (floatx80_is_any_nan(ST1)) {
915 ST0 = ST1;
916 } else {
917 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
918 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
919 }
920}
921
d3eb5eae 922void helper_fsin(CPUX86State *env)
f299f437 923{
d3eb5eae 924 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
925
926 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
927 env->fpus |= 0x400;
928 } else {
d3eb5eae 929 ST0 = double_to_floatx80(env, sin(fptemp));
f299f437
BS
930 env->fpus &= ~0x400; /* C2 <-- 0 */
931 /* the above code is for |arg| < 2**53 only */
932 }
933}
934
d3eb5eae 935void helper_fcos(CPUX86State *env)
f299f437 936{
d3eb5eae 937 double fptemp = floatx80_to_double(env, ST0);
f299f437
BS
938
939 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
940 env->fpus |= 0x400;
941 } else {
d3eb5eae 942 ST0 = double_to_floatx80(env, cos(fptemp));
f299f437
BS
943 env->fpus &= ~0x400; /* C2 <-- 0 */
944 /* the above code is for |arg| < 2**63 only */
945 }
946}
947
d3eb5eae 948void helper_fxam_ST0(CPUX86State *env)
f299f437
BS
949{
950 CPU_LDoubleU temp;
951 int expdif;
952
953 temp.d = ST0;
954
955 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
956 if (SIGND(temp)) {
957 env->fpus |= 0x200; /* C1 <-- 1 */
958 }
959
960 /* XXX: test fptags too */
961 expdif = EXPD(temp);
962 if (expdif == MAXEXPD) {
963 if (MANTD(temp) == 0x8000000000000000ULL) {
964 env->fpus |= 0x500; /* Infinity */
965 } else {
966 env->fpus |= 0x100; /* NaN */
967 }
968 } else if (expdif == 0) {
969 if (MANTD(temp) == 0) {
970 env->fpus |= 0x4000; /* Zero */
971 } else {
972 env->fpus |= 0x4400; /* Denormal */
973 }
974 } else {
975 env->fpus |= 0x400;
976 }
977}
978
6cad09d2
PD
979static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
980 uintptr_t retaddr)
f299f437
BS
981{
982 int fpus, fptag, exp, i;
983 uint64_t mant;
984 CPU_LDoubleU tmp;
985
986 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
987 fptag = 0;
988 for (i = 7; i >= 0; i--) {
989 fptag <<= 2;
990 if (env->fptags[i]) {
991 fptag |= 3;
992 } else {
993 tmp.d = env->fpregs[i].d;
994 exp = EXPD(tmp);
995 mant = MANTD(tmp);
996 if (exp == 0 && mant == 0) {
997 /* zero */
998 fptag |= 1;
999 } else if (exp == 0 || exp == MAXEXPD
1000 || (mant & (1LL << 63)) == 0) {
1001 /* NaNs, infinity, denormal */
1002 fptag |= 2;
1003 }
1004 }
1005 }
1006 if (data32) {
1007 /* 32 bit */
6cad09d2
PD
1008 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1009 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1010 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1011 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1012 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1013 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1014 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
f299f437
BS
1015 } else {
1016 /* 16 bit */
6cad09d2
PD
1017 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1018 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1019 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1020 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1021 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1022 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1023 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
f299f437
BS
1024 }
1025}
1026
6cad09d2
PD
1027void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1028{
1029 do_fstenv(env, ptr, data32, GETPC());
1030}
1031
1032static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1033 uintptr_t retaddr)
f299f437
BS
1034{
1035 int i, fpus, fptag;
1036
1037 if (data32) {
6cad09d2
PD
1038 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1039 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1040 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
f299f437 1041 } else {
6cad09d2
PD
1042 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1044 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
f299f437
BS
1045 }
1046 env->fpstt = (fpus >> 11) & 7;
1047 env->fpus = fpus & ~0x3800;
1048 for (i = 0; i < 8; i++) {
1049 env->fptags[i] = ((fptag & 3) == 3);
1050 fptag >>= 2;
1051 }
1052}
1053
6cad09d2
PD
1054void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1055{
1056 do_fldenv(env, ptr, data32, GETPC());
1057}
1058
d3eb5eae 1059void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1060{
1061 floatx80 tmp;
1062 int i;
1063
6cad09d2 1064 do_fstenv(env, ptr, data32, GETPC());
f299f437
BS
1065
1066 ptr += (14 << data32);
1067 for (i = 0; i < 8; i++) {
1068 tmp = ST(i);
6cad09d2 1069 helper_fstt(env, tmp, ptr, GETPC());
f299f437
BS
1070 ptr += 10;
1071 }
1072
1073 /* fninit */
1074 env->fpus = 0;
1075 env->fpstt = 0;
5bde1407 1076 cpu_set_fpuc(env, 0x37f);
f299f437
BS
1077 env->fptags[0] = 1;
1078 env->fptags[1] = 1;
1079 env->fptags[2] = 1;
1080 env->fptags[3] = 1;
1081 env->fptags[4] = 1;
1082 env->fptags[5] = 1;
1083 env->fptags[6] = 1;
1084 env->fptags[7] = 1;
1085}
1086
d3eb5eae 1087void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437
BS
1088{
1089 floatx80 tmp;
1090 int i;
1091
6cad09d2 1092 do_fldenv(env, ptr, data32, GETPC());
f299f437
BS
1093 ptr += (14 << data32);
1094
1095 for (i = 0; i < 8; i++) {
6cad09d2 1096 tmp = helper_fldt(env, ptr, GETPC());
f299f437
BS
1097 ST(i) = tmp;
1098 ptr += 10;
1099 }
1100}
1101
1102#if defined(CONFIG_USER_ONLY)
d3eb5eae 1103void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1104{
d3eb5eae 1105 helper_fsave(env, ptr, data32);
f299f437
BS
1106}
1107
d3eb5eae 1108void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
f299f437 1109{
d3eb5eae 1110 helper_frstor(env, ptr, data32);
f299f437
BS
1111}
1112#endif
1113
3f32bd21
RH
1114#define XO(X) offsetof(X86XSaveArea, X)
1115
64dbaff0 1116static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1117{
64dbaff0 1118 int fpus, fptag, i;
f299f437
BS
1119 target_ulong addr;
1120
f299f437
BS
1121 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1122 fptag = 0;
1123 for (i = 0; i < 8; i++) {
1124 fptag |= (env->fptags[i] << i);
1125 }
3f32bd21
RH
1126
1127 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1128 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1129 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
64dbaff0
RH
1130
1131 /* In 32-bit mode this is eip, sel, dp, sel.
1132 In 64-bit mode this is rip, rdp.
1133 But in either case we don't write actual data, just zeros. */
3f32bd21
RH
1134 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1135 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
f299f437 1136
3f32bd21 1137 addr = ptr + XO(legacy.fpregs);
f299f437 1138 for (i = 0; i < 8; i++) {
64dbaff0
RH
1139 floatx80 tmp = ST(i);
1140 helper_fstt(env, tmp, addr, ra);
f299f437
BS
1141 addr += 16;
1142 }
64dbaff0
RH
1143}
1144
1145static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1146{
3f32bd21
RH
1147 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1148 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
64dbaff0
RH
1149}
1150
1151static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1152{
1153 int i, nb_xmm_regs;
1154 target_ulong addr;
1155
1156 if (env->hflags & HF_CS64_MASK) {
1157 nb_xmm_regs = 16;
1158 } else {
1159 nb_xmm_regs = 8;
1160 }
1161
3f32bd21 1162 addr = ptr + XO(legacy.xmm_regs);
64dbaff0
RH
1163 for (i = 0; i < nb_xmm_regs; i++) {
1164 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1165 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1166 addr += 16;
1167 }
1168}
1169
3f32bd21 1170static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e 1171{
3f32bd21 1172 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
f4f1110e
RH
1173 int i;
1174
1175 for (i = 0; i < 4; i++, addr += 16) {
1176 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1178 }
1179}
1180
3f32bd21 1181static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e 1182{
3f32bd21
RH
1183 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1184 env->bndcs_regs.cfgu, ra);
1185 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1186 env->bndcs_regs.sts, ra);
f4f1110e
RH
1187}
1188
3f32bd21 1189static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
0f70ed47 1190{
3f32bd21 1191 cpu_stq_data_ra(env, ptr, env->pkru, ra);
0f70ed47
PB
1192}
1193
64dbaff0
RH
1194void helper_fxsave(CPUX86State *env, target_ulong ptr)
1195{
1196 uintptr_t ra = GETPC();
1197
1198 /* The operand must be 16 byte aligned */
1199 if (ptr & 0xf) {
1200 raise_exception_ra(env, EXCP0D_GPF, ra);
1201 }
1202
1203 do_xsave_fpu(env, ptr, ra);
f299f437
BS
1204
1205 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0 1206 do_xsave_mxcsr(env, ptr, ra);
f299f437
BS
1207 /* Fast FXSAVE leaves out the XMM registers */
1208 if (!(env->efer & MSR_EFER_FFXSR)
1209 || (env->hflags & HF_CPL_MASK)
1210 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1211 do_xsave_sse(env, ptr, ra);
f299f437
BS
1212 }
1213 }
1214}
1215
19dc85db
RH
1216static uint64_t get_xinuse(CPUX86State *env)
1217{
f4f1110e
RH
1218 uint64_t inuse = -1;
1219
1220 /* For the most part, we don't track XINUSE. We could calculate it
1221 here for all components, but it's probably less work to simply
1222 indicate in use. That said, the state of BNDREGS is important
1223 enough to track in HFLAGS, so we might as well use that here. */
1224 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
cfc3b074 1225 inuse &= ~XSTATE_BNDREGS_MASK;
f4f1110e
RH
1226 }
1227 return inuse;
19dc85db
RH
1228}
1229
c9cfe8f9
RH
1230static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1231 uint64_t inuse, uint64_t opt, uintptr_t ra)
19dc85db 1232{
19dc85db
RH
1233 uint64_t old_bv, new_bv;
1234
1235 /* The OS must have enabled XSAVE. */
1236 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1237 raise_exception_ra(env, EXCP06_ILLOP, ra);
1238 }
1239
1240 /* The operand must be 64 byte aligned. */
1241 if (ptr & 63) {
1242 raise_exception_ra(env, EXCP0D_GPF, ra);
1243 }
1244
1245 /* Never save anything not enabled by XCR0. */
1246 rfbm &= env->xcr0;
c9cfe8f9 1247 opt &= rfbm;
19dc85db 1248
cfc3b074 1249 if (opt & XSTATE_FP_MASK) {
19dc85db
RH
1250 do_xsave_fpu(env, ptr, ra);
1251 }
cfc3b074 1252 if (rfbm & XSTATE_SSE_MASK) {
c9cfe8f9 1253 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
19dc85db 1254 do_xsave_mxcsr(env, ptr, ra);
c9cfe8f9 1255 }
cfc3b074 1256 if (opt & XSTATE_SSE_MASK) {
19dc85db
RH
1257 do_xsave_sse(env, ptr, ra);
1258 }
cfc3b074 1259 if (opt & XSTATE_BNDREGS_MASK) {
3f32bd21 1260 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
f4f1110e 1261 }
cfc3b074 1262 if (opt & XSTATE_BNDCSR_MASK) {
3f32bd21 1263 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
f4f1110e 1264 }
0f70ed47 1265 if (opt & XSTATE_PKRU_MASK) {
3f32bd21 1266 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
0f70ed47 1267 }
19dc85db
RH
1268
1269 /* Update the XSTATE_BV field. */
3f32bd21 1270 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
c9cfe8f9 1271 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
3f32bd21 1272 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
19dc85db
RH
1273}
1274
c9cfe8f9
RH
1275void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1276{
1277 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1278}
1279
1280void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1281{
1282 uint64_t inuse = get_xinuse(env);
1283 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1284}
1285
64dbaff0 1286static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f299f437 1287{
3f32bd21 1288 int i, fpuc, fpus, fptag;
f299f437
BS
1289 target_ulong addr;
1290
3f32bd21
RH
1291 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1292 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1293 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1294 cpu_set_fpuc(env, fpuc);
f299f437
BS
1295 env->fpstt = (fpus >> 11) & 7;
1296 env->fpus = fpus & ~0x3800;
1297 fptag ^= 0xff;
1298 for (i = 0; i < 8; i++) {
1299 env->fptags[i] = ((fptag >> i) & 1);
1300 }
1301
3f32bd21 1302 addr = ptr + XO(legacy.fpregs);
f299f437 1303 for (i = 0; i < 8; i++) {
64dbaff0 1304 floatx80 tmp = helper_fldt(env, addr, ra);
f299f437
BS
1305 ST(i) = tmp;
1306 addr += 16;
1307 }
64dbaff0
RH
1308}
1309
1310static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1311{
3f32bd21 1312 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
64dbaff0
RH
1313}
1314
1315static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1316{
1317 int i, nb_xmm_regs;
1318 target_ulong addr;
1319
1320 if (env->hflags & HF_CS64_MASK) {
1321 nb_xmm_regs = 16;
1322 } else {
1323 nb_xmm_regs = 8;
1324 }
1325
3f32bd21 1326 addr = ptr + XO(legacy.xmm_regs);
64dbaff0
RH
1327 for (i = 0; i < nb_xmm_regs; i++) {
1328 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1329 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1330 addr += 16;
1331 }
1332}
1333
3f32bd21 1334static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e 1335{
3f32bd21 1336 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
f4f1110e
RH
1337 int i;
1338
1339 for (i = 0; i < 4; i++, addr += 16) {
1340 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1342 }
1343}
1344
3f32bd21 1345static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
f4f1110e
RH
1346{
1347 /* FIXME: Extend highest implemented bit of linear address. */
3f32bd21
RH
1348 env->bndcs_regs.cfgu
1349 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1350 env->bndcs_regs.sts
1351 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
f4f1110e
RH
1352}
1353
3f32bd21 1354static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
0f70ed47 1355{
3f32bd21 1356 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
0f70ed47
PB
1357}
1358
64dbaff0
RH
1359void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1360{
1361 uintptr_t ra = GETPC();
1362
1363 /* The operand must be 16 byte aligned */
1364 if (ptr & 0xf) {
1365 raise_exception_ra(env, EXCP0D_GPF, ra);
1366 }
1367
1368 do_xrstor_fpu(env, ptr, ra);
f299f437
BS
1369
1370 if (env->cr[4] & CR4_OSFXSR_MASK) {
64dbaff0
RH
1371 do_xrstor_mxcsr(env, ptr, ra);
1372 /* Fast FXRSTOR leaves out the XMM registers */
f299f437
BS
1373 if (!(env->efer & MSR_EFER_FFXSR)
1374 || (env->hflags & HF_CPL_MASK)
1375 || !(env->hflags & HF_LMA_MASK)) {
64dbaff0 1376 do_xrstor_sse(env, ptr, ra);
f299f437
BS
1377 }
1378 }
1379}
1380
1c1df019
PK
1381#if defined(CONFIG_USER_ONLY)
1382void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1383{
1384 helper_fxsave(env, ptr);
1385}
1386
1387void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1388{
1389 helper_fxrstor(env, ptr);
1390}
1391#endif
1392
19dc85db
RH
1393void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1394{
1395 uintptr_t ra = GETPC();
3f32bd21 1396 uint64_t xstate_bv, xcomp_bv, reserve0;
19dc85db
RH
1397
1398 rfbm &= env->xcr0;
1399
1400 /* The OS must have enabled XSAVE. */
1401 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1402 raise_exception_ra(env, EXCP06_ILLOP, ra);
1403 }
1404
1405 /* The operand must be 64 byte aligned. */
1406 if (ptr & 63) {
1407 raise_exception_ra(env, EXCP0D_GPF, ra);
1408 }
1409
3f32bd21 1410 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
19dc85db
RH
1411
1412 if ((int64_t)xstate_bv < 0) {
1413 /* FIXME: Compact form. */
1414 raise_exception_ra(env, EXCP0D_GPF, ra);
1415 }
1416
1417 /* Standard form. */
1418
3f32bd21 1419 /* The XSTATE_BV field must not set bits not present in XCR0. */
19dc85db
RH
1420 if (xstate_bv & ~env->xcr0) {
1421 raise_exception_ra(env, EXCP0D_GPF, ra);
1422 }
1423
3f32bd21
RH
1424 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1425 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1426 describes only XCOMP_BV, but the description of the standard form
1427 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1428 includes the next 64-bit field. */
1429 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1430 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1431 if (xcomp_bv || reserve0) {
19dc85db
RH
1432 raise_exception_ra(env, EXCP0D_GPF, ra);
1433 }
1434
cfc3b074
PB
1435 if (rfbm & XSTATE_FP_MASK) {
1436 if (xstate_bv & XSTATE_FP_MASK) {
19dc85db
RH
1437 do_xrstor_fpu(env, ptr, ra);
1438 } else {
1439 helper_fninit(env);
1440 memset(env->fpregs, 0, sizeof(env->fpregs));
1441 }
1442 }
cfc3b074 1443 if (rfbm & XSTATE_SSE_MASK) {
19dc85db
RH
1444 /* Note that the standard form of XRSTOR loads MXCSR from memory
1445 whether or not the XSTATE_BV bit is set. */
1446 do_xrstor_mxcsr(env, ptr, ra);
cfc3b074 1447 if (xstate_bv & XSTATE_SSE_MASK) {
19dc85db
RH
1448 do_xrstor_sse(env, ptr, ra);
1449 } else {
1450 /* ??? When AVX is implemented, we may have to be more
1451 selective in the clearing. */
1452 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1453 }
1454 }
cfc3b074
PB
1455 if (rfbm & XSTATE_BNDREGS_MASK) {
1456 if (xstate_bv & XSTATE_BNDREGS_MASK) {
3f32bd21 1457 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
f4f1110e
RH
1458 env->hflags |= HF_MPX_IU_MASK;
1459 } else {
1460 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1461 env->hflags &= ~HF_MPX_IU_MASK;
1462 }
1463 }
cfc3b074
PB
1464 if (rfbm & XSTATE_BNDCSR_MASK) {
1465 if (xstate_bv & XSTATE_BNDCSR_MASK) {
3f32bd21 1466 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
f4f1110e
RH
1467 } else {
1468 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1469 }
1470 cpu_sync_bndcs_hflags(env);
1471 }
0f70ed47
PB
1472 if (rfbm & XSTATE_PKRU_MASK) {
1473 uint64_t old_pkru = env->pkru;
1474 if (xstate_bv & XSTATE_PKRU_MASK) {
3f32bd21 1475 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
0f70ed47
PB
1476 } else {
1477 env->pkru = 0;
1478 }
1479 if (env->pkru != old_pkru) {
6aa9e42f 1480 CPUState *cs = env_cpu(env);
d10eb08f 1481 tlb_flush(cs);
0f70ed47
PB
1482 }
1483 }
19dc85db
RH
1484}
1485
3f32bd21
RH
1486#undef XO
1487
19dc85db
RH
1488uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1489{
1490 /* The OS must have enabled XSAVE. */
1491 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1492 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1493 }
1494
1495 switch (ecx) {
1496 case 0:
1497 return env->xcr0;
1498 case 1:
c9cfe8f9
RH
1499 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1500 return env->xcr0 & get_xinuse(env);
1501 }
1502 break;
19dc85db
RH
1503 }
1504 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1505}
1506
1507void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1508{
1509 uint32_t dummy, ena_lo, ena_hi;
1510 uint64_t ena;
1511
1512 /* The OS must have enabled XSAVE. */
1513 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1514 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1515 }
1516
1517 /* Only XCR0 is defined at present; the FPU may not be disabled. */
cfc3b074 1518 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
19dc85db
RH
1519 goto do_gpf;
1520 }
1521
1522 /* Disallow enabling unimplemented features. */
1523 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1524 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1525 if (mask & ~ena) {
1526 goto do_gpf;
1527 }
1528
f4f1110e 1529 /* Disallow enabling only half of MPX. */
cfc3b074
PB
1530 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1531 & XSTATE_BNDCSR_MASK) {
f4f1110e
RH
1532 goto do_gpf;
1533 }
1534
19dc85db 1535 env->xcr0 = mask;
f4f1110e 1536 cpu_sync_bndcs_hflags(env);
19dc85db
RH
1537 return;
1538
1539 do_gpf:
1540 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1541}
1542
f299f437
BS
1543/* MMX/SSE */
1544/* XXX: optimize by storing fptt and fptags in the static cpu state */
1545
1546#define SSE_DAZ 0x0040
1547#define SSE_RC_MASK 0x6000
1548#define SSE_RC_NEAR 0x0000
1549#define SSE_RC_DOWN 0x2000
1550#define SSE_RC_UP 0x4000
1551#define SSE_RC_CHOP 0x6000
1552#define SSE_FZ 0x8000
1553
1d8ad165 1554void update_mxcsr_status(CPUX86State *env)
f299f437 1555{
1d8ad165 1556 uint32_t mxcsr = env->mxcsr;
f299f437
BS
1557 int rnd_type;
1558
1559 /* set rounding mode */
4e47e39a 1560 switch (mxcsr & SSE_RC_MASK) {
f299f437
BS
1561 default:
1562 case SSE_RC_NEAR:
1563 rnd_type = float_round_nearest_even;
1564 break;
1565 case SSE_RC_DOWN:
1566 rnd_type = float_round_down;
1567 break;
1568 case SSE_RC_UP:
1569 rnd_type = float_round_up;
1570 break;
1571 case SSE_RC_CHOP:
1572 rnd_type = float_round_to_zero;
1573 break;
1574 }
1575 set_float_rounding_mode(rnd_type, &env->sse_status);
1576
1577 /* set denormals are zero */
4e47e39a 1578 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
f299f437
BS
1579
1580 /* set flush to zero */
4e47e39a 1581 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
f299f437
BS
1582}
1583
d3eb5eae 1584void helper_ldmxcsr(CPUX86State *env, uint32_t val)
f299f437 1585{
4e47e39a 1586 cpu_set_mxcsr(env, val);
f299f437
BS
1587}
1588
d3eb5eae 1589void helper_enter_mmx(CPUX86State *env)
f299f437
BS
1590{
1591 env->fpstt = 0;
1592 *(uint32_t *)(env->fptags) = 0;
1593 *(uint32_t *)(env->fptags + 4) = 0;
1594}
1595
d3eb5eae 1596void helper_emms(CPUX86State *env)
f299f437
BS
1597{
1598 /* set to empty state */
1599 *(uint32_t *)(env->fptags) = 0x01010101;
1600 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1601}
1602
1603/* XXX: suppress */
d3eb5eae 1604void helper_movq(CPUX86State *env, void *d, void *s)
f299f437
BS
1605{
1606 *(uint64_t *)d = *(uint64_t *)s;
1607}
1608
1609#define SHIFT 0
1610#include "ops_sse.h"
1611
1612#define SHIFT 1
1613#include "ops_sse.h"