]> git.proxmox.com Git - mirror_qemu.git/blob - target/i386/tcg/fpu_helper.c
Merge tag 'linux-user-for-7.0-pull-request' of https://gitlab.com/laurent_vivier...
[mirror_qemu.git] / target / i386 / tcg / fpu_helper.c
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "tcg-cpu.h"
24 #include "exec/helper-proto.h"
25 #include "fpu/softfloat.h"
26 #include "fpu/softfloat-macros.h"
27 #include "helper-tcg.h"
28
29 /* float macros */
30 #define FT0 (env->ft0)
31 #define ST0 (env->fpregs[env->fpstt].d)
32 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
33 #define ST1 ST(1)
34
35 #define FPU_RC_MASK 0xc00
36 #define FPU_RC_NEAR 0x000
37 #define FPU_RC_DOWN 0x400
38 #define FPU_RC_UP 0x800
39 #define FPU_RC_CHOP 0xc00
40
41 #define MAXTAN 9223372036854775808.0
42
43 /* the following deal with x86 long double-precision numbers */
44 #define MAXEXPD 0x7fff
45 #define EXPBIAS 16383
46 #define EXPD(fp) (fp.l.upper & 0x7fff)
47 #define SIGND(fp) ((fp.l.upper) & 0x8000)
48 #define MANTD(fp) (fp.l.lower)
49 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
50
51 #define FPUS_IE (1 << 0)
52 #define FPUS_DE (1 << 1)
53 #define FPUS_ZE (1 << 2)
54 #define FPUS_OE (1 << 3)
55 #define FPUS_UE (1 << 4)
56 #define FPUS_PE (1 << 5)
57 #define FPUS_SF (1 << 6)
58 #define FPUS_SE (1 << 7)
59 #define FPUS_B (1 << 15)
60
61 #define FPUC_EM 0x3f
62
63 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
64 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
65 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
66 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
67 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
68 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
69 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
70 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
71
72 static inline void fpush(CPUX86State *env)
73 {
74 env->fpstt = (env->fpstt - 1) & 7;
75 env->fptags[env->fpstt] = 0; /* validate stack entry */
76 }
77
78 static inline void fpop(CPUX86State *env)
79 {
80 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
81 env->fpstt = (env->fpstt + 1) & 7;
82 }
83
84 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
85 {
86 CPU_LDoubleU temp;
87
88 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
89 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
90 return temp.d;
91 }
92
93 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
94 uintptr_t retaddr)
95 {
96 CPU_LDoubleU temp;
97
98 temp.d = f;
99 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
100 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
101 }
102
103 /* x87 FPU helpers */
104
105 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
106 {
107 union {
108 float64 f64;
109 double d;
110 } u;
111
112 u.f64 = floatx80_to_float64(a, &env->fp_status);
113 return u.d;
114 }
115
116 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
117 {
118 union {
119 float64 f64;
120 double d;
121 } u;
122
123 u.d = a;
124 return float64_to_floatx80(u.f64, &env->fp_status);
125 }
126
127 static void fpu_set_exception(CPUX86State *env, int mask)
128 {
129 env->fpus |= mask;
130 if (env->fpus & (~env->fpuc & FPUC_EM)) {
131 env->fpus |= FPUS_SE | FPUS_B;
132 }
133 }
134
135 static inline uint8_t save_exception_flags(CPUX86State *env)
136 {
137 uint8_t old_flags = get_float_exception_flags(&env->fp_status);
138 set_float_exception_flags(0, &env->fp_status);
139 return old_flags;
140 }
141
142 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
143 {
144 uint8_t new_flags = get_float_exception_flags(&env->fp_status);
145 float_raise(old_flags, &env->fp_status);
146 fpu_set_exception(env,
147 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
148 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
149 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
150 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
151 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
152 (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
153 }
154
155 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
156 {
157 uint8_t old_flags = save_exception_flags(env);
158 floatx80 ret = floatx80_div(a, b, &env->fp_status);
159 merge_exception_flags(env, old_flags);
160 return ret;
161 }
162
163 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
164 {
165 if (env->cr[0] & CR0_NE_MASK) {
166 raise_exception_ra(env, EXCP10_COPR, retaddr);
167 }
168 #if !defined(CONFIG_USER_ONLY)
169 else {
170 fpu_check_raise_ferr_irq(env);
171 }
172 #endif
173 }
174
175 void helper_flds_FT0(CPUX86State *env, uint32_t val)
176 {
177 uint8_t old_flags = save_exception_flags(env);
178 union {
179 float32 f;
180 uint32_t i;
181 } u;
182
183 u.i = val;
184 FT0 = float32_to_floatx80(u.f, &env->fp_status);
185 merge_exception_flags(env, old_flags);
186 }
187
188 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
189 {
190 uint8_t old_flags = save_exception_flags(env);
191 union {
192 float64 f;
193 uint64_t i;
194 } u;
195
196 u.i = val;
197 FT0 = float64_to_floatx80(u.f, &env->fp_status);
198 merge_exception_flags(env, old_flags);
199 }
200
201 void helper_fildl_FT0(CPUX86State *env, int32_t val)
202 {
203 FT0 = int32_to_floatx80(val, &env->fp_status);
204 }
205
206 void helper_flds_ST0(CPUX86State *env, uint32_t val)
207 {
208 uint8_t old_flags = save_exception_flags(env);
209 int new_fpstt;
210 union {
211 float32 f;
212 uint32_t i;
213 } u;
214
215 new_fpstt = (env->fpstt - 1) & 7;
216 u.i = val;
217 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
218 env->fpstt = new_fpstt;
219 env->fptags[new_fpstt] = 0; /* validate stack entry */
220 merge_exception_flags(env, old_flags);
221 }
222
223 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
224 {
225 uint8_t old_flags = save_exception_flags(env);
226 int new_fpstt;
227 union {
228 float64 f;
229 uint64_t i;
230 } u;
231
232 new_fpstt = (env->fpstt - 1) & 7;
233 u.i = val;
234 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
235 env->fpstt = new_fpstt;
236 env->fptags[new_fpstt] = 0; /* validate stack entry */
237 merge_exception_flags(env, old_flags);
238 }
239
240 static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
241 {
242 FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
243 set_floatx80_rounding_precision(floatx80_precision_x, st);
244 return old;
245 }
246
247 void helper_fildl_ST0(CPUX86State *env, int32_t val)
248 {
249 int new_fpstt;
250 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
251
252 new_fpstt = (env->fpstt - 1) & 7;
253 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
254 env->fpstt = new_fpstt;
255 env->fptags[new_fpstt] = 0; /* validate stack entry */
256
257 set_floatx80_rounding_precision(old, &env->fp_status);
258 }
259
260 void helper_fildll_ST0(CPUX86State *env, int64_t val)
261 {
262 int new_fpstt;
263 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
264
265 new_fpstt = (env->fpstt - 1) & 7;
266 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
267 env->fpstt = new_fpstt;
268 env->fptags[new_fpstt] = 0; /* validate stack entry */
269
270 set_floatx80_rounding_precision(old, &env->fp_status);
271 }
272
273 uint32_t helper_fsts_ST0(CPUX86State *env)
274 {
275 uint8_t old_flags = save_exception_flags(env);
276 union {
277 float32 f;
278 uint32_t i;
279 } u;
280
281 u.f = floatx80_to_float32(ST0, &env->fp_status);
282 merge_exception_flags(env, old_flags);
283 return u.i;
284 }
285
286 uint64_t helper_fstl_ST0(CPUX86State *env)
287 {
288 uint8_t old_flags = save_exception_flags(env);
289 union {
290 float64 f;
291 uint64_t i;
292 } u;
293
294 u.f = floatx80_to_float64(ST0, &env->fp_status);
295 merge_exception_flags(env, old_flags);
296 return u.i;
297 }
298
299 int32_t helper_fist_ST0(CPUX86State *env)
300 {
301 uint8_t old_flags = save_exception_flags(env);
302 int32_t val;
303
304 val = floatx80_to_int32(ST0, &env->fp_status);
305 if (val != (int16_t)val) {
306 set_float_exception_flags(float_flag_invalid, &env->fp_status);
307 val = -32768;
308 }
309 merge_exception_flags(env, old_flags);
310 return val;
311 }
312
313 int32_t helper_fistl_ST0(CPUX86State *env)
314 {
315 uint8_t old_flags = save_exception_flags(env);
316 int32_t val;
317
318 val = floatx80_to_int32(ST0, &env->fp_status);
319 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
320 val = 0x80000000;
321 }
322 merge_exception_flags(env, old_flags);
323 return val;
324 }
325
326 int64_t helper_fistll_ST0(CPUX86State *env)
327 {
328 uint8_t old_flags = save_exception_flags(env);
329 int64_t val;
330
331 val = floatx80_to_int64(ST0, &env->fp_status);
332 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
333 val = 0x8000000000000000ULL;
334 }
335 merge_exception_flags(env, old_flags);
336 return val;
337 }
338
339 int32_t helper_fistt_ST0(CPUX86State *env)
340 {
341 uint8_t old_flags = save_exception_flags(env);
342 int32_t val;
343
344 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
345 if (val != (int16_t)val) {
346 set_float_exception_flags(float_flag_invalid, &env->fp_status);
347 val = -32768;
348 }
349 merge_exception_flags(env, old_flags);
350 return val;
351 }
352
353 int32_t helper_fisttl_ST0(CPUX86State *env)
354 {
355 uint8_t old_flags = save_exception_flags(env);
356 int32_t val;
357
358 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
359 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
360 val = 0x80000000;
361 }
362 merge_exception_flags(env, old_flags);
363 return val;
364 }
365
366 int64_t helper_fisttll_ST0(CPUX86State *env)
367 {
368 uint8_t old_flags = save_exception_flags(env);
369 int64_t val;
370
371 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
372 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
373 val = 0x8000000000000000ULL;
374 }
375 merge_exception_flags(env, old_flags);
376 return val;
377 }
378
379 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
380 {
381 int new_fpstt;
382
383 new_fpstt = (env->fpstt - 1) & 7;
384 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
385 env->fpstt = new_fpstt;
386 env->fptags[new_fpstt] = 0; /* validate stack entry */
387 }
388
389 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
390 {
391 do_fstt(env, ST0, ptr, GETPC());
392 }
393
394 void helper_fpush(CPUX86State *env)
395 {
396 fpush(env);
397 }
398
399 void helper_fpop(CPUX86State *env)
400 {
401 fpop(env);
402 }
403
404 void helper_fdecstp(CPUX86State *env)
405 {
406 env->fpstt = (env->fpstt - 1) & 7;
407 env->fpus &= ~0x4700;
408 }
409
410 void helper_fincstp(CPUX86State *env)
411 {
412 env->fpstt = (env->fpstt + 1) & 7;
413 env->fpus &= ~0x4700;
414 }
415
416 /* FPU move */
417
418 void helper_ffree_STN(CPUX86State *env, int st_index)
419 {
420 env->fptags[(env->fpstt + st_index) & 7] = 1;
421 }
422
423 void helper_fmov_ST0_FT0(CPUX86State *env)
424 {
425 ST0 = FT0;
426 }
427
428 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
429 {
430 FT0 = ST(st_index);
431 }
432
433 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
434 {
435 ST0 = ST(st_index);
436 }
437
438 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
439 {
440 ST(st_index) = ST0;
441 }
442
443 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
444 {
445 floatx80 tmp;
446
447 tmp = ST(st_index);
448 ST(st_index) = ST0;
449 ST0 = tmp;
450 }
451
452 /* FPU operations */
453
454 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
455
456 void helper_fcom_ST0_FT0(CPUX86State *env)
457 {
458 uint8_t old_flags = save_exception_flags(env);
459 FloatRelation ret;
460
461 ret = floatx80_compare(ST0, FT0, &env->fp_status);
462 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
463 merge_exception_flags(env, old_flags);
464 }
465
466 void helper_fucom_ST0_FT0(CPUX86State *env)
467 {
468 uint8_t old_flags = save_exception_flags(env);
469 FloatRelation ret;
470
471 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
472 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
473 merge_exception_flags(env, old_flags);
474 }
475
476 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
477
478 void helper_fcomi_ST0_FT0(CPUX86State *env)
479 {
480 uint8_t old_flags = save_exception_flags(env);
481 int eflags;
482 FloatRelation ret;
483
484 ret = floatx80_compare(ST0, FT0, &env->fp_status);
485 eflags = cpu_cc_compute_all(env, CC_OP);
486 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
487 CC_SRC = eflags;
488 merge_exception_flags(env, old_flags);
489 }
490
491 void helper_fucomi_ST0_FT0(CPUX86State *env)
492 {
493 uint8_t old_flags = save_exception_flags(env);
494 int eflags;
495 FloatRelation ret;
496
497 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
498 eflags = cpu_cc_compute_all(env, CC_OP);
499 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
500 CC_SRC = eflags;
501 merge_exception_flags(env, old_flags);
502 }
503
504 void helper_fadd_ST0_FT0(CPUX86State *env)
505 {
506 uint8_t old_flags = save_exception_flags(env);
507 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
508 merge_exception_flags(env, old_flags);
509 }
510
511 void helper_fmul_ST0_FT0(CPUX86State *env)
512 {
513 uint8_t old_flags = save_exception_flags(env);
514 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
515 merge_exception_flags(env, old_flags);
516 }
517
518 void helper_fsub_ST0_FT0(CPUX86State *env)
519 {
520 uint8_t old_flags = save_exception_flags(env);
521 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
522 merge_exception_flags(env, old_flags);
523 }
524
525 void helper_fsubr_ST0_FT0(CPUX86State *env)
526 {
527 uint8_t old_flags = save_exception_flags(env);
528 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
529 merge_exception_flags(env, old_flags);
530 }
531
532 void helper_fdiv_ST0_FT0(CPUX86State *env)
533 {
534 ST0 = helper_fdiv(env, ST0, FT0);
535 }
536
537 void helper_fdivr_ST0_FT0(CPUX86State *env)
538 {
539 ST0 = helper_fdiv(env, FT0, ST0);
540 }
541
542 /* fp operations between STN and ST0 */
543
544 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
545 {
546 uint8_t old_flags = save_exception_flags(env);
547 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
548 merge_exception_flags(env, old_flags);
549 }
550
551 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
552 {
553 uint8_t old_flags = save_exception_flags(env);
554 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
555 merge_exception_flags(env, old_flags);
556 }
557
558 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
559 {
560 uint8_t old_flags = save_exception_flags(env);
561 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
562 merge_exception_flags(env, old_flags);
563 }
564
565 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
566 {
567 uint8_t old_flags = save_exception_flags(env);
568 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
569 merge_exception_flags(env, old_flags);
570 }
571
572 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
573 {
574 floatx80 *p;
575
576 p = &ST(st_index);
577 *p = helper_fdiv(env, *p, ST0);
578 }
579
580 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
581 {
582 floatx80 *p;
583
584 p = &ST(st_index);
585 *p = helper_fdiv(env, ST0, *p);
586 }
587
588 /* misc FPU operations */
589 void helper_fchs_ST0(CPUX86State *env)
590 {
591 ST0 = floatx80_chs(ST0);
592 }
593
594 void helper_fabs_ST0(CPUX86State *env)
595 {
596 ST0 = floatx80_abs(ST0);
597 }
598
599 void helper_fld1_ST0(CPUX86State *env)
600 {
601 ST0 = floatx80_one;
602 }
603
604 void helper_fldl2t_ST0(CPUX86State *env)
605 {
606 switch (env->fpuc & FPU_RC_MASK) {
607 case FPU_RC_UP:
608 ST0 = floatx80_l2t_u;
609 break;
610 default:
611 ST0 = floatx80_l2t;
612 break;
613 }
614 }
615
616 void helper_fldl2e_ST0(CPUX86State *env)
617 {
618 switch (env->fpuc & FPU_RC_MASK) {
619 case FPU_RC_DOWN:
620 case FPU_RC_CHOP:
621 ST0 = floatx80_l2e_d;
622 break;
623 default:
624 ST0 = floatx80_l2e;
625 break;
626 }
627 }
628
629 void helper_fldpi_ST0(CPUX86State *env)
630 {
631 switch (env->fpuc & FPU_RC_MASK) {
632 case FPU_RC_DOWN:
633 case FPU_RC_CHOP:
634 ST0 = floatx80_pi_d;
635 break;
636 default:
637 ST0 = floatx80_pi;
638 break;
639 }
640 }
641
642 void helper_fldlg2_ST0(CPUX86State *env)
643 {
644 switch (env->fpuc & FPU_RC_MASK) {
645 case FPU_RC_DOWN:
646 case FPU_RC_CHOP:
647 ST0 = floatx80_lg2_d;
648 break;
649 default:
650 ST0 = floatx80_lg2;
651 break;
652 }
653 }
654
655 void helper_fldln2_ST0(CPUX86State *env)
656 {
657 switch (env->fpuc & FPU_RC_MASK) {
658 case FPU_RC_DOWN:
659 case FPU_RC_CHOP:
660 ST0 = floatx80_ln2_d;
661 break;
662 default:
663 ST0 = floatx80_ln2;
664 break;
665 }
666 }
667
668 void helper_fldz_ST0(CPUX86State *env)
669 {
670 ST0 = floatx80_zero;
671 }
672
673 void helper_fldz_FT0(CPUX86State *env)
674 {
675 FT0 = floatx80_zero;
676 }
677
678 uint32_t helper_fnstsw(CPUX86State *env)
679 {
680 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
681 }
682
683 uint32_t helper_fnstcw(CPUX86State *env)
684 {
685 return env->fpuc;
686 }
687
688 void update_fp_status(CPUX86State *env)
689 {
690 FloatRoundMode rnd_mode;
691 FloatX80RoundPrec rnd_prec;
692
693 /* set rounding mode */
694 switch (env->fpuc & FPU_RC_MASK) {
695 default:
696 case FPU_RC_NEAR:
697 rnd_mode = float_round_nearest_even;
698 break;
699 case FPU_RC_DOWN:
700 rnd_mode = float_round_down;
701 break;
702 case FPU_RC_UP:
703 rnd_mode = float_round_up;
704 break;
705 case FPU_RC_CHOP:
706 rnd_mode = float_round_to_zero;
707 break;
708 }
709 set_float_rounding_mode(rnd_mode, &env->fp_status);
710
711 switch ((env->fpuc >> 8) & 3) {
712 case 0:
713 rnd_prec = floatx80_precision_s;
714 break;
715 case 2:
716 rnd_prec = floatx80_precision_d;
717 break;
718 case 3:
719 default:
720 rnd_prec = floatx80_precision_x;
721 break;
722 }
723 set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
724 }
725
726 void helper_fldcw(CPUX86State *env, uint32_t val)
727 {
728 cpu_set_fpuc(env, val);
729 }
730
731 void helper_fclex(CPUX86State *env)
732 {
733 env->fpus &= 0x7f00;
734 }
735
736 void helper_fwait(CPUX86State *env)
737 {
738 if (env->fpus & FPUS_SE) {
739 fpu_raise_exception(env, GETPC());
740 }
741 }
742
743 static void do_fninit(CPUX86State *env)
744 {
745 env->fpus = 0;
746 env->fpstt = 0;
747 env->fpcs = 0;
748 env->fpds = 0;
749 env->fpip = 0;
750 env->fpdp = 0;
751 cpu_set_fpuc(env, 0x37f);
752 env->fptags[0] = 1;
753 env->fptags[1] = 1;
754 env->fptags[2] = 1;
755 env->fptags[3] = 1;
756 env->fptags[4] = 1;
757 env->fptags[5] = 1;
758 env->fptags[6] = 1;
759 env->fptags[7] = 1;
760 }
761
762 void helper_fninit(CPUX86State *env)
763 {
764 do_fninit(env);
765 }
766
767 /* BCD ops */
768
769 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
770 {
771 floatx80 tmp;
772 uint64_t val;
773 unsigned int v;
774 int i;
775
776 val = 0;
777 for (i = 8; i >= 0; i--) {
778 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
779 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
780 }
781 tmp = int64_to_floatx80(val, &env->fp_status);
782 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
783 tmp = floatx80_chs(tmp);
784 }
785 fpush(env);
786 ST0 = tmp;
787 }
788
789 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
790 {
791 uint8_t old_flags = save_exception_flags(env);
792 int v;
793 target_ulong mem_ref, mem_end;
794 int64_t val;
795 CPU_LDoubleU temp;
796
797 temp.d = ST0;
798
799 val = floatx80_to_int64(ST0, &env->fp_status);
800 mem_ref = ptr;
801 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
802 set_float_exception_flags(float_flag_invalid, &env->fp_status);
803 while (mem_ref < ptr + 7) {
804 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
805 }
806 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
807 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
808 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
809 merge_exception_flags(env, old_flags);
810 return;
811 }
812 mem_end = mem_ref + 9;
813 if (SIGND(temp)) {
814 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
815 val = -val;
816 } else {
817 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
818 }
819 while (mem_ref < mem_end) {
820 if (val == 0) {
821 break;
822 }
823 v = val % 100;
824 val = val / 100;
825 v = ((v / 10) << 4) | (v % 10);
826 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
827 }
828 while (mem_ref < mem_end) {
829 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
830 }
831 merge_exception_flags(env, old_flags);
832 }
833
834 /* 128-bit significand of log(2). */
835 #define ln2_sig_high 0xb17217f7d1cf79abULL
836 #define ln2_sig_low 0xc9e3b39803f2f6afULL
837
838 /*
839 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
840 * the interval [-1/64, 1/64].
841 */
842 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
843 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
844 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
845 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
846 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
847 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
848 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
849 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
850 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
851
852 struct f2xm1_data {
853 /*
854 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
855 * are very close to exact floatx80 values.
856 */
857 floatx80 t;
858 /* The value of 2^t. */
859 floatx80 exp2;
860 /* The value of 2^t - 1. */
861 floatx80 exp2m1;
862 };
863
864 static const struct f2xm1_data f2xm1_table[65] = {
865 { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
866 make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
867 make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
868 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
869 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
870 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
871 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
872 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
873 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
874 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
875 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
876 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
877 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
878 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
879 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
880 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
881 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
882 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
883 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
884 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
885 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
886 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
887 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
888 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
889 { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
890 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
891 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
892 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
893 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
894 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
895 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
896 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
897 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
898 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
899 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
900 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
901 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
902 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
903 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
904 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
905 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
906 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
907 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
908 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
909 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
910 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
911 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
912 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
913 { make_floatx80_init(0xbffe, 0x800000000000227dULL),
914 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
915 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
916 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
917 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
918 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
919 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
920 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
921 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
922 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
923 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
924 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
925 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
926 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
927 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
928 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
929 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
930 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
931 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
932 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
933 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
934 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
935 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
936 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
937 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
938 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
939 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
940 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
941 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
942 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
943 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
944 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
945 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
946 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
947 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
948 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
949 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
950 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
951 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
952 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
953 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
954 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
955 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
956 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
957 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
958 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
959 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
960 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
961 { floatx80_zero_init,
962 make_floatx80_init(0x3fff, 0x8000000000000000ULL),
963 floatx80_zero_init },
964 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
965 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
966 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
967 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
968 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
969 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
970 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
971 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
972 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
973 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
974 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
975 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
976 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
977 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
978 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
979 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
980 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
981 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
982 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
983 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
984 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
985 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
986 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
987 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
988 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
989 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
990 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
991 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
992 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
993 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
994 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
995 make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
996 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
997 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
998 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
999 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
1000 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1001 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1002 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1003 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1004 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1005 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1006 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1007 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1008 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1009 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1010 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1011 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1012 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1013 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1014 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1015 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1016 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1017 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1018 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1019 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1020 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1021 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1022 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1023 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1024 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1025 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1026 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1027 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1028 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1029 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1030 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1031 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1032 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1033 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1034 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1035 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1036 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1037 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1038 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1039 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1040 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1041 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1042 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1043 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1044 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1045 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1046 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1047 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1048 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1049 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1050 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1051 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1052 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1053 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1054 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1055 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1056 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1057 { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1058 make_floatx80_init(0x4000, 0x8000000000000000ULL),
1059 make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1060 };
1061
1062 void helper_f2xm1(CPUX86State *env)
1063 {
1064 uint8_t old_flags = save_exception_flags(env);
1065 uint64_t sig = extractFloatx80Frac(ST0);
1066 int32_t exp = extractFloatx80Exp(ST0);
1067 bool sign = extractFloatx80Sign(ST0);
1068
1069 if (floatx80_invalid_encoding(ST0)) {
1070 float_raise(float_flag_invalid, &env->fp_status);
1071 ST0 = floatx80_default_nan(&env->fp_status);
1072 } else if (floatx80_is_any_nan(ST0)) {
1073 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1074 float_raise(float_flag_invalid, &env->fp_status);
1075 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1076 }
1077 } else if (exp > 0x3fff ||
1078 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1079 /* Out of range for the instruction, treat as invalid. */
1080 float_raise(float_flag_invalid, &env->fp_status);
1081 ST0 = floatx80_default_nan(&env->fp_status);
1082 } else if (exp == 0x3fff) {
1083 /* Argument 1 or -1, exact result 1 or -0.5. */
1084 if (sign) {
1085 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1086 }
1087 } else if (exp < 0x3fb0) {
1088 if (!floatx80_is_zero(ST0)) {
1089 /*
1090 * Multiplying the argument by an extra-precision version
1091 * of log(2) is sufficiently precise. Zero arguments are
1092 * returned unchanged.
1093 */
1094 uint64_t sig0, sig1, sig2;
1095 if (exp == 0) {
1096 normalizeFloatx80Subnormal(sig, &exp, &sig);
1097 }
1098 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1099 &sig2);
1100 /* This result is inexact. */
1101 sig1 |= 1;
1102 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1103 sign, exp, sig0, sig1,
1104 &env->fp_status);
1105 }
1106 } else {
1107 floatx80 tmp, y, accum;
1108 bool asign, bsign;
1109 int32_t n, aexp, bexp;
1110 uint64_t asig0, asig1, asig2, bsig0, bsig1;
1111 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1112 FloatX80RoundPrec save_prec =
1113 env->fp_status.floatx80_rounding_precision;
1114 env->fp_status.float_rounding_mode = float_round_nearest_even;
1115 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1116
1117 /* Find the nearest multiple of 1/32 to the argument. */
1118 tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1119 n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1120 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1121
1122 if (floatx80_is_zero(y)) {
1123 /*
1124 * Use the value of 2^t - 1 from the table, to avoid
1125 * needing to special-case zero as a result of
1126 * multiplication below.
1127 */
1128 ST0 = f2xm1_table[n].t;
1129 set_float_exception_flags(float_flag_inexact, &env->fp_status);
1130 env->fp_status.float_rounding_mode = save_mode;
1131 } else {
1132 /*
1133 * Compute the lower parts of a polynomial expansion for
1134 * (2^y - 1) / y.
1135 */
1136 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1137 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1138 accum = floatx80_mul(accum, y, &env->fp_status);
1139 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1140 accum = floatx80_mul(accum, y, &env->fp_status);
1141 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1142 accum = floatx80_mul(accum, y, &env->fp_status);
1143 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1144 accum = floatx80_mul(accum, y, &env->fp_status);
1145 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1146 accum = floatx80_mul(accum, y, &env->fp_status);
1147 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1148 accum = floatx80_mul(accum, y, &env->fp_status);
1149 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1150
1151 /*
1152 * The full polynomial expansion is f2xm1_coeff_0 + accum
1153 * (where accum has much lower magnitude, and so, in
1154 * particular, carry out of the addition is not possible).
1155 * (This expansion is only accurate to about 70 bits, not
1156 * 128 bits.)
1157 */
1158 aexp = extractFloatx80Exp(f2xm1_coeff_0);
1159 asign = extractFloatx80Sign(f2xm1_coeff_0);
1160 shift128RightJamming(extractFloatx80Frac(accum), 0,
1161 aexp - extractFloatx80Exp(accum),
1162 &asig0, &asig1);
1163 bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1164 bsig1 = 0;
1165 if (asign == extractFloatx80Sign(accum)) {
1166 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1167 } else {
1168 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1169 }
1170 /* And thus compute an approximation to 2^y - 1. */
1171 mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1172 &asig0, &asig1, &asig2);
1173 aexp += extractFloatx80Exp(y) - 0x3ffe;
1174 asign ^= extractFloatx80Sign(y);
1175 if (n != 32) {
1176 /*
1177 * Multiply this by the precomputed value of 2^t and
1178 * add that of 2^t - 1.
1179 */
1180 mul128By64To192(asig0, asig1,
1181 extractFloatx80Frac(f2xm1_table[n].exp2),
1182 &asig0, &asig1, &asig2);
1183 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1184 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1185 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1186 bsig1 = 0;
1187 if (bexp < aexp) {
1188 shift128RightJamming(bsig0, bsig1, aexp - bexp,
1189 &bsig0, &bsig1);
1190 } else if (aexp < bexp) {
1191 shift128RightJamming(asig0, asig1, bexp - aexp,
1192 &asig0, &asig1);
1193 aexp = bexp;
1194 }
1195 /* The sign of 2^t - 1 is always that of the result. */
1196 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1197 if (asign == bsign) {
1198 /* Avoid possible carry out of the addition. */
1199 shift128RightJamming(asig0, asig1, 1,
1200 &asig0, &asig1);
1201 shift128RightJamming(bsig0, bsig1, 1,
1202 &bsig0, &bsig1);
1203 ++aexp;
1204 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1205 } else {
1206 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1207 asign = bsign;
1208 }
1209 }
1210 env->fp_status.float_rounding_mode = save_mode;
1211 /* This result is inexact. */
1212 asig1 |= 1;
1213 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1214 asign, aexp, asig0, asig1,
1215 &env->fp_status);
1216 }
1217
1218 env->fp_status.floatx80_rounding_precision = save_prec;
1219 }
1220 merge_exception_flags(env, old_flags);
1221 }
1222
1223 void helper_fptan(CPUX86State *env)
1224 {
1225 double fptemp = floatx80_to_double(env, ST0);
1226
1227 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1228 env->fpus |= 0x400;
1229 } else {
1230 fptemp = tan(fptemp);
1231 ST0 = double_to_floatx80(env, fptemp);
1232 fpush(env);
1233 ST0 = floatx80_one;
1234 env->fpus &= ~0x400; /* C2 <-- 0 */
1235 /* the above code is for |arg| < 2**52 only */
1236 }
1237 }
1238
1239 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1240 #define pi_4_exp 0x3ffe
1241 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1242 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1243 #define pi_2_exp 0x3fff
1244 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1245 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1246 #define pi_34_exp 0x4000
1247 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1248 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1249 #define pi_exp 0x4000
1250 #define pi_sig_high 0xc90fdaa22168c234ULL
1251 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1252
1253 /*
1254 * Polynomial coefficients for an approximation to atan(x), with only
1255 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1256 * for some other approximations, no low part is needed for the first
1257 * coefficient here to achieve a sufficiently accurate result, because
1258 * the coefficient in this minimax approximation is very close to
1259 * exactly 1.)
1260 */
1261 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1262 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1263 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1264 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1265 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1266 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1267 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1268
1269 struct fpatan_data {
1270 /* High and low parts of atan(x). */
1271 floatx80 atan_high, atan_low;
1272 };
1273
1274 static const struct fpatan_data fpatan_table[9] = {
1275 { floatx80_zero_init,
1276 floatx80_zero_init },
1277 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1278 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1279 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1280 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1281 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1282 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1283 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1284 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1285 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1286 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1287 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1288 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1289 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1290 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1291 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1292 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1293 };
1294
1295 void helper_fpatan(CPUX86State *env)
1296 {
1297 uint8_t old_flags = save_exception_flags(env);
1298 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1299 int32_t arg0_exp = extractFloatx80Exp(ST0);
1300 bool arg0_sign = extractFloatx80Sign(ST0);
1301 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1302 int32_t arg1_exp = extractFloatx80Exp(ST1);
1303 bool arg1_sign = extractFloatx80Sign(ST1);
1304
1305 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1306 float_raise(float_flag_invalid, &env->fp_status);
1307 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1308 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1309 float_raise(float_flag_invalid, &env->fp_status);
1310 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1311 } else if (floatx80_invalid_encoding(ST0) ||
1312 floatx80_invalid_encoding(ST1)) {
1313 float_raise(float_flag_invalid, &env->fp_status);
1314 ST1 = floatx80_default_nan(&env->fp_status);
1315 } else if (floatx80_is_any_nan(ST0)) {
1316 ST1 = ST0;
1317 } else if (floatx80_is_any_nan(ST1)) {
1318 /* Pass this NaN through. */
1319 } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1320 /* Pass this zero through. */
1321 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1322 arg0_exp - arg1_exp >= 80) &&
1323 !arg0_sign) {
1324 /*
1325 * Dividing ST1 by ST0 gives the correct result up to
1326 * rounding, and avoids spurious underflow exceptions that
1327 * might result from passing some small values through the
1328 * polynomial approximation, but if a finite nonzero result of
1329 * division is exact, the result of fpatan is still inexact
1330 * (and underflowing where appropriate).
1331 */
1332 FloatX80RoundPrec save_prec =
1333 env->fp_status.floatx80_rounding_precision;
1334 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1335 ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1336 env->fp_status.floatx80_rounding_precision = save_prec;
1337 if (!floatx80_is_zero(ST1) &&
1338 !(get_float_exception_flags(&env->fp_status) &
1339 float_flag_inexact)) {
1340 /*
1341 * The mathematical result is very slightly closer to zero
1342 * than this exact result. Round a value with the
1343 * significand adjusted accordingly to get the correct
1344 * exceptions, and possibly an adjusted result depending
1345 * on the rounding mode.
1346 */
1347 uint64_t sig = extractFloatx80Frac(ST1);
1348 int32_t exp = extractFloatx80Exp(ST1);
1349 bool sign = extractFloatx80Sign(ST1);
1350 if (exp == 0) {
1351 normalizeFloatx80Subnormal(sig, &exp, &sig);
1352 }
1353 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1354 sign, exp, sig - 1,
1355 -1, &env->fp_status);
1356 }
1357 } else {
1358 /* The result is inexact. */
1359 bool rsign = arg1_sign;
1360 int32_t rexp;
1361 uint64_t rsig0, rsig1;
1362 if (floatx80_is_zero(ST1)) {
1363 /*
1364 * ST0 is negative. The result is pi with the sign of
1365 * ST1.
1366 */
1367 rexp = pi_exp;
1368 rsig0 = pi_sig_high;
1369 rsig1 = pi_sig_low;
1370 } else if (floatx80_is_infinity(ST1)) {
1371 if (floatx80_is_infinity(ST0)) {
1372 if (arg0_sign) {
1373 rexp = pi_34_exp;
1374 rsig0 = pi_34_sig_high;
1375 rsig1 = pi_34_sig_low;
1376 } else {
1377 rexp = pi_4_exp;
1378 rsig0 = pi_4_sig_high;
1379 rsig1 = pi_4_sig_low;
1380 }
1381 } else {
1382 rexp = pi_2_exp;
1383 rsig0 = pi_2_sig_high;
1384 rsig1 = pi_2_sig_low;
1385 }
1386 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1387 rexp = pi_2_exp;
1388 rsig0 = pi_2_sig_high;
1389 rsig1 = pi_2_sig_low;
1390 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1391 /* ST0 is negative. */
1392 rexp = pi_exp;
1393 rsig0 = pi_sig_high;
1394 rsig1 = pi_sig_low;
1395 } else {
1396 /*
1397 * ST0 and ST1 are finite, nonzero and with exponents not
1398 * too far apart.
1399 */
1400 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1401 int32_t azexp, axexp;
1402 bool adj_sub, ysign, zsign;
1403 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1404 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1405 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1406 uint64_t azsig0, azsig1;
1407 uint64_t azsig2, azsig3, axsig0, axsig1;
1408 floatx80 x8;
1409 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1410 FloatX80RoundPrec save_prec =
1411 env->fp_status.floatx80_rounding_precision;
1412 env->fp_status.float_rounding_mode = float_round_nearest_even;
1413 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1414
1415 if (arg0_exp == 0) {
1416 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1417 }
1418 if (arg1_exp == 0) {
1419 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1420 }
1421 if (arg0_exp > arg1_exp ||
1422 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1423 /* Work with abs(ST1) / abs(ST0). */
1424 num_exp = arg1_exp;
1425 num_sig = arg1_sig;
1426 den_exp = arg0_exp;
1427 den_sig = arg0_sig;
1428 if (arg0_sign) {
1429 /* The result is subtracted from pi. */
1430 adj_exp = pi_exp;
1431 adj_sig0 = pi_sig_high;
1432 adj_sig1 = pi_sig_low;
1433 adj_sub = true;
1434 } else {
1435 /* The result is used as-is. */
1436 adj_exp = 0;
1437 adj_sig0 = 0;
1438 adj_sig1 = 0;
1439 adj_sub = false;
1440 }
1441 } else {
1442 /* Work with abs(ST0) / abs(ST1). */
1443 num_exp = arg0_exp;
1444 num_sig = arg0_sig;
1445 den_exp = arg1_exp;
1446 den_sig = arg1_sig;
1447 /* The result is added to or subtracted from pi/2. */
1448 adj_exp = pi_2_exp;
1449 adj_sig0 = pi_2_sig_high;
1450 adj_sig1 = pi_2_sig_low;
1451 adj_sub = !arg0_sign;
1452 }
1453
1454 /*
1455 * Compute x = num/den, where 0 < x <= 1 and x is not too
1456 * small.
1457 */
1458 xexp = num_exp - den_exp + 0x3ffe;
1459 remsig0 = num_sig;
1460 remsig1 = 0;
1461 if (den_sig <= remsig0) {
1462 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1463 ++xexp;
1464 }
1465 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1466 mul64To128(den_sig, xsig0, &msig0, &msig1);
1467 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1468 while ((int64_t) remsig0 < 0) {
1469 --xsig0;
1470 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1471 }
1472 xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1473 /*
1474 * No need to correct any estimation error in xsig1; even
1475 * with such error, it is accurate enough.
1476 */
1477
1478 /*
1479 * Split x as x = t + y, where t = n/8 is the nearest
1480 * multiple of 1/8 to x.
1481 */
1482 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1483 false, xexp + 3, xsig0,
1484 xsig1, &env->fp_status);
1485 n = floatx80_to_int32(x8, &env->fp_status);
1486 if (n == 0) {
1487 ysign = false;
1488 yexp = xexp;
1489 ysig0 = xsig0;
1490 ysig1 = xsig1;
1491 texp = 0;
1492 tsig = 0;
1493 } else {
1494 int shift = clz32(n) + 32;
1495 texp = 0x403b - shift;
1496 tsig = n;
1497 tsig <<= shift;
1498 if (texp == xexp) {
1499 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1500 if ((int64_t) ysig0 >= 0) {
1501 ysign = false;
1502 if (ysig0 == 0) {
1503 if (ysig1 == 0) {
1504 yexp = 0;
1505 } else {
1506 shift = clz64(ysig1) + 64;
1507 yexp = xexp - shift;
1508 shift128Left(ysig0, ysig1, shift,
1509 &ysig0, &ysig1);
1510 }
1511 } else {
1512 shift = clz64(ysig0);
1513 yexp = xexp - shift;
1514 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1515 }
1516 } else {
1517 ysign = true;
1518 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1519 if (ysig0 == 0) {
1520 shift = clz64(ysig1) + 64;
1521 } else {
1522 shift = clz64(ysig0);
1523 }
1524 yexp = xexp - shift;
1525 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1526 }
1527 } else {
1528 /*
1529 * t's exponent must be greater than x's because t
1530 * is positive and the nearest multiple of 1/8 to
1531 * x, and if x has a greater exponent, the power
1532 * of 2 with that exponent is also a multiple of
1533 * 1/8.
1534 */
1535 uint64_t usig0, usig1;
1536 shift128RightJamming(xsig0, xsig1, texp - xexp,
1537 &usig0, &usig1);
1538 ysign = true;
1539 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1540 if (ysig0 == 0) {
1541 shift = clz64(ysig1) + 64;
1542 } else {
1543 shift = clz64(ysig0);
1544 }
1545 yexp = texp - shift;
1546 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1547 }
1548 }
1549
1550 /*
1551 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1552 * arctan(z).
1553 */
1554 zsign = ysign;
1555 if (texp == 0 || yexp == 0) {
1556 zexp = yexp;
1557 zsig0 = ysig0;
1558 zsig1 = ysig1;
1559 } else {
1560 /*
1561 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1562 */
1563 int32_t dexp = texp + xexp - 0x3ffe;
1564 uint64_t dsig0, dsig1, dsig2;
1565 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1566 /*
1567 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1568 * bit). Add 1 to produce the denominator 1+tx.
1569 */
1570 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1571 &dsig0, &dsig1);
1572 dsig0 |= 0x8000000000000000ULL;
1573 zexp = yexp - 1;
1574 remsig0 = ysig0;
1575 remsig1 = ysig1;
1576 remsig2 = 0;
1577 if (dsig0 <= remsig0) {
1578 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1579 ++zexp;
1580 }
1581 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1582 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1583 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1584 &remsig0, &remsig1, &remsig2);
1585 while ((int64_t) remsig0 < 0) {
1586 --zsig0;
1587 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1588 &remsig0, &remsig1, &remsig2);
1589 }
1590 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1591 /* No need to correct any estimation error in zsig1. */
1592 }
1593
1594 if (zexp == 0) {
1595 azexp = 0;
1596 azsig0 = 0;
1597 azsig1 = 0;
1598 } else {
1599 floatx80 z2, accum;
1600 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1601 /* Compute z^2. */
1602 mul128To256(zsig0, zsig1, zsig0, zsig1,
1603 &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1604 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1605 zexp + zexp - 0x3ffe,
1606 z2sig0, z2sig1,
1607 &env->fp_status);
1608
1609 /* Compute the lower parts of the polynomial expansion. */
1610 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1611 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1612 accum = floatx80_mul(accum, z2, &env->fp_status);
1613 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1614 accum = floatx80_mul(accum, z2, &env->fp_status);
1615 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1616 accum = floatx80_mul(accum, z2, &env->fp_status);
1617 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1618 accum = floatx80_mul(accum, z2, &env->fp_status);
1619 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1620 accum = floatx80_mul(accum, z2, &env->fp_status);
1621
1622 /*
1623 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1624 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1625 */
1626 aexp = extractFloatx80Exp(fpatan_coeff_0);
1627 shift128RightJamming(extractFloatx80Frac(accum), 0,
1628 aexp - extractFloatx80Exp(accum),
1629 &asig0, &asig1);
1630 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1631 &asig0, &asig1);
1632 /* Multiply by z to compute arctan(z). */
1633 azexp = aexp + zexp - 0x3ffe;
1634 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1635 &azsig2, &azsig3);
1636 }
1637
1638 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1639 if (texp == 0) {
1640 /* z is positive. */
1641 axexp = azexp;
1642 axsig0 = azsig0;
1643 axsig1 = azsig1;
1644 } else {
1645 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1646 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1647 uint64_t low_sig0 =
1648 extractFloatx80Frac(fpatan_table[n].atan_low);
1649 uint64_t low_sig1 = 0;
1650 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1651 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1652 axsig1 = 0;
1653 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1654 &low_sig0, &low_sig1);
1655 if (low_sign) {
1656 sub128(axsig0, axsig1, low_sig0, low_sig1,
1657 &axsig0, &axsig1);
1658 } else {
1659 add128(axsig0, axsig1, low_sig0, low_sig1,
1660 &axsig0, &axsig1);
1661 }
1662 if (azexp >= axexp) {
1663 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1664 &axsig0, &axsig1);
1665 axexp = azexp + 1;
1666 shift128RightJamming(azsig0, azsig1, 1,
1667 &azsig0, &azsig1);
1668 } else {
1669 shift128RightJamming(axsig0, axsig1, 1,
1670 &axsig0, &axsig1);
1671 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1672 &azsig0, &azsig1);
1673 ++axexp;
1674 }
1675 if (zsign) {
1676 sub128(axsig0, axsig1, azsig0, azsig1,
1677 &axsig0, &axsig1);
1678 } else {
1679 add128(axsig0, axsig1, azsig0, azsig1,
1680 &axsig0, &axsig1);
1681 }
1682 }
1683
1684 if (adj_exp == 0) {
1685 rexp = axexp;
1686 rsig0 = axsig0;
1687 rsig1 = axsig1;
1688 } else {
1689 /*
1690 * Add or subtract arctan(x) (exponent axexp,
1691 * significand axsig0 and axsig1, positive, not
1692 * necessarily normalized) to the number given by
1693 * adj_exp, adj_sig0 and adj_sig1, according to
1694 * adj_sub.
1695 */
1696 if (adj_exp >= axexp) {
1697 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1698 &axsig0, &axsig1);
1699 rexp = adj_exp + 1;
1700 shift128RightJamming(adj_sig0, adj_sig1, 1,
1701 &adj_sig0, &adj_sig1);
1702 } else {
1703 shift128RightJamming(axsig0, axsig1, 1,
1704 &axsig0, &axsig1);
1705 shift128RightJamming(adj_sig0, adj_sig1,
1706 axexp - adj_exp + 1,
1707 &adj_sig0, &adj_sig1);
1708 rexp = axexp + 1;
1709 }
1710 if (adj_sub) {
1711 sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1712 &rsig0, &rsig1);
1713 } else {
1714 add128(adj_sig0, adj_sig1, axsig0, axsig1,
1715 &rsig0, &rsig1);
1716 }
1717 }
1718
1719 env->fp_status.float_rounding_mode = save_mode;
1720 env->fp_status.floatx80_rounding_precision = save_prec;
1721 }
1722 /* This result is inexact. */
1723 rsig1 |= 1;
1724 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
1725 rsig0, rsig1, &env->fp_status);
1726 }
1727
1728 fpop(env);
1729 merge_exception_flags(env, old_flags);
1730 }
1731
1732 void helper_fxtract(CPUX86State *env)
1733 {
1734 uint8_t old_flags = save_exception_flags(env);
1735 CPU_LDoubleU temp;
1736
1737 temp.d = ST0;
1738
1739 if (floatx80_is_zero(ST0)) {
1740 /* Easy way to generate -inf and raising division by 0 exception */
1741 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1742 &env->fp_status);
1743 fpush(env);
1744 ST0 = temp.d;
1745 } else if (floatx80_invalid_encoding(ST0)) {
1746 float_raise(float_flag_invalid, &env->fp_status);
1747 ST0 = floatx80_default_nan(&env->fp_status);
1748 fpush(env);
1749 ST0 = ST1;
1750 } else if (floatx80_is_any_nan(ST0)) {
1751 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1752 float_raise(float_flag_invalid, &env->fp_status);
1753 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1754 }
1755 fpush(env);
1756 ST0 = ST1;
1757 } else if (floatx80_is_infinity(ST0)) {
1758 fpush(env);
1759 ST0 = ST1;
1760 ST1 = floatx80_infinity;
1761 } else {
1762 int expdif;
1763
1764 if (EXPD(temp) == 0) {
1765 int shift = clz64(temp.l.lower);
1766 temp.l.lower <<= shift;
1767 expdif = 1 - EXPBIAS - shift;
1768 float_raise(float_flag_input_denormal, &env->fp_status);
1769 } else {
1770 expdif = EXPD(temp) - EXPBIAS;
1771 }
1772 /* DP exponent bias */
1773 ST0 = int32_to_floatx80(expdif, &env->fp_status);
1774 fpush(env);
1775 BIASEXPONENT(temp);
1776 ST0 = temp.d;
1777 }
1778 merge_exception_flags(env, old_flags);
1779 }
1780
1781 static void helper_fprem_common(CPUX86State *env, bool mod)
1782 {
1783 uint8_t old_flags = save_exception_flags(env);
1784 uint64_t quotient;
1785 CPU_LDoubleU temp0, temp1;
1786 int exp0, exp1, expdiff;
1787
1788 temp0.d = ST0;
1789 temp1.d = ST1;
1790 exp0 = EXPD(temp0);
1791 exp1 = EXPD(temp1);
1792
1793 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1794 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1795 exp0 == 0x7fff || exp1 == 0x7fff ||
1796 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1797 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1798 } else {
1799 if (exp0 == 0) {
1800 exp0 = 1 - clz64(temp0.l.lower);
1801 }
1802 if (exp1 == 0) {
1803 exp1 = 1 - clz64(temp1.l.lower);
1804 }
1805 expdiff = exp0 - exp1;
1806 if (expdiff < 64) {
1807 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1808 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
1809 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1810 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
1811 } else {
1812 /*
1813 * Partial remainder. This choice of how many bits to
1814 * process at once is specified in AMD instruction set
1815 * manuals, and empirically is followed by Intel
1816 * processors as well; it ensures that the final remainder
1817 * operation in a loop does produce the correct low three
1818 * bits of the quotient. AMD manuals specify that the
1819 * flags other than C2 are cleared, and empirically Intel
1820 * processors clear them as well.
1821 */
1822 int n = 32 + (expdiff % 32);
1823 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1824 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1825 env->fpus |= 0x400; /* C2 <-- 1 */
1826 }
1827 }
1828 merge_exception_flags(env, old_flags);
1829 }
1830
1831 void helper_fprem1(CPUX86State *env)
1832 {
1833 helper_fprem_common(env, false);
1834 }
1835
1836 void helper_fprem(CPUX86State *env)
1837 {
1838 helper_fprem_common(env, true);
1839 }
1840
1841 /* 128-bit significand of log2(e). */
1842 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1843 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1844
1845 /*
1846 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1847 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1848 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1849 * interval [sqrt(2)/2, sqrt(2)].
1850 */
1851 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1852 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1853 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1854 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1855 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1856 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1857 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1858 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1859 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1860 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1861 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1862
1863 /*
1864 * Compute an approximation of log2(1+arg), where 1+arg is in the
1865 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1866 * function is called, rounding precision is set to 80 and the
1867 * round-to-nearest mode is in effect. arg must not be exactly zero,
1868 * and must not be so close to zero that underflow might occur.
1869 */
1870 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1871 uint64_t *sig0, uint64_t *sig1)
1872 {
1873 uint64_t arg0_sig = extractFloatx80Frac(arg);
1874 int32_t arg0_exp = extractFloatx80Exp(arg);
1875 bool arg0_sign = extractFloatx80Sign(arg);
1876 bool asign;
1877 int32_t dexp, texp, aexp;
1878 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1879 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1880 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1881 floatx80 t2, accum;
1882
1883 /*
1884 * Compute an approximation of arg/(2+arg), with extra precision,
1885 * as the argument to a polynomial approximation. The extra
1886 * precision is only needed for the first term of the
1887 * approximation, with subsequent terms being significantly
1888 * smaller; the approximation only uses odd exponents, and the
1889 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1890 */
1891 if (arg0_sign) {
1892 dexp = 0x3fff;
1893 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1894 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1895 } else {
1896 dexp = 0x4000;
1897 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1898 dsig0 |= 0x8000000000000000ULL;
1899 }
1900 texp = arg0_exp - dexp + 0x3ffe;
1901 rsig0 = arg0_sig;
1902 rsig1 = 0;
1903 rsig2 = 0;
1904 if (dsig0 <= rsig0) {
1905 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1906 ++texp;
1907 }
1908 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1909 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1910 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1911 &rsig0, &rsig1, &rsig2);
1912 while ((int64_t) rsig0 < 0) {
1913 --tsig0;
1914 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1915 &rsig0, &rsig1, &rsig2);
1916 }
1917 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1918 /*
1919 * No need to correct any estimation error in tsig1; even with
1920 * such error, it is accurate enough. Now compute the square of
1921 * that approximation.
1922 */
1923 mul128To256(tsig0, tsig1, tsig0, tsig1,
1924 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1925 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1926 texp + texp - 0x3ffe,
1927 t2sig0, t2sig1, &env->fp_status);
1928
1929 /* Compute the lower parts of the polynomial expansion. */
1930 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1931 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1932 accum = floatx80_mul(accum, t2, &env->fp_status);
1933 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1934 accum = floatx80_mul(accum, t2, &env->fp_status);
1935 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1936 accum = floatx80_mul(accum, t2, &env->fp_status);
1937 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1938 accum = floatx80_mul(accum, t2, &env->fp_status);
1939 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1940 accum = floatx80_mul(accum, t2, &env->fp_status);
1941 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1942 accum = floatx80_mul(accum, t2, &env->fp_status);
1943 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1944 accum = floatx80_mul(accum, t2, &env->fp_status);
1945 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1946 accum = floatx80_mul(accum, t2, &env->fp_status);
1947 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1948
1949 /*
1950 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1951 * accum has much lower magnitude, and so, in particular, carry
1952 * out of the addition is not possible), multiplied by t. (This
1953 * expansion is only accurate to about 70 bits, not 128 bits.)
1954 */
1955 aexp = extractFloatx80Exp(fyl2x_coeff_0);
1956 asign = extractFloatx80Sign(fyl2x_coeff_0);
1957 shift128RightJamming(extractFloatx80Frac(accum), 0,
1958 aexp - extractFloatx80Exp(accum),
1959 &asig0, &asig1);
1960 bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1961 bsig1 = 0;
1962 if (asign == extractFloatx80Sign(accum)) {
1963 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1964 } else {
1965 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1966 }
1967 /* Multiply by t to compute the required result. */
1968 mul128To256(asig0, asig1, tsig0, tsig1,
1969 &asig0, &asig1, &asig2, &asig3);
1970 aexp += texp - 0x3ffe;
1971 *exp = aexp;
1972 *sig0 = asig0;
1973 *sig1 = asig1;
1974 }
1975
1976 void helper_fyl2xp1(CPUX86State *env)
1977 {
1978 uint8_t old_flags = save_exception_flags(env);
1979 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1980 int32_t arg0_exp = extractFloatx80Exp(ST0);
1981 bool arg0_sign = extractFloatx80Sign(ST0);
1982 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1983 int32_t arg1_exp = extractFloatx80Exp(ST1);
1984 bool arg1_sign = extractFloatx80Sign(ST1);
1985
1986 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1987 float_raise(float_flag_invalid, &env->fp_status);
1988 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1989 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1990 float_raise(float_flag_invalid, &env->fp_status);
1991 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1992 } else if (floatx80_invalid_encoding(ST0) ||
1993 floatx80_invalid_encoding(ST1)) {
1994 float_raise(float_flag_invalid, &env->fp_status);
1995 ST1 = floatx80_default_nan(&env->fp_status);
1996 } else if (floatx80_is_any_nan(ST0)) {
1997 ST1 = ST0;
1998 } else if (floatx80_is_any_nan(ST1)) {
1999 /* Pass this NaN through. */
2000 } else if (arg0_exp > 0x3ffd ||
2001 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2002 0x95f619980c4336f7ULL :
2003 0xd413cccfe7799211ULL))) {
2004 /*
2005 * Out of range for the instruction (ST0 must have absolute
2006 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2007 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2008 * to sqrt(2) - 1, which we allow here), treat as invalid.
2009 */
2010 float_raise(float_flag_invalid, &env->fp_status);
2011 ST1 = floatx80_default_nan(&env->fp_status);
2012 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2013 arg1_exp == 0x7fff) {
2014 /*
2015 * One argument is zero, or multiplying by infinity; correct
2016 * result is exact and can be obtained by multiplying the
2017 * arguments.
2018 */
2019 ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2020 } else if (arg0_exp < 0x3fb0) {
2021 /*
2022 * Multiplying both arguments and an extra-precision version
2023 * of log2(e) is sufficiently precise.
2024 */
2025 uint64_t sig0, sig1, sig2;
2026 int32_t exp;
2027 if (arg0_exp == 0) {
2028 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2029 }
2030 if (arg1_exp == 0) {
2031 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2032 }
2033 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2034 &sig0, &sig1, &sig2);
2035 exp = arg0_exp + 1;
2036 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2037 exp += arg1_exp - 0x3ffe;
2038 /* This result is inexact. */
2039 sig1 |= 1;
2040 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2041 arg0_sign ^ arg1_sign, exp,
2042 sig0, sig1, &env->fp_status);
2043 } else {
2044 int32_t aexp;
2045 uint64_t asig0, asig1, asig2;
2046 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2047 FloatX80RoundPrec save_prec =
2048 env->fp_status.floatx80_rounding_precision;
2049 env->fp_status.float_rounding_mode = float_round_nearest_even;
2050 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2051
2052 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2053 /*
2054 * Multiply by the second argument to compute the required
2055 * result.
2056 */
2057 if (arg1_exp == 0) {
2058 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2059 }
2060 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2061 aexp += arg1_exp - 0x3ffe;
2062 /* This result is inexact. */
2063 asig1 |= 1;
2064 env->fp_status.float_rounding_mode = save_mode;
2065 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2066 arg0_sign ^ arg1_sign, aexp,
2067 asig0, asig1, &env->fp_status);
2068 env->fp_status.floatx80_rounding_precision = save_prec;
2069 }
2070 fpop(env);
2071 merge_exception_flags(env, old_flags);
2072 }
2073
2074 void helper_fyl2x(CPUX86State *env)
2075 {
2076 uint8_t old_flags = save_exception_flags(env);
2077 uint64_t arg0_sig = extractFloatx80Frac(ST0);
2078 int32_t arg0_exp = extractFloatx80Exp(ST0);
2079 bool arg0_sign = extractFloatx80Sign(ST0);
2080 uint64_t arg1_sig = extractFloatx80Frac(ST1);
2081 int32_t arg1_exp = extractFloatx80Exp(ST1);
2082 bool arg1_sign = extractFloatx80Sign(ST1);
2083
2084 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2085 float_raise(float_flag_invalid, &env->fp_status);
2086 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2087 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2088 float_raise(float_flag_invalid, &env->fp_status);
2089 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2090 } else if (floatx80_invalid_encoding(ST0) ||
2091 floatx80_invalid_encoding(ST1)) {
2092 float_raise(float_flag_invalid, &env->fp_status);
2093 ST1 = floatx80_default_nan(&env->fp_status);
2094 } else if (floatx80_is_any_nan(ST0)) {
2095 ST1 = ST0;
2096 } else if (floatx80_is_any_nan(ST1)) {
2097 /* Pass this NaN through. */
2098 } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2099 float_raise(float_flag_invalid, &env->fp_status);
2100 ST1 = floatx80_default_nan(&env->fp_status);
2101 } else if (floatx80_is_infinity(ST1)) {
2102 FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2103 &env->fp_status);
2104 switch (cmp) {
2105 case float_relation_less:
2106 ST1 = floatx80_chs(ST1);
2107 break;
2108 case float_relation_greater:
2109 /* Result is infinity of the same sign as ST1. */
2110 break;
2111 default:
2112 float_raise(float_flag_invalid, &env->fp_status);
2113 ST1 = floatx80_default_nan(&env->fp_status);
2114 break;
2115 }
2116 } else if (floatx80_is_infinity(ST0)) {
2117 if (floatx80_is_zero(ST1)) {
2118 float_raise(float_flag_invalid, &env->fp_status);
2119 ST1 = floatx80_default_nan(&env->fp_status);
2120 } else if (arg1_sign) {
2121 ST1 = floatx80_chs(ST0);
2122 } else {
2123 ST1 = ST0;
2124 }
2125 } else if (floatx80_is_zero(ST0)) {
2126 if (floatx80_is_zero(ST1)) {
2127 float_raise(float_flag_invalid, &env->fp_status);
2128 ST1 = floatx80_default_nan(&env->fp_status);
2129 } else {
2130 /* Result is infinity with opposite sign to ST1. */
2131 float_raise(float_flag_divbyzero, &env->fp_status);
2132 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2133 0x8000000000000000ULL);
2134 }
2135 } else if (floatx80_is_zero(ST1)) {
2136 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2137 ST1 = floatx80_chs(ST1);
2138 }
2139 /* Otherwise, ST1 is already the correct result. */
2140 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2141 if (arg1_sign) {
2142 ST1 = floatx80_chs(floatx80_zero);
2143 } else {
2144 ST1 = floatx80_zero;
2145 }
2146 } else {
2147 int32_t int_exp;
2148 floatx80 arg0_m1;
2149 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2150 FloatX80RoundPrec save_prec =
2151 env->fp_status.floatx80_rounding_precision;
2152 env->fp_status.float_rounding_mode = float_round_nearest_even;
2153 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2154
2155 if (arg0_exp == 0) {
2156 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2157 }
2158 if (arg1_exp == 0) {
2159 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2160 }
2161 int_exp = arg0_exp - 0x3fff;
2162 if (arg0_sig > 0xb504f333f9de6484ULL) {
2163 ++int_exp;
2164 }
2165 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2166 &env->fp_status),
2167 floatx80_one, &env->fp_status);
2168 if (floatx80_is_zero(arg0_m1)) {
2169 /* Exact power of 2; multiply by ST1. */
2170 env->fp_status.float_rounding_mode = save_mode;
2171 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2172 ST1, &env->fp_status);
2173 } else {
2174 bool asign = extractFloatx80Sign(arg0_m1);
2175 int32_t aexp;
2176 uint64_t asig0, asig1, asig2;
2177 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2178 if (int_exp != 0) {
2179 bool isign = (int_exp < 0);
2180 int32_t iexp;
2181 uint64_t isig;
2182 int shift;
2183 int_exp = isign ? -int_exp : int_exp;
2184 shift = clz32(int_exp) + 32;
2185 isig = int_exp;
2186 isig <<= shift;
2187 iexp = 0x403e - shift;
2188 shift128RightJamming(asig0, asig1, iexp - aexp,
2189 &asig0, &asig1);
2190 if (asign == isign) {
2191 add128(isig, 0, asig0, asig1, &asig0, &asig1);
2192 } else {
2193 sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2194 }
2195 aexp = iexp;
2196 asign = isign;
2197 }
2198 /*
2199 * Multiply by the second argument to compute the required
2200 * result.
2201 */
2202 if (arg1_exp == 0) {
2203 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2204 }
2205 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2206 aexp += arg1_exp - 0x3ffe;
2207 /* This result is inexact. */
2208 asig1 |= 1;
2209 env->fp_status.float_rounding_mode = save_mode;
2210 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2211 asign ^ arg1_sign, aexp,
2212 asig0, asig1, &env->fp_status);
2213 }
2214
2215 env->fp_status.floatx80_rounding_precision = save_prec;
2216 }
2217 fpop(env);
2218 merge_exception_flags(env, old_flags);
2219 }
2220
2221 void helper_fsqrt(CPUX86State *env)
2222 {
2223 uint8_t old_flags = save_exception_flags(env);
2224 if (floatx80_is_neg(ST0)) {
2225 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2226 env->fpus |= 0x400;
2227 }
2228 ST0 = floatx80_sqrt(ST0, &env->fp_status);
2229 merge_exception_flags(env, old_flags);
2230 }
2231
2232 void helper_fsincos(CPUX86State *env)
2233 {
2234 double fptemp = floatx80_to_double(env, ST0);
2235
2236 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2237 env->fpus |= 0x400;
2238 } else {
2239 ST0 = double_to_floatx80(env, sin(fptemp));
2240 fpush(env);
2241 ST0 = double_to_floatx80(env, cos(fptemp));
2242 env->fpus &= ~0x400; /* C2 <-- 0 */
2243 /* the above code is for |arg| < 2**63 only */
2244 }
2245 }
2246
2247 void helper_frndint(CPUX86State *env)
2248 {
2249 uint8_t old_flags = save_exception_flags(env);
2250 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2251 merge_exception_flags(env, old_flags);
2252 }
2253
2254 void helper_fscale(CPUX86State *env)
2255 {
2256 uint8_t old_flags = save_exception_flags(env);
2257 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2258 float_raise(float_flag_invalid, &env->fp_status);
2259 ST0 = floatx80_default_nan(&env->fp_status);
2260 } else if (floatx80_is_any_nan(ST1)) {
2261 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2262 float_raise(float_flag_invalid, &env->fp_status);
2263 }
2264 ST0 = ST1;
2265 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2266 float_raise(float_flag_invalid, &env->fp_status);
2267 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2268 }
2269 } else if (floatx80_is_infinity(ST1) &&
2270 !floatx80_invalid_encoding(ST0) &&
2271 !floatx80_is_any_nan(ST0)) {
2272 if (floatx80_is_neg(ST1)) {
2273 if (floatx80_is_infinity(ST0)) {
2274 float_raise(float_flag_invalid, &env->fp_status);
2275 ST0 = floatx80_default_nan(&env->fp_status);
2276 } else {
2277 ST0 = (floatx80_is_neg(ST0) ?
2278 floatx80_chs(floatx80_zero) :
2279 floatx80_zero);
2280 }
2281 } else {
2282 if (floatx80_is_zero(ST0)) {
2283 float_raise(float_flag_invalid, &env->fp_status);
2284 ST0 = floatx80_default_nan(&env->fp_status);
2285 } else {
2286 ST0 = (floatx80_is_neg(ST0) ?
2287 floatx80_chs(floatx80_infinity) :
2288 floatx80_infinity);
2289 }
2290 }
2291 } else {
2292 int n;
2293 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
2294 uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2295 set_float_exception_flags(0, &env->fp_status);
2296 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2297 set_float_exception_flags(save_flags, &env->fp_status);
2298 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2299 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2300 env->fp_status.floatx80_rounding_precision = save;
2301 }
2302 merge_exception_flags(env, old_flags);
2303 }
2304
2305 void helper_fsin(CPUX86State *env)
2306 {
2307 double fptemp = floatx80_to_double(env, ST0);
2308
2309 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2310 env->fpus |= 0x400;
2311 } else {
2312 ST0 = double_to_floatx80(env, sin(fptemp));
2313 env->fpus &= ~0x400; /* C2 <-- 0 */
2314 /* the above code is for |arg| < 2**53 only */
2315 }
2316 }
2317
2318 void helper_fcos(CPUX86State *env)
2319 {
2320 double fptemp = floatx80_to_double(env, ST0);
2321
2322 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2323 env->fpus |= 0x400;
2324 } else {
2325 ST0 = double_to_floatx80(env, cos(fptemp));
2326 env->fpus &= ~0x400; /* C2 <-- 0 */
2327 /* the above code is for |arg| < 2**63 only */
2328 }
2329 }
2330
2331 void helper_fxam_ST0(CPUX86State *env)
2332 {
2333 CPU_LDoubleU temp;
2334 int expdif;
2335
2336 temp.d = ST0;
2337
2338 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2339 if (SIGND(temp)) {
2340 env->fpus |= 0x200; /* C1 <-- 1 */
2341 }
2342
2343 if (env->fptags[env->fpstt]) {
2344 env->fpus |= 0x4100; /* Empty */
2345 return;
2346 }
2347
2348 expdif = EXPD(temp);
2349 if (expdif == MAXEXPD) {
2350 if (MANTD(temp) == 0x8000000000000000ULL) {
2351 env->fpus |= 0x500; /* Infinity */
2352 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2353 env->fpus |= 0x100; /* NaN */
2354 }
2355 } else if (expdif == 0) {
2356 if (MANTD(temp) == 0) {
2357 env->fpus |= 0x4000; /* Zero */
2358 } else {
2359 env->fpus |= 0x4400; /* Denormal */
2360 }
2361 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2362 env->fpus |= 0x400;
2363 }
2364 }
2365
2366 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
2367 uintptr_t retaddr)
2368 {
2369 int fpus, fptag, exp, i;
2370 uint64_t mant;
2371 CPU_LDoubleU tmp;
2372
2373 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2374 fptag = 0;
2375 for (i = 7; i >= 0; i--) {
2376 fptag <<= 2;
2377 if (env->fptags[i]) {
2378 fptag |= 3;
2379 } else {
2380 tmp.d = env->fpregs[i].d;
2381 exp = EXPD(tmp);
2382 mant = MANTD(tmp);
2383 if (exp == 0 && mant == 0) {
2384 /* zero */
2385 fptag |= 1;
2386 } else if (exp == 0 || exp == MAXEXPD
2387 || (mant & (1LL << 63)) == 0) {
2388 /* NaNs, infinity, denormal */
2389 fptag |= 2;
2390 }
2391 }
2392 }
2393 if (data32) {
2394 /* 32 bit */
2395 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
2396 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
2397 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
2398 cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
2399 cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
2400 cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
2401 cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
2402 } else {
2403 /* 16 bit */
2404 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
2405 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
2406 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
2407 cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
2408 cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
2409 cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
2410 cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
2411 }
2412 }
2413
2414 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2415 {
2416 do_fstenv(env, ptr, data32, GETPC());
2417 }
2418
2419 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2420 {
2421 env->fpstt = (fpus >> 11) & 7;
2422 env->fpus = fpus & ~0x3800 & ~FPUS_B;
2423 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2424 #if !defined(CONFIG_USER_ONLY)
2425 if (!(env->fpus & FPUS_SE)) {
2426 /*
2427 * Here the processor deasserts FERR#; in response, the chipset deasserts
2428 * IGNNE#.
2429 */
2430 cpu_clear_ignne();
2431 }
2432 #endif
2433 }
2434
2435 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
2436 uintptr_t retaddr)
2437 {
2438 int i, fpus, fptag;
2439
2440 if (data32) {
2441 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2442 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2443 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
2444 } else {
2445 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2446 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
2447 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2448 }
2449 cpu_set_fpus(env, fpus);
2450 for (i = 0; i < 8; i++) {
2451 env->fptags[i] = ((fptag & 3) == 3);
2452 fptag >>= 2;
2453 }
2454 }
2455
2456 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2457 {
2458 do_fldenv(env, ptr, data32, GETPC());
2459 }
2460
2461 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
2462 uintptr_t retaddr)
2463 {
2464 floatx80 tmp;
2465 int i;
2466
2467 do_fstenv(env, ptr, data32, retaddr);
2468
2469 ptr += (14 << data32);
2470 for (i = 0; i < 8; i++) {
2471 tmp = ST(i);
2472 do_fstt(env, tmp, ptr, retaddr);
2473 ptr += 10;
2474 }
2475
2476 do_fninit(env);
2477 }
2478
2479 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2480 {
2481 do_fsave(env, ptr, data32, GETPC());
2482 }
2483
2484 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
2485 uintptr_t retaddr)
2486 {
2487 floatx80 tmp;
2488 int i;
2489
2490 do_fldenv(env, ptr, data32, retaddr);
2491 ptr += (14 << data32);
2492
2493 for (i = 0; i < 8; i++) {
2494 tmp = do_fldt(env, ptr, retaddr);
2495 ST(i) = tmp;
2496 ptr += 10;
2497 }
2498 }
2499
2500 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2501 {
2502 do_frstor(env, ptr, data32, GETPC());
2503 }
2504
2505 #if defined(CONFIG_USER_ONLY)
2506 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2507 {
2508 do_fsave(env, ptr, data32, 0);
2509 }
2510
2511 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2512 {
2513 do_frstor(env, ptr, data32, 0);
2514 }
2515 #endif
2516
2517 #define XO(X) offsetof(X86XSaveArea, X)
2518
2519 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2520 {
2521 int fpus, fptag, i;
2522 target_ulong addr;
2523
2524 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2525 fptag = 0;
2526 for (i = 0; i < 8; i++) {
2527 fptag |= (env->fptags[i] << i);
2528 }
2529
2530 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2531 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2532 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2533
2534 /* In 32-bit mode this is eip, sel, dp, sel.
2535 In 64-bit mode this is rip, rdp.
2536 But in either case we don't write actual data, just zeros. */
2537 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2538 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2539
2540 addr = ptr + XO(legacy.fpregs);
2541 for (i = 0; i < 8; i++) {
2542 floatx80 tmp = ST(i);
2543 do_fstt(env, tmp, addr, ra);
2544 addr += 16;
2545 }
2546 }
2547
2548 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2549 {
2550 update_mxcsr_from_sse_status(env);
2551 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2552 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2553 }
2554
2555 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2556 {
2557 int i, nb_xmm_regs;
2558 target_ulong addr;
2559
2560 if (env->hflags & HF_CS64_MASK) {
2561 nb_xmm_regs = 16;
2562 } else {
2563 nb_xmm_regs = 8;
2564 }
2565
2566 addr = ptr + XO(legacy.xmm_regs);
2567 for (i = 0; i < nb_xmm_regs; i++) {
2568 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2569 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2570 addr += 16;
2571 }
2572 }
2573
2574 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2575 {
2576 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2577 int i;
2578
2579 for (i = 0; i < 4; i++, addr += 16) {
2580 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2581 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2582 }
2583 }
2584
2585 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2586 {
2587 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2588 env->bndcs_regs.cfgu, ra);
2589 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2590 env->bndcs_regs.sts, ra);
2591 }
2592
2593 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2594 {
2595 cpu_stq_data_ra(env, ptr, env->pkru, ra);
2596 }
2597
2598 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2599 {
2600 /* The operand must be 16 byte aligned */
2601 if (ptr & 0xf) {
2602 raise_exception_ra(env, EXCP0D_GPF, ra);
2603 }
2604
2605 do_xsave_fpu(env, ptr, ra);
2606
2607 if (env->cr[4] & CR4_OSFXSR_MASK) {
2608 do_xsave_mxcsr(env, ptr, ra);
2609 /* Fast FXSAVE leaves out the XMM registers */
2610 if (!(env->efer & MSR_EFER_FFXSR)
2611 || (env->hflags & HF_CPL_MASK)
2612 || !(env->hflags & HF_LMA_MASK)) {
2613 do_xsave_sse(env, ptr, ra);
2614 }
2615 }
2616 }
2617
2618 void helper_fxsave(CPUX86State *env, target_ulong ptr)
2619 {
2620 do_fxsave(env, ptr, GETPC());
2621 }
2622
2623 static uint64_t get_xinuse(CPUX86State *env)
2624 {
2625 uint64_t inuse = -1;
2626
2627 /* For the most part, we don't track XINUSE. We could calculate it
2628 here for all components, but it's probably less work to simply
2629 indicate in use. That said, the state of BNDREGS is important
2630 enough to track in HFLAGS, so we might as well use that here. */
2631 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2632 inuse &= ~XSTATE_BNDREGS_MASK;
2633 }
2634 return inuse;
2635 }
2636
2637 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2638 uint64_t inuse, uint64_t opt, uintptr_t ra)
2639 {
2640 uint64_t old_bv, new_bv;
2641
2642 /* The OS must have enabled XSAVE. */
2643 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2644 raise_exception_ra(env, EXCP06_ILLOP, ra);
2645 }
2646
2647 /* The operand must be 64 byte aligned. */
2648 if (ptr & 63) {
2649 raise_exception_ra(env, EXCP0D_GPF, ra);
2650 }
2651
2652 /* Never save anything not enabled by XCR0. */
2653 rfbm &= env->xcr0;
2654 opt &= rfbm;
2655
2656 if (opt & XSTATE_FP_MASK) {
2657 do_xsave_fpu(env, ptr, ra);
2658 }
2659 if (rfbm & XSTATE_SSE_MASK) {
2660 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2661 do_xsave_mxcsr(env, ptr, ra);
2662 }
2663 if (opt & XSTATE_SSE_MASK) {
2664 do_xsave_sse(env, ptr, ra);
2665 }
2666 if (opt & XSTATE_BNDREGS_MASK) {
2667 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2668 }
2669 if (opt & XSTATE_BNDCSR_MASK) {
2670 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2671 }
2672 if (opt & XSTATE_PKRU_MASK) {
2673 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2674 }
2675
2676 /* Update the XSTATE_BV field. */
2677 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2678 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2679 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2680 }
2681
2682 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2683 {
2684 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2685 }
2686
2687 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2688 {
2689 uint64_t inuse = get_xinuse(env);
2690 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2691 }
2692
2693 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2694 {
2695 int i, fpuc, fpus, fptag;
2696 target_ulong addr;
2697
2698 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2699 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2700 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2701 cpu_set_fpuc(env, fpuc);
2702 cpu_set_fpus(env, fpus);
2703 fptag ^= 0xff;
2704 for (i = 0; i < 8; i++) {
2705 env->fptags[i] = ((fptag >> i) & 1);
2706 }
2707
2708 addr = ptr + XO(legacy.fpregs);
2709 for (i = 0; i < 8; i++) {
2710 floatx80 tmp = do_fldt(env, addr, ra);
2711 ST(i) = tmp;
2712 addr += 16;
2713 }
2714 }
2715
2716 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2717 {
2718 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2719 }
2720
2721 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2722 {
2723 int i, nb_xmm_regs;
2724 target_ulong addr;
2725
2726 if (env->hflags & HF_CS64_MASK) {
2727 nb_xmm_regs = 16;
2728 } else {
2729 nb_xmm_regs = 8;
2730 }
2731
2732 addr = ptr + XO(legacy.xmm_regs);
2733 for (i = 0; i < nb_xmm_regs; i++) {
2734 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2735 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2736 addr += 16;
2737 }
2738 }
2739
2740 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2741 {
2742 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2743 int i;
2744
2745 for (i = 0; i < 4; i++, addr += 16) {
2746 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2747 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2748 }
2749 }
2750
2751 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2752 {
2753 /* FIXME: Extend highest implemented bit of linear address. */
2754 env->bndcs_regs.cfgu
2755 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2756 env->bndcs_regs.sts
2757 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2758 }
2759
2760 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2761 {
2762 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2763 }
2764
2765 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2766 {
2767 /* The operand must be 16 byte aligned */
2768 if (ptr & 0xf) {
2769 raise_exception_ra(env, EXCP0D_GPF, ra);
2770 }
2771
2772 do_xrstor_fpu(env, ptr, ra);
2773
2774 if (env->cr[4] & CR4_OSFXSR_MASK) {
2775 do_xrstor_mxcsr(env, ptr, ra);
2776 /* Fast FXRSTOR leaves out the XMM registers */
2777 if (!(env->efer & MSR_EFER_FFXSR)
2778 || (env->hflags & HF_CPL_MASK)
2779 || !(env->hflags & HF_LMA_MASK)) {
2780 do_xrstor_sse(env, ptr, ra);
2781 }
2782 }
2783 }
2784
2785 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2786 {
2787 do_fxrstor(env, ptr, GETPC());
2788 }
2789
2790 #if defined(CONFIG_USER_ONLY)
2791 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2792 {
2793 do_fxsave(env, ptr, 0);
2794 }
2795
2796 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2797 {
2798 do_fxrstor(env, ptr, 0);
2799 }
2800 #endif
2801
2802 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2803 {
2804 uintptr_t ra = GETPC();
2805 uint64_t xstate_bv, xcomp_bv, reserve0;
2806
2807 rfbm &= env->xcr0;
2808
2809 /* The OS must have enabled XSAVE. */
2810 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2811 raise_exception_ra(env, EXCP06_ILLOP, ra);
2812 }
2813
2814 /* The operand must be 64 byte aligned. */
2815 if (ptr & 63) {
2816 raise_exception_ra(env, EXCP0D_GPF, ra);
2817 }
2818
2819 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2820
2821 if ((int64_t)xstate_bv < 0) {
2822 /* FIXME: Compact form. */
2823 raise_exception_ra(env, EXCP0D_GPF, ra);
2824 }
2825
2826 /* Standard form. */
2827
2828 /* The XSTATE_BV field must not set bits not present in XCR0. */
2829 if (xstate_bv & ~env->xcr0) {
2830 raise_exception_ra(env, EXCP0D_GPF, ra);
2831 }
2832
2833 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2834 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2835 describes only XCOMP_BV, but the description of the standard form
2836 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2837 includes the next 64-bit field. */
2838 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2839 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2840 if (xcomp_bv || reserve0) {
2841 raise_exception_ra(env, EXCP0D_GPF, ra);
2842 }
2843
2844 if (rfbm & XSTATE_FP_MASK) {
2845 if (xstate_bv & XSTATE_FP_MASK) {
2846 do_xrstor_fpu(env, ptr, ra);
2847 } else {
2848 do_fninit(env);
2849 memset(env->fpregs, 0, sizeof(env->fpregs));
2850 }
2851 }
2852 if (rfbm & XSTATE_SSE_MASK) {
2853 /* Note that the standard form of XRSTOR loads MXCSR from memory
2854 whether or not the XSTATE_BV bit is set. */
2855 do_xrstor_mxcsr(env, ptr, ra);
2856 if (xstate_bv & XSTATE_SSE_MASK) {
2857 do_xrstor_sse(env, ptr, ra);
2858 } else {
2859 /* ??? When AVX is implemented, we may have to be more
2860 selective in the clearing. */
2861 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2862 }
2863 }
2864 if (rfbm & XSTATE_BNDREGS_MASK) {
2865 if (xstate_bv & XSTATE_BNDREGS_MASK) {
2866 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2867 env->hflags |= HF_MPX_IU_MASK;
2868 } else {
2869 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2870 env->hflags &= ~HF_MPX_IU_MASK;
2871 }
2872 }
2873 if (rfbm & XSTATE_BNDCSR_MASK) {
2874 if (xstate_bv & XSTATE_BNDCSR_MASK) {
2875 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2876 } else {
2877 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2878 }
2879 cpu_sync_bndcs_hflags(env);
2880 }
2881 if (rfbm & XSTATE_PKRU_MASK) {
2882 uint64_t old_pkru = env->pkru;
2883 if (xstate_bv & XSTATE_PKRU_MASK) {
2884 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2885 } else {
2886 env->pkru = 0;
2887 }
2888 if (env->pkru != old_pkru) {
2889 CPUState *cs = env_cpu(env);
2890 tlb_flush(cs);
2891 }
2892 }
2893 }
2894
2895 #undef XO
2896
2897 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2898 {
2899 /* The OS must have enabled XSAVE. */
2900 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2901 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2902 }
2903
2904 switch (ecx) {
2905 case 0:
2906 return env->xcr0;
2907 case 1:
2908 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2909 return env->xcr0 & get_xinuse(env);
2910 }
2911 break;
2912 }
2913 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2914 }
2915
2916 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2917 {
2918 uint32_t dummy, ena_lo, ena_hi;
2919 uint64_t ena;
2920
2921 /* The OS must have enabled XSAVE. */
2922 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2923 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2924 }
2925
2926 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2927 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2928 goto do_gpf;
2929 }
2930
2931 /* Disallow enabling unimplemented features. */
2932 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2933 ena = ((uint64_t)ena_hi << 32) | ena_lo;
2934 if (mask & ~ena) {
2935 goto do_gpf;
2936 }
2937
2938 /* Disallow enabling only half of MPX. */
2939 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2940 & XSTATE_BNDCSR_MASK) {
2941 goto do_gpf;
2942 }
2943
2944 env->xcr0 = mask;
2945 cpu_sync_bndcs_hflags(env);
2946 return;
2947
2948 do_gpf:
2949 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2950 }
2951
2952 /* MMX/SSE */
2953 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2954
2955 #define SSE_DAZ 0x0040
2956 #define SSE_RC_MASK 0x6000
2957 #define SSE_RC_NEAR 0x0000
2958 #define SSE_RC_DOWN 0x2000
2959 #define SSE_RC_UP 0x4000
2960 #define SSE_RC_CHOP 0x6000
2961 #define SSE_FZ 0x8000
2962
2963 void update_mxcsr_status(CPUX86State *env)
2964 {
2965 uint32_t mxcsr = env->mxcsr;
2966 int rnd_type;
2967
2968 /* set rounding mode */
2969 switch (mxcsr & SSE_RC_MASK) {
2970 default:
2971 case SSE_RC_NEAR:
2972 rnd_type = float_round_nearest_even;
2973 break;
2974 case SSE_RC_DOWN:
2975 rnd_type = float_round_down;
2976 break;
2977 case SSE_RC_UP:
2978 rnd_type = float_round_up;
2979 break;
2980 case SSE_RC_CHOP:
2981 rnd_type = float_round_to_zero;
2982 break;
2983 }
2984 set_float_rounding_mode(rnd_type, &env->sse_status);
2985
2986 /* Set exception flags. */
2987 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
2988 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
2989 (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
2990 (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
2991 (mxcsr & FPUS_PE ? float_flag_inexact : 0),
2992 &env->sse_status);
2993
2994 /* set denormals are zero */
2995 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2996
2997 /* set flush to zero */
2998 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
2999 }
3000
3001 void update_mxcsr_from_sse_status(CPUX86State *env)
3002 {
3003 uint8_t flags = get_float_exception_flags(&env->sse_status);
3004 /*
3005 * The MXCSR denormal flag has opposite semantics to
3006 * float_flag_input_denormal (the softfloat code sets that flag
3007 * only when flushing input denormals to zero, but SSE sets it
3008 * only when not flushing them to zero), so is not converted
3009 * here.
3010 */
3011 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3012 (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3013 (flags & float_flag_overflow ? FPUS_OE : 0) |
3014 (flags & float_flag_underflow ? FPUS_UE : 0) |
3015 (flags & float_flag_inexact ? FPUS_PE : 0) |
3016 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3017 0));
3018 }
3019
3020 void helper_update_mxcsr(CPUX86State *env)
3021 {
3022 update_mxcsr_from_sse_status(env);
3023 }
3024
3025 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3026 {
3027 cpu_set_mxcsr(env, val);
3028 }
3029
3030 void helper_enter_mmx(CPUX86State *env)
3031 {
3032 env->fpstt = 0;
3033 *(uint32_t *)(env->fptags) = 0;
3034 *(uint32_t *)(env->fptags + 4) = 0;
3035 }
3036
3037 void helper_emms(CPUX86State *env)
3038 {
3039 /* set to empty state */
3040 *(uint32_t *)(env->fptags) = 0x01010101;
3041 *(uint32_t *)(env->fptags + 4) = 0x01010101;
3042 }
3043
3044 /* XXX: suppress */
3045 void helper_movq(CPUX86State *env, void *d, void *s)
3046 {
3047 *(uint64_t *)d = *(uint64_t *)s;
3048 }
3049
3050 #define SHIFT 0
3051 #include "ops_sse.h"
3052
3053 #define SHIFT 1
3054 #include "ops_sse.h"