]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat-parts.c.inc
accel/tcg: Add debuginfo support
[mirror_qemu.git] / fpu / softfloat-parts.c.inc
CommitLineData
7c45bad8
RH
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20 switch (a->cls) {
21 case float_class_snan:
e706d445 22 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
7c45bad8
RH
23 if (s->default_nan_mode) {
24 parts_default_nan(a, s);
25 } else {
26 parts_silence_nan(a, s);
27 }
28 break;
29 case float_class_qnan:
30 if (s->default_nan_mode) {
31 parts_default_nan(a, s);
32 }
33 break;
34 default:
35 g_assert_not_reached();
36 }
37}
22c355f4
RH
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40 float_status *s)
41{
42 if (is_snan(a->cls) || is_snan(b->cls)) {
e706d445 43 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
22c355f4
RH
44 }
45
46 if (s->default_nan_mode) {
47 parts_default_nan(a, s);
48 } else {
49 int cmp = frac_cmp(a, b);
50 if (cmp == 0) {
51 cmp = a->sign < b->sign;
52 }
53
54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55 a = b;
56 }
57 if (is_snan(a->cls)) {
58 parts_silence_nan(a, s);
59 }
60 }
61 return a;
62}
979582d0
RH
63
64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65 FloatPartsN *c, float_status *s,
66 int ab_mask, int abc_mask)
67{
68 int which;
69
70 if (unlikely(abc_mask & float_cmask_snan)) {
e706d445 71 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
979582d0
RH
72 }
73
74 which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75 ab_mask == float_cmask_infzero, s);
76
77 if (s->default_nan_mode || which == 3) {
78 /*
79 * Note that this check is after pickNaNMulAdd so that function
80 * has an opportunity to set the Invalid flag for infzero.
81 */
82 parts_default_nan(a, s);
83 return a;
84 }
85
86 switch (which) {
87 case 0:
88 break;
89 case 1:
90 a = b;
91 break;
92 case 2:
93 a = c;
94 break;
95 default:
96 g_assert_not_reached();
97 }
98 if (is_snan(a->cls)) {
99 parts_silence_nan(a, s);
100 }
101 return a;
102}
d46975bc
RH
103
104/*
105 * Canonicalize the FloatParts structure. Determine the class,
106 * unbias the exponent, and normalize the fraction.
107 */
108static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109 const FloatFmt *fmt)
110{
111 if (unlikely(p->exp == 0)) {
112 if (likely(frac_eqz(p))) {
113 p->cls = float_class_zero;
114 } else if (status->flush_inputs_to_zero) {
115 float_raise(float_flag_input_denormal, status);
116 p->cls = float_class_zero;
117 frac_clear(p);
118 } else {
119 int shift = frac_normalize(p);
120 p->cls = float_class_normal;
121 p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
122 }
123 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124 p->cls = float_class_normal;
125 p->exp -= fmt->exp_bias;
126 frac_shl(p, fmt->frac_shift);
127 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128 } else if (likely(frac_eqz(p))) {
129 p->cls = float_class_inf;
130 } else {
131 frac_shl(p, fmt->frac_shift);
132 p->cls = (parts_is_snan_frac(p->frac_hi, status)
133 ? float_class_snan : float_class_qnan);
134 }
135}
ee6959f2
RH
136
137/*
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
142 */
25fdedf0
RH
143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
144 const FloatFmt *fmt)
ee6959f2
RH
145{
146 const int exp_max = fmt->exp_max;
147 const int frac_shift = fmt->frac_shift;
ee6959f2 148 const uint64_t round_mask = fmt->round_mask;
d6e1f0cd
RH
149 const uint64_t frac_lsb = round_mask + 1;
150 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
151 const uint64_t roundeven_mask = round_mask | frac_lsb;
ee6959f2 152 uint64_t inc;
25fdedf0 153 bool overflow_norm = false;
ee6959f2
RH
154 int exp, flags = 0;
155
ee6959f2
RH
156 switch (s->float_rounding_mode) {
157 case float_round_nearest_even:
98b3cff7
RH
158 if (N > 64 && frac_lsb == 0) {
159 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
160 ? frac_lsbm1 : 0);
161 } else {
162 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
163 ? frac_lsbm1 : 0);
164 }
ee6959f2
RH
165 break;
166 case float_round_ties_away:
ee6959f2
RH
167 inc = frac_lsbm1;
168 break;
169 case float_round_to_zero:
170 overflow_norm = true;
171 inc = 0;
172 break;
173 case float_round_up:
174 inc = p->sign ? 0 : round_mask;
175 overflow_norm = p->sign;
176 break;
177 case float_round_down:
178 inc = p->sign ? round_mask : 0;
179 overflow_norm = !p->sign;
180 break;
181 case float_round_to_odd:
182 overflow_norm = true;
60c8f726
RH
183 /* fall through */
184 case float_round_to_odd_inf:
98b3cff7
RH
185 if (N > 64 && frac_lsb == 0) {
186 inc = p->frac_hi & 1 ? 0 : round_mask;
187 } else {
188 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
189 }
ee6959f2
RH
190 break;
191 default:
192 g_assert_not_reached();
193 }
194
195 exp = p->exp + fmt->exp_bias;
196 if (likely(exp > 0)) {
197 if (p->frac_lo & round_mask) {
198 flags |= float_flag_inexact;
199 if (frac_addi(p, p, inc)) {
200 frac_shr(p, 1);
201 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
202 exp++;
203 }
98b3cff7 204 p->frac_lo &= ~round_mask;
ee6959f2 205 }
ee6959f2
RH
206
207 if (fmt->arm_althp) {
208 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
209 if (unlikely(exp > exp_max)) {
210 /* Overflow. Return the maximum normal. */
211 flags = float_flag_invalid;
212 exp = exp_max;
213 frac_allones(p);
98b3cff7 214 p->frac_lo &= ~round_mask;
ee6959f2
RH
215 }
216 } else if (unlikely(exp >= exp_max)) {
c40da5c6
LMC
217 flags |= float_flag_overflow;
218 if (s->rebias_overflow) {
219 exp -= fmt->exp_re_bias;
220 } else if (overflow_norm) {
221 flags |= float_flag_inexact;
ee6959f2
RH
222 exp = exp_max - 1;
223 frac_allones(p);
98b3cff7 224 p->frac_lo &= ~round_mask;
ee6959f2 225 } else {
c40da5c6 226 flags |= float_flag_inexact;
ee6959f2
RH
227 p->cls = float_class_inf;
228 exp = exp_max;
229 frac_clear(p);
230 }
231 }
98b3cff7 232 frac_shr(p, frac_shift);
c40da5c6
LMC
233 } else if (unlikely(s->rebias_underflow)) {
234 flags |= float_flag_underflow;
235 exp += fmt->exp_re_bias;
236 if (p->frac_lo & round_mask) {
237 flags |= float_flag_inexact;
238 if (frac_addi(p, p, inc)) {
239 frac_shr(p, 1);
240 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
241 exp++;
242 }
243 p->frac_lo &= ~round_mask;
244 }
245 frac_shr(p, frac_shift);
ee6959f2
RH
246 } else if (s->flush_to_zero) {
247 flags |= float_flag_output_denormal;
248 p->cls = float_class_zero;
249 exp = 0;
250 frac_clear(p);
251 } else {
252 bool is_tiny = s->tininess_before_rounding || exp < 0;
253
254 if (!is_tiny) {
255 FloatPartsN discard;
256 is_tiny = !frac_addi(&discard, p, inc);
257 }
258
259 frac_shrjam(p, 1 - exp);
260
261 if (p->frac_lo & round_mask) {
262 /* Need to recompute round-to-even/round-to-odd. */
263 switch (s->float_rounding_mode) {
264 case float_round_nearest_even:
98b3cff7
RH
265 if (N > 64 && frac_lsb == 0) {
266 inc = ((p->frac_hi & 1) ||
267 (p->frac_lo & round_mask) != frac_lsbm1
268 ? frac_lsbm1 : 0);
269 } else {
270 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
271 ? frac_lsbm1 : 0);
272 }
ee6959f2
RH
273 break;
274 case float_round_to_odd:
60c8f726 275 case float_round_to_odd_inf:
98b3cff7
RH
276 if (N > 64 && frac_lsb == 0) {
277 inc = p->frac_hi & 1 ? 0 : round_mask;
278 } else {
279 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
280 }
ee6959f2
RH
281 break;
282 default:
283 break;
284 }
285 flags |= float_flag_inexact;
286 frac_addi(p, p, inc);
98b3cff7 287 p->frac_lo &= ~round_mask;
ee6959f2
RH
288 }
289
290 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
291 frac_shr(p, frac_shift);
292
293 if (is_tiny && (flags & float_flag_inexact)) {
294 flags |= float_flag_underflow;
295 }
296 if (exp == 0 && frac_eqz(p)) {
297 p->cls = float_class_zero;
298 }
299 }
300 p->exp = exp;
301 float_raise(flags, s);
302}
da10a907 303
25fdedf0
RH
304static void partsN(uncanon)(FloatPartsN *p, float_status *s,
305 const FloatFmt *fmt)
306{
307 if (likely(p->cls == float_class_normal)) {
308 parts_uncanon_normal(p, s, fmt);
309 } else {
310 switch (p->cls) {
311 case float_class_zero:
312 p->exp = 0;
313 frac_clear(p);
314 return;
315 case float_class_inf:
316 g_assert(!fmt->arm_althp);
317 p->exp = fmt->exp_max;
318 frac_clear(p);
319 return;
320 case float_class_qnan:
321 case float_class_snan:
322 g_assert(!fmt->arm_althp);
323 p->exp = fmt->exp_max;
324 frac_shr(p, fmt->frac_shift);
325 return;
326 default:
327 break;
328 }
329 g_assert_not_reached();
330 }
331}
332
da10a907
RH
333/*
334 * Returns the result of adding or subtracting the values of the
335 * floating-point values `a' and `b'. The operation is performed
336 * according to the IEC/IEEE Standard for Binary Floating-Point
337 * Arithmetic.
338 */
339static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
340 float_status *s, bool subtract)
341{
342 bool b_sign = b->sign ^ subtract;
343 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
344
345 if (a->sign != b_sign) {
346 /* Subtraction */
347 if (likely(ab_mask == float_cmask_normal)) {
348 if (parts_sub_normal(a, b)) {
349 return a;
350 }
351 /* Subtract was exact, fall through to set sign. */
352 ab_mask = float_cmask_zero;
353 }
354
355 if (ab_mask == float_cmask_zero) {
356 a->sign = s->float_rounding_mode == float_round_down;
357 return a;
358 }
359
360 if (unlikely(ab_mask & float_cmask_anynan)) {
361 goto p_nan;
362 }
363
364 if (ab_mask & float_cmask_inf) {
365 if (a->cls != float_class_inf) {
366 /* N - Inf */
367 goto return_b;
368 }
369 if (b->cls != float_class_inf) {
370 /* Inf - N */
371 return a;
372 }
373 /* Inf - Inf */
ba11446c 374 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
da10a907
RH
375 parts_default_nan(a, s);
376 return a;
377 }
378 } else {
379 /* Addition */
380 if (likely(ab_mask == float_cmask_normal)) {
381 parts_add_normal(a, b);
382 return a;
383 }
384
385 if (ab_mask == float_cmask_zero) {
386 return a;
387 }
388
389 if (unlikely(ab_mask & float_cmask_anynan)) {
390 goto p_nan;
391 }
392
393 if (ab_mask & float_cmask_inf) {
394 a->cls = float_class_inf;
395 return a;
396 }
397 }
398
399 if (b->cls == float_class_zero) {
400 g_assert(a->cls == float_class_normal);
401 return a;
402 }
403
404 g_assert(a->cls == float_class_zero);
405 g_assert(b->cls == float_class_normal);
406 return_b:
407 b->sign = b_sign;
408 return b;
409
410 p_nan:
411 return parts_pick_nan(a, b, s);
412}
aca84527
RH
413
414/*
415 * Returns the result of multiplying the floating-point values `a' and
416 * `b'. The operation is performed according to the IEC/IEEE Standard
417 * for Binary Floating-Point Arithmetic.
418 */
419static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
420 float_status *s)
421{
422 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
423 bool sign = a->sign ^ b->sign;
424
425 if (likely(ab_mask == float_cmask_normal)) {
426 FloatPartsW tmp;
427
428 frac_mulw(&tmp, a, b);
429 frac_truncjam(a, &tmp);
430
431 a->exp += b->exp + 1;
432 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
433 frac_add(a, a, a);
434 a->exp -= 1;
435 }
436
437 a->sign = sign;
438 return a;
439 }
440
441 /* Inf * Zero == NaN */
442 if (unlikely(ab_mask == float_cmask_infzero)) {
bead3c9b 443 float_raise(float_flag_invalid | float_flag_invalid_imz, s);
aca84527
RH
444 parts_default_nan(a, s);
445 return a;
446 }
447
448 if (unlikely(ab_mask & float_cmask_anynan)) {
449 return parts_pick_nan(a, b, s);
450 }
451
452 /* Multiply by 0 or Inf */
453 if (ab_mask & float_cmask_inf) {
454 a->cls = float_class_inf;
455 a->sign = sign;
456 return a;
457 }
458
459 g_assert(ab_mask & float_cmask_zero);
460 a->cls = float_class_zero;
461 a->sign = sign;
462 return a;
463}
dedd123c
RH
464
465/*
466 * Returns the result of multiplying the floating-point values `a' and
467 * `b' then adding 'c', with no intermediate rounding step after the
468 * multiplication. The operation is performed according to the
469 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
470 * The flags argument allows the caller to select negation of the
471 * addend, the intermediate product, or the final result. (The
472 * difference between this and having the caller do a separate
473 * negation is that negating externally will flip the sign bit on NaNs.)
474 *
475 * Requires A and C extracted into a double-sized structure to provide the
476 * extra space for the widening multiply.
477 */
478static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
479 FloatPartsN *c, int flags, float_status *s)
480{
481 int ab_mask, abc_mask;
482 FloatPartsW p_widen, c_widen;
483
484 ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
485 abc_mask = float_cmask(c->cls) | ab_mask;
486
487 /*
488 * It is implementation-defined whether the cases of (0,inf,qnan)
489 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
490 * they return if they do), so we have to hand this information
491 * off to the target-specific pick-a-NaN routine.
492 */
493 if (unlikely(abc_mask & float_cmask_anynan)) {
494 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
495 }
496
497 if (flags & float_muladd_negate_c) {
498 c->sign ^= 1;
499 }
500
501 /* Compute the sign of the product into A. */
502 a->sign ^= b->sign;
503 if (flags & float_muladd_negate_product) {
504 a->sign ^= 1;
505 }
506
507 if (unlikely(ab_mask != float_cmask_normal)) {
508 if (unlikely(ab_mask == float_cmask_infzero)) {
bead3c9b 509 float_raise(float_flag_invalid | float_flag_invalid_imz, s);
dedd123c
RH
510 goto d_nan;
511 }
512
513 if (ab_mask & float_cmask_inf) {
514 if (c->cls == float_class_inf && a->sign != c->sign) {
ba11446c 515 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
dedd123c
RH
516 goto d_nan;
517 }
518 goto return_inf;
519 }
520
521 g_assert(ab_mask & float_cmask_zero);
522 if (c->cls == float_class_normal) {
523 *a = *c;
524 goto return_normal;
525 }
526 if (c->cls == float_class_zero) {
527 if (a->sign != c->sign) {
528 goto return_sub_zero;
529 }
530 goto return_zero;
531 }
532 g_assert(c->cls == float_class_inf);
533 }
534
535 if (unlikely(c->cls == float_class_inf)) {
536 a->sign = c->sign;
537 goto return_inf;
538 }
539
540 /* Perform the multiplication step. */
541 p_widen.sign = a->sign;
542 p_widen.exp = a->exp + b->exp + 1;
543 frac_mulw(&p_widen, a, b);
544 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
545 frac_add(&p_widen, &p_widen, &p_widen);
546 p_widen.exp -= 1;
547 }
548
549 /* Perform the addition step. */
550 if (c->cls != float_class_zero) {
551 /* Zero-extend C to less significant bits. */
552 frac_widen(&c_widen, c);
553 c_widen.exp = c->exp;
554
555 if (a->sign == c->sign) {
556 parts_add_normal(&p_widen, &c_widen);
557 } else if (!parts_sub_normal(&p_widen, &c_widen)) {
558 goto return_sub_zero;
559 }
560 }
561
562 /* Narrow with sticky bit, for proper rounding later. */
563 frac_truncjam(a, &p_widen);
564 a->sign = p_widen.sign;
565 a->exp = p_widen.exp;
566
567 return_normal:
568 if (flags & float_muladd_halve_result) {
569 a->exp -= 1;
570 }
571 finish_sign:
572 if (flags & float_muladd_negate_result) {
573 a->sign ^= 1;
574 }
575 return a;
576
577 return_sub_zero:
578 a->sign = s->float_rounding_mode == float_round_down;
579 return_zero:
580 a->cls = float_class_zero;
581 goto finish_sign;
582
583 return_inf:
584 a->cls = float_class_inf;
585 goto finish_sign;
586
587 d_nan:
dedd123c
RH
588 parts_default_nan(a, s);
589 return a;
590}
ec961b81
RH
591
592/*
593 * Returns the result of dividing the floating-point value `a' by the
594 * corresponding value `b'. The operation is performed according to
595 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
596 */
597static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
598 float_status *s)
599{
600 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
601 bool sign = a->sign ^ b->sign;
602
603 if (likely(ab_mask == float_cmask_normal)) {
604 a->sign = sign;
605 a->exp -= b->exp + frac_div(a, b);
606 return a;
607 }
608
609 /* 0/0 or Inf/Inf => NaN */
10cc9640
RH
610 if (unlikely(ab_mask == float_cmask_zero)) {
611 float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
612 goto d_nan;
613 }
614 if (unlikely(ab_mask == float_cmask_inf)) {
615 float_raise(float_flag_invalid | float_flag_invalid_idi, s);
616 goto d_nan;
ec961b81
RH
617 }
618
619 /* All the NaN cases */
620 if (unlikely(ab_mask & float_cmask_anynan)) {
621 return parts_pick_nan(a, b, s);
622 }
623
624 a->sign = sign;
625
626 /* Inf / X */
627 if (a->cls == float_class_inf) {
628 return a;
629 }
630
631 /* 0 / X */
632 if (a->cls == float_class_zero) {
633 return a;
634 }
635
636 /* X / Inf */
637 if (b->cls == float_class_inf) {
638 a->cls = float_class_zero;
639 return a;
640 }
641
642 /* X / 0 => Inf */
643 g_assert(b->cls == float_class_zero);
644 float_raise(float_flag_divbyzero, s);
645 a->cls = float_class_inf;
646 return a;
10cc9640
RH
647
648 d_nan:
649 parts_default_nan(a, s);
650 return a;
ec961b81 651}
afc34931 652
feaf2e9c
RH
653/*
654 * Floating point remainder, per IEC/IEEE, or modulus.
655 */
656static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
657 uint64_t *mod_quot, float_status *s)
658{
659 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
660
661 if (likely(ab_mask == float_cmask_normal)) {
662 frac_modrem(a, b, mod_quot);
663 return a;
664 }
665
666 if (mod_quot) {
667 *mod_quot = 0;
668 }
669
670 /* All the NaN cases */
671 if (unlikely(ab_mask & float_cmask_anynan)) {
672 return parts_pick_nan(a, b, s);
673 }
674
675 /* Inf % N; N % 0 */
676 if (a->cls == float_class_inf || b->cls == float_class_zero) {
677 float_raise(float_flag_invalid, s);
678 parts_default_nan(a, s);
679 return a;
680 }
681
682 /* N % Inf; 0 % N */
683 g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
684 return a;
685}
686
9261b245
RH
687/*
688 * Square Root
689 *
690 * The base algorithm is lifted from
691 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
692 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
693 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
694 * and is thus MIT licenced.
695 */
696static void partsN(sqrt)(FloatPartsN *a, float_status *status,
697 const FloatFmt *fmt)
698{
699 const uint32_t three32 = 3u << 30;
700 const uint64_t three64 = 3ull << 62;
701 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */
702 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */
703 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */
704 uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
705 uint64_t discard;
706 bool exp_odd;
707 size_t index;
708
709 if (unlikely(a->cls != float_class_normal)) {
710 switch (a->cls) {
711 case float_class_snan:
712 case float_class_qnan:
713 parts_return_nan(a, status);
714 return;
715 case float_class_zero:
716 return;
717 case float_class_inf:
718 if (unlikely(a->sign)) {
719 goto d_nan;
720 }
721 return;
722 default:
723 g_assert_not_reached();
724 }
725 }
726
727 if (unlikely(a->sign)) {
728 goto d_nan;
729 }
730
731 /*
732 * Argument reduction.
733 * x = 4^e frac; with integer e, and frac in [1, 4)
734 * m = frac fixed point at bit 62, since we're in base 4.
735 * If base-2 exponent is odd, exchange that for multiply by 2,
736 * which results in no shift.
737 */
738 exp_odd = a->exp & 1;
739 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
740 if (!exp_odd) {
741 frac_shr(a, 1);
742 }
743
744 /*
745 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
746 *
747 * Initial estimate:
748 * 7-bit lookup table (1-bit exponent and 6-bit significand).
749 *
750 * The relative error (e = r0*sqrt(m)-1) of a linear estimate
751 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
752 * a table lookup is faster and needs one less iteration.
753 * The 7-bit table gives |e| < 0x1.fdp-9.
754 *
755 * A Newton-Raphson iteration for r is
756 * s = m*r
757 * d = s*r
758 * u = 3 - d
759 * r = r*u/2
760 *
761 * Fixed point representations:
762 * m, s, d, u, three are all 2.30; r is 0.32
763 */
764 m64 = a->frac_hi;
765 m32 = m64 >> 32;
766
767 r32 = rsqrt_tab[index] << 16;
768 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
769
770 s32 = ((uint64_t)m32 * r32) >> 32;
771 d32 = ((uint64_t)s32 * r32) >> 32;
772 u32 = three32 - d32;
773
774 if (N == 64) {
775 /* float64 or smaller */
776
777 r32 = ((uint64_t)r32 * u32) >> 31;
778 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
779
780 s32 = ((uint64_t)m32 * r32) >> 32;
781 d32 = ((uint64_t)s32 * r32) >> 32;
782 u32 = three32 - d32;
783
784 if (fmt->frac_size <= 23) {
785 /* float32 or smaller */
786
787 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */
788 s32 = (s32 - 1) >> 6; /* 9.23 */
789 /* s < sqrt(m) < s + 0x1.08p-23 */
790
791 /* compute nearest rounded result to 2.23 bits */
792 uint32_t d0 = (m32 << 16) - s32 * s32;
793 uint32_t d1 = s32 - d0;
794 uint32_t d2 = d1 + s32 + 1;
795 s32 += d1 >> 31;
796 a->frac_hi = (uint64_t)s32 << (64 - 25);
797
798 /* increment or decrement for inexact */
799 if (d2 != 0) {
800 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
801 }
802 goto done;
803 }
804
805 /* float64 */
806
807 r64 = (uint64_t)r32 * u32 * 2;
808 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
809 mul64To128(m64, r64, &s64, &discard);
810 mul64To128(s64, r64, &d64, &discard);
811 u64 = three64 - d64;
812
813 mul64To128(s64, u64, &s64, &discard); /* 3.61 */
814 s64 = (s64 - 2) >> 9; /* 12.52 */
815
816 /* Compute nearest rounded result */
817 uint64_t d0 = (m64 << 42) - s64 * s64;
818 uint64_t d1 = s64 - d0;
819 uint64_t d2 = d1 + s64 + 1;
820 s64 += d1 >> 63;
821 a->frac_hi = s64 << (64 - 54);
822
823 /* increment or decrement for inexact */
824 if (d2 != 0) {
825 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
826 }
827 goto done;
828 }
829
830 r64 = (uint64_t)r32 * u32 * 2;
831 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
832
833 mul64To128(m64, r64, &s64, &discard);
834 mul64To128(s64, r64, &d64, &discard);
835 u64 = three64 - d64;
836 mul64To128(u64, r64, &r64, &discard);
837 r64 <<= 1;
838 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
839
840 mul64To128(m64, r64, &s64, &discard);
841 mul64To128(s64, r64, &d64, &discard);
842 u64 = three64 - d64;
843 mul64To128(u64, r64, &rh, &rl);
844 add128(rh, rl, rh, rl, &rh, &rl);
845 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
846
847 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
848 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
849 sub128(three64, 0, dh, dl, &uh, &ul);
850 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */
851 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
852
853 sub128(sh, sl, 0, 4, &sh, &sl);
854 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */
855 /* s < sqrt(m) < s + 1ulp */
856
857 /* Compute nearest rounded result */
858 mul64To128(sl, sl, &d0h, &d0l);
859 d0h += 2 * sh * sl;
860 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
861 sub128(sh, sl, d0h, d0l, &d1h, &d1l);
862 add128(sh, sl, 0, 1, &d2h, &d2l);
863 add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
864 add128(sh, sl, 0, d1h >> 63, &sh, &sl);
865 shift128Left(sh, sl, 128 - 114, &sh, &sl);
866
867 /* increment or decrement for inexact */
868 if (d2h | d2l) {
869 if ((int64_t)(d1h ^ d2h) < 0) {
870 sub128(sh, sl, 0, 1, &sh, &sl);
871 } else {
872 add128(sh, sl, 0, 1, &sh, &sl);
873 }
874 }
875 a->frac_lo = sl;
876 a->frac_hi = sh;
877
878 done:
879 /* Convert back from base 4 to base 2. */
880 a->exp >>= 1;
881 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
882 frac_add(a, a, a);
883 } else {
884 a->exp += 1;
885 }
886 return;
887
888 d_nan:
f8718aab 889 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
9261b245
RH
890 parts_default_nan(a, status);
891}
892
afc34931
RH
893/*
894 * Rounds the floating-point value `a' to an integer, and returns the
895 * result as a floating-point value. The operation is performed
896 * according to the IEC/IEEE Standard for Binary Floating-Point
897 * Arithmetic.
898 *
899 * parts_round_to_int_normal is an internal helper function for
900 * normal numbers only, returning true for inexact but not directly
901 * raising float_flag_inexact.
902 */
903static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
904 int scale, int frac_size)
905{
906 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
907 int shift_adj;
908
909 scale = MIN(MAX(scale, -0x10000), 0x10000);
910 a->exp += scale;
911
912 if (a->exp < 0) {
913 bool one;
914
915 /* All fractional */
916 switch (rmode) {
917 case float_round_nearest_even:
918 one = false;
919 if (a->exp == -1) {
920 FloatPartsN tmp;
921 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
922 frac_add(&tmp, a, a);
923 /* Anything remaining means frac > 0.5. */
924 one = !frac_eqz(&tmp);
925 }
926 break;
927 case float_round_ties_away:
928 one = a->exp == -1;
929 break;
930 case float_round_to_zero:
931 one = false;
932 break;
933 case float_round_up:
934 one = !a->sign;
935 break;
936 case float_round_down:
937 one = a->sign;
938 break;
939 case float_round_to_odd:
940 one = true;
941 break;
942 default:
943 g_assert_not_reached();
944 }
945
946 frac_clear(a);
947 a->exp = 0;
948 if (one) {
949 a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
950 } else {
951 a->cls = float_class_zero;
952 }
953 return true;
954 }
955
956 if (a->exp >= frac_size) {
957 /* All integral */
958 return false;
959 }
960
961 if (N > 64 && a->exp < N - 64) {
962 /*
963 * Rounding is not in the low word -- shift lsb to bit 2,
964 * which leaves room for sticky and rounding bit.
965 */
966 shift_adj = (N - 1) - (a->exp + 2);
967 frac_shrjam(a, shift_adj);
968 frac_lsb = 1 << 2;
969 } else {
970 shift_adj = 0;
971 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
972 }
973
974 frac_lsbm1 = frac_lsb >> 1;
975 rnd_mask = frac_lsb - 1;
976 rnd_even_mask = rnd_mask | frac_lsb;
977
978 if (!(a->frac_lo & rnd_mask)) {
979 /* Fractional bits already clear, undo the shift above. */
980 frac_shl(a, shift_adj);
981 return false;
982 }
983
984 switch (rmode) {
985 case float_round_nearest_even:
986 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
987 break;
988 case float_round_ties_away:
989 inc = frac_lsbm1;
990 break;
991 case float_round_to_zero:
992 inc = 0;
993 break;
994 case float_round_up:
995 inc = a->sign ? 0 : rnd_mask;
996 break;
997 case float_round_down:
998 inc = a->sign ? rnd_mask : 0;
999 break;
1000 case float_round_to_odd:
1001 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1002 break;
1003 default:
1004 g_assert_not_reached();
1005 }
1006
1007 if (shift_adj == 0) {
1008 if (frac_addi(a, a, inc)) {
1009 frac_shr(a, 1);
1010 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1011 a->exp++;
1012 }
1013 a->frac_lo &= ~rnd_mask;
1014 } else {
1015 frac_addi(a, a, inc);
1016 a->frac_lo &= ~rnd_mask;
1017 /* Be careful shifting back, not to overflow */
1018 frac_shl(a, shift_adj - 1);
1019 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1020 a->exp++;
1021 } else {
1022 frac_add(a, a, a);
1023 }
1024 }
1025 return true;
1026}
1027
1028static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1029 int scale, float_status *s,
1030 const FloatFmt *fmt)
1031{
1032 switch (a->cls) {
1033 case float_class_qnan:
1034 case float_class_snan:
1035 parts_return_nan(a, s);
1036 break;
1037 case float_class_zero:
1038 case float_class_inf:
1039 break;
1040 case float_class_normal:
1041 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1042 float_raise(float_flag_inexact, s);
1043 }
1044 break;
1045 default:
1046 g_assert_not_reached();
1047 }
1048}
463b3f0d
RH
1049
1050/*
1051 * Returns the result of converting the floating-point value `a' to
1052 * the two's complement integer format. The conversion is performed
1053 * according to the IEC/IEEE Standard for Binary Floating-Point
1054 * Arithmetic---which means in particular that the conversion is
1055 * rounded according to the current rounding mode. If `a' is a NaN,
1056 * the largest positive integer is returned. Otherwise, if the
1057 * conversion overflows, the largest integer with the same sign as `a'
1058 * is returned.
4ab4aef0 1059 */
463b3f0d
RH
1060static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1061 int scale, int64_t min, int64_t max,
1062 float_status *s)
1063{
1064 int flags = 0;
1065 uint64_t r;
1066
1067 switch (p->cls) {
1068 case float_class_snan:
e706d445
RH
1069 flags |= float_flag_invalid_snan;
1070 /* fall through */
463b3f0d 1071 case float_class_qnan:
e706d445 1072 flags |= float_flag_invalid;
463b3f0d
RH
1073 r = max;
1074 break;
1075
1076 case float_class_inf:
81254b02 1077 flags = float_flag_invalid | float_flag_invalid_cvti;
463b3f0d
RH
1078 r = p->sign ? min : max;
1079 break;
1080
1081 case float_class_zero:
1082 return 0;
1083
1084 case float_class_normal:
1085 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1086 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1087 flags = float_flag_inexact;
1088 }
1089
1090 if (p->exp <= DECOMPOSED_BINARY_POINT) {
1091 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1092 } else {
1093 r = UINT64_MAX;
1094 }
1095 if (p->sign) {
1096 if (r <= -(uint64_t)min) {
1097 r = -r;
1098 } else {
81254b02 1099 flags = float_flag_invalid | float_flag_invalid_cvti;
463b3f0d
RH
1100 r = min;
1101 }
1102 } else if (r > max) {
81254b02 1103 flags = float_flag_invalid | float_flag_invalid_cvti;
463b3f0d
RH
1104 r = max;
1105 }
1106 break;
1107
1108 default:
1109 g_assert_not_reached();
1110 }
1111
1112 float_raise(flags, s);
1113 return r;
1114}
4ab4aef0
RH
1115
1116/*
1117 * Returns the result of converting the floating-point value `a' to
1118 * the unsigned integer format. The conversion is performed according
1119 * to the IEC/IEEE Standard for Binary Floating-Point
1120 * Arithmetic---which means in particular that the conversion is
1121 * rounded according to the current rounding mode. If `a' is a NaN,
1122 * the largest unsigned integer is returned. Otherwise, if the
1123 * conversion overflows, the largest unsigned integer is returned. If
1124 * the 'a' is negative, the result is rounded and zero is returned;
1125 * values that do not round to zero will raise the inexact exception
1126 * flag.
1127 */
1128static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1129 int scale, uint64_t max, float_status *s)
1130{
1131 int flags = 0;
1132 uint64_t r;
1133
1134 switch (p->cls) {
1135 case float_class_snan:
e706d445
RH
1136 flags |= float_flag_invalid_snan;
1137 /* fall through */
4ab4aef0 1138 case float_class_qnan:
e706d445 1139 flags |= float_flag_invalid;
4ab4aef0
RH
1140 r = max;
1141 break;
1142
1143 case float_class_inf:
81254b02 1144 flags = float_flag_invalid | float_flag_invalid_cvti;
4ab4aef0
RH
1145 r = p->sign ? 0 : max;
1146 break;
1147
1148 case float_class_zero:
1149 return 0;
1150
1151 case float_class_normal:
1152 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1153 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1154 flags = float_flag_inexact;
1155 if (p->cls == float_class_zero) {
1156 r = 0;
1157 break;
1158 }
1159 }
1160
1161 if (p->sign) {
81254b02 1162 flags = float_flag_invalid | float_flag_invalid_cvti;
4ab4aef0
RH
1163 r = 0;
1164 } else if (p->exp > DECOMPOSED_BINARY_POINT) {
81254b02 1165 flags = float_flag_invalid | float_flag_invalid_cvti;
4ab4aef0
RH
1166 r = max;
1167 } else {
1168 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1169 if (r > max) {
81254b02 1170 flags = float_flag_invalid | float_flag_invalid_cvti;
4ab4aef0
RH
1171 r = max;
1172 }
1173 }
1174 break;
1175
1176 default:
1177 g_assert_not_reached();
1178 }
1179
1180 float_raise(flags, s);
1181 return r;
1182}
e3689519
RH
1183
1184/*
1185 * Integer to float conversions
1186 *
1187 * Returns the result of converting the two's complement integer `a'
1188 * to the floating-point format. The conversion is performed according
1189 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1190 */
1191static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1192 int scale, float_status *s)
1193{
1194 uint64_t f = a;
1195 int shift;
1196
1197 memset(p, 0, sizeof(*p));
1198
1199 if (a == 0) {
1200 p->cls = float_class_zero;
1201 return;
1202 }
1203
1204 p->cls = float_class_normal;
1205 if (a < 0) {
1206 f = -f;
1207 p->sign = true;
1208 }
1209 shift = clz64(f);
1210 scale = MIN(MAX(scale, -0x10000), 0x10000);
1211
1212 p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1213 p->frac_hi = f << shift;
1214}
37c954a1
RH
1215
1216/*
1217 * Unsigned Integer to float conversions
1218 *
1219 * Returns the result of converting the unsigned integer `a' to the
1220 * floating-point format. The conversion is performed according to the
1221 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1222 */
1223static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1224 int scale, float_status *status)
1225{
1226 memset(p, 0, sizeof(*p));
1227
1228 if (a == 0) {
1229 p->cls = float_class_zero;
1230 } else {
1231 int shift = clz64(a);
1232 scale = MIN(MAX(scale, -0x10000), 0x10000);
1233 p->cls = float_class_normal;
1234 p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1235 p->frac_hi = a << shift;
1236 }
1237}
e1c4667a
RH
1238
1239/*
1240 * Float min/max.
1241 */
1242static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1243 float_status *s, int flags)
1244{
1245 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1246 int a_exp, b_exp, cmp;
1247
1248 if (unlikely(ab_mask & float_cmask_anynan)) {
1249 /*
0e903037
CMC
1250 * For minNum/maxNum (IEEE 754-2008)
1251 * or minimumNumber/maximumNumber (IEEE 754-2019),
1252 * if one operand is a QNaN, and the other
e1c4667a
RH
1253 * operand is numerical, then return numerical argument.
1254 */
0e903037 1255 if ((flags & (minmax_isnum | minmax_isnumber))
e1c4667a
RH
1256 && !(ab_mask & float_cmask_snan)
1257 && (ab_mask & ~float_cmask_qnan)) {
1258 return is_nan(a->cls) ? b : a;
1259 }
0e903037
CMC
1260
1261 /*
1262 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1263 * are removed and replaced with minimum, minimumNumber, maximum
1264 * and maximumNumber.
1265 * minimumNumber/maximumNumber behavior for SNaN is changed to:
1266 * If both operands are NaNs, a QNaN is returned.
1267 * If either operand is a SNaN,
1268 * an invalid operation exception is signaled,
1269 * but unless both operands are NaNs,
1270 * the SNaN is otherwise ignored and not converted to a QNaN.
1271 */
1272 if ((flags & minmax_isnumber)
1273 && (ab_mask & float_cmask_snan)
1274 && (ab_mask & ~float_cmask_anynan)) {
1275 float_raise(float_flag_invalid, s);
1276 return is_nan(a->cls) ? b : a;
1277 }
1278
e1c4667a
RH
1279 return parts_pick_nan(a, b, s);
1280 }
1281
1282 a_exp = a->exp;
1283 b_exp = b->exp;
1284
1285 if (unlikely(ab_mask != float_cmask_normal)) {
1286 switch (a->cls) {
1287 case float_class_normal:
1288 break;
1289 case float_class_inf:
1290 a_exp = INT16_MAX;
1291 break;
1292 case float_class_zero:
1293 a_exp = INT16_MIN;
1294 break;
1295 default:
1296 g_assert_not_reached();
1297 break;
1298 }
1299 switch (b->cls) {
1300 case float_class_normal:
1301 break;
1302 case float_class_inf:
1303 b_exp = INT16_MAX;
1304 break;
1305 case float_class_zero:
1306 b_exp = INT16_MIN;
1307 break;
1308 default:
1309 g_assert_not_reached();
1310 break;
1311 }
1312 }
1313
1314 /* Compare magnitudes. */
1315 cmp = a_exp - b_exp;
1316 if (cmp == 0) {
1317 cmp = frac_cmp(a, b);
1318 }
1319
1320 /*
1321 * Take the sign into account.
1322 * For ismag, only do this if the magnitudes are equal.
1323 */
1324 if (!(flags & minmax_ismag) || cmp == 0) {
1325 if (a->sign != b->sign) {
1326 /* For differing signs, the negative operand is less. */
1327 cmp = a->sign ? -1 : 1;
1328 } else if (a->sign) {
1329 /* For two negative operands, invert the magnitude comparison. */
1330 cmp = -cmp;
1331 }
1332 }
1333
1334 if (flags & minmax_ismin) {
1335 cmp = -cmp;
1336 }
1337 return cmp < 0 ? b : a;
1338}
6eb169b8
RH
1339
1340/*
1341 * Floating point compare
1342 */
1343static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1344 float_status *s, bool is_quiet)
1345{
1346 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
6eb169b8
RH
1347
1348 if (likely(ab_mask == float_cmask_normal)) {
9343c884
RH
1349 FloatRelation cmp;
1350
6eb169b8
RH
1351 if (a->sign != b->sign) {
1352 goto a_sign;
1353 }
9343c884 1354 if (a->exp == b->exp) {
6eb169b8 1355 cmp = frac_cmp(a, b);
9343c884
RH
1356 } else if (a->exp < b->exp) {
1357 cmp = float_relation_less;
1358 } else {
1359 cmp = float_relation_greater;
6eb169b8
RH
1360 }
1361 if (a->sign) {
1362 cmp = -cmp;
1363 }
1364 return cmp;
1365 }
1366
1367 if (unlikely(ab_mask & float_cmask_anynan)) {
e706d445
RH
1368 if (ab_mask & float_cmask_snan) {
1369 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1370 } else if (!is_quiet) {
6eb169b8
RH
1371 float_raise(float_flag_invalid, s);
1372 }
1373 return float_relation_unordered;
1374 }
1375
1376 if (ab_mask & float_cmask_zero) {
1377 if (ab_mask == float_cmask_zero) {
1378 return float_relation_equal;
1379 } else if (a->cls == float_class_zero) {
1380 goto b_sign;
1381 } else {
1382 goto a_sign;
1383 }
1384 }
1385
1386 if (ab_mask == float_cmask_inf) {
1387 if (a->sign == b->sign) {
1388 return float_relation_equal;
1389 }
1390 } else if (b->cls == float_class_inf) {
1391 goto b_sign;
1392 } else {
1393 g_assert(a->cls == float_class_inf);
1394 }
1395
1396 a_sign:
1397 return a->sign ? float_relation_less : float_relation_greater;
1398 b_sign:
1399 return b->sign ? float_relation_greater : float_relation_less;
1400}
39626b0c
RH
1401
1402/*
1403 * Multiply A by 2 raised to the power N.
1404 */
1405static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1406{
1407 switch (a->cls) {
1408 case float_class_snan:
1409 case float_class_qnan:
1410 parts_return_nan(a, s);
1411 break;
1412 case float_class_zero:
1413 case float_class_inf:
1414 break;
1415 case float_class_normal:
1416 a->exp += MIN(MAX(n, -0x10000), 0x10000);
1417 break;
1418 default:
1419 g_assert_not_reached();
1420 }
1421}
2fa3546c
RH
1422
1423/*
1424 * Return log2(A)
1425 */
1426static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1427{
1428 uint64_t a0, a1, r, t, ign;
1429 FloatPartsN f;
1430 int i, n, a_exp, f_exp;
1431
1432 if (unlikely(a->cls != float_class_normal)) {
1433 switch (a->cls) {
1434 case float_class_snan:
1435 case float_class_qnan:
1436 parts_return_nan(a, s);
1437 return;
1438 case float_class_zero:
3cf71969 1439 float_raise(float_flag_divbyzero, s);
2fa3546c
RH
1440 /* log2(0) = -inf */
1441 a->cls = float_class_inf;
1442 a->sign = 1;
1443 return;
1444 case float_class_inf:
1445 if (unlikely(a->sign)) {
1446 goto d_nan;
1447 }
1448 return;
1449 default:
1450 break;
1451 }
1452 g_assert_not_reached();
1453 }
1454 if (unlikely(a->sign)) {
1455 goto d_nan;
1456 }
1457
1458 /* TODO: This algorithm looses bits too quickly for float128. */
1459 g_assert(N == 64);
1460
1461 a_exp = a->exp;
1462 f_exp = -1;
1463
1464 r = 0;
1465 t = DECOMPOSED_IMPLICIT_BIT;
1466 a0 = a->frac_hi;
1467 a1 = 0;
1468
1469 n = fmt->frac_size + 2;
1470 if (unlikely(a_exp == -1)) {
1471 /*
1472 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1473 * When the value is very close to 1.0, there are lots of 1's in
1474 * the msb parts of the fraction. At the end, when we subtract
1475 * this value from -1.0, we can see a catastrophic loss of precision,
1476 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1477 * bits of y in the final result. To minimize this, compute as many
1478 * digits as we can.
1479 * ??? This case needs another algorithm to avoid this.
1480 */
1481 n = fmt->frac_size * 2 + 2;
1482 /* Don't compute a value overlapping the sticky bit */
1483 n = MIN(n, 62);
1484 }
1485
1486 for (i = 0; i < n; i++) {
1487 if (a1) {
1488 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1489 } else if (a0 & 0xffffffffull) {
1490 mul64To128(a0, a0, &a0, &a1);
1491 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1492 a0 >>= 32;
1493 a0 *= a0;
1494 } else {
1495 goto exact;
1496 }
1497
1498 if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1499 if (unlikely(a_exp == 0 && r == 0)) {
1500 /*
1501 * When a_exp == 0, we're computing the log2 of a value
1502 * [1.0,2.0). When the value is very close to 1.0, there
1503 * are lots of 0's in the msb parts of the fraction.
1504 * We need to compute more digits to produce a correct
1505 * result -- restart at the top of the fraction.
1506 * ??? This is likely to lose precision quickly, as for
1507 * float128; we may need another method.
1508 */
1509 f_exp -= i;
1510 t = r = DECOMPOSED_IMPLICIT_BIT;
1511 i = 0;
1512 } else {
1513 r |= t;
1514 }
1515 } else {
1516 add128(a0, a1, a0, a1, &a0, &a1);
1517 }
1518 t >>= 1;
1519 }
1520
1521 /* Set sticky for inexact. */
1522 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1523
1524 exact:
1525 parts_sint_to_float(a, a_exp, 0, s);
1526 if (r == 0) {
1527 return;
1528 }
1529
1530 memset(&f, 0, sizeof(f));
1531 f.cls = float_class_normal;
1532 f.frac_hi = r;
1533 f.exp = f_exp - frac_normalize(&f);
1534
1535 if (a_exp < 0) {
1536 parts_sub_normal(a, &f);
1537 } else if (a_exp > 0) {
1538 parts_add_normal(a, &f);
1539 } else {
1540 *a = f;
1541 }
1542 return;
1543
1544 d_nan:
1545 float_raise(float_flag_invalid, s);
1546 parts_default_nan(a, s);
1547}