]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat-parts.c.inc
MAINTAINERS: update libvirt devel mailing list address
[mirror_qemu.git] / fpu / softfloat-parts.c.inc
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19 {
20 switch (a->cls) {
21 case float_class_snan:
22 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23 if (s->default_nan_mode) {
24 parts_default_nan(a, s);
25 } else {
26 parts_silence_nan(a, s);
27 }
28 break;
29 case float_class_qnan:
30 if (s->default_nan_mode) {
31 parts_default_nan(a, s);
32 }
33 break;
34 default:
35 g_assert_not_reached();
36 }
37 }
38
39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40 float_status *s)
41 {
42 if (is_snan(a->cls) || is_snan(b->cls)) {
43 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
44 }
45
46 if (s->default_nan_mode) {
47 parts_default_nan(a, s);
48 } else {
49 int cmp = frac_cmp(a, b);
50 if (cmp == 0) {
51 cmp = a->sign < b->sign;
52 }
53
54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55 a = b;
56 }
57 if (is_snan(a->cls)) {
58 parts_silence_nan(a, s);
59 }
60 }
61 return a;
62 }
63
64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65 FloatPartsN *c, float_status *s,
66 int ab_mask, int abc_mask)
67 {
68 int which;
69
70 if (unlikely(abc_mask & float_cmask_snan)) {
71 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
72 }
73
74 which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75 ab_mask == float_cmask_infzero, s);
76
77 if (s->default_nan_mode || which == 3) {
78 /*
79 * Note that this check is after pickNaNMulAdd so that function
80 * has an opportunity to set the Invalid flag for infzero.
81 */
82 parts_default_nan(a, s);
83 return a;
84 }
85
86 switch (which) {
87 case 0:
88 break;
89 case 1:
90 a = b;
91 break;
92 case 2:
93 a = c;
94 break;
95 default:
96 g_assert_not_reached();
97 }
98 if (is_snan(a->cls)) {
99 parts_silence_nan(a, s);
100 }
101 return a;
102 }
103
104 /*
105 * Canonicalize the FloatParts structure. Determine the class,
106 * unbias the exponent, and normalize the fraction.
107 */
108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109 const FloatFmt *fmt)
110 {
111 if (unlikely(p->exp == 0)) {
112 if (likely(frac_eqz(p))) {
113 p->cls = float_class_zero;
114 } else if (status->flush_inputs_to_zero) {
115 float_raise(float_flag_input_denormal, status);
116 p->cls = float_class_zero;
117 frac_clear(p);
118 } else {
119 int shift = frac_normalize(p);
120 p->cls = float_class_normal;
121 p->exp = fmt->frac_shift - fmt->exp_bias
122 - shift + !fmt->m68k_denormal;
123 }
124 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
125 p->cls = float_class_normal;
126 p->exp -= fmt->exp_bias;
127 frac_shl(p, fmt->frac_shift);
128 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
129 } else if (likely(frac_eqz(p))) {
130 p->cls = float_class_inf;
131 } else {
132 frac_shl(p, fmt->frac_shift);
133 p->cls = (parts_is_snan_frac(p->frac_hi, status)
134 ? float_class_snan : float_class_qnan);
135 }
136 }
137
138 /*
139 * Round and uncanonicalize a floating-point number by parts. There
140 * are FRAC_SHIFT bits that may require rounding at the bottom of the
141 * fraction; these bits will be removed. The exponent will be biased
142 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
143 */
144 static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
145 const FloatFmt *fmt)
146 {
147 const int exp_max = fmt->exp_max;
148 const int frac_shift = fmt->frac_shift;
149 const uint64_t round_mask = fmt->round_mask;
150 const uint64_t frac_lsb = round_mask + 1;
151 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
152 const uint64_t roundeven_mask = round_mask | frac_lsb;
153 uint64_t inc;
154 bool overflow_norm = false;
155 int exp, flags = 0;
156
157 switch (s->float_rounding_mode) {
158 case float_round_nearest_even:
159 if (N > 64 && frac_lsb == 0) {
160 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
161 ? frac_lsbm1 : 0);
162 } else {
163 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
164 ? frac_lsbm1 : 0);
165 }
166 break;
167 case float_round_ties_away:
168 inc = frac_lsbm1;
169 break;
170 case float_round_to_zero:
171 overflow_norm = true;
172 inc = 0;
173 break;
174 case float_round_up:
175 inc = p->sign ? 0 : round_mask;
176 overflow_norm = p->sign;
177 break;
178 case float_round_down:
179 inc = p->sign ? round_mask : 0;
180 overflow_norm = !p->sign;
181 break;
182 case float_round_to_odd:
183 overflow_norm = true;
184 /* fall through */
185 case float_round_to_odd_inf:
186 if (N > 64 && frac_lsb == 0) {
187 inc = p->frac_hi & 1 ? 0 : round_mask;
188 } else {
189 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
190 }
191 break;
192 default:
193 g_assert_not_reached();
194 }
195
196 exp = p->exp + fmt->exp_bias;
197 if (likely(exp > 0)) {
198 if (p->frac_lo & round_mask) {
199 flags |= float_flag_inexact;
200 if (frac_addi(p, p, inc)) {
201 frac_shr(p, 1);
202 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
203 exp++;
204 }
205 p->frac_lo &= ~round_mask;
206 }
207
208 if (fmt->arm_althp) {
209 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
210 if (unlikely(exp > exp_max)) {
211 /* Overflow. Return the maximum normal. */
212 flags = float_flag_invalid;
213 exp = exp_max;
214 frac_allones(p);
215 p->frac_lo &= ~round_mask;
216 }
217 } else if (unlikely(exp >= exp_max)) {
218 flags |= float_flag_overflow;
219 if (s->rebias_overflow) {
220 exp -= fmt->exp_re_bias;
221 } else if (overflow_norm) {
222 flags |= float_flag_inexact;
223 exp = exp_max - 1;
224 frac_allones(p);
225 p->frac_lo &= ~round_mask;
226 } else {
227 flags |= float_flag_inexact;
228 p->cls = float_class_inf;
229 exp = exp_max;
230 frac_clear(p);
231 }
232 }
233 frac_shr(p, frac_shift);
234 } else if (unlikely(s->rebias_underflow)) {
235 flags |= float_flag_underflow;
236 exp += fmt->exp_re_bias;
237 if (p->frac_lo & round_mask) {
238 flags |= float_flag_inexact;
239 if (frac_addi(p, p, inc)) {
240 frac_shr(p, 1);
241 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
242 exp++;
243 }
244 p->frac_lo &= ~round_mask;
245 }
246 frac_shr(p, frac_shift);
247 } else if (s->flush_to_zero) {
248 flags |= float_flag_output_denormal;
249 p->cls = float_class_zero;
250 exp = 0;
251 frac_clear(p);
252 } else {
253 bool is_tiny = s->tininess_before_rounding || exp < 0;
254
255 if (!is_tiny) {
256 FloatPartsN discard;
257 is_tiny = !frac_addi(&discard, p, inc);
258 }
259
260 frac_shrjam(p, !fmt->m68k_denormal - exp);
261
262 if (p->frac_lo & round_mask) {
263 /* Need to recompute round-to-even/round-to-odd. */
264 switch (s->float_rounding_mode) {
265 case float_round_nearest_even:
266 if (N > 64 && frac_lsb == 0) {
267 inc = ((p->frac_hi & 1) ||
268 (p->frac_lo & round_mask) != frac_lsbm1
269 ? frac_lsbm1 : 0);
270 } else {
271 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
272 ? frac_lsbm1 : 0);
273 }
274 break;
275 case float_round_to_odd:
276 case float_round_to_odd_inf:
277 if (N > 64 && frac_lsb == 0) {
278 inc = p->frac_hi & 1 ? 0 : round_mask;
279 } else {
280 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
281 }
282 break;
283 default:
284 break;
285 }
286 flags |= float_flag_inexact;
287 frac_addi(p, p, inc);
288 p->frac_lo &= ~round_mask;
289 }
290
291 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
292 frac_shr(p, frac_shift);
293
294 if (is_tiny && (flags & float_flag_inexact)) {
295 flags |= float_flag_underflow;
296 }
297 if (exp == 0 && frac_eqz(p)) {
298 p->cls = float_class_zero;
299 }
300 }
301 p->exp = exp;
302 float_raise(flags, s);
303 }
304
305 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
306 const FloatFmt *fmt)
307 {
308 if (likely(p->cls == float_class_normal)) {
309 parts_uncanon_normal(p, s, fmt);
310 } else {
311 switch (p->cls) {
312 case float_class_zero:
313 p->exp = 0;
314 frac_clear(p);
315 return;
316 case float_class_inf:
317 g_assert(!fmt->arm_althp);
318 p->exp = fmt->exp_max;
319 frac_clear(p);
320 return;
321 case float_class_qnan:
322 case float_class_snan:
323 g_assert(!fmt->arm_althp);
324 p->exp = fmt->exp_max;
325 frac_shr(p, fmt->frac_shift);
326 return;
327 default:
328 break;
329 }
330 g_assert_not_reached();
331 }
332 }
333
334 /*
335 * Returns the result of adding or subtracting the values of the
336 * floating-point values `a' and `b'. The operation is performed
337 * according to the IEC/IEEE Standard for Binary Floating-Point
338 * Arithmetic.
339 */
340 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
341 float_status *s, bool subtract)
342 {
343 bool b_sign = b->sign ^ subtract;
344 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
345
346 if (a->sign != b_sign) {
347 /* Subtraction */
348 if (likely(ab_mask == float_cmask_normal)) {
349 if (parts_sub_normal(a, b)) {
350 return a;
351 }
352 /* Subtract was exact, fall through to set sign. */
353 ab_mask = float_cmask_zero;
354 }
355
356 if (ab_mask == float_cmask_zero) {
357 a->sign = s->float_rounding_mode == float_round_down;
358 return a;
359 }
360
361 if (unlikely(ab_mask & float_cmask_anynan)) {
362 goto p_nan;
363 }
364
365 if (ab_mask & float_cmask_inf) {
366 if (a->cls != float_class_inf) {
367 /* N - Inf */
368 goto return_b;
369 }
370 if (b->cls != float_class_inf) {
371 /* Inf - N */
372 return a;
373 }
374 /* Inf - Inf */
375 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
376 parts_default_nan(a, s);
377 return a;
378 }
379 } else {
380 /* Addition */
381 if (likely(ab_mask == float_cmask_normal)) {
382 parts_add_normal(a, b);
383 return a;
384 }
385
386 if (ab_mask == float_cmask_zero) {
387 return a;
388 }
389
390 if (unlikely(ab_mask & float_cmask_anynan)) {
391 goto p_nan;
392 }
393
394 if (ab_mask & float_cmask_inf) {
395 a->cls = float_class_inf;
396 return a;
397 }
398 }
399
400 if (b->cls == float_class_zero) {
401 g_assert(a->cls == float_class_normal);
402 return a;
403 }
404
405 g_assert(a->cls == float_class_zero);
406 g_assert(b->cls == float_class_normal);
407 return_b:
408 b->sign = b_sign;
409 return b;
410
411 p_nan:
412 return parts_pick_nan(a, b, s);
413 }
414
415 /*
416 * Returns the result of multiplying the floating-point values `a' and
417 * `b'. The operation is performed according to the IEC/IEEE Standard
418 * for Binary Floating-Point Arithmetic.
419 */
420 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
421 float_status *s)
422 {
423 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
424 bool sign = a->sign ^ b->sign;
425
426 if (likely(ab_mask == float_cmask_normal)) {
427 FloatPartsW tmp;
428
429 frac_mulw(&tmp, a, b);
430 frac_truncjam(a, &tmp);
431
432 a->exp += b->exp + 1;
433 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
434 frac_add(a, a, a);
435 a->exp -= 1;
436 }
437
438 a->sign = sign;
439 return a;
440 }
441
442 /* Inf * Zero == NaN */
443 if (unlikely(ab_mask == float_cmask_infzero)) {
444 float_raise(float_flag_invalid | float_flag_invalid_imz, s);
445 parts_default_nan(a, s);
446 return a;
447 }
448
449 if (unlikely(ab_mask & float_cmask_anynan)) {
450 return parts_pick_nan(a, b, s);
451 }
452
453 /* Multiply by 0 or Inf */
454 if (ab_mask & float_cmask_inf) {
455 a->cls = float_class_inf;
456 a->sign = sign;
457 return a;
458 }
459
460 g_assert(ab_mask & float_cmask_zero);
461 a->cls = float_class_zero;
462 a->sign = sign;
463 return a;
464 }
465
466 /*
467 * Returns the result of multiplying the floating-point values `a' and
468 * `b' then adding 'c', with no intermediate rounding step after the
469 * multiplication. The operation is performed according to the
470 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
471 * The flags argument allows the caller to select negation of the
472 * addend, the intermediate product, or the final result. (The
473 * difference between this and having the caller do a separate
474 * negation is that negating externally will flip the sign bit on NaNs.)
475 *
476 * Requires A and C extracted into a double-sized structure to provide the
477 * extra space for the widening multiply.
478 */
479 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
480 FloatPartsN *c, int flags, float_status *s)
481 {
482 int ab_mask, abc_mask;
483 FloatPartsW p_widen, c_widen;
484
485 ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
486 abc_mask = float_cmask(c->cls) | ab_mask;
487
488 /*
489 * It is implementation-defined whether the cases of (0,inf,qnan)
490 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
491 * they return if they do), so we have to hand this information
492 * off to the target-specific pick-a-NaN routine.
493 */
494 if (unlikely(abc_mask & float_cmask_anynan)) {
495 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
496 }
497
498 if (flags & float_muladd_negate_c) {
499 c->sign ^= 1;
500 }
501
502 /* Compute the sign of the product into A. */
503 a->sign ^= b->sign;
504 if (flags & float_muladd_negate_product) {
505 a->sign ^= 1;
506 }
507
508 if (unlikely(ab_mask != float_cmask_normal)) {
509 if (unlikely(ab_mask == float_cmask_infzero)) {
510 float_raise(float_flag_invalid | float_flag_invalid_imz, s);
511 goto d_nan;
512 }
513
514 if (ab_mask & float_cmask_inf) {
515 if (c->cls == float_class_inf && a->sign != c->sign) {
516 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
517 goto d_nan;
518 }
519 goto return_inf;
520 }
521
522 g_assert(ab_mask & float_cmask_zero);
523 if (c->cls == float_class_normal) {
524 *a = *c;
525 goto return_normal;
526 }
527 if (c->cls == float_class_zero) {
528 if (a->sign != c->sign) {
529 goto return_sub_zero;
530 }
531 goto return_zero;
532 }
533 g_assert(c->cls == float_class_inf);
534 }
535
536 if (unlikely(c->cls == float_class_inf)) {
537 a->sign = c->sign;
538 goto return_inf;
539 }
540
541 /* Perform the multiplication step. */
542 p_widen.sign = a->sign;
543 p_widen.exp = a->exp + b->exp + 1;
544 frac_mulw(&p_widen, a, b);
545 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
546 frac_add(&p_widen, &p_widen, &p_widen);
547 p_widen.exp -= 1;
548 }
549
550 /* Perform the addition step. */
551 if (c->cls != float_class_zero) {
552 /* Zero-extend C to less significant bits. */
553 frac_widen(&c_widen, c);
554 c_widen.exp = c->exp;
555
556 if (a->sign == c->sign) {
557 parts_add_normal(&p_widen, &c_widen);
558 } else if (!parts_sub_normal(&p_widen, &c_widen)) {
559 goto return_sub_zero;
560 }
561 }
562
563 /* Narrow with sticky bit, for proper rounding later. */
564 frac_truncjam(a, &p_widen);
565 a->sign = p_widen.sign;
566 a->exp = p_widen.exp;
567
568 return_normal:
569 if (flags & float_muladd_halve_result) {
570 a->exp -= 1;
571 }
572 finish_sign:
573 if (flags & float_muladd_negate_result) {
574 a->sign ^= 1;
575 }
576 return a;
577
578 return_sub_zero:
579 a->sign = s->float_rounding_mode == float_round_down;
580 return_zero:
581 a->cls = float_class_zero;
582 goto finish_sign;
583
584 return_inf:
585 a->cls = float_class_inf;
586 goto finish_sign;
587
588 d_nan:
589 parts_default_nan(a, s);
590 return a;
591 }
592
593 /*
594 * Returns the result of dividing the floating-point value `a' by the
595 * corresponding value `b'. The operation is performed according to
596 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
597 */
598 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
599 float_status *s)
600 {
601 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
602 bool sign = a->sign ^ b->sign;
603
604 if (likely(ab_mask == float_cmask_normal)) {
605 a->sign = sign;
606 a->exp -= b->exp + frac_div(a, b);
607 return a;
608 }
609
610 /* 0/0 or Inf/Inf => NaN */
611 if (unlikely(ab_mask == float_cmask_zero)) {
612 float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
613 goto d_nan;
614 }
615 if (unlikely(ab_mask == float_cmask_inf)) {
616 float_raise(float_flag_invalid | float_flag_invalid_idi, s);
617 goto d_nan;
618 }
619
620 /* All the NaN cases */
621 if (unlikely(ab_mask & float_cmask_anynan)) {
622 return parts_pick_nan(a, b, s);
623 }
624
625 a->sign = sign;
626
627 /* Inf / X */
628 if (a->cls == float_class_inf) {
629 return a;
630 }
631
632 /* 0 / X */
633 if (a->cls == float_class_zero) {
634 return a;
635 }
636
637 /* X / Inf */
638 if (b->cls == float_class_inf) {
639 a->cls = float_class_zero;
640 return a;
641 }
642
643 /* X / 0 => Inf */
644 g_assert(b->cls == float_class_zero);
645 float_raise(float_flag_divbyzero, s);
646 a->cls = float_class_inf;
647 return a;
648
649 d_nan:
650 parts_default_nan(a, s);
651 return a;
652 }
653
654 /*
655 * Floating point remainder, per IEC/IEEE, or modulus.
656 */
657 static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
658 uint64_t *mod_quot, float_status *s)
659 {
660 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
661
662 if (likely(ab_mask == float_cmask_normal)) {
663 frac_modrem(a, b, mod_quot);
664 return a;
665 }
666
667 if (mod_quot) {
668 *mod_quot = 0;
669 }
670
671 /* All the NaN cases */
672 if (unlikely(ab_mask & float_cmask_anynan)) {
673 return parts_pick_nan(a, b, s);
674 }
675
676 /* Inf % N; N % 0 */
677 if (a->cls == float_class_inf || b->cls == float_class_zero) {
678 float_raise(float_flag_invalid, s);
679 parts_default_nan(a, s);
680 return a;
681 }
682
683 /* N % Inf; 0 % N */
684 g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
685 return a;
686 }
687
688 /*
689 * Square Root
690 *
691 * The base algorithm is lifted from
692 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
693 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
694 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
695 * and is thus MIT licenced.
696 */
697 static void partsN(sqrt)(FloatPartsN *a, float_status *status,
698 const FloatFmt *fmt)
699 {
700 const uint32_t three32 = 3u << 30;
701 const uint64_t three64 = 3ull << 62;
702 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */
703 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */
704 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */
705 uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
706 uint64_t discard;
707 bool exp_odd;
708 size_t index;
709
710 if (unlikely(a->cls != float_class_normal)) {
711 switch (a->cls) {
712 case float_class_snan:
713 case float_class_qnan:
714 parts_return_nan(a, status);
715 return;
716 case float_class_zero:
717 return;
718 case float_class_inf:
719 if (unlikely(a->sign)) {
720 goto d_nan;
721 }
722 return;
723 default:
724 g_assert_not_reached();
725 }
726 }
727
728 if (unlikely(a->sign)) {
729 goto d_nan;
730 }
731
732 /*
733 * Argument reduction.
734 * x = 4^e frac; with integer e, and frac in [1, 4)
735 * m = frac fixed point at bit 62, since we're in base 4.
736 * If base-2 exponent is odd, exchange that for multiply by 2,
737 * which results in no shift.
738 */
739 exp_odd = a->exp & 1;
740 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
741 if (!exp_odd) {
742 frac_shr(a, 1);
743 }
744
745 /*
746 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
747 *
748 * Initial estimate:
749 * 7-bit lookup table (1-bit exponent and 6-bit significand).
750 *
751 * The relative error (e = r0*sqrt(m)-1) of a linear estimate
752 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
753 * a table lookup is faster and needs one less iteration.
754 * The 7-bit table gives |e| < 0x1.fdp-9.
755 *
756 * A Newton-Raphson iteration for r is
757 * s = m*r
758 * d = s*r
759 * u = 3 - d
760 * r = r*u/2
761 *
762 * Fixed point representations:
763 * m, s, d, u, three are all 2.30; r is 0.32
764 */
765 m64 = a->frac_hi;
766 m32 = m64 >> 32;
767
768 r32 = rsqrt_tab[index] << 16;
769 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
770
771 s32 = ((uint64_t)m32 * r32) >> 32;
772 d32 = ((uint64_t)s32 * r32) >> 32;
773 u32 = three32 - d32;
774
775 if (N == 64) {
776 /* float64 or smaller */
777
778 r32 = ((uint64_t)r32 * u32) >> 31;
779 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
780
781 s32 = ((uint64_t)m32 * r32) >> 32;
782 d32 = ((uint64_t)s32 * r32) >> 32;
783 u32 = three32 - d32;
784
785 if (fmt->frac_size <= 23) {
786 /* float32 or smaller */
787
788 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */
789 s32 = (s32 - 1) >> 6; /* 9.23 */
790 /* s < sqrt(m) < s + 0x1.08p-23 */
791
792 /* compute nearest rounded result to 2.23 bits */
793 uint32_t d0 = (m32 << 16) - s32 * s32;
794 uint32_t d1 = s32 - d0;
795 uint32_t d2 = d1 + s32 + 1;
796 s32 += d1 >> 31;
797 a->frac_hi = (uint64_t)s32 << (64 - 25);
798
799 /* increment or decrement for inexact */
800 if (d2 != 0) {
801 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
802 }
803 goto done;
804 }
805
806 /* float64 */
807
808 r64 = (uint64_t)r32 * u32 * 2;
809 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
810 mul64To128(m64, r64, &s64, &discard);
811 mul64To128(s64, r64, &d64, &discard);
812 u64 = three64 - d64;
813
814 mul64To128(s64, u64, &s64, &discard); /* 3.61 */
815 s64 = (s64 - 2) >> 9; /* 12.52 */
816
817 /* Compute nearest rounded result */
818 uint64_t d0 = (m64 << 42) - s64 * s64;
819 uint64_t d1 = s64 - d0;
820 uint64_t d2 = d1 + s64 + 1;
821 s64 += d1 >> 63;
822 a->frac_hi = s64 << (64 - 54);
823
824 /* increment or decrement for inexact */
825 if (d2 != 0) {
826 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
827 }
828 goto done;
829 }
830
831 r64 = (uint64_t)r32 * u32 * 2;
832 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
833
834 mul64To128(m64, r64, &s64, &discard);
835 mul64To128(s64, r64, &d64, &discard);
836 u64 = three64 - d64;
837 mul64To128(u64, r64, &r64, &discard);
838 r64 <<= 1;
839 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
840
841 mul64To128(m64, r64, &s64, &discard);
842 mul64To128(s64, r64, &d64, &discard);
843 u64 = three64 - d64;
844 mul64To128(u64, r64, &rh, &rl);
845 add128(rh, rl, rh, rl, &rh, &rl);
846 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
847
848 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
849 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
850 sub128(three64, 0, dh, dl, &uh, &ul);
851 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */
852 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
853
854 sub128(sh, sl, 0, 4, &sh, &sl);
855 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */
856 /* s < sqrt(m) < s + 1ulp */
857
858 /* Compute nearest rounded result */
859 mul64To128(sl, sl, &d0h, &d0l);
860 d0h += 2 * sh * sl;
861 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
862 sub128(sh, sl, d0h, d0l, &d1h, &d1l);
863 add128(sh, sl, 0, 1, &d2h, &d2l);
864 add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
865 add128(sh, sl, 0, d1h >> 63, &sh, &sl);
866 shift128Left(sh, sl, 128 - 114, &sh, &sl);
867
868 /* increment or decrement for inexact */
869 if (d2h | d2l) {
870 if ((int64_t)(d1h ^ d2h) < 0) {
871 sub128(sh, sl, 0, 1, &sh, &sl);
872 } else {
873 add128(sh, sl, 0, 1, &sh, &sl);
874 }
875 }
876 a->frac_lo = sl;
877 a->frac_hi = sh;
878
879 done:
880 /* Convert back from base 4 to base 2. */
881 a->exp >>= 1;
882 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
883 frac_add(a, a, a);
884 } else {
885 a->exp += 1;
886 }
887 return;
888
889 d_nan:
890 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
891 parts_default_nan(a, status);
892 }
893
894 /*
895 * Rounds the floating-point value `a' to an integer, and returns the
896 * result as a floating-point value. The operation is performed
897 * according to the IEC/IEEE Standard for Binary Floating-Point
898 * Arithmetic.
899 *
900 * parts_round_to_int_normal is an internal helper function for
901 * normal numbers only, returning true for inexact but not directly
902 * raising float_flag_inexact.
903 */
904 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
905 int scale, int frac_size)
906 {
907 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
908 int shift_adj;
909
910 scale = MIN(MAX(scale, -0x10000), 0x10000);
911 a->exp += scale;
912
913 if (a->exp < 0) {
914 bool one;
915
916 /* All fractional */
917 switch (rmode) {
918 case float_round_nearest_even:
919 one = false;
920 if (a->exp == -1) {
921 FloatPartsN tmp;
922 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
923 frac_add(&tmp, a, a);
924 /* Anything remaining means frac > 0.5. */
925 one = !frac_eqz(&tmp);
926 }
927 break;
928 case float_round_ties_away:
929 one = a->exp == -1;
930 break;
931 case float_round_to_zero:
932 one = false;
933 break;
934 case float_round_up:
935 one = !a->sign;
936 break;
937 case float_round_down:
938 one = a->sign;
939 break;
940 case float_round_to_odd:
941 one = true;
942 break;
943 default:
944 g_assert_not_reached();
945 }
946
947 frac_clear(a);
948 a->exp = 0;
949 if (one) {
950 a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
951 } else {
952 a->cls = float_class_zero;
953 }
954 return true;
955 }
956
957 if (a->exp >= frac_size) {
958 /* All integral */
959 return false;
960 }
961
962 if (N > 64 && a->exp < N - 64) {
963 /*
964 * Rounding is not in the low word -- shift lsb to bit 2,
965 * which leaves room for sticky and rounding bit.
966 */
967 shift_adj = (N - 1) - (a->exp + 2);
968 frac_shrjam(a, shift_adj);
969 frac_lsb = 1 << 2;
970 } else {
971 shift_adj = 0;
972 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
973 }
974
975 frac_lsbm1 = frac_lsb >> 1;
976 rnd_mask = frac_lsb - 1;
977 rnd_even_mask = rnd_mask | frac_lsb;
978
979 if (!(a->frac_lo & rnd_mask)) {
980 /* Fractional bits already clear, undo the shift above. */
981 frac_shl(a, shift_adj);
982 return false;
983 }
984
985 switch (rmode) {
986 case float_round_nearest_even:
987 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
988 break;
989 case float_round_ties_away:
990 inc = frac_lsbm1;
991 break;
992 case float_round_to_zero:
993 inc = 0;
994 break;
995 case float_round_up:
996 inc = a->sign ? 0 : rnd_mask;
997 break;
998 case float_round_down:
999 inc = a->sign ? rnd_mask : 0;
1000 break;
1001 case float_round_to_odd:
1002 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1003 break;
1004 default:
1005 g_assert_not_reached();
1006 }
1007
1008 if (shift_adj == 0) {
1009 if (frac_addi(a, a, inc)) {
1010 frac_shr(a, 1);
1011 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1012 a->exp++;
1013 }
1014 a->frac_lo &= ~rnd_mask;
1015 } else {
1016 frac_addi(a, a, inc);
1017 a->frac_lo &= ~rnd_mask;
1018 /* Be careful shifting back, not to overflow */
1019 frac_shl(a, shift_adj - 1);
1020 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1021 a->exp++;
1022 } else {
1023 frac_add(a, a, a);
1024 }
1025 }
1026 return true;
1027 }
1028
1029 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1030 int scale, float_status *s,
1031 const FloatFmt *fmt)
1032 {
1033 switch (a->cls) {
1034 case float_class_qnan:
1035 case float_class_snan:
1036 parts_return_nan(a, s);
1037 break;
1038 case float_class_zero:
1039 case float_class_inf:
1040 break;
1041 case float_class_normal:
1042 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1043 float_raise(float_flag_inexact, s);
1044 }
1045 break;
1046 default:
1047 g_assert_not_reached();
1048 }
1049 }
1050
1051 /*
1052 * Returns the result of converting the floating-point value `a' to
1053 * the two's complement integer format. The conversion is performed
1054 * according to the IEC/IEEE Standard for Binary Floating-Point
1055 * Arithmetic---which means in particular that the conversion is
1056 * rounded according to the current rounding mode. If `a' is a NaN,
1057 * the largest positive integer is returned. Otherwise, if the
1058 * conversion overflows, the largest integer with the same sign as `a'
1059 * is returned.
1060 */
1061 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1062 int scale, int64_t min, int64_t max,
1063 float_status *s)
1064 {
1065 int flags = 0;
1066 uint64_t r;
1067
1068 switch (p->cls) {
1069 case float_class_snan:
1070 flags |= float_flag_invalid_snan;
1071 /* fall through */
1072 case float_class_qnan:
1073 flags |= float_flag_invalid;
1074 r = max;
1075 break;
1076
1077 case float_class_inf:
1078 flags = float_flag_invalid | float_flag_invalid_cvti;
1079 r = p->sign ? min : max;
1080 break;
1081
1082 case float_class_zero:
1083 return 0;
1084
1085 case float_class_normal:
1086 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1087 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1088 flags = float_flag_inexact;
1089 }
1090
1091 if (p->exp <= DECOMPOSED_BINARY_POINT) {
1092 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1093 } else {
1094 r = UINT64_MAX;
1095 }
1096 if (p->sign) {
1097 if (r <= -(uint64_t)min) {
1098 r = -r;
1099 } else {
1100 flags = float_flag_invalid | float_flag_invalid_cvti;
1101 r = min;
1102 }
1103 } else if (r > max) {
1104 flags = float_flag_invalid | float_flag_invalid_cvti;
1105 r = max;
1106 }
1107 break;
1108
1109 default:
1110 g_assert_not_reached();
1111 }
1112
1113 float_raise(flags, s);
1114 return r;
1115 }
1116
1117 /*
1118 * Returns the result of converting the floating-point value `a' to
1119 * the unsigned integer format. The conversion is performed according
1120 * to the IEC/IEEE Standard for Binary Floating-Point
1121 * Arithmetic---which means in particular that the conversion is
1122 * rounded according to the current rounding mode. If `a' is a NaN,
1123 * the largest unsigned integer is returned. Otherwise, if the
1124 * conversion overflows, the largest unsigned integer is returned. If
1125 * the 'a' is negative, the result is rounded and zero is returned;
1126 * values that do not round to zero will raise the inexact exception
1127 * flag.
1128 */
1129 static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1130 int scale, uint64_t max, float_status *s)
1131 {
1132 int flags = 0;
1133 uint64_t r;
1134
1135 switch (p->cls) {
1136 case float_class_snan:
1137 flags |= float_flag_invalid_snan;
1138 /* fall through */
1139 case float_class_qnan:
1140 flags |= float_flag_invalid;
1141 r = max;
1142 break;
1143
1144 case float_class_inf:
1145 flags = float_flag_invalid | float_flag_invalid_cvti;
1146 r = p->sign ? 0 : max;
1147 break;
1148
1149 case float_class_zero:
1150 return 0;
1151
1152 case float_class_normal:
1153 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1154 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1155 flags = float_flag_inexact;
1156 if (p->cls == float_class_zero) {
1157 r = 0;
1158 break;
1159 }
1160 }
1161
1162 if (p->sign) {
1163 flags = float_flag_invalid | float_flag_invalid_cvti;
1164 r = 0;
1165 } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1166 flags = float_flag_invalid | float_flag_invalid_cvti;
1167 r = max;
1168 } else {
1169 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1170 if (r > max) {
1171 flags = float_flag_invalid | float_flag_invalid_cvti;
1172 r = max;
1173 }
1174 }
1175 break;
1176
1177 default:
1178 g_assert_not_reached();
1179 }
1180
1181 float_raise(flags, s);
1182 return r;
1183 }
1184
1185 /*
1186 * Like partsN(float_to_sint), except do not saturate the result.
1187 * Instead, return the rounded unbounded precision two's compliment result,
1188 * modulo 2**(bitsm1 + 1).
1189 */
1190 static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1191 FloatRoundMode rmode,
1192 int bitsm1, float_status *s)
1193 {
1194 int flags = 0;
1195 uint64_t r;
1196 bool overflow = false;
1197
1198 switch (p->cls) {
1199 case float_class_snan:
1200 flags |= float_flag_invalid_snan;
1201 /* fall through */
1202 case float_class_qnan:
1203 flags |= float_flag_invalid;
1204 r = 0;
1205 break;
1206
1207 case float_class_inf:
1208 overflow = true;
1209 r = 0;
1210 break;
1211
1212 case float_class_zero:
1213 return 0;
1214
1215 case float_class_normal:
1216 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1217 if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1218 flags = float_flag_inexact;
1219 }
1220
1221 if (p->exp <= DECOMPOSED_BINARY_POINT) {
1222 /*
1223 * Because we rounded to integral, and exp < 64,
1224 * we know frac_low is zero.
1225 */
1226 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1227 if (p->exp < bitsm1) {
1228 /* Result in range. */
1229 } else if (p->exp == bitsm1) {
1230 /* The only in-range value is INT_MIN. */
1231 overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1232 } else {
1233 overflow = true;
1234 }
1235 } else {
1236 /* Overflow, but there might still be bits to return. */
1237 int shl = p->exp - DECOMPOSED_BINARY_POINT;
1238 if (shl < N) {
1239 frac_shl(p, shl);
1240 r = p->frac_hi;
1241 } else {
1242 r = 0;
1243 }
1244 overflow = true;
1245 }
1246
1247 if (p->sign) {
1248 r = -r;
1249 }
1250 break;
1251
1252 default:
1253 g_assert_not_reached();
1254 }
1255
1256 if (overflow) {
1257 flags = float_flag_invalid | float_flag_invalid_cvti;
1258 }
1259 float_raise(flags, s);
1260 return r;
1261 }
1262
1263 /*
1264 * Integer to float conversions
1265 *
1266 * Returns the result of converting the two's complement integer `a'
1267 * to the floating-point format. The conversion is performed according
1268 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1269 */
1270 static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1271 int scale, float_status *s)
1272 {
1273 uint64_t f = a;
1274 int shift;
1275
1276 memset(p, 0, sizeof(*p));
1277
1278 if (a == 0) {
1279 p->cls = float_class_zero;
1280 return;
1281 }
1282
1283 p->cls = float_class_normal;
1284 if (a < 0) {
1285 f = -f;
1286 p->sign = true;
1287 }
1288 shift = clz64(f);
1289 scale = MIN(MAX(scale, -0x10000), 0x10000);
1290
1291 p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1292 p->frac_hi = f << shift;
1293 }
1294
1295 /*
1296 * Unsigned Integer to float conversions
1297 *
1298 * Returns the result of converting the unsigned integer `a' to the
1299 * floating-point format. The conversion is performed according to the
1300 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1301 */
1302 static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1303 int scale, float_status *status)
1304 {
1305 memset(p, 0, sizeof(*p));
1306
1307 if (a == 0) {
1308 p->cls = float_class_zero;
1309 } else {
1310 int shift = clz64(a);
1311 scale = MIN(MAX(scale, -0x10000), 0x10000);
1312 p->cls = float_class_normal;
1313 p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1314 p->frac_hi = a << shift;
1315 }
1316 }
1317
1318 /*
1319 * Float min/max.
1320 */
1321 static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1322 float_status *s, int flags)
1323 {
1324 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1325 int a_exp, b_exp, cmp;
1326
1327 if (unlikely(ab_mask & float_cmask_anynan)) {
1328 /*
1329 * For minNum/maxNum (IEEE 754-2008)
1330 * or minimumNumber/maximumNumber (IEEE 754-2019),
1331 * if one operand is a QNaN, and the other
1332 * operand is numerical, then return numerical argument.
1333 */
1334 if ((flags & (minmax_isnum | minmax_isnumber))
1335 && !(ab_mask & float_cmask_snan)
1336 && (ab_mask & ~float_cmask_qnan)) {
1337 return is_nan(a->cls) ? b : a;
1338 }
1339
1340 /*
1341 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1342 * are removed and replaced with minimum, minimumNumber, maximum
1343 * and maximumNumber.
1344 * minimumNumber/maximumNumber behavior for SNaN is changed to:
1345 * If both operands are NaNs, a QNaN is returned.
1346 * If either operand is a SNaN,
1347 * an invalid operation exception is signaled,
1348 * but unless both operands are NaNs,
1349 * the SNaN is otherwise ignored and not converted to a QNaN.
1350 */
1351 if ((flags & minmax_isnumber)
1352 && (ab_mask & float_cmask_snan)
1353 && (ab_mask & ~float_cmask_anynan)) {
1354 float_raise(float_flag_invalid, s);
1355 return is_nan(a->cls) ? b : a;
1356 }
1357
1358 return parts_pick_nan(a, b, s);
1359 }
1360
1361 a_exp = a->exp;
1362 b_exp = b->exp;
1363
1364 if (unlikely(ab_mask != float_cmask_normal)) {
1365 switch (a->cls) {
1366 case float_class_normal:
1367 break;
1368 case float_class_inf:
1369 a_exp = INT16_MAX;
1370 break;
1371 case float_class_zero:
1372 a_exp = INT16_MIN;
1373 break;
1374 default:
1375 g_assert_not_reached();
1376 break;
1377 }
1378 switch (b->cls) {
1379 case float_class_normal:
1380 break;
1381 case float_class_inf:
1382 b_exp = INT16_MAX;
1383 break;
1384 case float_class_zero:
1385 b_exp = INT16_MIN;
1386 break;
1387 default:
1388 g_assert_not_reached();
1389 break;
1390 }
1391 }
1392
1393 /* Compare magnitudes. */
1394 cmp = a_exp - b_exp;
1395 if (cmp == 0) {
1396 cmp = frac_cmp(a, b);
1397 }
1398
1399 /*
1400 * Take the sign into account.
1401 * For ismag, only do this if the magnitudes are equal.
1402 */
1403 if (!(flags & minmax_ismag) || cmp == 0) {
1404 if (a->sign != b->sign) {
1405 /* For differing signs, the negative operand is less. */
1406 cmp = a->sign ? -1 : 1;
1407 } else if (a->sign) {
1408 /* For two negative operands, invert the magnitude comparison. */
1409 cmp = -cmp;
1410 }
1411 }
1412
1413 if (flags & minmax_ismin) {
1414 cmp = -cmp;
1415 }
1416 return cmp < 0 ? b : a;
1417 }
1418
1419 /*
1420 * Floating point compare
1421 */
1422 static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1423 float_status *s, bool is_quiet)
1424 {
1425 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1426
1427 if (likely(ab_mask == float_cmask_normal)) {
1428 FloatRelation cmp;
1429
1430 if (a->sign != b->sign) {
1431 goto a_sign;
1432 }
1433 if (a->exp == b->exp) {
1434 cmp = frac_cmp(a, b);
1435 } else if (a->exp < b->exp) {
1436 cmp = float_relation_less;
1437 } else {
1438 cmp = float_relation_greater;
1439 }
1440 if (a->sign) {
1441 cmp = -cmp;
1442 }
1443 return cmp;
1444 }
1445
1446 if (unlikely(ab_mask & float_cmask_anynan)) {
1447 if (ab_mask & float_cmask_snan) {
1448 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1449 } else if (!is_quiet) {
1450 float_raise(float_flag_invalid, s);
1451 }
1452 return float_relation_unordered;
1453 }
1454
1455 if (ab_mask & float_cmask_zero) {
1456 if (ab_mask == float_cmask_zero) {
1457 return float_relation_equal;
1458 } else if (a->cls == float_class_zero) {
1459 goto b_sign;
1460 } else {
1461 goto a_sign;
1462 }
1463 }
1464
1465 if (ab_mask == float_cmask_inf) {
1466 if (a->sign == b->sign) {
1467 return float_relation_equal;
1468 }
1469 } else if (b->cls == float_class_inf) {
1470 goto b_sign;
1471 } else {
1472 g_assert(a->cls == float_class_inf);
1473 }
1474
1475 a_sign:
1476 return a->sign ? float_relation_less : float_relation_greater;
1477 b_sign:
1478 return b->sign ? float_relation_greater : float_relation_less;
1479 }
1480
1481 /*
1482 * Multiply A by 2 raised to the power N.
1483 */
1484 static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1485 {
1486 switch (a->cls) {
1487 case float_class_snan:
1488 case float_class_qnan:
1489 parts_return_nan(a, s);
1490 break;
1491 case float_class_zero:
1492 case float_class_inf:
1493 break;
1494 case float_class_normal:
1495 a->exp += MIN(MAX(n, -0x10000), 0x10000);
1496 break;
1497 default:
1498 g_assert_not_reached();
1499 }
1500 }
1501
1502 /*
1503 * Return log2(A)
1504 */
1505 static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1506 {
1507 uint64_t a0, a1, r, t, ign;
1508 FloatPartsN f;
1509 int i, n, a_exp, f_exp;
1510
1511 if (unlikely(a->cls != float_class_normal)) {
1512 switch (a->cls) {
1513 case float_class_snan:
1514 case float_class_qnan:
1515 parts_return_nan(a, s);
1516 return;
1517 case float_class_zero:
1518 float_raise(float_flag_divbyzero, s);
1519 /* log2(0) = -inf */
1520 a->cls = float_class_inf;
1521 a->sign = 1;
1522 return;
1523 case float_class_inf:
1524 if (unlikely(a->sign)) {
1525 goto d_nan;
1526 }
1527 return;
1528 default:
1529 break;
1530 }
1531 g_assert_not_reached();
1532 }
1533 if (unlikely(a->sign)) {
1534 goto d_nan;
1535 }
1536
1537 /* TODO: This algorithm looses bits too quickly for float128. */
1538 g_assert(N == 64);
1539
1540 a_exp = a->exp;
1541 f_exp = -1;
1542
1543 r = 0;
1544 t = DECOMPOSED_IMPLICIT_BIT;
1545 a0 = a->frac_hi;
1546 a1 = 0;
1547
1548 n = fmt->frac_size + 2;
1549 if (unlikely(a_exp == -1)) {
1550 /*
1551 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1552 * When the value is very close to 1.0, there are lots of 1's in
1553 * the msb parts of the fraction. At the end, when we subtract
1554 * this value from -1.0, we can see a catastrophic loss of precision,
1555 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1556 * bits of y in the final result. To minimize this, compute as many
1557 * digits as we can.
1558 * ??? This case needs another algorithm to avoid this.
1559 */
1560 n = fmt->frac_size * 2 + 2;
1561 /* Don't compute a value overlapping the sticky bit */
1562 n = MIN(n, 62);
1563 }
1564
1565 for (i = 0; i < n; i++) {
1566 if (a1) {
1567 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1568 } else if (a0 & 0xffffffffull) {
1569 mul64To128(a0, a0, &a0, &a1);
1570 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1571 a0 >>= 32;
1572 a0 *= a0;
1573 } else {
1574 goto exact;
1575 }
1576
1577 if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1578 if (unlikely(a_exp == 0 && r == 0)) {
1579 /*
1580 * When a_exp == 0, we're computing the log2 of a value
1581 * [1.0,2.0). When the value is very close to 1.0, there
1582 * are lots of 0's in the msb parts of the fraction.
1583 * We need to compute more digits to produce a correct
1584 * result -- restart at the top of the fraction.
1585 * ??? This is likely to lose precision quickly, as for
1586 * float128; we may need another method.
1587 */
1588 f_exp -= i;
1589 t = r = DECOMPOSED_IMPLICIT_BIT;
1590 i = 0;
1591 } else {
1592 r |= t;
1593 }
1594 } else {
1595 add128(a0, a1, a0, a1, &a0, &a1);
1596 }
1597 t >>= 1;
1598 }
1599
1600 /* Set sticky for inexact. */
1601 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1602
1603 exact:
1604 parts_sint_to_float(a, a_exp, 0, s);
1605 if (r == 0) {
1606 return;
1607 }
1608
1609 memset(&f, 0, sizeof(f));
1610 f.cls = float_class_normal;
1611 f.frac_hi = r;
1612 f.exp = f_exp - frac_normalize(&f);
1613
1614 if (a_exp < 0) {
1615 parts_sub_normal(a, &f);
1616 } else if (a_exp > 0) {
1617 parts_add_normal(a, &f);
1618 } else {
1619 *a = f;
1620 }
1621 return;
1622
1623 d_nan:
1624 float_raise(float_flag_invalid, s);
1625 parts_default_nan(a, s);
1626 }