]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat-parts.c.inc
keyval: Fix grammar comment to cover downstream prefix
[mirror_qemu.git] / fpu / softfloat-parts.c.inc
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19 {
20 switch (a->cls) {
21 case float_class_snan:
22 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23 if (s->default_nan_mode) {
24 parts_default_nan(a, s);
25 } else {
26 parts_silence_nan(a, s);
27 }
28 break;
29 case float_class_qnan:
30 if (s->default_nan_mode) {
31 parts_default_nan(a, s);
32 }
33 break;
34 default:
35 g_assert_not_reached();
36 }
37 }
38
39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40 float_status *s)
41 {
42 if (is_snan(a->cls) || is_snan(b->cls)) {
43 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
44 }
45
46 if (s->default_nan_mode) {
47 parts_default_nan(a, s);
48 } else {
49 int cmp = frac_cmp(a, b);
50 if (cmp == 0) {
51 cmp = a->sign < b->sign;
52 }
53
54 if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55 a = b;
56 }
57 if (is_snan(a->cls)) {
58 parts_silence_nan(a, s);
59 }
60 }
61 return a;
62 }
63
64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65 FloatPartsN *c, float_status *s,
66 int ab_mask, int abc_mask)
67 {
68 int which;
69
70 if (unlikely(abc_mask & float_cmask_snan)) {
71 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
72 }
73
74 which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75 ab_mask == float_cmask_infzero, s);
76
77 if (s->default_nan_mode || which == 3) {
78 /*
79 * Note that this check is after pickNaNMulAdd so that function
80 * has an opportunity to set the Invalid flag for infzero.
81 */
82 parts_default_nan(a, s);
83 return a;
84 }
85
86 switch (which) {
87 case 0:
88 break;
89 case 1:
90 a = b;
91 break;
92 case 2:
93 a = c;
94 break;
95 default:
96 g_assert_not_reached();
97 }
98 if (is_snan(a->cls)) {
99 parts_silence_nan(a, s);
100 }
101 return a;
102 }
103
104 /*
105 * Canonicalize the FloatParts structure. Determine the class,
106 * unbias the exponent, and normalize the fraction.
107 */
108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109 const FloatFmt *fmt)
110 {
111 if (unlikely(p->exp == 0)) {
112 if (likely(frac_eqz(p))) {
113 p->cls = float_class_zero;
114 } else if (status->flush_inputs_to_zero) {
115 float_raise(float_flag_input_denormal, status);
116 p->cls = float_class_zero;
117 frac_clear(p);
118 } else {
119 int shift = frac_normalize(p);
120 p->cls = float_class_normal;
121 p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
122 }
123 } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124 p->cls = float_class_normal;
125 p->exp -= fmt->exp_bias;
126 frac_shl(p, fmt->frac_shift);
127 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128 } else if (likely(frac_eqz(p))) {
129 p->cls = float_class_inf;
130 } else {
131 frac_shl(p, fmt->frac_shift);
132 p->cls = (parts_is_snan_frac(p->frac_hi, status)
133 ? float_class_snan : float_class_qnan);
134 }
135 }
136
137 /*
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
142 */
143 static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
144 const FloatFmt *fmt)
145 {
146 const int exp_max = fmt->exp_max;
147 const int frac_shift = fmt->frac_shift;
148 const uint64_t round_mask = fmt->round_mask;
149 const uint64_t frac_lsb = round_mask + 1;
150 const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
151 const uint64_t roundeven_mask = round_mask | frac_lsb;
152 uint64_t inc;
153 bool overflow_norm = false;
154 int exp, flags = 0;
155
156 switch (s->float_rounding_mode) {
157 case float_round_nearest_even:
158 if (N > 64 && frac_lsb == 0) {
159 inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
160 ? frac_lsbm1 : 0);
161 } else {
162 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
163 ? frac_lsbm1 : 0);
164 }
165 break;
166 case float_round_ties_away:
167 inc = frac_lsbm1;
168 break;
169 case float_round_to_zero:
170 overflow_norm = true;
171 inc = 0;
172 break;
173 case float_round_up:
174 inc = p->sign ? 0 : round_mask;
175 overflow_norm = p->sign;
176 break;
177 case float_round_down:
178 inc = p->sign ? round_mask : 0;
179 overflow_norm = !p->sign;
180 break;
181 case float_round_to_odd:
182 overflow_norm = true;
183 /* fall through */
184 case float_round_to_odd_inf:
185 if (N > 64 && frac_lsb == 0) {
186 inc = p->frac_hi & 1 ? 0 : round_mask;
187 } else {
188 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
189 }
190 break;
191 default:
192 g_assert_not_reached();
193 }
194
195 exp = p->exp + fmt->exp_bias;
196 if (likely(exp > 0)) {
197 if (p->frac_lo & round_mask) {
198 flags |= float_flag_inexact;
199 if (frac_addi(p, p, inc)) {
200 frac_shr(p, 1);
201 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
202 exp++;
203 }
204 p->frac_lo &= ~round_mask;
205 }
206
207 if (fmt->arm_althp) {
208 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
209 if (unlikely(exp > exp_max)) {
210 /* Overflow. Return the maximum normal. */
211 flags = float_flag_invalid;
212 exp = exp_max;
213 frac_allones(p);
214 p->frac_lo &= ~round_mask;
215 }
216 } else if (unlikely(exp >= exp_max)) {
217 flags |= float_flag_overflow | float_flag_inexact;
218 if (overflow_norm) {
219 exp = exp_max - 1;
220 frac_allones(p);
221 p->frac_lo &= ~round_mask;
222 } else {
223 p->cls = float_class_inf;
224 exp = exp_max;
225 frac_clear(p);
226 }
227 }
228 frac_shr(p, frac_shift);
229 } else if (s->flush_to_zero) {
230 flags |= float_flag_output_denormal;
231 p->cls = float_class_zero;
232 exp = 0;
233 frac_clear(p);
234 } else {
235 bool is_tiny = s->tininess_before_rounding || exp < 0;
236
237 if (!is_tiny) {
238 FloatPartsN discard;
239 is_tiny = !frac_addi(&discard, p, inc);
240 }
241
242 frac_shrjam(p, 1 - exp);
243
244 if (p->frac_lo & round_mask) {
245 /* Need to recompute round-to-even/round-to-odd. */
246 switch (s->float_rounding_mode) {
247 case float_round_nearest_even:
248 if (N > 64 && frac_lsb == 0) {
249 inc = ((p->frac_hi & 1) ||
250 (p->frac_lo & round_mask) != frac_lsbm1
251 ? frac_lsbm1 : 0);
252 } else {
253 inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
254 ? frac_lsbm1 : 0);
255 }
256 break;
257 case float_round_to_odd:
258 case float_round_to_odd_inf:
259 if (N > 64 && frac_lsb == 0) {
260 inc = p->frac_hi & 1 ? 0 : round_mask;
261 } else {
262 inc = p->frac_lo & frac_lsb ? 0 : round_mask;
263 }
264 break;
265 default:
266 break;
267 }
268 flags |= float_flag_inexact;
269 frac_addi(p, p, inc);
270 p->frac_lo &= ~round_mask;
271 }
272
273 exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
274 frac_shr(p, frac_shift);
275
276 if (is_tiny && (flags & float_flag_inexact)) {
277 flags |= float_flag_underflow;
278 }
279 if (exp == 0 && frac_eqz(p)) {
280 p->cls = float_class_zero;
281 }
282 }
283 p->exp = exp;
284 float_raise(flags, s);
285 }
286
287 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
288 const FloatFmt *fmt)
289 {
290 if (likely(p->cls == float_class_normal)) {
291 parts_uncanon_normal(p, s, fmt);
292 } else {
293 switch (p->cls) {
294 case float_class_zero:
295 p->exp = 0;
296 frac_clear(p);
297 return;
298 case float_class_inf:
299 g_assert(!fmt->arm_althp);
300 p->exp = fmt->exp_max;
301 frac_clear(p);
302 return;
303 case float_class_qnan:
304 case float_class_snan:
305 g_assert(!fmt->arm_althp);
306 p->exp = fmt->exp_max;
307 frac_shr(p, fmt->frac_shift);
308 return;
309 default:
310 break;
311 }
312 g_assert_not_reached();
313 }
314 }
315
316 /*
317 * Returns the result of adding or subtracting the values of the
318 * floating-point values `a' and `b'. The operation is performed
319 * according to the IEC/IEEE Standard for Binary Floating-Point
320 * Arithmetic.
321 */
322 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
323 float_status *s, bool subtract)
324 {
325 bool b_sign = b->sign ^ subtract;
326 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
327
328 if (a->sign != b_sign) {
329 /* Subtraction */
330 if (likely(ab_mask == float_cmask_normal)) {
331 if (parts_sub_normal(a, b)) {
332 return a;
333 }
334 /* Subtract was exact, fall through to set sign. */
335 ab_mask = float_cmask_zero;
336 }
337
338 if (ab_mask == float_cmask_zero) {
339 a->sign = s->float_rounding_mode == float_round_down;
340 return a;
341 }
342
343 if (unlikely(ab_mask & float_cmask_anynan)) {
344 goto p_nan;
345 }
346
347 if (ab_mask & float_cmask_inf) {
348 if (a->cls != float_class_inf) {
349 /* N - Inf */
350 goto return_b;
351 }
352 if (b->cls != float_class_inf) {
353 /* Inf - N */
354 return a;
355 }
356 /* Inf - Inf */
357 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
358 parts_default_nan(a, s);
359 return a;
360 }
361 } else {
362 /* Addition */
363 if (likely(ab_mask == float_cmask_normal)) {
364 parts_add_normal(a, b);
365 return a;
366 }
367
368 if (ab_mask == float_cmask_zero) {
369 return a;
370 }
371
372 if (unlikely(ab_mask & float_cmask_anynan)) {
373 goto p_nan;
374 }
375
376 if (ab_mask & float_cmask_inf) {
377 a->cls = float_class_inf;
378 return a;
379 }
380 }
381
382 if (b->cls == float_class_zero) {
383 g_assert(a->cls == float_class_normal);
384 return a;
385 }
386
387 g_assert(a->cls == float_class_zero);
388 g_assert(b->cls == float_class_normal);
389 return_b:
390 b->sign = b_sign;
391 return b;
392
393 p_nan:
394 return parts_pick_nan(a, b, s);
395 }
396
397 /*
398 * Returns the result of multiplying the floating-point values `a' and
399 * `b'. The operation is performed according to the IEC/IEEE Standard
400 * for Binary Floating-Point Arithmetic.
401 */
402 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
403 float_status *s)
404 {
405 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
406 bool sign = a->sign ^ b->sign;
407
408 if (likely(ab_mask == float_cmask_normal)) {
409 FloatPartsW tmp;
410
411 frac_mulw(&tmp, a, b);
412 frac_truncjam(a, &tmp);
413
414 a->exp += b->exp + 1;
415 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
416 frac_add(a, a, a);
417 a->exp -= 1;
418 }
419
420 a->sign = sign;
421 return a;
422 }
423
424 /* Inf * Zero == NaN */
425 if (unlikely(ab_mask == float_cmask_infzero)) {
426 float_raise(float_flag_invalid | float_flag_invalid_imz, s);
427 parts_default_nan(a, s);
428 return a;
429 }
430
431 if (unlikely(ab_mask & float_cmask_anynan)) {
432 return parts_pick_nan(a, b, s);
433 }
434
435 /* Multiply by 0 or Inf */
436 if (ab_mask & float_cmask_inf) {
437 a->cls = float_class_inf;
438 a->sign = sign;
439 return a;
440 }
441
442 g_assert(ab_mask & float_cmask_zero);
443 a->cls = float_class_zero;
444 a->sign = sign;
445 return a;
446 }
447
448 /*
449 * Returns the result of multiplying the floating-point values `a' and
450 * `b' then adding 'c', with no intermediate rounding step after the
451 * multiplication. The operation is performed according to the
452 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
453 * The flags argument allows the caller to select negation of the
454 * addend, the intermediate product, or the final result. (The
455 * difference between this and having the caller do a separate
456 * negation is that negating externally will flip the sign bit on NaNs.)
457 *
458 * Requires A and C extracted into a double-sized structure to provide the
459 * extra space for the widening multiply.
460 */
461 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
462 FloatPartsN *c, int flags, float_status *s)
463 {
464 int ab_mask, abc_mask;
465 FloatPartsW p_widen, c_widen;
466
467 ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
468 abc_mask = float_cmask(c->cls) | ab_mask;
469
470 /*
471 * It is implementation-defined whether the cases of (0,inf,qnan)
472 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
473 * they return if they do), so we have to hand this information
474 * off to the target-specific pick-a-NaN routine.
475 */
476 if (unlikely(abc_mask & float_cmask_anynan)) {
477 return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
478 }
479
480 if (flags & float_muladd_negate_c) {
481 c->sign ^= 1;
482 }
483
484 /* Compute the sign of the product into A. */
485 a->sign ^= b->sign;
486 if (flags & float_muladd_negate_product) {
487 a->sign ^= 1;
488 }
489
490 if (unlikely(ab_mask != float_cmask_normal)) {
491 if (unlikely(ab_mask == float_cmask_infzero)) {
492 float_raise(float_flag_invalid | float_flag_invalid_imz, s);
493 goto d_nan;
494 }
495
496 if (ab_mask & float_cmask_inf) {
497 if (c->cls == float_class_inf && a->sign != c->sign) {
498 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
499 goto d_nan;
500 }
501 goto return_inf;
502 }
503
504 g_assert(ab_mask & float_cmask_zero);
505 if (c->cls == float_class_normal) {
506 *a = *c;
507 goto return_normal;
508 }
509 if (c->cls == float_class_zero) {
510 if (a->sign != c->sign) {
511 goto return_sub_zero;
512 }
513 goto return_zero;
514 }
515 g_assert(c->cls == float_class_inf);
516 }
517
518 if (unlikely(c->cls == float_class_inf)) {
519 a->sign = c->sign;
520 goto return_inf;
521 }
522
523 /* Perform the multiplication step. */
524 p_widen.sign = a->sign;
525 p_widen.exp = a->exp + b->exp + 1;
526 frac_mulw(&p_widen, a, b);
527 if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
528 frac_add(&p_widen, &p_widen, &p_widen);
529 p_widen.exp -= 1;
530 }
531
532 /* Perform the addition step. */
533 if (c->cls != float_class_zero) {
534 /* Zero-extend C to less significant bits. */
535 frac_widen(&c_widen, c);
536 c_widen.exp = c->exp;
537
538 if (a->sign == c->sign) {
539 parts_add_normal(&p_widen, &c_widen);
540 } else if (!parts_sub_normal(&p_widen, &c_widen)) {
541 goto return_sub_zero;
542 }
543 }
544
545 /* Narrow with sticky bit, for proper rounding later. */
546 frac_truncjam(a, &p_widen);
547 a->sign = p_widen.sign;
548 a->exp = p_widen.exp;
549
550 return_normal:
551 if (flags & float_muladd_halve_result) {
552 a->exp -= 1;
553 }
554 finish_sign:
555 if (flags & float_muladd_negate_result) {
556 a->sign ^= 1;
557 }
558 return a;
559
560 return_sub_zero:
561 a->sign = s->float_rounding_mode == float_round_down;
562 return_zero:
563 a->cls = float_class_zero;
564 goto finish_sign;
565
566 return_inf:
567 a->cls = float_class_inf;
568 goto finish_sign;
569
570 d_nan:
571 parts_default_nan(a, s);
572 return a;
573 }
574
575 /*
576 * Returns the result of dividing the floating-point value `a' by the
577 * corresponding value `b'. The operation is performed according to
578 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
579 */
580 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
581 float_status *s)
582 {
583 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
584 bool sign = a->sign ^ b->sign;
585
586 if (likely(ab_mask == float_cmask_normal)) {
587 a->sign = sign;
588 a->exp -= b->exp + frac_div(a, b);
589 return a;
590 }
591
592 /* 0/0 or Inf/Inf => NaN */
593 if (unlikely(ab_mask == float_cmask_zero)) {
594 float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
595 goto d_nan;
596 }
597 if (unlikely(ab_mask == float_cmask_inf)) {
598 float_raise(float_flag_invalid | float_flag_invalid_idi, s);
599 goto d_nan;
600 }
601
602 /* All the NaN cases */
603 if (unlikely(ab_mask & float_cmask_anynan)) {
604 return parts_pick_nan(a, b, s);
605 }
606
607 a->sign = sign;
608
609 /* Inf / X */
610 if (a->cls == float_class_inf) {
611 return a;
612 }
613
614 /* 0 / X */
615 if (a->cls == float_class_zero) {
616 return a;
617 }
618
619 /* X / Inf */
620 if (b->cls == float_class_inf) {
621 a->cls = float_class_zero;
622 return a;
623 }
624
625 /* X / 0 => Inf */
626 g_assert(b->cls == float_class_zero);
627 float_raise(float_flag_divbyzero, s);
628 a->cls = float_class_inf;
629 return a;
630
631 d_nan:
632 parts_default_nan(a, s);
633 return a;
634 }
635
636 /*
637 * Floating point remainder, per IEC/IEEE, or modulus.
638 */
639 static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
640 uint64_t *mod_quot, float_status *s)
641 {
642 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
643
644 if (likely(ab_mask == float_cmask_normal)) {
645 frac_modrem(a, b, mod_quot);
646 return a;
647 }
648
649 if (mod_quot) {
650 *mod_quot = 0;
651 }
652
653 /* All the NaN cases */
654 if (unlikely(ab_mask & float_cmask_anynan)) {
655 return parts_pick_nan(a, b, s);
656 }
657
658 /* Inf % N; N % 0 */
659 if (a->cls == float_class_inf || b->cls == float_class_zero) {
660 float_raise(float_flag_invalid, s);
661 parts_default_nan(a, s);
662 return a;
663 }
664
665 /* N % Inf; 0 % N */
666 g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
667 return a;
668 }
669
670 /*
671 * Square Root
672 *
673 * The base algorithm is lifted from
674 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
675 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
676 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
677 * and is thus MIT licenced.
678 */
679 static void partsN(sqrt)(FloatPartsN *a, float_status *status,
680 const FloatFmt *fmt)
681 {
682 const uint32_t three32 = 3u << 30;
683 const uint64_t three64 = 3ull << 62;
684 uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */
685 uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */
686 uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */
687 uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
688 uint64_t discard;
689 bool exp_odd;
690 size_t index;
691
692 if (unlikely(a->cls != float_class_normal)) {
693 switch (a->cls) {
694 case float_class_snan:
695 case float_class_qnan:
696 parts_return_nan(a, status);
697 return;
698 case float_class_zero:
699 return;
700 case float_class_inf:
701 if (unlikely(a->sign)) {
702 goto d_nan;
703 }
704 return;
705 default:
706 g_assert_not_reached();
707 }
708 }
709
710 if (unlikely(a->sign)) {
711 goto d_nan;
712 }
713
714 /*
715 * Argument reduction.
716 * x = 4^e frac; with integer e, and frac in [1, 4)
717 * m = frac fixed point at bit 62, since we're in base 4.
718 * If base-2 exponent is odd, exchange that for multiply by 2,
719 * which results in no shift.
720 */
721 exp_odd = a->exp & 1;
722 index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
723 if (!exp_odd) {
724 frac_shr(a, 1);
725 }
726
727 /*
728 * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
729 *
730 * Initial estimate:
731 * 7-bit lookup table (1-bit exponent and 6-bit significand).
732 *
733 * The relative error (e = r0*sqrt(m)-1) of a linear estimate
734 * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
735 * a table lookup is faster and needs one less iteration.
736 * The 7-bit table gives |e| < 0x1.fdp-9.
737 *
738 * A Newton-Raphson iteration for r is
739 * s = m*r
740 * d = s*r
741 * u = 3 - d
742 * r = r*u/2
743 *
744 * Fixed point representations:
745 * m, s, d, u, three are all 2.30; r is 0.32
746 */
747 m64 = a->frac_hi;
748 m32 = m64 >> 32;
749
750 r32 = rsqrt_tab[index] << 16;
751 /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
752
753 s32 = ((uint64_t)m32 * r32) >> 32;
754 d32 = ((uint64_t)s32 * r32) >> 32;
755 u32 = three32 - d32;
756
757 if (N == 64) {
758 /* float64 or smaller */
759
760 r32 = ((uint64_t)r32 * u32) >> 31;
761 /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
762
763 s32 = ((uint64_t)m32 * r32) >> 32;
764 d32 = ((uint64_t)s32 * r32) >> 32;
765 u32 = three32 - d32;
766
767 if (fmt->frac_size <= 23) {
768 /* float32 or smaller */
769
770 s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */
771 s32 = (s32 - 1) >> 6; /* 9.23 */
772 /* s < sqrt(m) < s + 0x1.08p-23 */
773
774 /* compute nearest rounded result to 2.23 bits */
775 uint32_t d0 = (m32 << 16) - s32 * s32;
776 uint32_t d1 = s32 - d0;
777 uint32_t d2 = d1 + s32 + 1;
778 s32 += d1 >> 31;
779 a->frac_hi = (uint64_t)s32 << (64 - 25);
780
781 /* increment or decrement for inexact */
782 if (d2 != 0) {
783 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
784 }
785 goto done;
786 }
787
788 /* float64 */
789
790 r64 = (uint64_t)r32 * u32 * 2;
791 /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
792 mul64To128(m64, r64, &s64, &discard);
793 mul64To128(s64, r64, &d64, &discard);
794 u64 = three64 - d64;
795
796 mul64To128(s64, u64, &s64, &discard); /* 3.61 */
797 s64 = (s64 - 2) >> 9; /* 12.52 */
798
799 /* Compute nearest rounded result */
800 uint64_t d0 = (m64 << 42) - s64 * s64;
801 uint64_t d1 = s64 - d0;
802 uint64_t d2 = d1 + s64 + 1;
803 s64 += d1 >> 63;
804 a->frac_hi = s64 << (64 - 54);
805
806 /* increment or decrement for inexact */
807 if (d2 != 0) {
808 a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
809 }
810 goto done;
811 }
812
813 r64 = (uint64_t)r32 * u32 * 2;
814 /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
815
816 mul64To128(m64, r64, &s64, &discard);
817 mul64To128(s64, r64, &d64, &discard);
818 u64 = three64 - d64;
819 mul64To128(u64, r64, &r64, &discard);
820 r64 <<= 1;
821 /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
822
823 mul64To128(m64, r64, &s64, &discard);
824 mul64To128(s64, r64, &d64, &discard);
825 u64 = three64 - d64;
826 mul64To128(u64, r64, &rh, &rl);
827 add128(rh, rl, rh, rl, &rh, &rl);
828 /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
829
830 mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
831 mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
832 sub128(three64, 0, dh, dl, &uh, &ul);
833 mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */
834 /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
835
836 sub128(sh, sl, 0, 4, &sh, &sl);
837 shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */
838 /* s < sqrt(m) < s + 1ulp */
839
840 /* Compute nearest rounded result */
841 mul64To128(sl, sl, &d0h, &d0l);
842 d0h += 2 * sh * sl;
843 sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
844 sub128(sh, sl, d0h, d0l, &d1h, &d1l);
845 add128(sh, sl, 0, 1, &d2h, &d2l);
846 add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
847 add128(sh, sl, 0, d1h >> 63, &sh, &sl);
848 shift128Left(sh, sl, 128 - 114, &sh, &sl);
849
850 /* increment or decrement for inexact */
851 if (d2h | d2l) {
852 if ((int64_t)(d1h ^ d2h) < 0) {
853 sub128(sh, sl, 0, 1, &sh, &sl);
854 } else {
855 add128(sh, sl, 0, 1, &sh, &sl);
856 }
857 }
858 a->frac_lo = sl;
859 a->frac_hi = sh;
860
861 done:
862 /* Convert back from base 4 to base 2. */
863 a->exp >>= 1;
864 if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
865 frac_add(a, a, a);
866 } else {
867 a->exp += 1;
868 }
869 return;
870
871 d_nan:
872 float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
873 parts_default_nan(a, status);
874 }
875
876 /*
877 * Rounds the floating-point value `a' to an integer, and returns the
878 * result as a floating-point value. The operation is performed
879 * according to the IEC/IEEE Standard for Binary Floating-Point
880 * Arithmetic.
881 *
882 * parts_round_to_int_normal is an internal helper function for
883 * normal numbers only, returning true for inexact but not directly
884 * raising float_flag_inexact.
885 */
886 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
887 int scale, int frac_size)
888 {
889 uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
890 int shift_adj;
891
892 scale = MIN(MAX(scale, -0x10000), 0x10000);
893 a->exp += scale;
894
895 if (a->exp < 0) {
896 bool one;
897
898 /* All fractional */
899 switch (rmode) {
900 case float_round_nearest_even:
901 one = false;
902 if (a->exp == -1) {
903 FloatPartsN tmp;
904 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
905 frac_add(&tmp, a, a);
906 /* Anything remaining means frac > 0.5. */
907 one = !frac_eqz(&tmp);
908 }
909 break;
910 case float_round_ties_away:
911 one = a->exp == -1;
912 break;
913 case float_round_to_zero:
914 one = false;
915 break;
916 case float_round_up:
917 one = !a->sign;
918 break;
919 case float_round_down:
920 one = a->sign;
921 break;
922 case float_round_to_odd:
923 one = true;
924 break;
925 default:
926 g_assert_not_reached();
927 }
928
929 frac_clear(a);
930 a->exp = 0;
931 if (one) {
932 a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
933 } else {
934 a->cls = float_class_zero;
935 }
936 return true;
937 }
938
939 if (a->exp >= frac_size) {
940 /* All integral */
941 return false;
942 }
943
944 if (N > 64 && a->exp < N - 64) {
945 /*
946 * Rounding is not in the low word -- shift lsb to bit 2,
947 * which leaves room for sticky and rounding bit.
948 */
949 shift_adj = (N - 1) - (a->exp + 2);
950 frac_shrjam(a, shift_adj);
951 frac_lsb = 1 << 2;
952 } else {
953 shift_adj = 0;
954 frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
955 }
956
957 frac_lsbm1 = frac_lsb >> 1;
958 rnd_mask = frac_lsb - 1;
959 rnd_even_mask = rnd_mask | frac_lsb;
960
961 if (!(a->frac_lo & rnd_mask)) {
962 /* Fractional bits already clear, undo the shift above. */
963 frac_shl(a, shift_adj);
964 return false;
965 }
966
967 switch (rmode) {
968 case float_round_nearest_even:
969 inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
970 break;
971 case float_round_ties_away:
972 inc = frac_lsbm1;
973 break;
974 case float_round_to_zero:
975 inc = 0;
976 break;
977 case float_round_up:
978 inc = a->sign ? 0 : rnd_mask;
979 break;
980 case float_round_down:
981 inc = a->sign ? rnd_mask : 0;
982 break;
983 case float_round_to_odd:
984 inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
985 break;
986 default:
987 g_assert_not_reached();
988 }
989
990 if (shift_adj == 0) {
991 if (frac_addi(a, a, inc)) {
992 frac_shr(a, 1);
993 a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
994 a->exp++;
995 }
996 a->frac_lo &= ~rnd_mask;
997 } else {
998 frac_addi(a, a, inc);
999 a->frac_lo &= ~rnd_mask;
1000 /* Be careful shifting back, not to overflow */
1001 frac_shl(a, shift_adj - 1);
1002 if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1003 a->exp++;
1004 } else {
1005 frac_add(a, a, a);
1006 }
1007 }
1008 return true;
1009 }
1010
1011 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1012 int scale, float_status *s,
1013 const FloatFmt *fmt)
1014 {
1015 switch (a->cls) {
1016 case float_class_qnan:
1017 case float_class_snan:
1018 parts_return_nan(a, s);
1019 break;
1020 case float_class_zero:
1021 case float_class_inf:
1022 break;
1023 case float_class_normal:
1024 if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1025 float_raise(float_flag_inexact, s);
1026 }
1027 break;
1028 default:
1029 g_assert_not_reached();
1030 }
1031 }
1032
1033 /*
1034 * Returns the result of converting the floating-point value `a' to
1035 * the two's complement integer format. The conversion is performed
1036 * according to the IEC/IEEE Standard for Binary Floating-Point
1037 * Arithmetic---which means in particular that the conversion is
1038 * rounded according to the current rounding mode. If `a' is a NaN,
1039 * the largest positive integer is returned. Otherwise, if the
1040 * conversion overflows, the largest integer with the same sign as `a'
1041 * is returned.
1042 */
1043 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1044 int scale, int64_t min, int64_t max,
1045 float_status *s)
1046 {
1047 int flags = 0;
1048 uint64_t r;
1049
1050 switch (p->cls) {
1051 case float_class_snan:
1052 flags |= float_flag_invalid_snan;
1053 /* fall through */
1054 case float_class_qnan:
1055 flags |= float_flag_invalid;
1056 r = max;
1057 break;
1058
1059 case float_class_inf:
1060 flags = float_flag_invalid | float_flag_invalid_cvti;
1061 r = p->sign ? min : max;
1062 break;
1063
1064 case float_class_zero:
1065 return 0;
1066
1067 case float_class_normal:
1068 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1069 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1070 flags = float_flag_inexact;
1071 }
1072
1073 if (p->exp <= DECOMPOSED_BINARY_POINT) {
1074 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1075 } else {
1076 r = UINT64_MAX;
1077 }
1078 if (p->sign) {
1079 if (r <= -(uint64_t)min) {
1080 r = -r;
1081 } else {
1082 flags = float_flag_invalid | float_flag_invalid_cvti;
1083 r = min;
1084 }
1085 } else if (r > max) {
1086 flags = float_flag_invalid | float_flag_invalid_cvti;
1087 r = max;
1088 }
1089 break;
1090
1091 default:
1092 g_assert_not_reached();
1093 }
1094
1095 float_raise(flags, s);
1096 return r;
1097 }
1098
1099 /*
1100 * Returns the result of converting the floating-point value `a' to
1101 * the unsigned integer format. The conversion is performed according
1102 * to the IEC/IEEE Standard for Binary Floating-Point
1103 * Arithmetic---which means in particular that the conversion is
1104 * rounded according to the current rounding mode. If `a' is a NaN,
1105 * the largest unsigned integer is returned. Otherwise, if the
1106 * conversion overflows, the largest unsigned integer is returned. If
1107 * the 'a' is negative, the result is rounded and zero is returned;
1108 * values that do not round to zero will raise the inexact exception
1109 * flag.
1110 */
1111 static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1112 int scale, uint64_t max, float_status *s)
1113 {
1114 int flags = 0;
1115 uint64_t r;
1116
1117 switch (p->cls) {
1118 case float_class_snan:
1119 flags |= float_flag_invalid_snan;
1120 /* fall through */
1121 case float_class_qnan:
1122 flags |= float_flag_invalid;
1123 r = max;
1124 break;
1125
1126 case float_class_inf:
1127 flags = float_flag_invalid | float_flag_invalid_cvti;
1128 r = p->sign ? 0 : max;
1129 break;
1130
1131 case float_class_zero:
1132 return 0;
1133
1134 case float_class_normal:
1135 /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1136 if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1137 flags = float_flag_inexact;
1138 if (p->cls == float_class_zero) {
1139 r = 0;
1140 break;
1141 }
1142 }
1143
1144 if (p->sign) {
1145 flags = float_flag_invalid | float_flag_invalid_cvti;
1146 r = 0;
1147 } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1148 flags = float_flag_invalid | float_flag_invalid_cvti;
1149 r = max;
1150 } else {
1151 r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1152 if (r > max) {
1153 flags = float_flag_invalid | float_flag_invalid_cvti;
1154 r = max;
1155 }
1156 }
1157 break;
1158
1159 default:
1160 g_assert_not_reached();
1161 }
1162
1163 float_raise(flags, s);
1164 return r;
1165 }
1166
1167 /*
1168 * Integer to float conversions
1169 *
1170 * Returns the result of converting the two's complement integer `a'
1171 * to the floating-point format. The conversion is performed according
1172 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1173 */
1174 static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1175 int scale, float_status *s)
1176 {
1177 uint64_t f = a;
1178 int shift;
1179
1180 memset(p, 0, sizeof(*p));
1181
1182 if (a == 0) {
1183 p->cls = float_class_zero;
1184 return;
1185 }
1186
1187 p->cls = float_class_normal;
1188 if (a < 0) {
1189 f = -f;
1190 p->sign = true;
1191 }
1192 shift = clz64(f);
1193 scale = MIN(MAX(scale, -0x10000), 0x10000);
1194
1195 p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1196 p->frac_hi = f << shift;
1197 }
1198
1199 /*
1200 * Unsigned Integer to float conversions
1201 *
1202 * Returns the result of converting the unsigned integer `a' to the
1203 * floating-point format. The conversion is performed according to the
1204 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1205 */
1206 static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1207 int scale, float_status *status)
1208 {
1209 memset(p, 0, sizeof(*p));
1210
1211 if (a == 0) {
1212 p->cls = float_class_zero;
1213 } else {
1214 int shift = clz64(a);
1215 scale = MIN(MAX(scale, -0x10000), 0x10000);
1216 p->cls = float_class_normal;
1217 p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1218 p->frac_hi = a << shift;
1219 }
1220 }
1221
1222 /*
1223 * Float min/max.
1224 */
1225 static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1226 float_status *s, int flags)
1227 {
1228 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1229 int a_exp, b_exp, cmp;
1230
1231 if (unlikely(ab_mask & float_cmask_anynan)) {
1232 /*
1233 * For minNum/maxNum (IEEE 754-2008)
1234 * or minimumNumber/maximumNumber (IEEE 754-2019),
1235 * if one operand is a QNaN, and the other
1236 * operand is numerical, then return numerical argument.
1237 */
1238 if ((flags & (minmax_isnum | minmax_isnumber))
1239 && !(ab_mask & float_cmask_snan)
1240 && (ab_mask & ~float_cmask_qnan)) {
1241 return is_nan(a->cls) ? b : a;
1242 }
1243
1244 /*
1245 * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1246 * are removed and replaced with minimum, minimumNumber, maximum
1247 * and maximumNumber.
1248 * minimumNumber/maximumNumber behavior for SNaN is changed to:
1249 * If both operands are NaNs, a QNaN is returned.
1250 * If either operand is a SNaN,
1251 * an invalid operation exception is signaled,
1252 * but unless both operands are NaNs,
1253 * the SNaN is otherwise ignored and not converted to a QNaN.
1254 */
1255 if ((flags & minmax_isnumber)
1256 && (ab_mask & float_cmask_snan)
1257 && (ab_mask & ~float_cmask_anynan)) {
1258 float_raise(float_flag_invalid, s);
1259 return is_nan(a->cls) ? b : a;
1260 }
1261
1262 return parts_pick_nan(a, b, s);
1263 }
1264
1265 a_exp = a->exp;
1266 b_exp = b->exp;
1267
1268 if (unlikely(ab_mask != float_cmask_normal)) {
1269 switch (a->cls) {
1270 case float_class_normal:
1271 break;
1272 case float_class_inf:
1273 a_exp = INT16_MAX;
1274 break;
1275 case float_class_zero:
1276 a_exp = INT16_MIN;
1277 break;
1278 default:
1279 g_assert_not_reached();
1280 break;
1281 }
1282 switch (b->cls) {
1283 case float_class_normal:
1284 break;
1285 case float_class_inf:
1286 b_exp = INT16_MAX;
1287 break;
1288 case float_class_zero:
1289 b_exp = INT16_MIN;
1290 break;
1291 default:
1292 g_assert_not_reached();
1293 break;
1294 }
1295 }
1296
1297 /* Compare magnitudes. */
1298 cmp = a_exp - b_exp;
1299 if (cmp == 0) {
1300 cmp = frac_cmp(a, b);
1301 }
1302
1303 /*
1304 * Take the sign into account.
1305 * For ismag, only do this if the magnitudes are equal.
1306 */
1307 if (!(flags & minmax_ismag) || cmp == 0) {
1308 if (a->sign != b->sign) {
1309 /* For differing signs, the negative operand is less. */
1310 cmp = a->sign ? -1 : 1;
1311 } else if (a->sign) {
1312 /* For two negative operands, invert the magnitude comparison. */
1313 cmp = -cmp;
1314 }
1315 }
1316
1317 if (flags & minmax_ismin) {
1318 cmp = -cmp;
1319 }
1320 return cmp < 0 ? b : a;
1321 }
1322
1323 /*
1324 * Floating point compare
1325 */
1326 static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1327 float_status *s, bool is_quiet)
1328 {
1329 int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1330 int cmp;
1331
1332 if (likely(ab_mask == float_cmask_normal)) {
1333 if (a->sign != b->sign) {
1334 goto a_sign;
1335 }
1336 if (a->exp != b->exp) {
1337 cmp = a->exp < b->exp ? -1 : 1;
1338 } else {
1339 cmp = frac_cmp(a, b);
1340 }
1341 if (a->sign) {
1342 cmp = -cmp;
1343 }
1344 return cmp;
1345 }
1346
1347 if (unlikely(ab_mask & float_cmask_anynan)) {
1348 if (ab_mask & float_cmask_snan) {
1349 float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1350 } else if (!is_quiet) {
1351 float_raise(float_flag_invalid, s);
1352 }
1353 return float_relation_unordered;
1354 }
1355
1356 if (ab_mask & float_cmask_zero) {
1357 if (ab_mask == float_cmask_zero) {
1358 return float_relation_equal;
1359 } else if (a->cls == float_class_zero) {
1360 goto b_sign;
1361 } else {
1362 goto a_sign;
1363 }
1364 }
1365
1366 if (ab_mask == float_cmask_inf) {
1367 if (a->sign == b->sign) {
1368 return float_relation_equal;
1369 }
1370 } else if (b->cls == float_class_inf) {
1371 goto b_sign;
1372 } else {
1373 g_assert(a->cls == float_class_inf);
1374 }
1375
1376 a_sign:
1377 return a->sign ? float_relation_less : float_relation_greater;
1378 b_sign:
1379 return b->sign ? float_relation_greater : float_relation_less;
1380 }
1381
1382 /*
1383 * Multiply A by 2 raised to the power N.
1384 */
1385 static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1386 {
1387 switch (a->cls) {
1388 case float_class_snan:
1389 case float_class_qnan:
1390 parts_return_nan(a, s);
1391 break;
1392 case float_class_zero:
1393 case float_class_inf:
1394 break;
1395 case float_class_normal:
1396 a->exp += MIN(MAX(n, -0x10000), 0x10000);
1397 break;
1398 default:
1399 g_assert_not_reached();
1400 }
1401 }
1402
1403 /*
1404 * Return log2(A)
1405 */
1406 static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1407 {
1408 uint64_t a0, a1, r, t, ign;
1409 FloatPartsN f;
1410 int i, n, a_exp, f_exp;
1411
1412 if (unlikely(a->cls != float_class_normal)) {
1413 switch (a->cls) {
1414 case float_class_snan:
1415 case float_class_qnan:
1416 parts_return_nan(a, s);
1417 return;
1418 case float_class_zero:
1419 /* log2(0) = -inf */
1420 a->cls = float_class_inf;
1421 a->sign = 1;
1422 return;
1423 case float_class_inf:
1424 if (unlikely(a->sign)) {
1425 goto d_nan;
1426 }
1427 return;
1428 default:
1429 break;
1430 }
1431 g_assert_not_reached();
1432 }
1433 if (unlikely(a->sign)) {
1434 goto d_nan;
1435 }
1436
1437 /* TODO: This algorithm looses bits too quickly for float128. */
1438 g_assert(N == 64);
1439
1440 a_exp = a->exp;
1441 f_exp = -1;
1442
1443 r = 0;
1444 t = DECOMPOSED_IMPLICIT_BIT;
1445 a0 = a->frac_hi;
1446 a1 = 0;
1447
1448 n = fmt->frac_size + 2;
1449 if (unlikely(a_exp == -1)) {
1450 /*
1451 * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1452 * When the value is very close to 1.0, there are lots of 1's in
1453 * the msb parts of the fraction. At the end, when we subtract
1454 * this value from -1.0, we can see a catastrophic loss of precision,
1455 * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1456 * bits of y in the final result. To minimize this, compute as many
1457 * digits as we can.
1458 * ??? This case needs another algorithm to avoid this.
1459 */
1460 n = fmt->frac_size * 2 + 2;
1461 /* Don't compute a value overlapping the sticky bit */
1462 n = MIN(n, 62);
1463 }
1464
1465 for (i = 0; i < n; i++) {
1466 if (a1) {
1467 mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1468 } else if (a0 & 0xffffffffull) {
1469 mul64To128(a0, a0, &a0, &a1);
1470 } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1471 a0 >>= 32;
1472 a0 *= a0;
1473 } else {
1474 goto exact;
1475 }
1476
1477 if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1478 if (unlikely(a_exp == 0 && r == 0)) {
1479 /*
1480 * When a_exp == 0, we're computing the log2 of a value
1481 * [1.0,2.0). When the value is very close to 1.0, there
1482 * are lots of 0's in the msb parts of the fraction.
1483 * We need to compute more digits to produce a correct
1484 * result -- restart at the top of the fraction.
1485 * ??? This is likely to lose precision quickly, as for
1486 * float128; we may need another method.
1487 */
1488 f_exp -= i;
1489 t = r = DECOMPOSED_IMPLICIT_BIT;
1490 i = 0;
1491 } else {
1492 r |= t;
1493 }
1494 } else {
1495 add128(a0, a1, a0, a1, &a0, &a1);
1496 }
1497 t >>= 1;
1498 }
1499
1500 /* Set sticky for inexact. */
1501 r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1502
1503 exact:
1504 parts_sint_to_float(a, a_exp, 0, s);
1505 if (r == 0) {
1506 return;
1507 }
1508
1509 memset(&f, 0, sizeof(f));
1510 f.cls = float_class_normal;
1511 f.frac_hi = r;
1512 f.exp = f_exp - frac_normalize(&f);
1513
1514 if (a_exp < 0) {
1515 parts_sub_normal(a, &f);
1516 } else if (a_exp > 0) {
1517 parts_add_normal(a, &f);
1518 } else {
1519 *a = f;
1520 }
1521 return;
1522
1523 d_nan:
1524 float_raise(float_flag_invalid, s);
1525 parts_default_nan(a, s);
1526 }