]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * QEMU float support | |
3 | * | |
4 | * The code in this source file is derived from release 2a of the SoftFloat | |
5 | * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and | |
6 | * some later contributions) are provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
11 | * GPL-v2-or-later | |
12 | * | |
13 | * Any future contributions to this file after December 1st 2014 will be | |
14 | * taken to be licensed under the Softfloat-2a license unless specifically | |
15 | * indicated otherwise. | |
16 | */ | |
17 | ||
18 | static void partsN(return_nan)(FloatPartsN *a, float_status *s) | |
19 | { | |
20 | switch (a->cls) { | |
21 | case float_class_snan: | |
22 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); | |
23 | if (s->default_nan_mode) { | |
24 | parts_default_nan(a, s); | |
25 | } else { | |
26 | parts_silence_nan(a, s); | |
27 | } | |
28 | break; | |
29 | case float_class_qnan: | |
30 | if (s->default_nan_mode) { | |
31 | parts_default_nan(a, s); | |
32 | } | |
33 | break; | |
34 | default: | |
35 | g_assert_not_reached(); | |
36 | } | |
37 | } | |
38 | ||
39 | static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, | |
40 | float_status *s) | |
41 | { | |
42 | if (is_snan(a->cls) || is_snan(b->cls)) { | |
43 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); | |
44 | } | |
45 | ||
46 | if (s->default_nan_mode) { | |
47 | parts_default_nan(a, s); | |
48 | } else { | |
49 | int cmp = frac_cmp(a, b); | |
50 | if (cmp == 0) { | |
51 | cmp = a->sign < b->sign; | |
52 | } | |
53 | ||
54 | if (pickNaN(a->cls, b->cls, cmp > 0, s)) { | |
55 | a = b; | |
56 | } | |
57 | if (is_snan(a->cls)) { | |
58 | parts_silence_nan(a, s); | |
59 | } | |
60 | } | |
61 | return a; | |
62 | } | |
63 | ||
64 | static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, | |
65 | FloatPartsN *c, float_status *s, | |
66 | int ab_mask, int abc_mask) | |
67 | { | |
68 | int which; | |
69 | ||
70 | if (unlikely(abc_mask & float_cmask_snan)) { | |
71 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); | |
72 | } | |
73 | ||
74 | which = pickNaNMulAdd(a->cls, b->cls, c->cls, | |
75 | ab_mask == float_cmask_infzero, s); | |
76 | ||
77 | if (s->default_nan_mode || which == 3) { | |
78 | /* | |
79 | * Note that this check is after pickNaNMulAdd so that function | |
80 | * has an opportunity to set the Invalid flag for infzero. | |
81 | */ | |
82 | parts_default_nan(a, s); | |
83 | return a; | |
84 | } | |
85 | ||
86 | switch (which) { | |
87 | case 0: | |
88 | break; | |
89 | case 1: | |
90 | a = b; | |
91 | break; | |
92 | case 2: | |
93 | a = c; | |
94 | break; | |
95 | default: | |
96 | g_assert_not_reached(); | |
97 | } | |
98 | if (is_snan(a->cls)) { | |
99 | parts_silence_nan(a, s); | |
100 | } | |
101 | return a; | |
102 | } | |
103 | ||
104 | /* | |
105 | * Canonicalize the FloatParts structure. Determine the class, | |
106 | * unbias the exponent, and normalize the fraction. | |
107 | */ | |
108 | static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | |
109 | const FloatFmt *fmt) | |
110 | { | |
111 | if (unlikely(p->exp == 0)) { | |
112 | if (likely(frac_eqz(p))) { | |
113 | p->cls = float_class_zero; | |
114 | } else if (status->flush_inputs_to_zero) { | |
115 | float_raise(float_flag_input_denormal, status); | |
116 | p->cls = float_class_zero; | |
117 | frac_clear(p); | |
118 | } else { | |
119 | int shift = frac_normalize(p); | |
120 | p->cls = float_class_normal; | |
121 | p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1; | |
122 | } | |
123 | } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { | |
124 | p->cls = float_class_normal; | |
125 | p->exp -= fmt->exp_bias; | |
126 | frac_shl(p, fmt->frac_shift); | |
127 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
128 | } else if (likely(frac_eqz(p))) { | |
129 | p->cls = float_class_inf; | |
130 | } else { | |
131 | frac_shl(p, fmt->frac_shift); | |
132 | p->cls = (parts_is_snan_frac(p->frac_hi, status) | |
133 | ? float_class_snan : float_class_qnan); | |
134 | } | |
135 | } | |
136 | ||
137 | /* | |
138 | * Round and uncanonicalize a floating-point number by parts. There | |
139 | * are FRAC_SHIFT bits that may require rounding at the bottom of the | |
140 | * fraction; these bits will be removed. The exponent will be biased | |
141 | * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. | |
142 | */ | |
143 | static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, | |
144 | const FloatFmt *fmt) | |
145 | { | |
146 | const int exp_max = fmt->exp_max; | |
147 | const int frac_shift = fmt->frac_shift; | |
148 | const uint64_t round_mask = fmt->round_mask; | |
149 | const uint64_t frac_lsb = round_mask + 1; | |
150 | const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); | |
151 | const uint64_t roundeven_mask = round_mask | frac_lsb; | |
152 | uint64_t inc; | |
153 | bool overflow_norm = false; | |
154 | int exp, flags = 0; | |
155 | ||
156 | switch (s->float_rounding_mode) { | |
157 | case float_round_nearest_even: | |
158 | if (N > 64 && frac_lsb == 0) { | |
159 | inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 | |
160 | ? frac_lsbm1 : 0); | |
161 | } else { | |
162 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 | |
163 | ? frac_lsbm1 : 0); | |
164 | } | |
165 | break; | |
166 | case float_round_ties_away: | |
167 | inc = frac_lsbm1; | |
168 | break; | |
169 | case float_round_to_zero: | |
170 | overflow_norm = true; | |
171 | inc = 0; | |
172 | break; | |
173 | case float_round_up: | |
174 | inc = p->sign ? 0 : round_mask; | |
175 | overflow_norm = p->sign; | |
176 | break; | |
177 | case float_round_down: | |
178 | inc = p->sign ? round_mask : 0; | |
179 | overflow_norm = !p->sign; | |
180 | break; | |
181 | case float_round_to_odd: | |
182 | overflow_norm = true; | |
183 | /* fall through */ | |
184 | case float_round_to_odd_inf: | |
185 | if (N > 64 && frac_lsb == 0) { | |
186 | inc = p->frac_hi & 1 ? 0 : round_mask; | |
187 | } else { | |
188 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; | |
189 | } | |
190 | break; | |
191 | default: | |
192 | g_assert_not_reached(); | |
193 | } | |
194 | ||
195 | exp = p->exp + fmt->exp_bias; | |
196 | if (likely(exp > 0)) { | |
197 | if (p->frac_lo & round_mask) { | |
198 | flags |= float_flag_inexact; | |
199 | if (frac_addi(p, p, inc)) { | |
200 | frac_shr(p, 1); | |
201 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
202 | exp++; | |
203 | } | |
204 | p->frac_lo &= ~round_mask; | |
205 | } | |
206 | ||
207 | if (fmt->arm_althp) { | |
208 | /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ | |
209 | if (unlikely(exp > exp_max)) { | |
210 | /* Overflow. Return the maximum normal. */ | |
211 | flags = float_flag_invalid; | |
212 | exp = exp_max; | |
213 | frac_allones(p); | |
214 | p->frac_lo &= ~round_mask; | |
215 | } | |
216 | } else if (unlikely(exp >= exp_max)) { | |
217 | flags |= float_flag_overflow; | |
218 | if (s->rebias_overflow) { | |
219 | exp -= fmt->exp_re_bias; | |
220 | } else if (overflow_norm) { | |
221 | flags |= float_flag_inexact; | |
222 | exp = exp_max - 1; | |
223 | frac_allones(p); | |
224 | p->frac_lo &= ~round_mask; | |
225 | } else { | |
226 | flags |= float_flag_inexact; | |
227 | p->cls = float_class_inf; | |
228 | exp = exp_max; | |
229 | frac_clear(p); | |
230 | } | |
231 | } | |
232 | frac_shr(p, frac_shift); | |
233 | } else if (unlikely(s->rebias_underflow)) { | |
234 | flags |= float_flag_underflow; | |
235 | exp += fmt->exp_re_bias; | |
236 | if (p->frac_lo & round_mask) { | |
237 | flags |= float_flag_inexact; | |
238 | if (frac_addi(p, p, inc)) { | |
239 | frac_shr(p, 1); | |
240 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
241 | exp++; | |
242 | } | |
243 | p->frac_lo &= ~round_mask; | |
244 | } | |
245 | frac_shr(p, frac_shift); | |
246 | } else if (s->flush_to_zero) { | |
247 | flags |= float_flag_output_denormal; | |
248 | p->cls = float_class_zero; | |
249 | exp = 0; | |
250 | frac_clear(p); | |
251 | } else { | |
252 | bool is_tiny = s->tininess_before_rounding || exp < 0; | |
253 | ||
254 | if (!is_tiny) { | |
255 | FloatPartsN discard; | |
256 | is_tiny = !frac_addi(&discard, p, inc); | |
257 | } | |
258 | ||
259 | frac_shrjam(p, 1 - exp); | |
260 | ||
261 | if (p->frac_lo & round_mask) { | |
262 | /* Need to recompute round-to-even/round-to-odd. */ | |
263 | switch (s->float_rounding_mode) { | |
264 | case float_round_nearest_even: | |
265 | if (N > 64 && frac_lsb == 0) { | |
266 | inc = ((p->frac_hi & 1) || | |
267 | (p->frac_lo & round_mask) != frac_lsbm1 | |
268 | ? frac_lsbm1 : 0); | |
269 | } else { | |
270 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 | |
271 | ? frac_lsbm1 : 0); | |
272 | } | |
273 | break; | |
274 | case float_round_to_odd: | |
275 | case float_round_to_odd_inf: | |
276 | if (N > 64 && frac_lsb == 0) { | |
277 | inc = p->frac_hi & 1 ? 0 : round_mask; | |
278 | } else { | |
279 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; | |
280 | } | |
281 | break; | |
282 | default: | |
283 | break; | |
284 | } | |
285 | flags |= float_flag_inexact; | |
286 | frac_addi(p, p, inc); | |
287 | p->frac_lo &= ~round_mask; | |
288 | } | |
289 | ||
290 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0; | |
291 | frac_shr(p, frac_shift); | |
292 | ||
293 | if (is_tiny && (flags & float_flag_inexact)) { | |
294 | flags |= float_flag_underflow; | |
295 | } | |
296 | if (exp == 0 && frac_eqz(p)) { | |
297 | p->cls = float_class_zero; | |
298 | } | |
299 | } | |
300 | p->exp = exp; | |
301 | float_raise(flags, s); | |
302 | } | |
303 | ||
304 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | |
305 | const FloatFmt *fmt) | |
306 | { | |
307 | if (likely(p->cls == float_class_normal)) { | |
308 | parts_uncanon_normal(p, s, fmt); | |
309 | } else { | |
310 | switch (p->cls) { | |
311 | case float_class_zero: | |
312 | p->exp = 0; | |
313 | frac_clear(p); | |
314 | return; | |
315 | case float_class_inf: | |
316 | g_assert(!fmt->arm_althp); | |
317 | p->exp = fmt->exp_max; | |
318 | frac_clear(p); | |
319 | return; | |
320 | case float_class_qnan: | |
321 | case float_class_snan: | |
322 | g_assert(!fmt->arm_althp); | |
323 | p->exp = fmt->exp_max; | |
324 | frac_shr(p, fmt->frac_shift); | |
325 | return; | |
326 | default: | |
327 | break; | |
328 | } | |
329 | g_assert_not_reached(); | |
330 | } | |
331 | } | |
332 | ||
333 | /* | |
334 | * Returns the result of adding or subtracting the values of the | |
335 | * floating-point values `a' and `b'. The operation is performed | |
336 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
337 | * Arithmetic. | |
338 | */ | |
339 | static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | |
340 | float_status *s, bool subtract) | |
341 | { | |
342 | bool b_sign = b->sign ^ subtract; | |
343 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
344 | ||
345 | if (a->sign != b_sign) { | |
346 | /* Subtraction */ | |
347 | if (likely(ab_mask == float_cmask_normal)) { | |
348 | if (parts_sub_normal(a, b)) { | |
349 | return a; | |
350 | } | |
351 | /* Subtract was exact, fall through to set sign. */ | |
352 | ab_mask = float_cmask_zero; | |
353 | } | |
354 | ||
355 | if (ab_mask == float_cmask_zero) { | |
356 | a->sign = s->float_rounding_mode == float_round_down; | |
357 | return a; | |
358 | } | |
359 | ||
360 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
361 | goto p_nan; | |
362 | } | |
363 | ||
364 | if (ab_mask & float_cmask_inf) { | |
365 | if (a->cls != float_class_inf) { | |
366 | /* N - Inf */ | |
367 | goto return_b; | |
368 | } | |
369 | if (b->cls != float_class_inf) { | |
370 | /* Inf - N */ | |
371 | return a; | |
372 | } | |
373 | /* Inf - Inf */ | |
374 | float_raise(float_flag_invalid | float_flag_invalid_isi, s); | |
375 | parts_default_nan(a, s); | |
376 | return a; | |
377 | } | |
378 | } else { | |
379 | /* Addition */ | |
380 | if (likely(ab_mask == float_cmask_normal)) { | |
381 | parts_add_normal(a, b); | |
382 | return a; | |
383 | } | |
384 | ||
385 | if (ab_mask == float_cmask_zero) { | |
386 | return a; | |
387 | } | |
388 | ||
389 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
390 | goto p_nan; | |
391 | } | |
392 | ||
393 | if (ab_mask & float_cmask_inf) { | |
394 | a->cls = float_class_inf; | |
395 | return a; | |
396 | } | |
397 | } | |
398 | ||
399 | if (b->cls == float_class_zero) { | |
400 | g_assert(a->cls == float_class_normal); | |
401 | return a; | |
402 | } | |
403 | ||
404 | g_assert(a->cls == float_class_zero); | |
405 | g_assert(b->cls == float_class_normal); | |
406 | return_b: | |
407 | b->sign = b_sign; | |
408 | return b; | |
409 | ||
410 | p_nan: | |
411 | return parts_pick_nan(a, b, s); | |
412 | } | |
413 | ||
414 | /* | |
415 | * Returns the result of multiplying the floating-point values `a' and | |
416 | * `b'. The operation is performed according to the IEC/IEEE Standard | |
417 | * for Binary Floating-Point Arithmetic. | |
418 | */ | |
419 | static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | |
420 | float_status *s) | |
421 | { | |
422 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
423 | bool sign = a->sign ^ b->sign; | |
424 | ||
425 | if (likely(ab_mask == float_cmask_normal)) { | |
426 | FloatPartsW tmp; | |
427 | ||
428 | frac_mulw(&tmp, a, b); | |
429 | frac_truncjam(a, &tmp); | |
430 | ||
431 | a->exp += b->exp + 1; | |
432 | if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
433 | frac_add(a, a, a); | |
434 | a->exp -= 1; | |
435 | } | |
436 | ||
437 | a->sign = sign; | |
438 | return a; | |
439 | } | |
440 | ||
441 | /* Inf * Zero == NaN */ | |
442 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
443 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | |
444 | parts_default_nan(a, s); | |
445 | return a; | |
446 | } | |
447 | ||
448 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
449 | return parts_pick_nan(a, b, s); | |
450 | } | |
451 | ||
452 | /* Multiply by 0 or Inf */ | |
453 | if (ab_mask & float_cmask_inf) { | |
454 | a->cls = float_class_inf; | |
455 | a->sign = sign; | |
456 | return a; | |
457 | } | |
458 | ||
459 | g_assert(ab_mask & float_cmask_zero); | |
460 | a->cls = float_class_zero; | |
461 | a->sign = sign; | |
462 | return a; | |
463 | } | |
464 | ||
465 | /* | |
466 | * Returns the result of multiplying the floating-point values `a' and | |
467 | * `b' then adding 'c', with no intermediate rounding step after the | |
468 | * multiplication. The operation is performed according to the | |
469 | * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. | |
470 | * The flags argument allows the caller to select negation of the | |
471 | * addend, the intermediate product, or the final result. (The | |
472 | * difference between this and having the caller do a separate | |
473 | * negation is that negating externally will flip the sign bit on NaNs.) | |
474 | * | |
475 | * Requires A and C extracted into a double-sized structure to provide the | |
476 | * extra space for the widening multiply. | |
477 | */ | |
478 | static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, | |
479 | FloatPartsN *c, int flags, float_status *s) | |
480 | { | |
481 | int ab_mask, abc_mask; | |
482 | FloatPartsW p_widen, c_widen; | |
483 | ||
484 | ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
485 | abc_mask = float_cmask(c->cls) | ab_mask; | |
486 | ||
487 | /* | |
488 | * It is implementation-defined whether the cases of (0,inf,qnan) | |
489 | * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN | |
490 | * they return if they do), so we have to hand this information | |
491 | * off to the target-specific pick-a-NaN routine. | |
492 | */ | |
493 | if (unlikely(abc_mask & float_cmask_anynan)) { | |
494 | return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); | |
495 | } | |
496 | ||
497 | if (flags & float_muladd_negate_c) { | |
498 | c->sign ^= 1; | |
499 | } | |
500 | ||
501 | /* Compute the sign of the product into A. */ | |
502 | a->sign ^= b->sign; | |
503 | if (flags & float_muladd_negate_product) { | |
504 | a->sign ^= 1; | |
505 | } | |
506 | ||
507 | if (unlikely(ab_mask != float_cmask_normal)) { | |
508 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
509 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); | |
510 | goto d_nan; | |
511 | } | |
512 | ||
513 | if (ab_mask & float_cmask_inf) { | |
514 | if (c->cls == float_class_inf && a->sign != c->sign) { | |
515 | float_raise(float_flag_invalid | float_flag_invalid_isi, s); | |
516 | goto d_nan; | |
517 | } | |
518 | goto return_inf; | |
519 | } | |
520 | ||
521 | g_assert(ab_mask & float_cmask_zero); | |
522 | if (c->cls == float_class_normal) { | |
523 | *a = *c; | |
524 | goto return_normal; | |
525 | } | |
526 | if (c->cls == float_class_zero) { | |
527 | if (a->sign != c->sign) { | |
528 | goto return_sub_zero; | |
529 | } | |
530 | goto return_zero; | |
531 | } | |
532 | g_assert(c->cls == float_class_inf); | |
533 | } | |
534 | ||
535 | if (unlikely(c->cls == float_class_inf)) { | |
536 | a->sign = c->sign; | |
537 | goto return_inf; | |
538 | } | |
539 | ||
540 | /* Perform the multiplication step. */ | |
541 | p_widen.sign = a->sign; | |
542 | p_widen.exp = a->exp + b->exp + 1; | |
543 | frac_mulw(&p_widen, a, b); | |
544 | if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
545 | frac_add(&p_widen, &p_widen, &p_widen); | |
546 | p_widen.exp -= 1; | |
547 | } | |
548 | ||
549 | /* Perform the addition step. */ | |
550 | if (c->cls != float_class_zero) { | |
551 | /* Zero-extend C to less significant bits. */ | |
552 | frac_widen(&c_widen, c); | |
553 | c_widen.exp = c->exp; | |
554 | ||
555 | if (a->sign == c->sign) { | |
556 | parts_add_normal(&p_widen, &c_widen); | |
557 | } else if (!parts_sub_normal(&p_widen, &c_widen)) { | |
558 | goto return_sub_zero; | |
559 | } | |
560 | } | |
561 | ||
562 | /* Narrow with sticky bit, for proper rounding later. */ | |
563 | frac_truncjam(a, &p_widen); | |
564 | a->sign = p_widen.sign; | |
565 | a->exp = p_widen.exp; | |
566 | ||
567 | return_normal: | |
568 | if (flags & float_muladd_halve_result) { | |
569 | a->exp -= 1; | |
570 | } | |
571 | finish_sign: | |
572 | if (flags & float_muladd_negate_result) { | |
573 | a->sign ^= 1; | |
574 | } | |
575 | return a; | |
576 | ||
577 | return_sub_zero: | |
578 | a->sign = s->float_rounding_mode == float_round_down; | |
579 | return_zero: | |
580 | a->cls = float_class_zero; | |
581 | goto finish_sign; | |
582 | ||
583 | return_inf: | |
584 | a->cls = float_class_inf; | |
585 | goto finish_sign; | |
586 | ||
587 | d_nan: | |
588 | parts_default_nan(a, s); | |
589 | return a; | |
590 | } | |
591 | ||
592 | /* | |
593 | * Returns the result of dividing the floating-point value `a' by the | |
594 | * corresponding value `b'. The operation is performed according to | |
595 | * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
596 | */ | |
597 | static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | |
598 | float_status *s) | |
599 | { | |
600 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
601 | bool sign = a->sign ^ b->sign; | |
602 | ||
603 | if (likely(ab_mask == float_cmask_normal)) { | |
604 | a->sign = sign; | |
605 | a->exp -= b->exp + frac_div(a, b); | |
606 | return a; | |
607 | } | |
608 | ||
609 | /* 0/0 or Inf/Inf => NaN */ | |
610 | if (unlikely(ab_mask == float_cmask_zero)) { | |
611 | float_raise(float_flag_invalid | float_flag_invalid_zdz, s); | |
612 | goto d_nan; | |
613 | } | |
614 | if (unlikely(ab_mask == float_cmask_inf)) { | |
615 | float_raise(float_flag_invalid | float_flag_invalid_idi, s); | |
616 | goto d_nan; | |
617 | } | |
618 | ||
619 | /* All the NaN cases */ | |
620 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
621 | return parts_pick_nan(a, b, s); | |
622 | } | |
623 | ||
624 | a->sign = sign; | |
625 | ||
626 | /* Inf / X */ | |
627 | if (a->cls == float_class_inf) { | |
628 | return a; | |
629 | } | |
630 | ||
631 | /* 0 / X */ | |
632 | if (a->cls == float_class_zero) { | |
633 | return a; | |
634 | } | |
635 | ||
636 | /* X / Inf */ | |
637 | if (b->cls == float_class_inf) { | |
638 | a->cls = float_class_zero; | |
639 | return a; | |
640 | } | |
641 | ||
642 | /* X / 0 => Inf */ | |
643 | g_assert(b->cls == float_class_zero); | |
644 | float_raise(float_flag_divbyzero, s); | |
645 | a->cls = float_class_inf; | |
646 | return a; | |
647 | ||
648 | d_nan: | |
649 | parts_default_nan(a, s); | |
650 | return a; | |
651 | } | |
652 | ||
653 | /* | |
654 | * Floating point remainder, per IEC/IEEE, or modulus. | |
655 | */ | |
656 | static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | |
657 | uint64_t *mod_quot, float_status *s) | |
658 | { | |
659 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
660 | ||
661 | if (likely(ab_mask == float_cmask_normal)) { | |
662 | frac_modrem(a, b, mod_quot); | |
663 | return a; | |
664 | } | |
665 | ||
666 | if (mod_quot) { | |
667 | *mod_quot = 0; | |
668 | } | |
669 | ||
670 | /* All the NaN cases */ | |
671 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
672 | return parts_pick_nan(a, b, s); | |
673 | } | |
674 | ||
675 | /* Inf % N; N % 0 */ | |
676 | if (a->cls == float_class_inf || b->cls == float_class_zero) { | |
677 | float_raise(float_flag_invalid, s); | |
678 | parts_default_nan(a, s); | |
679 | return a; | |
680 | } | |
681 | ||
682 | /* N % Inf; 0 % N */ | |
683 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | |
684 | return a; | |
685 | } | |
686 | ||
687 | /* | |
688 | * Square Root | |
689 | * | |
690 | * The base algorithm is lifted from | |
691 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c | |
692 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c | |
693 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c | |
694 | * and is thus MIT licenced. | |
695 | */ | |
696 | static void partsN(sqrt)(FloatPartsN *a, float_status *status, | |
697 | const FloatFmt *fmt) | |
698 | { | |
699 | const uint32_t three32 = 3u << 30; | |
700 | const uint64_t three64 = 3ull << 62; | |
701 | uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ | |
702 | uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ | |
703 | uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ | |
704 | uint64_t d0h, d0l, d1h, d1l, d2h, d2l; | |
705 | uint64_t discard; | |
706 | bool exp_odd; | |
707 | size_t index; | |
708 | ||
709 | if (unlikely(a->cls != float_class_normal)) { | |
710 | switch (a->cls) { | |
711 | case float_class_snan: | |
712 | case float_class_qnan: | |
713 | parts_return_nan(a, status); | |
714 | return; | |
715 | case float_class_zero: | |
716 | return; | |
717 | case float_class_inf: | |
718 | if (unlikely(a->sign)) { | |
719 | goto d_nan; | |
720 | } | |
721 | return; | |
722 | default: | |
723 | g_assert_not_reached(); | |
724 | } | |
725 | } | |
726 | ||
727 | if (unlikely(a->sign)) { | |
728 | goto d_nan; | |
729 | } | |
730 | ||
731 | /* | |
732 | * Argument reduction. | |
733 | * x = 4^e frac; with integer e, and frac in [1, 4) | |
734 | * m = frac fixed point at bit 62, since we're in base 4. | |
735 | * If base-2 exponent is odd, exchange that for multiply by 2, | |
736 | * which results in no shift. | |
737 | */ | |
738 | exp_odd = a->exp & 1; | |
739 | index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); | |
740 | if (!exp_odd) { | |
741 | frac_shr(a, 1); | |
742 | } | |
743 | ||
744 | /* | |
745 | * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). | |
746 | * | |
747 | * Initial estimate: | |
748 | * 7-bit lookup table (1-bit exponent and 6-bit significand). | |
749 | * | |
750 | * The relative error (e = r0*sqrt(m)-1) of a linear estimate | |
751 | * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; | |
752 | * a table lookup is faster and needs one less iteration. | |
753 | * The 7-bit table gives |e| < 0x1.fdp-9. | |
754 | * | |
755 | * A Newton-Raphson iteration for r is | |
756 | * s = m*r | |
757 | * d = s*r | |
758 | * u = 3 - d | |
759 | * r = r*u/2 | |
760 | * | |
761 | * Fixed point representations: | |
762 | * m, s, d, u, three are all 2.30; r is 0.32 | |
763 | */ | |
764 | m64 = a->frac_hi; | |
765 | m32 = m64 >> 32; | |
766 | ||
767 | r32 = rsqrt_tab[index] << 16; | |
768 | /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ | |
769 | ||
770 | s32 = ((uint64_t)m32 * r32) >> 32; | |
771 | d32 = ((uint64_t)s32 * r32) >> 32; | |
772 | u32 = three32 - d32; | |
773 | ||
774 | if (N == 64) { | |
775 | /* float64 or smaller */ | |
776 | ||
777 | r32 = ((uint64_t)r32 * u32) >> 31; | |
778 | /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ | |
779 | ||
780 | s32 = ((uint64_t)m32 * r32) >> 32; | |
781 | d32 = ((uint64_t)s32 * r32) >> 32; | |
782 | u32 = three32 - d32; | |
783 | ||
784 | if (fmt->frac_size <= 23) { | |
785 | /* float32 or smaller */ | |
786 | ||
787 | s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ | |
788 | s32 = (s32 - 1) >> 6; /* 9.23 */ | |
789 | /* s < sqrt(m) < s + 0x1.08p-23 */ | |
790 | ||
791 | /* compute nearest rounded result to 2.23 bits */ | |
792 | uint32_t d0 = (m32 << 16) - s32 * s32; | |
793 | uint32_t d1 = s32 - d0; | |
794 | uint32_t d2 = d1 + s32 + 1; | |
795 | s32 += d1 >> 31; | |
796 | a->frac_hi = (uint64_t)s32 << (64 - 25); | |
797 | ||
798 | /* increment or decrement for inexact */ | |
799 | if (d2 != 0) { | |
800 | a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); | |
801 | } | |
802 | goto done; | |
803 | } | |
804 | ||
805 | /* float64 */ | |
806 | ||
807 | r64 = (uint64_t)r32 * u32 * 2; | |
808 | /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ | |
809 | mul64To128(m64, r64, &s64, &discard); | |
810 | mul64To128(s64, r64, &d64, &discard); | |
811 | u64 = three64 - d64; | |
812 | ||
813 | mul64To128(s64, u64, &s64, &discard); /* 3.61 */ | |
814 | s64 = (s64 - 2) >> 9; /* 12.52 */ | |
815 | ||
816 | /* Compute nearest rounded result */ | |
817 | uint64_t d0 = (m64 << 42) - s64 * s64; | |
818 | uint64_t d1 = s64 - d0; | |
819 | uint64_t d2 = d1 + s64 + 1; | |
820 | s64 += d1 >> 63; | |
821 | a->frac_hi = s64 << (64 - 54); | |
822 | ||
823 | /* increment or decrement for inexact */ | |
824 | if (d2 != 0) { | |
825 | a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); | |
826 | } | |
827 | goto done; | |
828 | } | |
829 | ||
830 | r64 = (uint64_t)r32 * u32 * 2; | |
831 | /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ | |
832 | ||
833 | mul64To128(m64, r64, &s64, &discard); | |
834 | mul64To128(s64, r64, &d64, &discard); | |
835 | u64 = three64 - d64; | |
836 | mul64To128(u64, r64, &r64, &discard); | |
837 | r64 <<= 1; | |
838 | /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ | |
839 | ||
840 | mul64To128(m64, r64, &s64, &discard); | |
841 | mul64To128(s64, r64, &d64, &discard); | |
842 | u64 = three64 - d64; | |
843 | mul64To128(u64, r64, &rh, &rl); | |
844 | add128(rh, rl, rh, rl, &rh, &rl); | |
845 | /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ | |
846 | ||
847 | mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); | |
848 | mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); | |
849 | sub128(three64, 0, dh, dl, &uh, &ul); | |
850 | mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ | |
851 | /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ | |
852 | ||
853 | sub128(sh, sl, 0, 4, &sh, &sl); | |
854 | shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ | |
855 | /* s < sqrt(m) < s + 1ulp */ | |
856 | ||
857 | /* Compute nearest rounded result */ | |
858 | mul64To128(sl, sl, &d0h, &d0l); | |
859 | d0h += 2 * sh * sl; | |
860 | sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); | |
861 | sub128(sh, sl, d0h, d0l, &d1h, &d1l); | |
862 | add128(sh, sl, 0, 1, &d2h, &d2l); | |
863 | add128(d2h, d2l, d1h, d1l, &d2h, &d2l); | |
864 | add128(sh, sl, 0, d1h >> 63, &sh, &sl); | |
865 | shift128Left(sh, sl, 128 - 114, &sh, &sl); | |
866 | ||
867 | /* increment or decrement for inexact */ | |
868 | if (d2h | d2l) { | |
869 | if ((int64_t)(d1h ^ d2h) < 0) { | |
870 | sub128(sh, sl, 0, 1, &sh, &sl); | |
871 | } else { | |
872 | add128(sh, sl, 0, 1, &sh, &sl); | |
873 | } | |
874 | } | |
875 | a->frac_lo = sl; | |
876 | a->frac_hi = sh; | |
877 | ||
878 | done: | |
879 | /* Convert back from base 4 to base 2. */ | |
880 | a->exp >>= 1; | |
881 | if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
882 | frac_add(a, a, a); | |
883 | } else { | |
884 | a->exp += 1; | |
885 | } | |
886 | return; | |
887 | ||
888 | d_nan: | |
889 | float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); | |
890 | parts_default_nan(a, status); | |
891 | } | |
892 | ||
893 | /* | |
894 | * Rounds the floating-point value `a' to an integer, and returns the | |
895 | * result as a floating-point value. The operation is performed | |
896 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
897 | * Arithmetic. | |
898 | * | |
899 | * parts_round_to_int_normal is an internal helper function for | |
900 | * normal numbers only, returning true for inexact but not directly | |
901 | * raising float_flag_inexact. | |
902 | */ | |
903 | static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, | |
904 | int scale, int frac_size) | |
905 | { | |
906 | uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; | |
907 | int shift_adj; | |
908 | ||
909 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
910 | a->exp += scale; | |
911 | ||
912 | if (a->exp < 0) { | |
913 | bool one; | |
914 | ||
915 | /* All fractional */ | |
916 | switch (rmode) { | |
917 | case float_round_nearest_even: | |
918 | one = false; | |
919 | if (a->exp == -1) { | |
920 | FloatPartsN tmp; | |
921 | /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ | |
922 | frac_add(&tmp, a, a); | |
923 | /* Anything remaining means frac > 0.5. */ | |
924 | one = !frac_eqz(&tmp); | |
925 | } | |
926 | break; | |
927 | case float_round_ties_away: | |
928 | one = a->exp == -1; | |
929 | break; | |
930 | case float_round_to_zero: | |
931 | one = false; | |
932 | break; | |
933 | case float_round_up: | |
934 | one = !a->sign; | |
935 | break; | |
936 | case float_round_down: | |
937 | one = a->sign; | |
938 | break; | |
939 | case float_round_to_odd: | |
940 | one = true; | |
941 | break; | |
942 | default: | |
943 | g_assert_not_reached(); | |
944 | } | |
945 | ||
946 | frac_clear(a); | |
947 | a->exp = 0; | |
948 | if (one) { | |
949 | a->frac_hi = DECOMPOSED_IMPLICIT_BIT; | |
950 | } else { | |
951 | a->cls = float_class_zero; | |
952 | } | |
953 | return true; | |
954 | } | |
955 | ||
956 | if (a->exp >= frac_size) { | |
957 | /* All integral */ | |
958 | return false; | |
959 | } | |
960 | ||
961 | if (N > 64 && a->exp < N - 64) { | |
962 | /* | |
963 | * Rounding is not in the low word -- shift lsb to bit 2, | |
964 | * which leaves room for sticky and rounding bit. | |
965 | */ | |
966 | shift_adj = (N - 1) - (a->exp + 2); | |
967 | frac_shrjam(a, shift_adj); | |
968 | frac_lsb = 1 << 2; | |
969 | } else { | |
970 | shift_adj = 0; | |
971 | frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); | |
972 | } | |
973 | ||
974 | frac_lsbm1 = frac_lsb >> 1; | |
975 | rnd_mask = frac_lsb - 1; | |
976 | rnd_even_mask = rnd_mask | frac_lsb; | |
977 | ||
978 | if (!(a->frac_lo & rnd_mask)) { | |
979 | /* Fractional bits already clear, undo the shift above. */ | |
980 | frac_shl(a, shift_adj); | |
981 | return false; | |
982 | } | |
983 | ||
984 | switch (rmode) { | |
985 | case float_round_nearest_even: | |
986 | inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); | |
987 | break; | |
988 | case float_round_ties_away: | |
989 | inc = frac_lsbm1; | |
990 | break; | |
991 | case float_round_to_zero: | |
992 | inc = 0; | |
993 | break; | |
994 | case float_round_up: | |
995 | inc = a->sign ? 0 : rnd_mask; | |
996 | break; | |
997 | case float_round_down: | |
998 | inc = a->sign ? rnd_mask : 0; | |
999 | break; | |
1000 | case float_round_to_odd: | |
1001 | inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; | |
1002 | break; | |
1003 | default: | |
1004 | g_assert_not_reached(); | |
1005 | } | |
1006 | ||
1007 | if (shift_adj == 0) { | |
1008 | if (frac_addi(a, a, inc)) { | |
1009 | frac_shr(a, 1); | |
1010 | a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
1011 | a->exp++; | |
1012 | } | |
1013 | a->frac_lo &= ~rnd_mask; | |
1014 | } else { | |
1015 | frac_addi(a, a, inc); | |
1016 | a->frac_lo &= ~rnd_mask; | |
1017 | /* Be careful shifting back, not to overflow */ | |
1018 | frac_shl(a, shift_adj - 1); | |
1019 | if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { | |
1020 | a->exp++; | |
1021 | } else { | |
1022 | frac_add(a, a, a); | |
1023 | } | |
1024 | } | |
1025 | return true; | |
1026 | } | |
1027 | ||
1028 | static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | |
1029 | int scale, float_status *s, | |
1030 | const FloatFmt *fmt) | |
1031 | { | |
1032 | switch (a->cls) { | |
1033 | case float_class_qnan: | |
1034 | case float_class_snan: | |
1035 | parts_return_nan(a, s); | |
1036 | break; | |
1037 | case float_class_zero: | |
1038 | case float_class_inf: | |
1039 | break; | |
1040 | case float_class_normal: | |
1041 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | |
1042 | float_raise(float_flag_inexact, s); | |
1043 | } | |
1044 | break; | |
1045 | default: | |
1046 | g_assert_not_reached(); | |
1047 | } | |
1048 | } | |
1049 | ||
1050 | /* | |
1051 | * Returns the result of converting the floating-point value `a' to | |
1052 | * the two's complement integer format. The conversion is performed | |
1053 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
1054 | * Arithmetic---which means in particular that the conversion is | |
1055 | * rounded according to the current rounding mode. If `a' is a NaN, | |
1056 | * the largest positive integer is returned. Otherwise, if the | |
1057 | * conversion overflows, the largest integer with the same sign as `a' | |
1058 | * is returned. | |
1059 | */ | |
1060 | static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, | |
1061 | int scale, int64_t min, int64_t max, | |
1062 | float_status *s) | |
1063 | { | |
1064 | int flags = 0; | |
1065 | uint64_t r; | |
1066 | ||
1067 | switch (p->cls) { | |
1068 | case float_class_snan: | |
1069 | flags |= float_flag_invalid_snan; | |
1070 | /* fall through */ | |
1071 | case float_class_qnan: | |
1072 | flags |= float_flag_invalid; | |
1073 | r = max; | |
1074 | break; | |
1075 | ||
1076 | case float_class_inf: | |
1077 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1078 | r = p->sign ? min : max; | |
1079 | break; | |
1080 | ||
1081 | case float_class_zero: | |
1082 | return 0; | |
1083 | ||
1084 | case float_class_normal: | |
1085 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
1086 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
1087 | flags = float_flag_inexact; | |
1088 | } | |
1089 | ||
1090 | if (p->exp <= DECOMPOSED_BINARY_POINT) { | |
1091 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
1092 | } else { | |
1093 | r = UINT64_MAX; | |
1094 | } | |
1095 | if (p->sign) { | |
1096 | if (r <= -(uint64_t)min) { | |
1097 | r = -r; | |
1098 | } else { | |
1099 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1100 | r = min; | |
1101 | } | |
1102 | } else if (r > max) { | |
1103 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1104 | r = max; | |
1105 | } | |
1106 | break; | |
1107 | ||
1108 | default: | |
1109 | g_assert_not_reached(); | |
1110 | } | |
1111 | ||
1112 | float_raise(flags, s); | |
1113 | return r; | |
1114 | } | |
1115 | ||
1116 | /* | |
1117 | * Returns the result of converting the floating-point value `a' to | |
1118 | * the unsigned integer format. The conversion is performed according | |
1119 | * to the IEC/IEEE Standard for Binary Floating-Point | |
1120 | * Arithmetic---which means in particular that the conversion is | |
1121 | * rounded according to the current rounding mode. If `a' is a NaN, | |
1122 | * the largest unsigned integer is returned. Otherwise, if the | |
1123 | * conversion overflows, the largest unsigned integer is returned. If | |
1124 | * the 'a' is negative, the result is rounded and zero is returned; | |
1125 | * values that do not round to zero will raise the inexact exception | |
1126 | * flag. | |
1127 | */ | |
1128 | static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | |
1129 | int scale, uint64_t max, float_status *s) | |
1130 | { | |
1131 | int flags = 0; | |
1132 | uint64_t r; | |
1133 | ||
1134 | switch (p->cls) { | |
1135 | case float_class_snan: | |
1136 | flags |= float_flag_invalid_snan; | |
1137 | /* fall through */ | |
1138 | case float_class_qnan: | |
1139 | flags |= float_flag_invalid; | |
1140 | r = max; | |
1141 | break; | |
1142 | ||
1143 | case float_class_inf: | |
1144 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1145 | r = p->sign ? 0 : max; | |
1146 | break; | |
1147 | ||
1148 | case float_class_zero: | |
1149 | return 0; | |
1150 | ||
1151 | case float_class_normal: | |
1152 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
1153 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
1154 | flags = float_flag_inexact; | |
1155 | if (p->cls == float_class_zero) { | |
1156 | r = 0; | |
1157 | break; | |
1158 | } | |
1159 | } | |
1160 | ||
1161 | if (p->sign) { | |
1162 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1163 | r = 0; | |
1164 | } else if (p->exp > DECOMPOSED_BINARY_POINT) { | |
1165 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1166 | r = max; | |
1167 | } else { | |
1168 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
1169 | if (r > max) { | |
1170 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1171 | r = max; | |
1172 | } | |
1173 | } | |
1174 | break; | |
1175 | ||
1176 | default: | |
1177 | g_assert_not_reached(); | |
1178 | } | |
1179 | ||
1180 | float_raise(flags, s); | |
1181 | return r; | |
1182 | } | |
1183 | ||
1184 | /* | |
1185 | * Integer to float conversions | |
1186 | * | |
1187 | * Returns the result of converting the two's complement integer `a' | |
1188 | * to the floating-point format. The conversion is performed according | |
1189 | * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
1190 | */ | |
1191 | static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, | |
1192 | int scale, float_status *s) | |
1193 | { | |
1194 | uint64_t f = a; | |
1195 | int shift; | |
1196 | ||
1197 | memset(p, 0, sizeof(*p)); | |
1198 | ||
1199 | if (a == 0) { | |
1200 | p->cls = float_class_zero; | |
1201 | return; | |
1202 | } | |
1203 | ||
1204 | p->cls = float_class_normal; | |
1205 | if (a < 0) { | |
1206 | f = -f; | |
1207 | p->sign = true; | |
1208 | } | |
1209 | shift = clz64(f); | |
1210 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
1211 | ||
1212 | p->exp = DECOMPOSED_BINARY_POINT - shift + scale; | |
1213 | p->frac_hi = f << shift; | |
1214 | } | |
1215 | ||
1216 | /* | |
1217 | * Unsigned Integer to float conversions | |
1218 | * | |
1219 | * Returns the result of converting the unsigned integer `a' to the | |
1220 | * floating-point format. The conversion is performed according to the | |
1221 | * IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
1222 | */ | |
1223 | static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, | |
1224 | int scale, float_status *status) | |
1225 | { | |
1226 | memset(p, 0, sizeof(*p)); | |
1227 | ||
1228 | if (a == 0) { | |
1229 | p->cls = float_class_zero; | |
1230 | } else { | |
1231 | int shift = clz64(a); | |
1232 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
1233 | p->cls = float_class_normal; | |
1234 | p->exp = DECOMPOSED_BINARY_POINT - shift + scale; | |
1235 | p->frac_hi = a << shift; | |
1236 | } | |
1237 | } | |
1238 | ||
1239 | /* | |
1240 | * Float min/max. | |
1241 | */ | |
1242 | static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | |
1243 | float_status *s, int flags) | |
1244 | { | |
1245 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
1246 | int a_exp, b_exp, cmp; | |
1247 | ||
1248 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
1249 | /* | |
1250 | * For minNum/maxNum (IEEE 754-2008) | |
1251 | * or minimumNumber/maximumNumber (IEEE 754-2019), | |
1252 | * if one operand is a QNaN, and the other | |
1253 | * operand is numerical, then return numerical argument. | |
1254 | */ | |
1255 | if ((flags & (minmax_isnum | minmax_isnumber)) | |
1256 | && !(ab_mask & float_cmask_snan) | |
1257 | && (ab_mask & ~float_cmask_qnan)) { | |
1258 | return is_nan(a->cls) ? b : a; | |
1259 | } | |
1260 | ||
1261 | /* | |
1262 | * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag | |
1263 | * are removed and replaced with minimum, minimumNumber, maximum | |
1264 | * and maximumNumber. | |
1265 | * minimumNumber/maximumNumber behavior for SNaN is changed to: | |
1266 | * If both operands are NaNs, a QNaN is returned. | |
1267 | * If either operand is a SNaN, | |
1268 | * an invalid operation exception is signaled, | |
1269 | * but unless both operands are NaNs, | |
1270 | * the SNaN is otherwise ignored and not converted to a QNaN. | |
1271 | */ | |
1272 | if ((flags & minmax_isnumber) | |
1273 | && (ab_mask & float_cmask_snan) | |
1274 | && (ab_mask & ~float_cmask_anynan)) { | |
1275 | float_raise(float_flag_invalid, s); | |
1276 | return is_nan(a->cls) ? b : a; | |
1277 | } | |
1278 | ||
1279 | return parts_pick_nan(a, b, s); | |
1280 | } | |
1281 | ||
1282 | a_exp = a->exp; | |
1283 | b_exp = b->exp; | |
1284 | ||
1285 | if (unlikely(ab_mask != float_cmask_normal)) { | |
1286 | switch (a->cls) { | |
1287 | case float_class_normal: | |
1288 | break; | |
1289 | case float_class_inf: | |
1290 | a_exp = INT16_MAX; | |
1291 | break; | |
1292 | case float_class_zero: | |
1293 | a_exp = INT16_MIN; | |
1294 | break; | |
1295 | default: | |
1296 | g_assert_not_reached(); | |
1297 | break; | |
1298 | } | |
1299 | switch (b->cls) { | |
1300 | case float_class_normal: | |
1301 | break; | |
1302 | case float_class_inf: | |
1303 | b_exp = INT16_MAX; | |
1304 | break; | |
1305 | case float_class_zero: | |
1306 | b_exp = INT16_MIN; | |
1307 | break; | |
1308 | default: | |
1309 | g_assert_not_reached(); | |
1310 | break; | |
1311 | } | |
1312 | } | |
1313 | ||
1314 | /* Compare magnitudes. */ | |
1315 | cmp = a_exp - b_exp; | |
1316 | if (cmp == 0) { | |
1317 | cmp = frac_cmp(a, b); | |
1318 | } | |
1319 | ||
1320 | /* | |
1321 | * Take the sign into account. | |
1322 | * For ismag, only do this if the magnitudes are equal. | |
1323 | */ | |
1324 | if (!(flags & minmax_ismag) || cmp == 0) { | |
1325 | if (a->sign != b->sign) { | |
1326 | /* For differing signs, the negative operand is less. */ | |
1327 | cmp = a->sign ? -1 : 1; | |
1328 | } else if (a->sign) { | |
1329 | /* For two negative operands, invert the magnitude comparison. */ | |
1330 | cmp = -cmp; | |
1331 | } | |
1332 | } | |
1333 | ||
1334 | if (flags & minmax_ismin) { | |
1335 | cmp = -cmp; | |
1336 | } | |
1337 | return cmp < 0 ? b : a; | |
1338 | } | |
1339 | ||
1340 | /* | |
1341 | * Floating point compare | |
1342 | */ | |
1343 | static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | |
1344 | float_status *s, bool is_quiet) | |
1345 | { | |
1346 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
1347 | ||
1348 | if (likely(ab_mask == float_cmask_normal)) { | |
1349 | FloatRelation cmp; | |
1350 | ||
1351 | if (a->sign != b->sign) { | |
1352 | goto a_sign; | |
1353 | } | |
1354 | if (a->exp == b->exp) { | |
1355 | cmp = frac_cmp(a, b); | |
1356 | } else if (a->exp < b->exp) { | |
1357 | cmp = float_relation_less; | |
1358 | } else { | |
1359 | cmp = float_relation_greater; | |
1360 | } | |
1361 | if (a->sign) { | |
1362 | cmp = -cmp; | |
1363 | } | |
1364 | return cmp; | |
1365 | } | |
1366 | ||
1367 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
1368 | if (ab_mask & float_cmask_snan) { | |
1369 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); | |
1370 | } else if (!is_quiet) { | |
1371 | float_raise(float_flag_invalid, s); | |
1372 | } | |
1373 | return float_relation_unordered; | |
1374 | } | |
1375 | ||
1376 | if (ab_mask & float_cmask_zero) { | |
1377 | if (ab_mask == float_cmask_zero) { | |
1378 | return float_relation_equal; | |
1379 | } else if (a->cls == float_class_zero) { | |
1380 | goto b_sign; | |
1381 | } else { | |
1382 | goto a_sign; | |
1383 | } | |
1384 | } | |
1385 | ||
1386 | if (ab_mask == float_cmask_inf) { | |
1387 | if (a->sign == b->sign) { | |
1388 | return float_relation_equal; | |
1389 | } | |
1390 | } else if (b->cls == float_class_inf) { | |
1391 | goto b_sign; | |
1392 | } else { | |
1393 | g_assert(a->cls == float_class_inf); | |
1394 | } | |
1395 | ||
1396 | a_sign: | |
1397 | return a->sign ? float_relation_less : float_relation_greater; | |
1398 | b_sign: | |
1399 | return b->sign ? float_relation_greater : float_relation_less; | |
1400 | } | |
1401 | ||
1402 | /* | |
1403 | * Multiply A by 2 raised to the power N. | |
1404 | */ | |
1405 | static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | |
1406 | { | |
1407 | switch (a->cls) { | |
1408 | case float_class_snan: | |
1409 | case float_class_qnan: | |
1410 | parts_return_nan(a, s); | |
1411 | break; | |
1412 | case float_class_zero: | |
1413 | case float_class_inf: | |
1414 | break; | |
1415 | case float_class_normal: | |
1416 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | |
1417 | break; | |
1418 | default: | |
1419 | g_assert_not_reached(); | |
1420 | } | |
1421 | } | |
1422 | ||
1423 | /* | |
1424 | * Return log2(A) | |
1425 | */ | |
1426 | static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | |
1427 | { | |
1428 | uint64_t a0, a1, r, t, ign; | |
1429 | FloatPartsN f; | |
1430 | int i, n, a_exp, f_exp; | |
1431 | ||
1432 | if (unlikely(a->cls != float_class_normal)) { | |
1433 | switch (a->cls) { | |
1434 | case float_class_snan: | |
1435 | case float_class_qnan: | |
1436 | parts_return_nan(a, s); | |
1437 | return; | |
1438 | case float_class_zero: | |
1439 | float_raise(float_flag_divbyzero, s); | |
1440 | /* log2(0) = -inf */ | |
1441 | a->cls = float_class_inf; | |
1442 | a->sign = 1; | |
1443 | return; | |
1444 | case float_class_inf: | |
1445 | if (unlikely(a->sign)) { | |
1446 | goto d_nan; | |
1447 | } | |
1448 | return; | |
1449 | default: | |
1450 | break; | |
1451 | } | |
1452 | g_assert_not_reached(); | |
1453 | } | |
1454 | if (unlikely(a->sign)) { | |
1455 | goto d_nan; | |
1456 | } | |
1457 | ||
1458 | /* TODO: This algorithm looses bits too quickly for float128. */ | |
1459 | g_assert(N == 64); | |
1460 | ||
1461 | a_exp = a->exp; | |
1462 | f_exp = -1; | |
1463 | ||
1464 | r = 0; | |
1465 | t = DECOMPOSED_IMPLICIT_BIT; | |
1466 | a0 = a->frac_hi; | |
1467 | a1 = 0; | |
1468 | ||
1469 | n = fmt->frac_size + 2; | |
1470 | if (unlikely(a_exp == -1)) { | |
1471 | /* | |
1472 | * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). | |
1473 | * When the value is very close to 1.0, there are lots of 1's in | |
1474 | * the msb parts of the fraction. At the end, when we subtract | |
1475 | * this value from -1.0, we can see a catastrophic loss of precision, | |
1476 | * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the | |
1477 | * bits of y in the final result. To minimize this, compute as many | |
1478 | * digits as we can. | |
1479 | * ??? This case needs another algorithm to avoid this. | |
1480 | */ | |
1481 | n = fmt->frac_size * 2 + 2; | |
1482 | /* Don't compute a value overlapping the sticky bit */ | |
1483 | n = MIN(n, 62); | |
1484 | } | |
1485 | ||
1486 | for (i = 0; i < n; i++) { | |
1487 | if (a1) { | |
1488 | mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); | |
1489 | } else if (a0 & 0xffffffffull) { | |
1490 | mul64To128(a0, a0, &a0, &a1); | |
1491 | } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { | |
1492 | a0 >>= 32; | |
1493 | a0 *= a0; | |
1494 | } else { | |
1495 | goto exact; | |
1496 | } | |
1497 | ||
1498 | if (a0 & DECOMPOSED_IMPLICIT_BIT) { | |
1499 | if (unlikely(a_exp == 0 && r == 0)) { | |
1500 | /* | |
1501 | * When a_exp == 0, we're computing the log2 of a value | |
1502 | * [1.0,2.0). When the value is very close to 1.0, there | |
1503 | * are lots of 0's in the msb parts of the fraction. | |
1504 | * We need to compute more digits to produce a correct | |
1505 | * result -- restart at the top of the fraction. | |
1506 | * ??? This is likely to lose precision quickly, as for | |
1507 | * float128; we may need another method. | |
1508 | */ | |
1509 | f_exp -= i; | |
1510 | t = r = DECOMPOSED_IMPLICIT_BIT; | |
1511 | i = 0; | |
1512 | } else { | |
1513 | r |= t; | |
1514 | } | |
1515 | } else { | |
1516 | add128(a0, a1, a0, a1, &a0, &a1); | |
1517 | } | |
1518 | t >>= 1; | |
1519 | } | |
1520 | ||
1521 | /* Set sticky for inexact. */ | |
1522 | r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); | |
1523 | ||
1524 | exact: | |
1525 | parts_sint_to_float(a, a_exp, 0, s); | |
1526 | if (r == 0) { | |
1527 | return; | |
1528 | } | |
1529 | ||
1530 | memset(&f, 0, sizeof(f)); | |
1531 | f.cls = float_class_normal; | |
1532 | f.frac_hi = r; | |
1533 | f.exp = f_exp - frac_normalize(&f); | |
1534 | ||
1535 | if (a_exp < 0) { | |
1536 | parts_sub_normal(a, &f); | |
1537 | } else if (a_exp > 0) { | |
1538 | parts_add_normal(a, &f); | |
1539 | } else { | |
1540 | *a = f; | |
1541 | } | |
1542 | return; | |
1543 | ||
1544 | d_nan: | |
1545 | float_raise(float_flag_invalid, s); | |
1546 | parts_default_nan(a, s); | |
1547 | } |