]>
Commit | Line | Data |
---|---|---|
7c45bad8 RH |
1 | /* |
2 | * QEMU float support | |
3 | * | |
4 | * The code in this source file is derived from release 2a of the SoftFloat | |
5 | * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and | |
6 | * some later contributions) are provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
11 | * GPL-v2-or-later | |
12 | * | |
13 | * Any future contributions to this file after December 1st 2014 will be | |
14 | * taken to be licensed under the Softfloat-2a license unless specifically | |
15 | * indicated otherwise. | |
16 | */ | |
17 | ||
18 | static void partsN(return_nan)(FloatPartsN *a, float_status *s) | |
19 | { | |
20 | switch (a->cls) { | |
21 | case float_class_snan: | |
22 | float_raise(float_flag_invalid, s); | |
23 | if (s->default_nan_mode) { | |
24 | parts_default_nan(a, s); | |
25 | } else { | |
26 | parts_silence_nan(a, s); | |
27 | } | |
28 | break; | |
29 | case float_class_qnan: | |
30 | if (s->default_nan_mode) { | |
31 | parts_default_nan(a, s); | |
32 | } | |
33 | break; | |
34 | default: | |
35 | g_assert_not_reached(); | |
36 | } | |
37 | } | |
22c355f4 RH |
38 | |
39 | static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, | |
40 | float_status *s) | |
41 | { | |
42 | if (is_snan(a->cls) || is_snan(b->cls)) { | |
43 | float_raise(float_flag_invalid, s); | |
44 | } | |
45 | ||
46 | if (s->default_nan_mode) { | |
47 | parts_default_nan(a, s); | |
48 | } else { | |
49 | int cmp = frac_cmp(a, b); | |
50 | if (cmp == 0) { | |
51 | cmp = a->sign < b->sign; | |
52 | } | |
53 | ||
54 | if (pickNaN(a->cls, b->cls, cmp > 0, s)) { | |
55 | a = b; | |
56 | } | |
57 | if (is_snan(a->cls)) { | |
58 | parts_silence_nan(a, s); | |
59 | } | |
60 | } | |
61 | return a; | |
62 | } | |
979582d0 RH |
63 | |
64 | static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, | |
65 | FloatPartsN *c, float_status *s, | |
66 | int ab_mask, int abc_mask) | |
67 | { | |
68 | int which; | |
69 | ||
70 | if (unlikely(abc_mask & float_cmask_snan)) { | |
71 | float_raise(float_flag_invalid, s); | |
72 | } | |
73 | ||
74 | which = pickNaNMulAdd(a->cls, b->cls, c->cls, | |
75 | ab_mask == float_cmask_infzero, s); | |
76 | ||
77 | if (s->default_nan_mode || which == 3) { | |
78 | /* | |
79 | * Note that this check is after pickNaNMulAdd so that function | |
80 | * has an opportunity to set the Invalid flag for infzero. | |
81 | */ | |
82 | parts_default_nan(a, s); | |
83 | return a; | |
84 | } | |
85 | ||
86 | switch (which) { | |
87 | case 0: | |
88 | break; | |
89 | case 1: | |
90 | a = b; | |
91 | break; | |
92 | case 2: | |
93 | a = c; | |
94 | break; | |
95 | default: | |
96 | g_assert_not_reached(); | |
97 | } | |
98 | if (is_snan(a->cls)) { | |
99 | parts_silence_nan(a, s); | |
100 | } | |
101 | return a; | |
102 | } | |
d46975bc RH |
103 | |
104 | /* | |
105 | * Canonicalize the FloatParts structure. Determine the class, | |
106 | * unbias the exponent, and normalize the fraction. | |
107 | */ | |
108 | static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | |
109 | const FloatFmt *fmt) | |
110 | { | |
111 | if (unlikely(p->exp == 0)) { | |
112 | if (likely(frac_eqz(p))) { | |
113 | p->cls = float_class_zero; | |
114 | } else if (status->flush_inputs_to_zero) { | |
115 | float_raise(float_flag_input_denormal, status); | |
116 | p->cls = float_class_zero; | |
117 | frac_clear(p); | |
118 | } else { | |
119 | int shift = frac_normalize(p); | |
120 | p->cls = float_class_normal; | |
121 | p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1; | |
122 | } | |
123 | } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { | |
124 | p->cls = float_class_normal; | |
125 | p->exp -= fmt->exp_bias; | |
126 | frac_shl(p, fmt->frac_shift); | |
127 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
128 | } else if (likely(frac_eqz(p))) { | |
129 | p->cls = float_class_inf; | |
130 | } else { | |
131 | frac_shl(p, fmt->frac_shift); | |
132 | p->cls = (parts_is_snan_frac(p->frac_hi, status) | |
133 | ? float_class_snan : float_class_qnan); | |
134 | } | |
135 | } | |
ee6959f2 RH |
136 | |
137 | /* | |
138 | * Round and uncanonicalize a floating-point number by parts. There | |
139 | * are FRAC_SHIFT bits that may require rounding at the bottom of the | |
140 | * fraction; these bits will be removed. The exponent will be biased | |
141 | * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. | |
142 | */ | |
25fdedf0 RH |
143 | static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, |
144 | const FloatFmt *fmt) | |
ee6959f2 RH |
145 | { |
146 | const int exp_max = fmt->exp_max; | |
147 | const int frac_shift = fmt->frac_shift; | |
ee6959f2 | 148 | const uint64_t round_mask = fmt->round_mask; |
d6e1f0cd RH |
149 | const uint64_t frac_lsb = round_mask + 1; |
150 | const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); | |
151 | const uint64_t roundeven_mask = round_mask | frac_lsb; | |
ee6959f2 | 152 | uint64_t inc; |
25fdedf0 | 153 | bool overflow_norm = false; |
ee6959f2 RH |
154 | int exp, flags = 0; |
155 | ||
ee6959f2 RH |
156 | switch (s->float_rounding_mode) { |
157 | case float_round_nearest_even: | |
98b3cff7 RH |
158 | if (N > 64 && frac_lsb == 0) { |
159 | inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 | |
160 | ? frac_lsbm1 : 0); | |
161 | } else { | |
162 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 | |
163 | ? frac_lsbm1 : 0); | |
164 | } | |
ee6959f2 RH |
165 | break; |
166 | case float_round_ties_away: | |
ee6959f2 RH |
167 | inc = frac_lsbm1; |
168 | break; | |
169 | case float_round_to_zero: | |
170 | overflow_norm = true; | |
171 | inc = 0; | |
172 | break; | |
173 | case float_round_up: | |
174 | inc = p->sign ? 0 : round_mask; | |
175 | overflow_norm = p->sign; | |
176 | break; | |
177 | case float_round_down: | |
178 | inc = p->sign ? round_mask : 0; | |
179 | overflow_norm = !p->sign; | |
180 | break; | |
181 | case float_round_to_odd: | |
182 | overflow_norm = true; | |
60c8f726 RH |
183 | /* fall through */ |
184 | case float_round_to_odd_inf: | |
98b3cff7 RH |
185 | if (N > 64 && frac_lsb == 0) { |
186 | inc = p->frac_hi & 1 ? 0 : round_mask; | |
187 | } else { | |
188 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; | |
189 | } | |
ee6959f2 RH |
190 | break; |
191 | default: | |
192 | g_assert_not_reached(); | |
193 | } | |
194 | ||
195 | exp = p->exp + fmt->exp_bias; | |
196 | if (likely(exp > 0)) { | |
197 | if (p->frac_lo & round_mask) { | |
198 | flags |= float_flag_inexact; | |
199 | if (frac_addi(p, p, inc)) { | |
200 | frac_shr(p, 1); | |
201 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
202 | exp++; | |
203 | } | |
98b3cff7 | 204 | p->frac_lo &= ~round_mask; |
ee6959f2 | 205 | } |
ee6959f2 RH |
206 | |
207 | if (fmt->arm_althp) { | |
208 | /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ | |
209 | if (unlikely(exp > exp_max)) { | |
210 | /* Overflow. Return the maximum normal. */ | |
211 | flags = float_flag_invalid; | |
212 | exp = exp_max; | |
213 | frac_allones(p); | |
98b3cff7 | 214 | p->frac_lo &= ~round_mask; |
ee6959f2 RH |
215 | } |
216 | } else if (unlikely(exp >= exp_max)) { | |
217 | flags |= float_flag_overflow | float_flag_inexact; | |
218 | if (overflow_norm) { | |
219 | exp = exp_max - 1; | |
220 | frac_allones(p); | |
98b3cff7 | 221 | p->frac_lo &= ~round_mask; |
ee6959f2 RH |
222 | } else { |
223 | p->cls = float_class_inf; | |
224 | exp = exp_max; | |
225 | frac_clear(p); | |
226 | } | |
227 | } | |
98b3cff7 | 228 | frac_shr(p, frac_shift); |
ee6959f2 RH |
229 | } else if (s->flush_to_zero) { |
230 | flags |= float_flag_output_denormal; | |
231 | p->cls = float_class_zero; | |
232 | exp = 0; | |
233 | frac_clear(p); | |
234 | } else { | |
235 | bool is_tiny = s->tininess_before_rounding || exp < 0; | |
236 | ||
237 | if (!is_tiny) { | |
238 | FloatPartsN discard; | |
239 | is_tiny = !frac_addi(&discard, p, inc); | |
240 | } | |
241 | ||
242 | frac_shrjam(p, 1 - exp); | |
243 | ||
244 | if (p->frac_lo & round_mask) { | |
245 | /* Need to recompute round-to-even/round-to-odd. */ | |
246 | switch (s->float_rounding_mode) { | |
247 | case float_round_nearest_even: | |
98b3cff7 RH |
248 | if (N > 64 && frac_lsb == 0) { |
249 | inc = ((p->frac_hi & 1) || | |
250 | (p->frac_lo & round_mask) != frac_lsbm1 | |
251 | ? frac_lsbm1 : 0); | |
252 | } else { | |
253 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 | |
254 | ? frac_lsbm1 : 0); | |
255 | } | |
ee6959f2 RH |
256 | break; |
257 | case float_round_to_odd: | |
60c8f726 | 258 | case float_round_to_odd_inf: |
98b3cff7 RH |
259 | if (N > 64 && frac_lsb == 0) { |
260 | inc = p->frac_hi & 1 ? 0 : round_mask; | |
261 | } else { | |
262 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; | |
263 | } | |
ee6959f2 RH |
264 | break; |
265 | default: | |
266 | break; | |
267 | } | |
268 | flags |= float_flag_inexact; | |
269 | frac_addi(p, p, inc); | |
98b3cff7 | 270 | p->frac_lo &= ~round_mask; |
ee6959f2 RH |
271 | } |
272 | ||
273 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0; | |
274 | frac_shr(p, frac_shift); | |
275 | ||
276 | if (is_tiny && (flags & float_flag_inexact)) { | |
277 | flags |= float_flag_underflow; | |
278 | } | |
279 | if (exp == 0 && frac_eqz(p)) { | |
280 | p->cls = float_class_zero; | |
281 | } | |
282 | } | |
283 | p->exp = exp; | |
284 | float_raise(flags, s); | |
285 | } | |
da10a907 | 286 | |
25fdedf0 RH |
287 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, |
288 | const FloatFmt *fmt) | |
289 | { | |
290 | if (likely(p->cls == float_class_normal)) { | |
291 | parts_uncanon_normal(p, s, fmt); | |
292 | } else { | |
293 | switch (p->cls) { | |
294 | case float_class_zero: | |
295 | p->exp = 0; | |
296 | frac_clear(p); | |
297 | return; | |
298 | case float_class_inf: | |
299 | g_assert(!fmt->arm_althp); | |
300 | p->exp = fmt->exp_max; | |
301 | frac_clear(p); | |
302 | return; | |
303 | case float_class_qnan: | |
304 | case float_class_snan: | |
305 | g_assert(!fmt->arm_althp); | |
306 | p->exp = fmt->exp_max; | |
307 | frac_shr(p, fmt->frac_shift); | |
308 | return; | |
309 | default: | |
310 | break; | |
311 | } | |
312 | g_assert_not_reached(); | |
313 | } | |
314 | } | |
315 | ||
da10a907 RH |
316 | /* |
317 | * Returns the result of adding or subtracting the values of the | |
318 | * floating-point values `a' and `b'. The operation is performed | |
319 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
320 | * Arithmetic. | |
321 | */ | |
322 | static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | |
323 | float_status *s, bool subtract) | |
324 | { | |
325 | bool b_sign = b->sign ^ subtract; | |
326 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
327 | ||
328 | if (a->sign != b_sign) { | |
329 | /* Subtraction */ | |
330 | if (likely(ab_mask == float_cmask_normal)) { | |
331 | if (parts_sub_normal(a, b)) { | |
332 | return a; | |
333 | } | |
334 | /* Subtract was exact, fall through to set sign. */ | |
335 | ab_mask = float_cmask_zero; | |
336 | } | |
337 | ||
338 | if (ab_mask == float_cmask_zero) { | |
339 | a->sign = s->float_rounding_mode == float_round_down; | |
340 | return a; | |
341 | } | |
342 | ||
343 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
344 | goto p_nan; | |
345 | } | |
346 | ||
347 | if (ab_mask & float_cmask_inf) { | |
348 | if (a->cls != float_class_inf) { | |
349 | /* N - Inf */ | |
350 | goto return_b; | |
351 | } | |
352 | if (b->cls != float_class_inf) { | |
353 | /* Inf - N */ | |
354 | return a; | |
355 | } | |
356 | /* Inf - Inf */ | |
357 | float_raise(float_flag_invalid, s); | |
358 | parts_default_nan(a, s); | |
359 | return a; | |
360 | } | |
361 | } else { | |
362 | /* Addition */ | |
363 | if (likely(ab_mask == float_cmask_normal)) { | |
364 | parts_add_normal(a, b); | |
365 | return a; | |
366 | } | |
367 | ||
368 | if (ab_mask == float_cmask_zero) { | |
369 | return a; | |
370 | } | |
371 | ||
372 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
373 | goto p_nan; | |
374 | } | |
375 | ||
376 | if (ab_mask & float_cmask_inf) { | |
377 | a->cls = float_class_inf; | |
378 | return a; | |
379 | } | |
380 | } | |
381 | ||
382 | if (b->cls == float_class_zero) { | |
383 | g_assert(a->cls == float_class_normal); | |
384 | return a; | |
385 | } | |
386 | ||
387 | g_assert(a->cls == float_class_zero); | |
388 | g_assert(b->cls == float_class_normal); | |
389 | return_b: | |
390 | b->sign = b_sign; | |
391 | return b; | |
392 | ||
393 | p_nan: | |
394 | return parts_pick_nan(a, b, s); | |
395 | } | |
aca84527 RH |
396 | |
397 | /* | |
398 | * Returns the result of multiplying the floating-point values `a' and | |
399 | * `b'. The operation is performed according to the IEC/IEEE Standard | |
400 | * for Binary Floating-Point Arithmetic. | |
401 | */ | |
402 | static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | |
403 | float_status *s) | |
404 | { | |
405 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
406 | bool sign = a->sign ^ b->sign; | |
407 | ||
408 | if (likely(ab_mask == float_cmask_normal)) { | |
409 | FloatPartsW tmp; | |
410 | ||
411 | frac_mulw(&tmp, a, b); | |
412 | frac_truncjam(a, &tmp); | |
413 | ||
414 | a->exp += b->exp + 1; | |
415 | if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
416 | frac_add(a, a, a); | |
417 | a->exp -= 1; | |
418 | } | |
419 | ||
420 | a->sign = sign; | |
421 | return a; | |
422 | } | |
423 | ||
424 | /* Inf * Zero == NaN */ | |
425 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
426 | float_raise(float_flag_invalid, s); | |
427 | parts_default_nan(a, s); | |
428 | return a; | |
429 | } | |
430 | ||
431 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
432 | return parts_pick_nan(a, b, s); | |
433 | } | |
434 | ||
435 | /* Multiply by 0 or Inf */ | |
436 | if (ab_mask & float_cmask_inf) { | |
437 | a->cls = float_class_inf; | |
438 | a->sign = sign; | |
439 | return a; | |
440 | } | |
441 | ||
442 | g_assert(ab_mask & float_cmask_zero); | |
443 | a->cls = float_class_zero; | |
444 | a->sign = sign; | |
445 | return a; | |
446 | } | |
dedd123c RH |
447 | |
448 | /* | |
449 | * Returns the result of multiplying the floating-point values `a' and | |
450 | * `b' then adding 'c', with no intermediate rounding step after the | |
451 | * multiplication. The operation is performed according to the | |
452 | * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. | |
453 | * The flags argument allows the caller to select negation of the | |
454 | * addend, the intermediate product, or the final result. (The | |
455 | * difference between this and having the caller do a separate | |
456 | * negation is that negating externally will flip the sign bit on NaNs.) | |
457 | * | |
458 | * Requires A and C extracted into a double-sized structure to provide the | |
459 | * extra space for the widening multiply. | |
460 | */ | |
461 | static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, | |
462 | FloatPartsN *c, int flags, float_status *s) | |
463 | { | |
464 | int ab_mask, abc_mask; | |
465 | FloatPartsW p_widen, c_widen; | |
466 | ||
467 | ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
468 | abc_mask = float_cmask(c->cls) | ab_mask; | |
469 | ||
470 | /* | |
471 | * It is implementation-defined whether the cases of (0,inf,qnan) | |
472 | * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN | |
473 | * they return if they do), so we have to hand this information | |
474 | * off to the target-specific pick-a-NaN routine. | |
475 | */ | |
476 | if (unlikely(abc_mask & float_cmask_anynan)) { | |
477 | return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); | |
478 | } | |
479 | ||
480 | if (flags & float_muladd_negate_c) { | |
481 | c->sign ^= 1; | |
482 | } | |
483 | ||
484 | /* Compute the sign of the product into A. */ | |
485 | a->sign ^= b->sign; | |
486 | if (flags & float_muladd_negate_product) { | |
487 | a->sign ^= 1; | |
488 | } | |
489 | ||
490 | if (unlikely(ab_mask != float_cmask_normal)) { | |
491 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
492 | goto d_nan; | |
493 | } | |
494 | ||
495 | if (ab_mask & float_cmask_inf) { | |
496 | if (c->cls == float_class_inf && a->sign != c->sign) { | |
497 | goto d_nan; | |
498 | } | |
499 | goto return_inf; | |
500 | } | |
501 | ||
502 | g_assert(ab_mask & float_cmask_zero); | |
503 | if (c->cls == float_class_normal) { | |
504 | *a = *c; | |
505 | goto return_normal; | |
506 | } | |
507 | if (c->cls == float_class_zero) { | |
508 | if (a->sign != c->sign) { | |
509 | goto return_sub_zero; | |
510 | } | |
511 | goto return_zero; | |
512 | } | |
513 | g_assert(c->cls == float_class_inf); | |
514 | } | |
515 | ||
516 | if (unlikely(c->cls == float_class_inf)) { | |
517 | a->sign = c->sign; | |
518 | goto return_inf; | |
519 | } | |
520 | ||
521 | /* Perform the multiplication step. */ | |
522 | p_widen.sign = a->sign; | |
523 | p_widen.exp = a->exp + b->exp + 1; | |
524 | frac_mulw(&p_widen, a, b); | |
525 | if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
526 | frac_add(&p_widen, &p_widen, &p_widen); | |
527 | p_widen.exp -= 1; | |
528 | } | |
529 | ||
530 | /* Perform the addition step. */ | |
531 | if (c->cls != float_class_zero) { | |
532 | /* Zero-extend C to less significant bits. */ | |
533 | frac_widen(&c_widen, c); | |
534 | c_widen.exp = c->exp; | |
535 | ||
536 | if (a->sign == c->sign) { | |
537 | parts_add_normal(&p_widen, &c_widen); | |
538 | } else if (!parts_sub_normal(&p_widen, &c_widen)) { | |
539 | goto return_sub_zero; | |
540 | } | |
541 | } | |
542 | ||
543 | /* Narrow with sticky bit, for proper rounding later. */ | |
544 | frac_truncjam(a, &p_widen); | |
545 | a->sign = p_widen.sign; | |
546 | a->exp = p_widen.exp; | |
547 | ||
548 | return_normal: | |
549 | if (flags & float_muladd_halve_result) { | |
550 | a->exp -= 1; | |
551 | } | |
552 | finish_sign: | |
553 | if (flags & float_muladd_negate_result) { | |
554 | a->sign ^= 1; | |
555 | } | |
556 | return a; | |
557 | ||
558 | return_sub_zero: | |
559 | a->sign = s->float_rounding_mode == float_round_down; | |
560 | return_zero: | |
561 | a->cls = float_class_zero; | |
562 | goto finish_sign; | |
563 | ||
564 | return_inf: | |
565 | a->cls = float_class_inf; | |
566 | goto finish_sign; | |
567 | ||
568 | d_nan: | |
569 | float_raise(float_flag_invalid, s); | |
570 | parts_default_nan(a, s); | |
571 | return a; | |
572 | } | |
ec961b81 RH |
573 | |
574 | /* | |
575 | * Returns the result of dividing the floating-point value `a' by the | |
576 | * corresponding value `b'. The operation is performed according to | |
577 | * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
578 | */ | |
579 | static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | |
580 | float_status *s) | |
581 | { | |
582 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
583 | bool sign = a->sign ^ b->sign; | |
584 | ||
585 | if (likely(ab_mask == float_cmask_normal)) { | |
586 | a->sign = sign; | |
587 | a->exp -= b->exp + frac_div(a, b); | |
588 | return a; | |
589 | } | |
590 | ||
591 | /* 0/0 or Inf/Inf => NaN */ | |
592 | if (unlikely(ab_mask == float_cmask_zero) || | |
593 | unlikely(ab_mask == float_cmask_inf)) { | |
594 | float_raise(float_flag_invalid, s); | |
595 | parts_default_nan(a, s); | |
596 | return a; | |
597 | } | |
598 | ||
599 | /* All the NaN cases */ | |
600 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
601 | return parts_pick_nan(a, b, s); | |
602 | } | |
603 | ||
604 | a->sign = sign; | |
605 | ||
606 | /* Inf / X */ | |
607 | if (a->cls == float_class_inf) { | |
608 | return a; | |
609 | } | |
610 | ||
611 | /* 0 / X */ | |
612 | if (a->cls == float_class_zero) { | |
613 | return a; | |
614 | } | |
615 | ||
616 | /* X / Inf */ | |
617 | if (b->cls == float_class_inf) { | |
618 | a->cls = float_class_zero; | |
619 | return a; | |
620 | } | |
621 | ||
622 | /* X / 0 => Inf */ | |
623 | g_assert(b->cls == float_class_zero); | |
624 | float_raise(float_flag_divbyzero, s); | |
625 | a->cls = float_class_inf; | |
626 | return a; | |
627 | } | |
afc34931 | 628 | |
feaf2e9c RH |
629 | /* |
630 | * Floating point remainder, per IEC/IEEE, or modulus. | |
631 | */ | |
632 | static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | |
633 | uint64_t *mod_quot, float_status *s) | |
634 | { | |
635 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
636 | ||
637 | if (likely(ab_mask == float_cmask_normal)) { | |
638 | frac_modrem(a, b, mod_quot); | |
639 | return a; | |
640 | } | |
641 | ||
642 | if (mod_quot) { | |
643 | *mod_quot = 0; | |
644 | } | |
645 | ||
646 | /* All the NaN cases */ | |
647 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
648 | return parts_pick_nan(a, b, s); | |
649 | } | |
650 | ||
651 | /* Inf % N; N % 0 */ | |
652 | if (a->cls == float_class_inf || b->cls == float_class_zero) { | |
653 | float_raise(float_flag_invalid, s); | |
654 | parts_default_nan(a, s); | |
655 | return a; | |
656 | } | |
657 | ||
658 | /* N % Inf; 0 % N */ | |
659 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | |
660 | return a; | |
661 | } | |
662 | ||
9261b245 RH |
663 | /* |
664 | * Square Root | |
665 | * | |
666 | * The base algorithm is lifted from | |
667 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c | |
668 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c | |
669 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c | |
670 | * and is thus MIT licenced. | |
671 | */ | |
672 | static void partsN(sqrt)(FloatPartsN *a, float_status *status, | |
673 | const FloatFmt *fmt) | |
674 | { | |
675 | const uint32_t three32 = 3u << 30; | |
676 | const uint64_t three64 = 3ull << 62; | |
677 | uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ | |
678 | uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ | |
679 | uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ | |
680 | uint64_t d0h, d0l, d1h, d1l, d2h, d2l; | |
681 | uint64_t discard; | |
682 | bool exp_odd; | |
683 | size_t index; | |
684 | ||
685 | if (unlikely(a->cls != float_class_normal)) { | |
686 | switch (a->cls) { | |
687 | case float_class_snan: | |
688 | case float_class_qnan: | |
689 | parts_return_nan(a, status); | |
690 | return; | |
691 | case float_class_zero: | |
692 | return; | |
693 | case float_class_inf: | |
694 | if (unlikely(a->sign)) { | |
695 | goto d_nan; | |
696 | } | |
697 | return; | |
698 | default: | |
699 | g_assert_not_reached(); | |
700 | } | |
701 | } | |
702 | ||
703 | if (unlikely(a->sign)) { | |
704 | goto d_nan; | |
705 | } | |
706 | ||
707 | /* | |
708 | * Argument reduction. | |
709 | * x = 4^e frac; with integer e, and frac in [1, 4) | |
710 | * m = frac fixed point at bit 62, since we're in base 4. | |
711 | * If base-2 exponent is odd, exchange that for multiply by 2, | |
712 | * which results in no shift. | |
713 | */ | |
714 | exp_odd = a->exp & 1; | |
715 | index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); | |
716 | if (!exp_odd) { | |
717 | frac_shr(a, 1); | |
718 | } | |
719 | ||
720 | /* | |
721 | * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). | |
722 | * | |
723 | * Initial estimate: | |
724 | * 7-bit lookup table (1-bit exponent and 6-bit significand). | |
725 | * | |
726 | * The relative error (e = r0*sqrt(m)-1) of a linear estimate | |
727 | * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; | |
728 | * a table lookup is faster and needs one less iteration. | |
729 | * The 7-bit table gives |e| < 0x1.fdp-9. | |
730 | * | |
731 | * A Newton-Raphson iteration for r is | |
732 | * s = m*r | |
733 | * d = s*r | |
734 | * u = 3 - d | |
735 | * r = r*u/2 | |
736 | * | |
737 | * Fixed point representations: | |
738 | * m, s, d, u, three are all 2.30; r is 0.32 | |
739 | */ | |
740 | m64 = a->frac_hi; | |
741 | m32 = m64 >> 32; | |
742 | ||
743 | r32 = rsqrt_tab[index] << 16; | |
744 | /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ | |
745 | ||
746 | s32 = ((uint64_t)m32 * r32) >> 32; | |
747 | d32 = ((uint64_t)s32 * r32) >> 32; | |
748 | u32 = three32 - d32; | |
749 | ||
750 | if (N == 64) { | |
751 | /* float64 or smaller */ | |
752 | ||
753 | r32 = ((uint64_t)r32 * u32) >> 31; | |
754 | /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ | |
755 | ||
756 | s32 = ((uint64_t)m32 * r32) >> 32; | |
757 | d32 = ((uint64_t)s32 * r32) >> 32; | |
758 | u32 = three32 - d32; | |
759 | ||
760 | if (fmt->frac_size <= 23) { | |
761 | /* float32 or smaller */ | |
762 | ||
763 | s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ | |
764 | s32 = (s32 - 1) >> 6; /* 9.23 */ | |
765 | /* s < sqrt(m) < s + 0x1.08p-23 */ | |
766 | ||
767 | /* compute nearest rounded result to 2.23 bits */ | |
768 | uint32_t d0 = (m32 << 16) - s32 * s32; | |
769 | uint32_t d1 = s32 - d0; | |
770 | uint32_t d2 = d1 + s32 + 1; | |
771 | s32 += d1 >> 31; | |
772 | a->frac_hi = (uint64_t)s32 << (64 - 25); | |
773 | ||
774 | /* increment or decrement for inexact */ | |
775 | if (d2 != 0) { | |
776 | a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); | |
777 | } | |
778 | goto done; | |
779 | } | |
780 | ||
781 | /* float64 */ | |
782 | ||
783 | r64 = (uint64_t)r32 * u32 * 2; | |
784 | /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ | |
785 | mul64To128(m64, r64, &s64, &discard); | |
786 | mul64To128(s64, r64, &d64, &discard); | |
787 | u64 = three64 - d64; | |
788 | ||
789 | mul64To128(s64, u64, &s64, &discard); /* 3.61 */ | |
790 | s64 = (s64 - 2) >> 9; /* 12.52 */ | |
791 | ||
792 | /* Compute nearest rounded result */ | |
793 | uint64_t d0 = (m64 << 42) - s64 * s64; | |
794 | uint64_t d1 = s64 - d0; | |
795 | uint64_t d2 = d1 + s64 + 1; | |
796 | s64 += d1 >> 63; | |
797 | a->frac_hi = s64 << (64 - 54); | |
798 | ||
799 | /* increment or decrement for inexact */ | |
800 | if (d2 != 0) { | |
801 | a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); | |
802 | } | |
803 | goto done; | |
804 | } | |
805 | ||
806 | r64 = (uint64_t)r32 * u32 * 2; | |
807 | /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ | |
808 | ||
809 | mul64To128(m64, r64, &s64, &discard); | |
810 | mul64To128(s64, r64, &d64, &discard); | |
811 | u64 = three64 - d64; | |
812 | mul64To128(u64, r64, &r64, &discard); | |
813 | r64 <<= 1; | |
814 | /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ | |
815 | ||
816 | mul64To128(m64, r64, &s64, &discard); | |
817 | mul64To128(s64, r64, &d64, &discard); | |
818 | u64 = three64 - d64; | |
819 | mul64To128(u64, r64, &rh, &rl); | |
820 | add128(rh, rl, rh, rl, &rh, &rl); | |
821 | /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ | |
822 | ||
823 | mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); | |
824 | mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); | |
825 | sub128(three64, 0, dh, dl, &uh, &ul); | |
826 | mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ | |
827 | /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ | |
828 | ||
829 | sub128(sh, sl, 0, 4, &sh, &sl); | |
830 | shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ | |
831 | /* s < sqrt(m) < s + 1ulp */ | |
832 | ||
833 | /* Compute nearest rounded result */ | |
834 | mul64To128(sl, sl, &d0h, &d0l); | |
835 | d0h += 2 * sh * sl; | |
836 | sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); | |
837 | sub128(sh, sl, d0h, d0l, &d1h, &d1l); | |
838 | add128(sh, sl, 0, 1, &d2h, &d2l); | |
839 | add128(d2h, d2l, d1h, d1l, &d2h, &d2l); | |
840 | add128(sh, sl, 0, d1h >> 63, &sh, &sl); | |
841 | shift128Left(sh, sl, 128 - 114, &sh, &sl); | |
842 | ||
843 | /* increment or decrement for inexact */ | |
844 | if (d2h | d2l) { | |
845 | if ((int64_t)(d1h ^ d2h) < 0) { | |
846 | sub128(sh, sl, 0, 1, &sh, &sl); | |
847 | } else { | |
848 | add128(sh, sl, 0, 1, &sh, &sl); | |
849 | } | |
850 | } | |
851 | a->frac_lo = sl; | |
852 | a->frac_hi = sh; | |
853 | ||
854 | done: | |
855 | /* Convert back from base 4 to base 2. */ | |
856 | a->exp >>= 1; | |
857 | if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
858 | frac_add(a, a, a); | |
859 | } else { | |
860 | a->exp += 1; | |
861 | } | |
862 | return; | |
863 | ||
864 | d_nan: | |
865 | float_raise(float_flag_invalid, status); | |
866 | parts_default_nan(a, status); | |
867 | } | |
868 | ||
afc34931 RH |
869 | /* |
870 | * Rounds the floating-point value `a' to an integer, and returns the | |
871 | * result as a floating-point value. The operation is performed | |
872 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
873 | * Arithmetic. | |
874 | * | |
875 | * parts_round_to_int_normal is an internal helper function for | |
876 | * normal numbers only, returning true for inexact but not directly | |
877 | * raising float_flag_inexact. | |
878 | */ | |
879 | static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, | |
880 | int scale, int frac_size) | |
881 | { | |
882 | uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; | |
883 | int shift_adj; | |
884 | ||
885 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
886 | a->exp += scale; | |
887 | ||
888 | if (a->exp < 0) { | |
889 | bool one; | |
890 | ||
891 | /* All fractional */ | |
892 | switch (rmode) { | |
893 | case float_round_nearest_even: | |
894 | one = false; | |
895 | if (a->exp == -1) { | |
896 | FloatPartsN tmp; | |
897 | /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ | |
898 | frac_add(&tmp, a, a); | |
899 | /* Anything remaining means frac > 0.5. */ | |
900 | one = !frac_eqz(&tmp); | |
901 | } | |
902 | break; | |
903 | case float_round_ties_away: | |
904 | one = a->exp == -1; | |
905 | break; | |
906 | case float_round_to_zero: | |
907 | one = false; | |
908 | break; | |
909 | case float_round_up: | |
910 | one = !a->sign; | |
911 | break; | |
912 | case float_round_down: | |
913 | one = a->sign; | |
914 | break; | |
915 | case float_round_to_odd: | |
916 | one = true; | |
917 | break; | |
918 | default: | |
919 | g_assert_not_reached(); | |
920 | } | |
921 | ||
922 | frac_clear(a); | |
923 | a->exp = 0; | |
924 | if (one) { | |
925 | a->frac_hi = DECOMPOSED_IMPLICIT_BIT; | |
926 | } else { | |
927 | a->cls = float_class_zero; | |
928 | } | |
929 | return true; | |
930 | } | |
931 | ||
932 | if (a->exp >= frac_size) { | |
933 | /* All integral */ | |
934 | return false; | |
935 | } | |
936 | ||
937 | if (N > 64 && a->exp < N - 64) { | |
938 | /* | |
939 | * Rounding is not in the low word -- shift lsb to bit 2, | |
940 | * which leaves room for sticky and rounding bit. | |
941 | */ | |
942 | shift_adj = (N - 1) - (a->exp + 2); | |
943 | frac_shrjam(a, shift_adj); | |
944 | frac_lsb = 1 << 2; | |
945 | } else { | |
946 | shift_adj = 0; | |
947 | frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); | |
948 | } | |
949 | ||
950 | frac_lsbm1 = frac_lsb >> 1; | |
951 | rnd_mask = frac_lsb - 1; | |
952 | rnd_even_mask = rnd_mask | frac_lsb; | |
953 | ||
954 | if (!(a->frac_lo & rnd_mask)) { | |
955 | /* Fractional bits already clear, undo the shift above. */ | |
956 | frac_shl(a, shift_adj); | |
957 | return false; | |
958 | } | |
959 | ||
960 | switch (rmode) { | |
961 | case float_round_nearest_even: | |
962 | inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); | |
963 | break; | |
964 | case float_round_ties_away: | |
965 | inc = frac_lsbm1; | |
966 | break; | |
967 | case float_round_to_zero: | |
968 | inc = 0; | |
969 | break; | |
970 | case float_round_up: | |
971 | inc = a->sign ? 0 : rnd_mask; | |
972 | break; | |
973 | case float_round_down: | |
974 | inc = a->sign ? rnd_mask : 0; | |
975 | break; | |
976 | case float_round_to_odd: | |
977 | inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; | |
978 | break; | |
979 | default: | |
980 | g_assert_not_reached(); | |
981 | } | |
982 | ||
983 | if (shift_adj == 0) { | |
984 | if (frac_addi(a, a, inc)) { | |
985 | frac_shr(a, 1); | |
986 | a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
987 | a->exp++; | |
988 | } | |
989 | a->frac_lo &= ~rnd_mask; | |
990 | } else { | |
991 | frac_addi(a, a, inc); | |
992 | a->frac_lo &= ~rnd_mask; | |
993 | /* Be careful shifting back, not to overflow */ | |
994 | frac_shl(a, shift_adj - 1); | |
995 | if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { | |
996 | a->exp++; | |
997 | } else { | |
998 | frac_add(a, a, a); | |
999 | } | |
1000 | } | |
1001 | return true; | |
1002 | } | |
1003 | ||
1004 | static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | |
1005 | int scale, float_status *s, | |
1006 | const FloatFmt *fmt) | |
1007 | { | |
1008 | switch (a->cls) { | |
1009 | case float_class_qnan: | |
1010 | case float_class_snan: | |
1011 | parts_return_nan(a, s); | |
1012 | break; | |
1013 | case float_class_zero: | |
1014 | case float_class_inf: | |
1015 | break; | |
1016 | case float_class_normal: | |
1017 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | |
1018 | float_raise(float_flag_inexact, s); | |
1019 | } | |
1020 | break; | |
1021 | default: | |
1022 | g_assert_not_reached(); | |
1023 | } | |
1024 | } | |
463b3f0d RH |
1025 | |
1026 | /* | |
1027 | * Returns the result of converting the floating-point value `a' to | |
1028 | * the two's complement integer format. The conversion is performed | |
1029 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
1030 | * Arithmetic---which means in particular that the conversion is | |
1031 | * rounded according to the current rounding mode. If `a' is a NaN, | |
1032 | * the largest positive integer is returned. Otherwise, if the | |
1033 | * conversion overflows, the largest integer with the same sign as `a' | |
1034 | * is returned. | |
4ab4aef0 | 1035 | */ |
463b3f0d RH |
1036 | static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, |
1037 | int scale, int64_t min, int64_t max, | |
1038 | float_status *s) | |
1039 | { | |
1040 | int flags = 0; | |
1041 | uint64_t r; | |
1042 | ||
1043 | switch (p->cls) { | |
1044 | case float_class_snan: | |
1045 | case float_class_qnan: | |
1046 | flags = float_flag_invalid; | |
1047 | r = max; | |
1048 | break; | |
1049 | ||
1050 | case float_class_inf: | |
1051 | flags = float_flag_invalid; | |
1052 | r = p->sign ? min : max; | |
1053 | break; | |
1054 | ||
1055 | case float_class_zero: | |
1056 | return 0; | |
1057 | ||
1058 | case float_class_normal: | |
1059 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
1060 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
1061 | flags = float_flag_inexact; | |
1062 | } | |
1063 | ||
1064 | if (p->exp <= DECOMPOSED_BINARY_POINT) { | |
1065 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
1066 | } else { | |
1067 | r = UINT64_MAX; | |
1068 | } | |
1069 | if (p->sign) { | |
1070 | if (r <= -(uint64_t)min) { | |
1071 | r = -r; | |
1072 | } else { | |
1073 | flags = float_flag_invalid; | |
1074 | r = min; | |
1075 | } | |
1076 | } else if (r > max) { | |
1077 | flags = float_flag_invalid; | |
1078 | r = max; | |
1079 | } | |
1080 | break; | |
1081 | ||
1082 | default: | |
1083 | g_assert_not_reached(); | |
1084 | } | |
1085 | ||
1086 | float_raise(flags, s); | |
1087 | return r; | |
1088 | } | |
4ab4aef0 RH |
1089 | |
1090 | /* | |
1091 | * Returns the result of converting the floating-point value `a' to | |
1092 | * the unsigned integer format. The conversion is performed according | |
1093 | * to the IEC/IEEE Standard for Binary Floating-Point | |
1094 | * Arithmetic---which means in particular that the conversion is | |
1095 | * rounded according to the current rounding mode. If `a' is a NaN, | |
1096 | * the largest unsigned integer is returned. Otherwise, if the | |
1097 | * conversion overflows, the largest unsigned integer is returned. If | |
1098 | * the 'a' is negative, the result is rounded and zero is returned; | |
1099 | * values that do not round to zero will raise the inexact exception | |
1100 | * flag. | |
1101 | */ | |
1102 | static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | |
1103 | int scale, uint64_t max, float_status *s) | |
1104 | { | |
1105 | int flags = 0; | |
1106 | uint64_t r; | |
1107 | ||
1108 | switch (p->cls) { | |
1109 | case float_class_snan: | |
1110 | case float_class_qnan: | |
1111 | flags = float_flag_invalid; | |
1112 | r = max; | |
1113 | break; | |
1114 | ||
1115 | case float_class_inf: | |
1116 | flags = float_flag_invalid; | |
1117 | r = p->sign ? 0 : max; | |
1118 | break; | |
1119 | ||
1120 | case float_class_zero: | |
1121 | return 0; | |
1122 | ||
1123 | case float_class_normal: | |
1124 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
1125 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
1126 | flags = float_flag_inexact; | |
1127 | if (p->cls == float_class_zero) { | |
1128 | r = 0; | |
1129 | break; | |
1130 | } | |
1131 | } | |
1132 | ||
1133 | if (p->sign) { | |
1134 | flags = float_flag_invalid; | |
1135 | r = 0; | |
1136 | } else if (p->exp > DECOMPOSED_BINARY_POINT) { | |
1137 | flags = float_flag_invalid; | |
1138 | r = max; | |
1139 | } else { | |
1140 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
1141 | if (r > max) { | |
1142 | flags = float_flag_invalid; | |
1143 | r = max; | |
1144 | } | |
1145 | } | |
1146 | break; | |
1147 | ||
1148 | default: | |
1149 | g_assert_not_reached(); | |
1150 | } | |
1151 | ||
1152 | float_raise(flags, s); | |
1153 | return r; | |
1154 | } | |
e3689519 RH |
1155 | |
1156 | /* | |
1157 | * Integer to float conversions | |
1158 | * | |
1159 | * Returns the result of converting the two's complement integer `a' | |
1160 | * to the floating-point format. The conversion is performed according | |
1161 | * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
1162 | */ | |
1163 | static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, | |
1164 | int scale, float_status *s) | |
1165 | { | |
1166 | uint64_t f = a; | |
1167 | int shift; | |
1168 | ||
1169 | memset(p, 0, sizeof(*p)); | |
1170 | ||
1171 | if (a == 0) { | |
1172 | p->cls = float_class_zero; | |
1173 | return; | |
1174 | } | |
1175 | ||
1176 | p->cls = float_class_normal; | |
1177 | if (a < 0) { | |
1178 | f = -f; | |
1179 | p->sign = true; | |
1180 | } | |
1181 | shift = clz64(f); | |
1182 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
1183 | ||
1184 | p->exp = DECOMPOSED_BINARY_POINT - shift + scale; | |
1185 | p->frac_hi = f << shift; | |
1186 | } | |
37c954a1 RH |
1187 | |
1188 | /* | |
1189 | * Unsigned Integer to float conversions | |
1190 | * | |
1191 | * Returns the result of converting the unsigned integer `a' to the | |
1192 | * floating-point format. The conversion is performed according to the | |
1193 | * IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
1194 | */ | |
1195 | static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, | |
1196 | int scale, float_status *status) | |
1197 | { | |
1198 | memset(p, 0, sizeof(*p)); | |
1199 | ||
1200 | if (a == 0) { | |
1201 | p->cls = float_class_zero; | |
1202 | } else { | |
1203 | int shift = clz64(a); | |
1204 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
1205 | p->cls = float_class_normal; | |
1206 | p->exp = DECOMPOSED_BINARY_POINT - shift + scale; | |
1207 | p->frac_hi = a << shift; | |
1208 | } | |
1209 | } | |
e1c4667a RH |
1210 | |
1211 | /* | |
1212 | * Float min/max. | |
1213 | */ | |
1214 | static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | |
1215 | float_status *s, int flags) | |
1216 | { | |
1217 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
1218 | int a_exp, b_exp, cmp; | |
1219 | ||
1220 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
1221 | /* | |
1222 | * For minnum/maxnum, if one operand is a QNaN, and the other | |
1223 | * operand is numerical, then return numerical argument. | |
1224 | */ | |
1225 | if ((flags & minmax_isnum) | |
1226 | && !(ab_mask & float_cmask_snan) | |
1227 | && (ab_mask & ~float_cmask_qnan)) { | |
1228 | return is_nan(a->cls) ? b : a; | |
1229 | } | |
1230 | return parts_pick_nan(a, b, s); | |
1231 | } | |
1232 | ||
1233 | a_exp = a->exp; | |
1234 | b_exp = b->exp; | |
1235 | ||
1236 | if (unlikely(ab_mask != float_cmask_normal)) { | |
1237 | switch (a->cls) { | |
1238 | case float_class_normal: | |
1239 | break; | |
1240 | case float_class_inf: | |
1241 | a_exp = INT16_MAX; | |
1242 | break; | |
1243 | case float_class_zero: | |
1244 | a_exp = INT16_MIN; | |
1245 | break; | |
1246 | default: | |
1247 | g_assert_not_reached(); | |
1248 | break; | |
1249 | } | |
1250 | switch (b->cls) { | |
1251 | case float_class_normal: | |
1252 | break; | |
1253 | case float_class_inf: | |
1254 | b_exp = INT16_MAX; | |
1255 | break; | |
1256 | case float_class_zero: | |
1257 | b_exp = INT16_MIN; | |
1258 | break; | |
1259 | default: | |
1260 | g_assert_not_reached(); | |
1261 | break; | |
1262 | } | |
1263 | } | |
1264 | ||
1265 | /* Compare magnitudes. */ | |
1266 | cmp = a_exp - b_exp; | |
1267 | if (cmp == 0) { | |
1268 | cmp = frac_cmp(a, b); | |
1269 | } | |
1270 | ||
1271 | /* | |
1272 | * Take the sign into account. | |
1273 | * For ismag, only do this if the magnitudes are equal. | |
1274 | */ | |
1275 | if (!(flags & minmax_ismag) || cmp == 0) { | |
1276 | if (a->sign != b->sign) { | |
1277 | /* For differing signs, the negative operand is less. */ | |
1278 | cmp = a->sign ? -1 : 1; | |
1279 | } else if (a->sign) { | |
1280 | /* For two negative operands, invert the magnitude comparison. */ | |
1281 | cmp = -cmp; | |
1282 | } | |
1283 | } | |
1284 | ||
1285 | if (flags & minmax_ismin) { | |
1286 | cmp = -cmp; | |
1287 | } | |
1288 | return cmp < 0 ? b : a; | |
1289 | } | |
6eb169b8 RH |
1290 | |
1291 | /* | |
1292 | * Floating point compare | |
1293 | */ | |
1294 | static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | |
1295 | float_status *s, bool is_quiet) | |
1296 | { | |
1297 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
1298 | int cmp; | |
1299 | ||
1300 | if (likely(ab_mask == float_cmask_normal)) { | |
1301 | if (a->sign != b->sign) { | |
1302 | goto a_sign; | |
1303 | } | |
1304 | if (a->exp != b->exp) { | |
1305 | cmp = a->exp < b->exp ? -1 : 1; | |
1306 | } else { | |
1307 | cmp = frac_cmp(a, b); | |
1308 | } | |
1309 | if (a->sign) { | |
1310 | cmp = -cmp; | |
1311 | } | |
1312 | return cmp; | |
1313 | } | |
1314 | ||
1315 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
1316 | if (!is_quiet || (ab_mask & float_cmask_snan)) { | |
1317 | float_raise(float_flag_invalid, s); | |
1318 | } | |
1319 | return float_relation_unordered; | |
1320 | } | |
1321 | ||
1322 | if (ab_mask & float_cmask_zero) { | |
1323 | if (ab_mask == float_cmask_zero) { | |
1324 | return float_relation_equal; | |
1325 | } else if (a->cls == float_class_zero) { | |
1326 | goto b_sign; | |
1327 | } else { | |
1328 | goto a_sign; | |
1329 | } | |
1330 | } | |
1331 | ||
1332 | if (ab_mask == float_cmask_inf) { | |
1333 | if (a->sign == b->sign) { | |
1334 | return float_relation_equal; | |
1335 | } | |
1336 | } else if (b->cls == float_class_inf) { | |
1337 | goto b_sign; | |
1338 | } else { | |
1339 | g_assert(a->cls == float_class_inf); | |
1340 | } | |
1341 | ||
1342 | a_sign: | |
1343 | return a->sign ? float_relation_less : float_relation_greater; | |
1344 | b_sign: | |
1345 | return b->sign ? float_relation_greater : float_relation_less; | |
1346 | } | |
39626b0c RH |
1347 | |
1348 | /* | |
1349 | * Multiply A by 2 raised to the power N. | |
1350 | */ | |
1351 | static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | |
1352 | { | |
1353 | switch (a->cls) { | |
1354 | case float_class_snan: | |
1355 | case float_class_qnan: | |
1356 | parts_return_nan(a, s); | |
1357 | break; | |
1358 | case float_class_zero: | |
1359 | case float_class_inf: | |
1360 | break; | |
1361 | case float_class_normal: | |
1362 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | |
1363 | break; | |
1364 | default: | |
1365 | g_assert_not_reached(); | |
1366 | } | |
1367 | } | |
2fa3546c RH |
1368 | |
1369 | /* | |
1370 | * Return log2(A) | |
1371 | */ | |
1372 | static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | |
1373 | { | |
1374 | uint64_t a0, a1, r, t, ign; | |
1375 | FloatPartsN f; | |
1376 | int i, n, a_exp, f_exp; | |
1377 | ||
1378 | if (unlikely(a->cls != float_class_normal)) { | |
1379 | switch (a->cls) { | |
1380 | case float_class_snan: | |
1381 | case float_class_qnan: | |
1382 | parts_return_nan(a, s); | |
1383 | return; | |
1384 | case float_class_zero: | |
1385 | /* log2(0) = -inf */ | |
1386 | a->cls = float_class_inf; | |
1387 | a->sign = 1; | |
1388 | return; | |
1389 | case float_class_inf: | |
1390 | if (unlikely(a->sign)) { | |
1391 | goto d_nan; | |
1392 | } | |
1393 | return; | |
1394 | default: | |
1395 | break; | |
1396 | } | |
1397 | g_assert_not_reached(); | |
1398 | } | |
1399 | if (unlikely(a->sign)) { | |
1400 | goto d_nan; | |
1401 | } | |
1402 | ||
1403 | /* TODO: This algorithm looses bits too quickly for float128. */ | |
1404 | g_assert(N == 64); | |
1405 | ||
1406 | a_exp = a->exp; | |
1407 | f_exp = -1; | |
1408 | ||
1409 | r = 0; | |
1410 | t = DECOMPOSED_IMPLICIT_BIT; | |
1411 | a0 = a->frac_hi; | |
1412 | a1 = 0; | |
1413 | ||
1414 | n = fmt->frac_size + 2; | |
1415 | if (unlikely(a_exp == -1)) { | |
1416 | /* | |
1417 | * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). | |
1418 | * When the value is very close to 1.0, there are lots of 1's in | |
1419 | * the msb parts of the fraction. At the end, when we subtract | |
1420 | * this value from -1.0, we can see a catastrophic loss of precision, | |
1421 | * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the | |
1422 | * bits of y in the final result. To minimize this, compute as many | |
1423 | * digits as we can. | |
1424 | * ??? This case needs another algorithm to avoid this. | |
1425 | */ | |
1426 | n = fmt->frac_size * 2 + 2; | |
1427 | /* Don't compute a value overlapping the sticky bit */ | |
1428 | n = MIN(n, 62); | |
1429 | } | |
1430 | ||
1431 | for (i = 0; i < n; i++) { | |
1432 | if (a1) { | |
1433 | mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); | |
1434 | } else if (a0 & 0xffffffffull) { | |
1435 | mul64To128(a0, a0, &a0, &a1); | |
1436 | } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { | |
1437 | a0 >>= 32; | |
1438 | a0 *= a0; | |
1439 | } else { | |
1440 | goto exact; | |
1441 | } | |
1442 | ||
1443 | if (a0 & DECOMPOSED_IMPLICIT_BIT) { | |
1444 | if (unlikely(a_exp == 0 && r == 0)) { | |
1445 | /* | |
1446 | * When a_exp == 0, we're computing the log2 of a value | |
1447 | * [1.0,2.0). When the value is very close to 1.0, there | |
1448 | * are lots of 0's in the msb parts of the fraction. | |
1449 | * We need to compute more digits to produce a correct | |
1450 | * result -- restart at the top of the fraction. | |
1451 | * ??? This is likely to lose precision quickly, as for | |
1452 | * float128; we may need another method. | |
1453 | */ | |
1454 | f_exp -= i; | |
1455 | t = r = DECOMPOSED_IMPLICIT_BIT; | |
1456 | i = 0; | |
1457 | } else { | |
1458 | r |= t; | |
1459 | } | |
1460 | } else { | |
1461 | add128(a0, a1, a0, a1, &a0, &a1); | |
1462 | } | |
1463 | t >>= 1; | |
1464 | } | |
1465 | ||
1466 | /* Set sticky for inexact. */ | |
1467 | r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); | |
1468 | ||
1469 | exact: | |
1470 | parts_sint_to_float(a, a_exp, 0, s); | |
1471 | if (r == 0) { | |
1472 | return; | |
1473 | } | |
1474 | ||
1475 | memset(&f, 0, sizeof(f)); | |
1476 | f.cls = float_class_normal; | |
1477 | f.frac_hi = r; | |
1478 | f.exp = f_exp - frac_normalize(&f); | |
1479 | ||
1480 | if (a_exp < 0) { | |
1481 | parts_sub_normal(a, &f); | |
1482 | } else if (a_exp > 0) { | |
1483 | parts_add_normal(a, &f); | |
1484 | } else { | |
1485 | *a = f; | |
1486 | } | |
1487 | return; | |
1488 | ||
1489 | d_nan: | |
1490 | float_raise(float_flag_invalid, s); | |
1491 | parts_default_nan(a, s); | |
1492 | } |