]>
Commit | Line | Data |
---|---|---|
7c45bad8 RH |
1 | /* |
2 | * QEMU float support | |
3 | * | |
4 | * The code in this source file is derived from release 2a of the SoftFloat | |
5 | * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and | |
6 | * some later contributions) are provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
11 | * GPL-v2-or-later | |
12 | * | |
13 | * Any future contributions to this file after December 1st 2014 will be | |
14 | * taken to be licensed under the Softfloat-2a license unless specifically | |
15 | * indicated otherwise. | |
16 | */ | |
17 | ||
18 | static void partsN(return_nan)(FloatPartsN *a, float_status *s) | |
19 | { | |
20 | switch (a->cls) { | |
21 | case float_class_snan: | |
e706d445 | 22 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); |
7c45bad8 RH |
23 | if (s->default_nan_mode) { |
24 | parts_default_nan(a, s); | |
25 | } else { | |
26 | parts_silence_nan(a, s); | |
27 | } | |
28 | break; | |
29 | case float_class_qnan: | |
30 | if (s->default_nan_mode) { | |
31 | parts_default_nan(a, s); | |
32 | } | |
33 | break; | |
34 | default: | |
35 | g_assert_not_reached(); | |
36 | } | |
37 | } | |
22c355f4 RH |
38 | |
39 | static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, | |
40 | float_status *s) | |
41 | { | |
42 | if (is_snan(a->cls) || is_snan(b->cls)) { | |
e706d445 | 43 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); |
22c355f4 RH |
44 | } |
45 | ||
46 | if (s->default_nan_mode) { | |
47 | parts_default_nan(a, s); | |
48 | } else { | |
49 | int cmp = frac_cmp(a, b); | |
50 | if (cmp == 0) { | |
51 | cmp = a->sign < b->sign; | |
52 | } | |
53 | ||
54 | if (pickNaN(a->cls, b->cls, cmp > 0, s)) { | |
55 | a = b; | |
56 | } | |
57 | if (is_snan(a->cls)) { | |
58 | parts_silence_nan(a, s); | |
59 | } | |
60 | } | |
61 | return a; | |
62 | } | |
979582d0 RH |
63 | |
64 | static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, | |
65 | FloatPartsN *c, float_status *s, | |
66 | int ab_mask, int abc_mask) | |
67 | { | |
68 | int which; | |
69 | ||
70 | if (unlikely(abc_mask & float_cmask_snan)) { | |
e706d445 | 71 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); |
979582d0 RH |
72 | } |
73 | ||
74 | which = pickNaNMulAdd(a->cls, b->cls, c->cls, | |
75 | ab_mask == float_cmask_infzero, s); | |
76 | ||
77 | if (s->default_nan_mode || which == 3) { | |
78 | /* | |
79 | * Note that this check is after pickNaNMulAdd so that function | |
80 | * has an opportunity to set the Invalid flag for infzero. | |
81 | */ | |
82 | parts_default_nan(a, s); | |
83 | return a; | |
84 | } | |
85 | ||
86 | switch (which) { | |
87 | case 0: | |
88 | break; | |
89 | case 1: | |
90 | a = b; | |
91 | break; | |
92 | case 2: | |
93 | a = c; | |
94 | break; | |
95 | default: | |
96 | g_assert_not_reached(); | |
97 | } | |
98 | if (is_snan(a->cls)) { | |
99 | parts_silence_nan(a, s); | |
100 | } | |
101 | return a; | |
102 | } | |
d46975bc RH |
103 | |
104 | /* | |
105 | * Canonicalize the FloatParts structure. Determine the class, | |
106 | * unbias the exponent, and normalize the fraction. | |
107 | */ | |
108 | static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | |
109 | const FloatFmt *fmt) | |
110 | { | |
111 | if (unlikely(p->exp == 0)) { | |
112 | if (likely(frac_eqz(p))) { | |
113 | p->cls = float_class_zero; | |
114 | } else if (status->flush_inputs_to_zero) { | |
115 | float_raise(float_flag_input_denormal, status); | |
116 | p->cls = float_class_zero; | |
117 | frac_clear(p); | |
118 | } else { | |
119 | int shift = frac_normalize(p); | |
120 | p->cls = float_class_normal; | |
72246065 RH |
121 | p->exp = fmt->frac_shift - fmt->exp_bias |
122 | - shift + !fmt->m68k_denormal; | |
d46975bc RH |
123 | } |
124 | } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { | |
125 | p->cls = float_class_normal; | |
126 | p->exp -= fmt->exp_bias; | |
127 | frac_shl(p, fmt->frac_shift); | |
128 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
129 | } else if (likely(frac_eqz(p))) { | |
130 | p->cls = float_class_inf; | |
131 | } else { | |
132 | frac_shl(p, fmt->frac_shift); | |
133 | p->cls = (parts_is_snan_frac(p->frac_hi, status) | |
134 | ? float_class_snan : float_class_qnan); | |
135 | } | |
136 | } | |
ee6959f2 RH |
137 | |
138 | /* | |
139 | * Round and uncanonicalize a floating-point number by parts. There | |
140 | * are FRAC_SHIFT bits that may require rounding at the bottom of the | |
141 | * fraction; these bits will be removed. The exponent will be biased | |
142 | * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. | |
143 | */ | |
25fdedf0 RH |
144 | static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, |
145 | const FloatFmt *fmt) | |
ee6959f2 RH |
146 | { |
147 | const int exp_max = fmt->exp_max; | |
148 | const int frac_shift = fmt->frac_shift; | |
ee6959f2 | 149 | const uint64_t round_mask = fmt->round_mask; |
d6e1f0cd RH |
150 | const uint64_t frac_lsb = round_mask + 1; |
151 | const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); | |
152 | const uint64_t roundeven_mask = round_mask | frac_lsb; | |
ee6959f2 | 153 | uint64_t inc; |
25fdedf0 | 154 | bool overflow_norm = false; |
ee6959f2 RH |
155 | int exp, flags = 0; |
156 | ||
ee6959f2 RH |
157 | switch (s->float_rounding_mode) { |
158 | case float_round_nearest_even: | |
98b3cff7 RH |
159 | if (N > 64 && frac_lsb == 0) { |
160 | inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 | |
161 | ? frac_lsbm1 : 0); | |
162 | } else { | |
163 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 | |
164 | ? frac_lsbm1 : 0); | |
165 | } | |
ee6959f2 RH |
166 | break; |
167 | case float_round_ties_away: | |
ee6959f2 RH |
168 | inc = frac_lsbm1; |
169 | break; | |
170 | case float_round_to_zero: | |
171 | overflow_norm = true; | |
172 | inc = 0; | |
173 | break; | |
174 | case float_round_up: | |
175 | inc = p->sign ? 0 : round_mask; | |
176 | overflow_norm = p->sign; | |
177 | break; | |
178 | case float_round_down: | |
179 | inc = p->sign ? round_mask : 0; | |
180 | overflow_norm = !p->sign; | |
181 | break; | |
182 | case float_round_to_odd: | |
183 | overflow_norm = true; | |
60c8f726 RH |
184 | /* fall through */ |
185 | case float_round_to_odd_inf: | |
98b3cff7 RH |
186 | if (N > 64 && frac_lsb == 0) { |
187 | inc = p->frac_hi & 1 ? 0 : round_mask; | |
188 | } else { | |
189 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; | |
190 | } | |
ee6959f2 RH |
191 | break; |
192 | default: | |
193 | g_assert_not_reached(); | |
194 | } | |
195 | ||
196 | exp = p->exp + fmt->exp_bias; | |
197 | if (likely(exp > 0)) { | |
198 | if (p->frac_lo & round_mask) { | |
199 | flags |= float_flag_inexact; | |
200 | if (frac_addi(p, p, inc)) { | |
201 | frac_shr(p, 1); | |
202 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
203 | exp++; | |
204 | } | |
98b3cff7 | 205 | p->frac_lo &= ~round_mask; |
ee6959f2 | 206 | } |
ee6959f2 RH |
207 | |
208 | if (fmt->arm_althp) { | |
209 | /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ | |
210 | if (unlikely(exp > exp_max)) { | |
211 | /* Overflow. Return the maximum normal. */ | |
212 | flags = float_flag_invalid; | |
213 | exp = exp_max; | |
214 | frac_allones(p); | |
98b3cff7 | 215 | p->frac_lo &= ~round_mask; |
ee6959f2 RH |
216 | } |
217 | } else if (unlikely(exp >= exp_max)) { | |
c40da5c6 LMC |
218 | flags |= float_flag_overflow; |
219 | if (s->rebias_overflow) { | |
220 | exp -= fmt->exp_re_bias; | |
221 | } else if (overflow_norm) { | |
222 | flags |= float_flag_inexact; | |
ee6959f2 RH |
223 | exp = exp_max - 1; |
224 | frac_allones(p); | |
98b3cff7 | 225 | p->frac_lo &= ~round_mask; |
ee6959f2 | 226 | } else { |
c40da5c6 | 227 | flags |= float_flag_inexact; |
ee6959f2 RH |
228 | p->cls = float_class_inf; |
229 | exp = exp_max; | |
230 | frac_clear(p); | |
231 | } | |
232 | } | |
98b3cff7 | 233 | frac_shr(p, frac_shift); |
c40da5c6 LMC |
234 | } else if (unlikely(s->rebias_underflow)) { |
235 | flags |= float_flag_underflow; | |
236 | exp += fmt->exp_re_bias; | |
237 | if (p->frac_lo & round_mask) { | |
238 | flags |= float_flag_inexact; | |
239 | if (frac_addi(p, p, inc)) { | |
240 | frac_shr(p, 1); | |
241 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
242 | exp++; | |
243 | } | |
244 | p->frac_lo &= ~round_mask; | |
245 | } | |
246 | frac_shr(p, frac_shift); | |
ee6959f2 RH |
247 | } else if (s->flush_to_zero) { |
248 | flags |= float_flag_output_denormal; | |
249 | p->cls = float_class_zero; | |
250 | exp = 0; | |
251 | frac_clear(p); | |
252 | } else { | |
253 | bool is_tiny = s->tininess_before_rounding || exp < 0; | |
254 | ||
255 | if (!is_tiny) { | |
256 | FloatPartsN discard; | |
257 | is_tiny = !frac_addi(&discard, p, inc); | |
258 | } | |
259 | ||
72246065 | 260 | frac_shrjam(p, !fmt->m68k_denormal - exp); |
ee6959f2 RH |
261 | |
262 | if (p->frac_lo & round_mask) { | |
263 | /* Need to recompute round-to-even/round-to-odd. */ | |
264 | switch (s->float_rounding_mode) { | |
265 | case float_round_nearest_even: | |
98b3cff7 RH |
266 | if (N > 64 && frac_lsb == 0) { |
267 | inc = ((p->frac_hi & 1) || | |
268 | (p->frac_lo & round_mask) != frac_lsbm1 | |
269 | ? frac_lsbm1 : 0); | |
270 | } else { | |
271 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 | |
272 | ? frac_lsbm1 : 0); | |
273 | } | |
ee6959f2 RH |
274 | break; |
275 | case float_round_to_odd: | |
60c8f726 | 276 | case float_round_to_odd_inf: |
98b3cff7 RH |
277 | if (N > 64 && frac_lsb == 0) { |
278 | inc = p->frac_hi & 1 ? 0 : round_mask; | |
279 | } else { | |
280 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; | |
281 | } | |
ee6959f2 RH |
282 | break; |
283 | default: | |
284 | break; | |
285 | } | |
286 | flags |= float_flag_inexact; | |
287 | frac_addi(p, p, inc); | |
98b3cff7 | 288 | p->frac_lo &= ~round_mask; |
ee6959f2 RH |
289 | } |
290 | ||
72246065 | 291 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; |
ee6959f2 RH |
292 | frac_shr(p, frac_shift); |
293 | ||
294 | if (is_tiny && (flags & float_flag_inexact)) { | |
295 | flags |= float_flag_underflow; | |
296 | } | |
297 | if (exp == 0 && frac_eqz(p)) { | |
298 | p->cls = float_class_zero; | |
299 | } | |
300 | } | |
301 | p->exp = exp; | |
302 | float_raise(flags, s); | |
303 | } | |
da10a907 | 304 | |
25fdedf0 RH |
305 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, |
306 | const FloatFmt *fmt) | |
307 | { | |
308 | if (likely(p->cls == float_class_normal)) { | |
309 | parts_uncanon_normal(p, s, fmt); | |
310 | } else { | |
311 | switch (p->cls) { | |
312 | case float_class_zero: | |
313 | p->exp = 0; | |
314 | frac_clear(p); | |
315 | return; | |
316 | case float_class_inf: | |
317 | g_assert(!fmt->arm_althp); | |
318 | p->exp = fmt->exp_max; | |
319 | frac_clear(p); | |
320 | return; | |
321 | case float_class_qnan: | |
322 | case float_class_snan: | |
323 | g_assert(!fmt->arm_althp); | |
324 | p->exp = fmt->exp_max; | |
325 | frac_shr(p, fmt->frac_shift); | |
326 | return; | |
327 | default: | |
328 | break; | |
329 | } | |
330 | g_assert_not_reached(); | |
331 | } | |
332 | } | |
333 | ||
da10a907 RH |
334 | /* |
335 | * Returns the result of adding or subtracting the values of the | |
336 | * floating-point values `a' and `b'. The operation is performed | |
337 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
338 | * Arithmetic. | |
339 | */ | |
340 | static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | |
341 | float_status *s, bool subtract) | |
342 | { | |
343 | bool b_sign = b->sign ^ subtract; | |
344 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
345 | ||
346 | if (a->sign != b_sign) { | |
347 | /* Subtraction */ | |
348 | if (likely(ab_mask == float_cmask_normal)) { | |
349 | if (parts_sub_normal(a, b)) { | |
350 | return a; | |
351 | } | |
352 | /* Subtract was exact, fall through to set sign. */ | |
353 | ab_mask = float_cmask_zero; | |
354 | } | |
355 | ||
356 | if (ab_mask == float_cmask_zero) { | |
357 | a->sign = s->float_rounding_mode == float_round_down; | |
358 | return a; | |
359 | } | |
360 | ||
361 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
362 | goto p_nan; | |
363 | } | |
364 | ||
365 | if (ab_mask & float_cmask_inf) { | |
366 | if (a->cls != float_class_inf) { | |
367 | /* N - Inf */ | |
368 | goto return_b; | |
369 | } | |
370 | if (b->cls != float_class_inf) { | |
371 | /* Inf - N */ | |
372 | return a; | |
373 | } | |
374 | /* Inf - Inf */ | |
ba11446c | 375 | float_raise(float_flag_invalid | float_flag_invalid_isi, s); |
da10a907 RH |
376 | parts_default_nan(a, s); |
377 | return a; | |
378 | } | |
379 | } else { | |
380 | /* Addition */ | |
381 | if (likely(ab_mask == float_cmask_normal)) { | |
382 | parts_add_normal(a, b); | |
383 | return a; | |
384 | } | |
385 | ||
386 | if (ab_mask == float_cmask_zero) { | |
387 | return a; | |
388 | } | |
389 | ||
390 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
391 | goto p_nan; | |
392 | } | |
393 | ||
394 | if (ab_mask & float_cmask_inf) { | |
395 | a->cls = float_class_inf; | |
396 | return a; | |
397 | } | |
398 | } | |
399 | ||
400 | if (b->cls == float_class_zero) { | |
401 | g_assert(a->cls == float_class_normal); | |
402 | return a; | |
403 | } | |
404 | ||
405 | g_assert(a->cls == float_class_zero); | |
406 | g_assert(b->cls == float_class_normal); | |
407 | return_b: | |
408 | b->sign = b_sign; | |
409 | return b; | |
410 | ||
411 | p_nan: | |
412 | return parts_pick_nan(a, b, s); | |
413 | } | |
aca84527 RH |
414 | |
415 | /* | |
416 | * Returns the result of multiplying the floating-point values `a' and | |
417 | * `b'. The operation is performed according to the IEC/IEEE Standard | |
418 | * for Binary Floating-Point Arithmetic. | |
419 | */ | |
420 | static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | |
421 | float_status *s) | |
422 | { | |
423 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
424 | bool sign = a->sign ^ b->sign; | |
425 | ||
426 | if (likely(ab_mask == float_cmask_normal)) { | |
427 | FloatPartsW tmp; | |
428 | ||
429 | frac_mulw(&tmp, a, b); | |
430 | frac_truncjam(a, &tmp); | |
431 | ||
432 | a->exp += b->exp + 1; | |
433 | if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
434 | frac_add(a, a, a); | |
435 | a->exp -= 1; | |
436 | } | |
437 | ||
438 | a->sign = sign; | |
439 | return a; | |
440 | } | |
441 | ||
442 | /* Inf * Zero == NaN */ | |
443 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
bead3c9b | 444 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); |
aca84527 RH |
445 | parts_default_nan(a, s); |
446 | return a; | |
447 | } | |
448 | ||
449 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
450 | return parts_pick_nan(a, b, s); | |
451 | } | |
452 | ||
453 | /* Multiply by 0 or Inf */ | |
454 | if (ab_mask & float_cmask_inf) { | |
455 | a->cls = float_class_inf; | |
456 | a->sign = sign; | |
457 | return a; | |
458 | } | |
459 | ||
460 | g_assert(ab_mask & float_cmask_zero); | |
461 | a->cls = float_class_zero; | |
462 | a->sign = sign; | |
463 | return a; | |
464 | } | |
dedd123c RH |
465 | |
466 | /* | |
467 | * Returns the result of multiplying the floating-point values `a' and | |
468 | * `b' then adding 'c', with no intermediate rounding step after the | |
469 | * multiplication. The operation is performed according to the | |
470 | * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. | |
471 | * The flags argument allows the caller to select negation of the | |
472 | * addend, the intermediate product, or the final result. (The | |
473 | * difference between this and having the caller do a separate | |
474 | * negation is that negating externally will flip the sign bit on NaNs.) | |
475 | * | |
476 | * Requires A and C extracted into a double-sized structure to provide the | |
477 | * extra space for the widening multiply. | |
478 | */ | |
479 | static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, | |
480 | FloatPartsN *c, int flags, float_status *s) | |
481 | { | |
482 | int ab_mask, abc_mask; | |
483 | FloatPartsW p_widen, c_widen; | |
484 | ||
485 | ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
486 | abc_mask = float_cmask(c->cls) | ab_mask; | |
487 | ||
488 | /* | |
489 | * It is implementation-defined whether the cases of (0,inf,qnan) | |
490 | * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN | |
491 | * they return if they do), so we have to hand this information | |
492 | * off to the target-specific pick-a-NaN routine. | |
493 | */ | |
494 | if (unlikely(abc_mask & float_cmask_anynan)) { | |
495 | return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); | |
496 | } | |
497 | ||
498 | if (flags & float_muladd_negate_c) { | |
499 | c->sign ^= 1; | |
500 | } | |
501 | ||
502 | /* Compute the sign of the product into A. */ | |
503 | a->sign ^= b->sign; | |
504 | if (flags & float_muladd_negate_product) { | |
505 | a->sign ^= 1; | |
506 | } | |
507 | ||
508 | if (unlikely(ab_mask != float_cmask_normal)) { | |
509 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
bead3c9b | 510 | float_raise(float_flag_invalid | float_flag_invalid_imz, s); |
dedd123c RH |
511 | goto d_nan; |
512 | } | |
513 | ||
514 | if (ab_mask & float_cmask_inf) { | |
515 | if (c->cls == float_class_inf && a->sign != c->sign) { | |
ba11446c | 516 | float_raise(float_flag_invalid | float_flag_invalid_isi, s); |
dedd123c RH |
517 | goto d_nan; |
518 | } | |
519 | goto return_inf; | |
520 | } | |
521 | ||
522 | g_assert(ab_mask & float_cmask_zero); | |
523 | if (c->cls == float_class_normal) { | |
524 | *a = *c; | |
525 | goto return_normal; | |
526 | } | |
527 | if (c->cls == float_class_zero) { | |
528 | if (a->sign != c->sign) { | |
529 | goto return_sub_zero; | |
530 | } | |
531 | goto return_zero; | |
532 | } | |
533 | g_assert(c->cls == float_class_inf); | |
534 | } | |
535 | ||
536 | if (unlikely(c->cls == float_class_inf)) { | |
537 | a->sign = c->sign; | |
538 | goto return_inf; | |
539 | } | |
540 | ||
541 | /* Perform the multiplication step. */ | |
542 | p_widen.sign = a->sign; | |
543 | p_widen.exp = a->exp + b->exp + 1; | |
544 | frac_mulw(&p_widen, a, b); | |
545 | if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
546 | frac_add(&p_widen, &p_widen, &p_widen); | |
547 | p_widen.exp -= 1; | |
548 | } | |
549 | ||
550 | /* Perform the addition step. */ | |
551 | if (c->cls != float_class_zero) { | |
552 | /* Zero-extend C to less significant bits. */ | |
553 | frac_widen(&c_widen, c); | |
554 | c_widen.exp = c->exp; | |
555 | ||
556 | if (a->sign == c->sign) { | |
557 | parts_add_normal(&p_widen, &c_widen); | |
558 | } else if (!parts_sub_normal(&p_widen, &c_widen)) { | |
559 | goto return_sub_zero; | |
560 | } | |
561 | } | |
562 | ||
563 | /* Narrow with sticky bit, for proper rounding later. */ | |
564 | frac_truncjam(a, &p_widen); | |
565 | a->sign = p_widen.sign; | |
566 | a->exp = p_widen.exp; | |
567 | ||
568 | return_normal: | |
569 | if (flags & float_muladd_halve_result) { | |
570 | a->exp -= 1; | |
571 | } | |
572 | finish_sign: | |
573 | if (flags & float_muladd_negate_result) { | |
574 | a->sign ^= 1; | |
575 | } | |
576 | return a; | |
577 | ||
578 | return_sub_zero: | |
579 | a->sign = s->float_rounding_mode == float_round_down; | |
580 | return_zero: | |
581 | a->cls = float_class_zero; | |
582 | goto finish_sign; | |
583 | ||
584 | return_inf: | |
585 | a->cls = float_class_inf; | |
586 | goto finish_sign; | |
587 | ||
588 | d_nan: | |
dedd123c RH |
589 | parts_default_nan(a, s); |
590 | return a; | |
591 | } | |
ec961b81 RH |
592 | |
593 | /* | |
594 | * Returns the result of dividing the floating-point value `a' by the | |
595 | * corresponding value `b'. The operation is performed according to | |
596 | * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
597 | */ | |
598 | static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | |
599 | float_status *s) | |
600 | { | |
601 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
602 | bool sign = a->sign ^ b->sign; | |
603 | ||
604 | if (likely(ab_mask == float_cmask_normal)) { | |
605 | a->sign = sign; | |
606 | a->exp -= b->exp + frac_div(a, b); | |
607 | return a; | |
608 | } | |
609 | ||
610 | /* 0/0 or Inf/Inf => NaN */ | |
10cc9640 RH |
611 | if (unlikely(ab_mask == float_cmask_zero)) { |
612 | float_raise(float_flag_invalid | float_flag_invalid_zdz, s); | |
613 | goto d_nan; | |
614 | } | |
615 | if (unlikely(ab_mask == float_cmask_inf)) { | |
616 | float_raise(float_flag_invalid | float_flag_invalid_idi, s); | |
617 | goto d_nan; | |
ec961b81 RH |
618 | } |
619 | ||
620 | /* All the NaN cases */ | |
621 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
622 | return parts_pick_nan(a, b, s); | |
623 | } | |
624 | ||
625 | a->sign = sign; | |
626 | ||
627 | /* Inf / X */ | |
628 | if (a->cls == float_class_inf) { | |
629 | return a; | |
630 | } | |
631 | ||
632 | /* 0 / X */ | |
633 | if (a->cls == float_class_zero) { | |
634 | return a; | |
635 | } | |
636 | ||
637 | /* X / Inf */ | |
638 | if (b->cls == float_class_inf) { | |
639 | a->cls = float_class_zero; | |
640 | return a; | |
641 | } | |
642 | ||
643 | /* X / 0 => Inf */ | |
644 | g_assert(b->cls == float_class_zero); | |
645 | float_raise(float_flag_divbyzero, s); | |
646 | a->cls = float_class_inf; | |
647 | return a; | |
10cc9640 RH |
648 | |
649 | d_nan: | |
650 | parts_default_nan(a, s); | |
651 | return a; | |
ec961b81 | 652 | } |
afc34931 | 653 | |
feaf2e9c RH |
654 | /* |
655 | * Floating point remainder, per IEC/IEEE, or modulus. | |
656 | */ | |
657 | static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, | |
658 | uint64_t *mod_quot, float_status *s) | |
659 | { | |
660 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
661 | ||
662 | if (likely(ab_mask == float_cmask_normal)) { | |
663 | frac_modrem(a, b, mod_quot); | |
664 | return a; | |
665 | } | |
666 | ||
667 | if (mod_quot) { | |
668 | *mod_quot = 0; | |
669 | } | |
670 | ||
671 | /* All the NaN cases */ | |
672 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
673 | return parts_pick_nan(a, b, s); | |
674 | } | |
675 | ||
676 | /* Inf % N; N % 0 */ | |
677 | if (a->cls == float_class_inf || b->cls == float_class_zero) { | |
678 | float_raise(float_flag_invalid, s); | |
679 | parts_default_nan(a, s); | |
680 | return a; | |
681 | } | |
682 | ||
683 | /* N % Inf; 0 % N */ | |
684 | g_assert(b->cls == float_class_inf || a->cls == float_class_zero); | |
685 | return a; | |
686 | } | |
687 | ||
9261b245 RH |
688 | /* |
689 | * Square Root | |
690 | * | |
691 | * The base algorithm is lifted from | |
692 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c | |
693 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c | |
694 | * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c | |
695 | * and is thus MIT licenced. | |
696 | */ | |
697 | static void partsN(sqrt)(FloatPartsN *a, float_status *status, | |
698 | const FloatFmt *fmt) | |
699 | { | |
700 | const uint32_t three32 = 3u << 30; | |
701 | const uint64_t three64 = 3ull << 62; | |
702 | uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ | |
703 | uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ | |
704 | uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ | |
705 | uint64_t d0h, d0l, d1h, d1l, d2h, d2l; | |
706 | uint64_t discard; | |
707 | bool exp_odd; | |
708 | size_t index; | |
709 | ||
710 | if (unlikely(a->cls != float_class_normal)) { | |
711 | switch (a->cls) { | |
712 | case float_class_snan: | |
713 | case float_class_qnan: | |
714 | parts_return_nan(a, status); | |
715 | return; | |
716 | case float_class_zero: | |
717 | return; | |
718 | case float_class_inf: | |
719 | if (unlikely(a->sign)) { | |
720 | goto d_nan; | |
721 | } | |
722 | return; | |
723 | default: | |
724 | g_assert_not_reached(); | |
725 | } | |
726 | } | |
727 | ||
728 | if (unlikely(a->sign)) { | |
729 | goto d_nan; | |
730 | } | |
731 | ||
732 | /* | |
733 | * Argument reduction. | |
734 | * x = 4^e frac; with integer e, and frac in [1, 4) | |
735 | * m = frac fixed point at bit 62, since we're in base 4. | |
736 | * If base-2 exponent is odd, exchange that for multiply by 2, | |
737 | * which results in no shift. | |
738 | */ | |
739 | exp_odd = a->exp & 1; | |
740 | index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); | |
741 | if (!exp_odd) { | |
742 | frac_shr(a, 1); | |
743 | } | |
744 | ||
745 | /* | |
746 | * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). | |
747 | * | |
748 | * Initial estimate: | |
749 | * 7-bit lookup table (1-bit exponent and 6-bit significand). | |
750 | * | |
751 | * The relative error (e = r0*sqrt(m)-1) of a linear estimate | |
752 | * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; | |
753 | * a table lookup is faster and needs one less iteration. | |
754 | * The 7-bit table gives |e| < 0x1.fdp-9. | |
755 | * | |
756 | * A Newton-Raphson iteration for r is | |
757 | * s = m*r | |
758 | * d = s*r | |
759 | * u = 3 - d | |
760 | * r = r*u/2 | |
761 | * | |
762 | * Fixed point representations: | |
763 | * m, s, d, u, three are all 2.30; r is 0.32 | |
764 | */ | |
765 | m64 = a->frac_hi; | |
766 | m32 = m64 >> 32; | |
767 | ||
768 | r32 = rsqrt_tab[index] << 16; | |
769 | /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ | |
770 | ||
771 | s32 = ((uint64_t)m32 * r32) >> 32; | |
772 | d32 = ((uint64_t)s32 * r32) >> 32; | |
773 | u32 = three32 - d32; | |
774 | ||
775 | if (N == 64) { | |
776 | /* float64 or smaller */ | |
777 | ||
778 | r32 = ((uint64_t)r32 * u32) >> 31; | |
779 | /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ | |
780 | ||
781 | s32 = ((uint64_t)m32 * r32) >> 32; | |
782 | d32 = ((uint64_t)s32 * r32) >> 32; | |
783 | u32 = three32 - d32; | |
784 | ||
785 | if (fmt->frac_size <= 23) { | |
786 | /* float32 or smaller */ | |
787 | ||
788 | s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ | |
789 | s32 = (s32 - 1) >> 6; /* 9.23 */ | |
790 | /* s < sqrt(m) < s + 0x1.08p-23 */ | |
791 | ||
792 | /* compute nearest rounded result to 2.23 bits */ | |
793 | uint32_t d0 = (m32 << 16) - s32 * s32; | |
794 | uint32_t d1 = s32 - d0; | |
795 | uint32_t d2 = d1 + s32 + 1; | |
796 | s32 += d1 >> 31; | |
797 | a->frac_hi = (uint64_t)s32 << (64 - 25); | |
798 | ||
799 | /* increment or decrement for inexact */ | |
800 | if (d2 != 0) { | |
801 | a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); | |
802 | } | |
803 | goto done; | |
804 | } | |
805 | ||
806 | /* float64 */ | |
807 | ||
808 | r64 = (uint64_t)r32 * u32 * 2; | |
809 | /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ | |
810 | mul64To128(m64, r64, &s64, &discard); | |
811 | mul64To128(s64, r64, &d64, &discard); | |
812 | u64 = three64 - d64; | |
813 | ||
814 | mul64To128(s64, u64, &s64, &discard); /* 3.61 */ | |
815 | s64 = (s64 - 2) >> 9; /* 12.52 */ | |
816 | ||
817 | /* Compute nearest rounded result */ | |
818 | uint64_t d0 = (m64 << 42) - s64 * s64; | |
819 | uint64_t d1 = s64 - d0; | |
820 | uint64_t d2 = d1 + s64 + 1; | |
821 | s64 += d1 >> 63; | |
822 | a->frac_hi = s64 << (64 - 54); | |
823 | ||
824 | /* increment or decrement for inexact */ | |
825 | if (d2 != 0) { | |
826 | a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); | |
827 | } | |
828 | goto done; | |
829 | } | |
830 | ||
831 | r64 = (uint64_t)r32 * u32 * 2; | |
832 | /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ | |
833 | ||
834 | mul64To128(m64, r64, &s64, &discard); | |
835 | mul64To128(s64, r64, &d64, &discard); | |
836 | u64 = three64 - d64; | |
837 | mul64To128(u64, r64, &r64, &discard); | |
838 | r64 <<= 1; | |
839 | /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ | |
840 | ||
841 | mul64To128(m64, r64, &s64, &discard); | |
842 | mul64To128(s64, r64, &d64, &discard); | |
843 | u64 = three64 - d64; | |
844 | mul64To128(u64, r64, &rh, &rl); | |
845 | add128(rh, rl, rh, rl, &rh, &rl); | |
846 | /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ | |
847 | ||
848 | mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); | |
849 | mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); | |
850 | sub128(three64, 0, dh, dl, &uh, &ul); | |
851 | mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ | |
852 | /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ | |
853 | ||
854 | sub128(sh, sl, 0, 4, &sh, &sl); | |
855 | shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ | |
856 | /* s < sqrt(m) < s + 1ulp */ | |
857 | ||
858 | /* Compute nearest rounded result */ | |
859 | mul64To128(sl, sl, &d0h, &d0l); | |
860 | d0h += 2 * sh * sl; | |
861 | sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); | |
862 | sub128(sh, sl, d0h, d0l, &d1h, &d1l); | |
863 | add128(sh, sl, 0, 1, &d2h, &d2l); | |
864 | add128(d2h, d2l, d1h, d1l, &d2h, &d2l); | |
865 | add128(sh, sl, 0, d1h >> 63, &sh, &sl); | |
866 | shift128Left(sh, sl, 128 - 114, &sh, &sl); | |
867 | ||
868 | /* increment or decrement for inexact */ | |
869 | if (d2h | d2l) { | |
870 | if ((int64_t)(d1h ^ d2h) < 0) { | |
871 | sub128(sh, sl, 0, 1, &sh, &sl); | |
872 | } else { | |
873 | add128(sh, sl, 0, 1, &sh, &sl); | |
874 | } | |
875 | } | |
876 | a->frac_lo = sl; | |
877 | a->frac_hi = sh; | |
878 | ||
879 | done: | |
880 | /* Convert back from base 4 to base 2. */ | |
881 | a->exp >>= 1; | |
882 | if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
883 | frac_add(a, a, a); | |
884 | } else { | |
885 | a->exp += 1; | |
886 | } | |
887 | return; | |
888 | ||
889 | d_nan: | |
f8718aab | 890 | float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); |
9261b245 RH |
891 | parts_default_nan(a, status); |
892 | } | |
893 | ||
afc34931 RH |
894 | /* |
895 | * Rounds the floating-point value `a' to an integer, and returns the | |
896 | * result as a floating-point value. The operation is performed | |
897 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
898 | * Arithmetic. | |
899 | * | |
900 | * parts_round_to_int_normal is an internal helper function for | |
901 | * normal numbers only, returning true for inexact but not directly | |
902 | * raising float_flag_inexact. | |
903 | */ | |
904 | static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, | |
905 | int scale, int frac_size) | |
906 | { | |
907 | uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; | |
908 | int shift_adj; | |
909 | ||
910 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
911 | a->exp += scale; | |
912 | ||
913 | if (a->exp < 0) { | |
914 | bool one; | |
915 | ||
916 | /* All fractional */ | |
917 | switch (rmode) { | |
918 | case float_round_nearest_even: | |
919 | one = false; | |
920 | if (a->exp == -1) { | |
921 | FloatPartsN tmp; | |
922 | /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ | |
923 | frac_add(&tmp, a, a); | |
924 | /* Anything remaining means frac > 0.5. */ | |
925 | one = !frac_eqz(&tmp); | |
926 | } | |
927 | break; | |
928 | case float_round_ties_away: | |
929 | one = a->exp == -1; | |
930 | break; | |
931 | case float_round_to_zero: | |
932 | one = false; | |
933 | break; | |
934 | case float_round_up: | |
935 | one = !a->sign; | |
936 | break; | |
937 | case float_round_down: | |
938 | one = a->sign; | |
939 | break; | |
940 | case float_round_to_odd: | |
941 | one = true; | |
942 | break; | |
943 | default: | |
944 | g_assert_not_reached(); | |
945 | } | |
946 | ||
947 | frac_clear(a); | |
948 | a->exp = 0; | |
949 | if (one) { | |
950 | a->frac_hi = DECOMPOSED_IMPLICIT_BIT; | |
951 | } else { | |
952 | a->cls = float_class_zero; | |
953 | } | |
954 | return true; | |
955 | } | |
956 | ||
957 | if (a->exp >= frac_size) { | |
958 | /* All integral */ | |
959 | return false; | |
960 | } | |
961 | ||
962 | if (N > 64 && a->exp < N - 64) { | |
963 | /* | |
964 | * Rounding is not in the low word -- shift lsb to bit 2, | |
965 | * which leaves room for sticky and rounding bit. | |
966 | */ | |
967 | shift_adj = (N - 1) - (a->exp + 2); | |
968 | frac_shrjam(a, shift_adj); | |
969 | frac_lsb = 1 << 2; | |
970 | } else { | |
971 | shift_adj = 0; | |
972 | frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); | |
973 | } | |
974 | ||
975 | frac_lsbm1 = frac_lsb >> 1; | |
976 | rnd_mask = frac_lsb - 1; | |
977 | rnd_even_mask = rnd_mask | frac_lsb; | |
978 | ||
979 | if (!(a->frac_lo & rnd_mask)) { | |
980 | /* Fractional bits already clear, undo the shift above. */ | |
981 | frac_shl(a, shift_adj); | |
982 | return false; | |
983 | } | |
984 | ||
985 | switch (rmode) { | |
986 | case float_round_nearest_even: | |
987 | inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); | |
988 | break; | |
989 | case float_round_ties_away: | |
990 | inc = frac_lsbm1; | |
991 | break; | |
992 | case float_round_to_zero: | |
993 | inc = 0; | |
994 | break; | |
995 | case float_round_up: | |
996 | inc = a->sign ? 0 : rnd_mask; | |
997 | break; | |
998 | case float_round_down: | |
999 | inc = a->sign ? rnd_mask : 0; | |
1000 | break; | |
1001 | case float_round_to_odd: | |
1002 | inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; | |
1003 | break; | |
1004 | default: | |
1005 | g_assert_not_reached(); | |
1006 | } | |
1007 | ||
1008 | if (shift_adj == 0) { | |
1009 | if (frac_addi(a, a, inc)) { | |
1010 | frac_shr(a, 1); | |
1011 | a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
1012 | a->exp++; | |
1013 | } | |
1014 | a->frac_lo &= ~rnd_mask; | |
1015 | } else { | |
1016 | frac_addi(a, a, inc); | |
1017 | a->frac_lo &= ~rnd_mask; | |
1018 | /* Be careful shifting back, not to overflow */ | |
1019 | frac_shl(a, shift_adj - 1); | |
1020 | if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { | |
1021 | a->exp++; | |
1022 | } else { | |
1023 | frac_add(a, a, a); | |
1024 | } | |
1025 | } | |
1026 | return true; | |
1027 | } | |
1028 | ||
1029 | static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | |
1030 | int scale, float_status *s, | |
1031 | const FloatFmt *fmt) | |
1032 | { | |
1033 | switch (a->cls) { | |
1034 | case float_class_qnan: | |
1035 | case float_class_snan: | |
1036 | parts_return_nan(a, s); | |
1037 | break; | |
1038 | case float_class_zero: | |
1039 | case float_class_inf: | |
1040 | break; | |
1041 | case float_class_normal: | |
1042 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | |
1043 | float_raise(float_flag_inexact, s); | |
1044 | } | |
1045 | break; | |
1046 | default: | |
1047 | g_assert_not_reached(); | |
1048 | } | |
1049 | } | |
463b3f0d RH |
1050 | |
1051 | /* | |
1052 | * Returns the result of converting the floating-point value `a' to | |
1053 | * the two's complement integer format. The conversion is performed | |
1054 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
1055 | * Arithmetic---which means in particular that the conversion is | |
1056 | * rounded according to the current rounding mode. If `a' is a NaN, | |
1057 | * the largest positive integer is returned. Otherwise, if the | |
1058 | * conversion overflows, the largest integer with the same sign as `a' | |
1059 | * is returned. | |
4ab4aef0 | 1060 | */ |
463b3f0d RH |
1061 | static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, |
1062 | int scale, int64_t min, int64_t max, | |
1063 | float_status *s) | |
1064 | { | |
1065 | int flags = 0; | |
1066 | uint64_t r; | |
1067 | ||
1068 | switch (p->cls) { | |
1069 | case float_class_snan: | |
e706d445 RH |
1070 | flags |= float_flag_invalid_snan; |
1071 | /* fall through */ | |
463b3f0d | 1072 | case float_class_qnan: |
e706d445 | 1073 | flags |= float_flag_invalid; |
463b3f0d RH |
1074 | r = max; |
1075 | break; | |
1076 | ||
1077 | case float_class_inf: | |
81254b02 | 1078 | flags = float_flag_invalid | float_flag_invalid_cvti; |
463b3f0d RH |
1079 | r = p->sign ? min : max; |
1080 | break; | |
1081 | ||
1082 | case float_class_zero: | |
1083 | return 0; | |
1084 | ||
1085 | case float_class_normal: | |
1086 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
1087 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
1088 | flags = float_flag_inexact; | |
1089 | } | |
1090 | ||
1091 | if (p->exp <= DECOMPOSED_BINARY_POINT) { | |
1092 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
1093 | } else { | |
1094 | r = UINT64_MAX; | |
1095 | } | |
1096 | if (p->sign) { | |
1097 | if (r <= -(uint64_t)min) { | |
1098 | r = -r; | |
1099 | } else { | |
81254b02 | 1100 | flags = float_flag_invalid | float_flag_invalid_cvti; |
463b3f0d RH |
1101 | r = min; |
1102 | } | |
1103 | } else if (r > max) { | |
81254b02 | 1104 | flags = float_flag_invalid | float_flag_invalid_cvti; |
463b3f0d RH |
1105 | r = max; |
1106 | } | |
1107 | break; | |
1108 | ||
1109 | default: | |
1110 | g_assert_not_reached(); | |
1111 | } | |
1112 | ||
1113 | float_raise(flags, s); | |
1114 | return r; | |
1115 | } | |
4ab4aef0 RH |
1116 | |
1117 | /* | |
1118 | * Returns the result of converting the floating-point value `a' to | |
1119 | * the unsigned integer format. The conversion is performed according | |
1120 | * to the IEC/IEEE Standard for Binary Floating-Point | |
1121 | * Arithmetic---which means in particular that the conversion is | |
1122 | * rounded according to the current rounding mode. If `a' is a NaN, | |
1123 | * the largest unsigned integer is returned. Otherwise, if the | |
1124 | * conversion overflows, the largest unsigned integer is returned. If | |
1125 | * the 'a' is negative, the result is rounded and zero is returned; | |
1126 | * values that do not round to zero will raise the inexact exception | |
1127 | * flag. | |
1128 | */ | |
1129 | static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | |
1130 | int scale, uint64_t max, float_status *s) | |
1131 | { | |
1132 | int flags = 0; | |
1133 | uint64_t r; | |
1134 | ||
1135 | switch (p->cls) { | |
1136 | case float_class_snan: | |
e706d445 RH |
1137 | flags |= float_flag_invalid_snan; |
1138 | /* fall through */ | |
4ab4aef0 | 1139 | case float_class_qnan: |
e706d445 | 1140 | flags |= float_flag_invalid; |
4ab4aef0 RH |
1141 | r = max; |
1142 | break; | |
1143 | ||
1144 | case float_class_inf: | |
81254b02 | 1145 | flags = float_flag_invalid | float_flag_invalid_cvti; |
4ab4aef0 RH |
1146 | r = p->sign ? 0 : max; |
1147 | break; | |
1148 | ||
1149 | case float_class_zero: | |
1150 | return 0; | |
1151 | ||
1152 | case float_class_normal: | |
1153 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
1154 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
1155 | flags = float_flag_inexact; | |
1156 | if (p->cls == float_class_zero) { | |
1157 | r = 0; | |
1158 | break; | |
1159 | } | |
1160 | } | |
1161 | ||
1162 | if (p->sign) { | |
81254b02 | 1163 | flags = float_flag_invalid | float_flag_invalid_cvti; |
4ab4aef0 RH |
1164 | r = 0; |
1165 | } else if (p->exp > DECOMPOSED_BINARY_POINT) { | |
81254b02 | 1166 | flags = float_flag_invalid | float_flag_invalid_cvti; |
4ab4aef0 RH |
1167 | r = max; |
1168 | } else { | |
1169 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
1170 | if (r > max) { | |
81254b02 | 1171 | flags = float_flag_invalid | float_flag_invalid_cvti; |
4ab4aef0 RH |
1172 | r = max; |
1173 | } | |
1174 | } | |
1175 | break; | |
1176 | ||
1177 | default: | |
1178 | g_assert_not_reached(); | |
1179 | } | |
1180 | ||
1181 | float_raise(flags, s); | |
1182 | return r; | |
1183 | } | |
e3689519 | 1184 | |
e2041f4d RH |
1185 | /* |
1186 | * Like partsN(float_to_sint), except do not saturate the result. | |
1187 | * Instead, return the rounded unbounded precision two's compliment result, | |
1188 | * modulo 2**(bitsm1 + 1). | |
1189 | */ | |
1190 | static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, | |
1191 | FloatRoundMode rmode, | |
1192 | int bitsm1, float_status *s) | |
1193 | { | |
1194 | int flags = 0; | |
1195 | uint64_t r; | |
1196 | bool overflow = false; | |
1197 | ||
1198 | switch (p->cls) { | |
1199 | case float_class_snan: | |
1200 | flags |= float_flag_invalid_snan; | |
1201 | /* fall through */ | |
1202 | case float_class_qnan: | |
1203 | flags |= float_flag_invalid; | |
1204 | r = 0; | |
1205 | break; | |
1206 | ||
1207 | case float_class_inf: | |
1208 | overflow = true; | |
1209 | r = 0; | |
1210 | break; | |
1211 | ||
1212 | case float_class_zero: | |
1213 | return 0; | |
1214 | ||
1215 | case float_class_normal: | |
1216 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
1217 | if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { | |
1218 | flags = float_flag_inexact; | |
1219 | } | |
1220 | ||
1221 | if (p->exp <= DECOMPOSED_BINARY_POINT) { | |
1222 | /* | |
1223 | * Because we rounded to integral, and exp < 64, | |
1224 | * we know frac_low is zero. | |
1225 | */ | |
1226 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
1227 | if (p->exp < bitsm1) { | |
1228 | /* Result in range. */ | |
1229 | } else if (p->exp == bitsm1) { | |
1230 | /* The only in-range value is INT_MIN. */ | |
1231 | overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT; | |
1232 | } else { | |
1233 | overflow = true; | |
1234 | } | |
1235 | } else { | |
1236 | /* Overflow, but there might still be bits to return. */ | |
1237 | int shl = p->exp - DECOMPOSED_BINARY_POINT; | |
1238 | if (shl < N) { | |
1239 | frac_shl(p, shl); | |
1240 | r = p->frac_hi; | |
1241 | } else { | |
1242 | r = 0; | |
1243 | } | |
1244 | overflow = true; | |
1245 | } | |
1246 | ||
1247 | if (p->sign) { | |
1248 | r = -r; | |
1249 | } | |
1250 | break; | |
1251 | ||
1252 | default: | |
1253 | g_assert_not_reached(); | |
1254 | } | |
1255 | ||
1256 | if (overflow) { | |
1257 | flags = float_flag_invalid | float_flag_invalid_cvti; | |
1258 | } | |
1259 | float_raise(flags, s); | |
1260 | return r; | |
1261 | } | |
1262 | ||
e3689519 RH |
1263 | /* |
1264 | * Integer to float conversions | |
1265 | * | |
1266 | * Returns the result of converting the two's complement integer `a' | |
1267 | * to the floating-point format. The conversion is performed according | |
1268 | * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
1269 | */ | |
1270 | static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, | |
1271 | int scale, float_status *s) | |
1272 | { | |
1273 | uint64_t f = a; | |
1274 | int shift; | |
1275 | ||
1276 | memset(p, 0, sizeof(*p)); | |
1277 | ||
1278 | if (a == 0) { | |
1279 | p->cls = float_class_zero; | |
1280 | return; | |
1281 | } | |
1282 | ||
1283 | p->cls = float_class_normal; | |
1284 | if (a < 0) { | |
1285 | f = -f; | |
1286 | p->sign = true; | |
1287 | } | |
1288 | shift = clz64(f); | |
1289 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
1290 | ||
1291 | p->exp = DECOMPOSED_BINARY_POINT - shift + scale; | |
1292 | p->frac_hi = f << shift; | |
1293 | } | |
37c954a1 RH |
1294 | |
1295 | /* | |
1296 | * Unsigned Integer to float conversions | |
1297 | * | |
1298 | * Returns the result of converting the unsigned integer `a' to the | |
1299 | * floating-point format. The conversion is performed according to the | |
1300 | * IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
1301 | */ | |
1302 | static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, | |
1303 | int scale, float_status *status) | |
1304 | { | |
1305 | memset(p, 0, sizeof(*p)); | |
1306 | ||
1307 | if (a == 0) { | |
1308 | p->cls = float_class_zero; | |
1309 | } else { | |
1310 | int shift = clz64(a); | |
1311 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
1312 | p->cls = float_class_normal; | |
1313 | p->exp = DECOMPOSED_BINARY_POINT - shift + scale; | |
1314 | p->frac_hi = a << shift; | |
1315 | } | |
1316 | } | |
e1c4667a RH |
1317 | |
1318 | /* | |
1319 | * Float min/max. | |
1320 | */ | |
1321 | static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, | |
1322 | float_status *s, int flags) | |
1323 | { | |
1324 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
1325 | int a_exp, b_exp, cmp; | |
1326 | ||
1327 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
1328 | /* | |
0e903037 CMC |
1329 | * For minNum/maxNum (IEEE 754-2008) |
1330 | * or minimumNumber/maximumNumber (IEEE 754-2019), | |
1331 | * if one operand is a QNaN, and the other | |
e1c4667a RH |
1332 | * operand is numerical, then return numerical argument. |
1333 | */ | |
0e903037 | 1334 | if ((flags & (minmax_isnum | minmax_isnumber)) |
e1c4667a RH |
1335 | && !(ab_mask & float_cmask_snan) |
1336 | && (ab_mask & ~float_cmask_qnan)) { | |
1337 | return is_nan(a->cls) ? b : a; | |
1338 | } | |
0e903037 CMC |
1339 | |
1340 | /* | |
1341 | * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag | |
1342 | * are removed and replaced with minimum, minimumNumber, maximum | |
1343 | * and maximumNumber. | |
1344 | * minimumNumber/maximumNumber behavior for SNaN is changed to: | |
1345 | * If both operands are NaNs, a QNaN is returned. | |
1346 | * If either operand is a SNaN, | |
1347 | * an invalid operation exception is signaled, | |
1348 | * but unless both operands are NaNs, | |
1349 | * the SNaN is otherwise ignored and not converted to a QNaN. | |
1350 | */ | |
1351 | if ((flags & minmax_isnumber) | |
1352 | && (ab_mask & float_cmask_snan) | |
1353 | && (ab_mask & ~float_cmask_anynan)) { | |
1354 | float_raise(float_flag_invalid, s); | |
1355 | return is_nan(a->cls) ? b : a; | |
1356 | } | |
1357 | ||
e1c4667a RH |
1358 | return parts_pick_nan(a, b, s); |
1359 | } | |
1360 | ||
1361 | a_exp = a->exp; | |
1362 | b_exp = b->exp; | |
1363 | ||
1364 | if (unlikely(ab_mask != float_cmask_normal)) { | |
1365 | switch (a->cls) { | |
1366 | case float_class_normal: | |
1367 | break; | |
1368 | case float_class_inf: | |
1369 | a_exp = INT16_MAX; | |
1370 | break; | |
1371 | case float_class_zero: | |
1372 | a_exp = INT16_MIN; | |
1373 | break; | |
1374 | default: | |
1375 | g_assert_not_reached(); | |
1376 | break; | |
1377 | } | |
1378 | switch (b->cls) { | |
1379 | case float_class_normal: | |
1380 | break; | |
1381 | case float_class_inf: | |
1382 | b_exp = INT16_MAX; | |
1383 | break; | |
1384 | case float_class_zero: | |
1385 | b_exp = INT16_MIN; | |
1386 | break; | |
1387 | default: | |
1388 | g_assert_not_reached(); | |
1389 | break; | |
1390 | } | |
1391 | } | |
1392 | ||
1393 | /* Compare magnitudes. */ | |
1394 | cmp = a_exp - b_exp; | |
1395 | if (cmp == 0) { | |
1396 | cmp = frac_cmp(a, b); | |
1397 | } | |
1398 | ||
1399 | /* | |
1400 | * Take the sign into account. | |
1401 | * For ismag, only do this if the magnitudes are equal. | |
1402 | */ | |
1403 | if (!(flags & minmax_ismag) || cmp == 0) { | |
1404 | if (a->sign != b->sign) { | |
1405 | /* For differing signs, the negative operand is less. */ | |
1406 | cmp = a->sign ? -1 : 1; | |
1407 | } else if (a->sign) { | |
1408 | /* For two negative operands, invert the magnitude comparison. */ | |
1409 | cmp = -cmp; | |
1410 | } | |
1411 | } | |
1412 | ||
1413 | if (flags & minmax_ismin) { | |
1414 | cmp = -cmp; | |
1415 | } | |
1416 | return cmp < 0 ? b : a; | |
1417 | } | |
6eb169b8 RH |
1418 | |
1419 | /* | |
1420 | * Floating point compare | |
1421 | */ | |
1422 | static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, | |
1423 | float_status *s, bool is_quiet) | |
1424 | { | |
1425 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
6eb169b8 RH |
1426 | |
1427 | if (likely(ab_mask == float_cmask_normal)) { | |
9343c884 RH |
1428 | FloatRelation cmp; |
1429 | ||
6eb169b8 RH |
1430 | if (a->sign != b->sign) { |
1431 | goto a_sign; | |
1432 | } | |
9343c884 | 1433 | if (a->exp == b->exp) { |
6eb169b8 | 1434 | cmp = frac_cmp(a, b); |
9343c884 RH |
1435 | } else if (a->exp < b->exp) { |
1436 | cmp = float_relation_less; | |
1437 | } else { | |
1438 | cmp = float_relation_greater; | |
6eb169b8 RH |
1439 | } |
1440 | if (a->sign) { | |
1441 | cmp = -cmp; | |
1442 | } | |
1443 | return cmp; | |
1444 | } | |
1445 | ||
1446 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
e706d445 RH |
1447 | if (ab_mask & float_cmask_snan) { |
1448 | float_raise(float_flag_invalid | float_flag_invalid_snan, s); | |
1449 | } else if (!is_quiet) { | |
6eb169b8 RH |
1450 | float_raise(float_flag_invalid, s); |
1451 | } | |
1452 | return float_relation_unordered; | |
1453 | } | |
1454 | ||
1455 | if (ab_mask & float_cmask_zero) { | |
1456 | if (ab_mask == float_cmask_zero) { | |
1457 | return float_relation_equal; | |
1458 | } else if (a->cls == float_class_zero) { | |
1459 | goto b_sign; | |
1460 | } else { | |
1461 | goto a_sign; | |
1462 | } | |
1463 | } | |
1464 | ||
1465 | if (ab_mask == float_cmask_inf) { | |
1466 | if (a->sign == b->sign) { | |
1467 | return float_relation_equal; | |
1468 | } | |
1469 | } else if (b->cls == float_class_inf) { | |
1470 | goto b_sign; | |
1471 | } else { | |
1472 | g_assert(a->cls == float_class_inf); | |
1473 | } | |
1474 | ||
1475 | a_sign: | |
1476 | return a->sign ? float_relation_less : float_relation_greater; | |
1477 | b_sign: | |
1478 | return b->sign ? float_relation_greater : float_relation_less; | |
1479 | } | |
39626b0c RH |
1480 | |
1481 | /* | |
1482 | * Multiply A by 2 raised to the power N. | |
1483 | */ | |
1484 | static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) | |
1485 | { | |
1486 | switch (a->cls) { | |
1487 | case float_class_snan: | |
1488 | case float_class_qnan: | |
1489 | parts_return_nan(a, s); | |
1490 | break; | |
1491 | case float_class_zero: | |
1492 | case float_class_inf: | |
1493 | break; | |
1494 | case float_class_normal: | |
1495 | a->exp += MIN(MAX(n, -0x10000), 0x10000); | |
1496 | break; | |
1497 | default: | |
1498 | g_assert_not_reached(); | |
1499 | } | |
1500 | } | |
2fa3546c RH |
1501 | |
1502 | /* | |
1503 | * Return log2(A) | |
1504 | */ | |
1505 | static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) | |
1506 | { | |
1507 | uint64_t a0, a1, r, t, ign; | |
1508 | FloatPartsN f; | |
1509 | int i, n, a_exp, f_exp; | |
1510 | ||
1511 | if (unlikely(a->cls != float_class_normal)) { | |
1512 | switch (a->cls) { | |
1513 | case float_class_snan: | |
1514 | case float_class_qnan: | |
1515 | parts_return_nan(a, s); | |
1516 | return; | |
1517 | case float_class_zero: | |
3cf71969 | 1518 | float_raise(float_flag_divbyzero, s); |
2fa3546c RH |
1519 | /* log2(0) = -inf */ |
1520 | a->cls = float_class_inf; | |
1521 | a->sign = 1; | |
1522 | return; | |
1523 | case float_class_inf: | |
1524 | if (unlikely(a->sign)) { | |
1525 | goto d_nan; | |
1526 | } | |
1527 | return; | |
1528 | default: | |
1529 | break; | |
1530 | } | |
1531 | g_assert_not_reached(); | |
1532 | } | |
1533 | if (unlikely(a->sign)) { | |
1534 | goto d_nan; | |
1535 | } | |
1536 | ||
1537 | /* TODO: This algorithm looses bits too quickly for float128. */ | |
1538 | g_assert(N == 64); | |
1539 | ||
1540 | a_exp = a->exp; | |
1541 | f_exp = -1; | |
1542 | ||
1543 | r = 0; | |
1544 | t = DECOMPOSED_IMPLICIT_BIT; | |
1545 | a0 = a->frac_hi; | |
1546 | a1 = 0; | |
1547 | ||
1548 | n = fmt->frac_size + 2; | |
1549 | if (unlikely(a_exp == -1)) { | |
1550 | /* | |
1551 | * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). | |
1552 | * When the value is very close to 1.0, there are lots of 1's in | |
1553 | * the msb parts of the fraction. At the end, when we subtract | |
1554 | * this value from -1.0, we can see a catastrophic loss of precision, | |
1555 | * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the | |
1556 | * bits of y in the final result. To minimize this, compute as many | |
1557 | * digits as we can. | |
1558 | * ??? This case needs another algorithm to avoid this. | |
1559 | */ | |
1560 | n = fmt->frac_size * 2 + 2; | |
1561 | /* Don't compute a value overlapping the sticky bit */ | |
1562 | n = MIN(n, 62); | |
1563 | } | |
1564 | ||
1565 | for (i = 0; i < n; i++) { | |
1566 | if (a1) { | |
1567 | mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); | |
1568 | } else if (a0 & 0xffffffffull) { | |
1569 | mul64To128(a0, a0, &a0, &a1); | |
1570 | } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { | |
1571 | a0 >>= 32; | |
1572 | a0 *= a0; | |
1573 | } else { | |
1574 | goto exact; | |
1575 | } | |
1576 | ||
1577 | if (a0 & DECOMPOSED_IMPLICIT_BIT) { | |
1578 | if (unlikely(a_exp == 0 && r == 0)) { | |
1579 | /* | |
1580 | * When a_exp == 0, we're computing the log2 of a value | |
1581 | * [1.0,2.0). When the value is very close to 1.0, there | |
1582 | * are lots of 0's in the msb parts of the fraction. | |
1583 | * We need to compute more digits to produce a correct | |
1584 | * result -- restart at the top of the fraction. | |
1585 | * ??? This is likely to lose precision quickly, as for | |
1586 | * float128; we may need another method. | |
1587 | */ | |
1588 | f_exp -= i; | |
1589 | t = r = DECOMPOSED_IMPLICIT_BIT; | |
1590 | i = 0; | |
1591 | } else { | |
1592 | r |= t; | |
1593 | } | |
1594 | } else { | |
1595 | add128(a0, a1, a0, a1, &a0, &a1); | |
1596 | } | |
1597 | t >>= 1; | |
1598 | } | |
1599 | ||
1600 | /* Set sticky for inexact. */ | |
1601 | r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); | |
1602 | ||
1603 | exact: | |
1604 | parts_sint_to_float(a, a_exp, 0, s); | |
1605 | if (r == 0) { | |
1606 | return; | |
1607 | } | |
1608 | ||
1609 | memset(&f, 0, sizeof(f)); | |
1610 | f.cls = float_class_normal; | |
1611 | f.frac_hi = r; | |
1612 | f.exp = f_exp - frac_normalize(&f); | |
1613 | ||
1614 | if (a_exp < 0) { | |
1615 | parts_sub_normal(a, &f); | |
1616 | } else if (a_exp > 0) { | |
1617 | parts_add_normal(a, &f); | |
1618 | } else { | |
1619 | *a = f; | |
1620 | } | |
1621 | return; | |
1622 | ||
1623 | d_nan: | |
1624 | float_raise(float_flag_invalid, s); | |
1625 | parts_default_nan(a, s); | |
1626 | } |