]>
Commit | Line | Data |
---|---|---|
7c45bad8 RH |
1 | /* |
2 | * QEMU float support | |
3 | * | |
4 | * The code in this source file is derived from release 2a of the SoftFloat | |
5 | * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and | |
6 | * some later contributions) are provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
11 | * GPL-v2-or-later | |
12 | * | |
13 | * Any future contributions to this file after December 1st 2014 will be | |
14 | * taken to be licensed under the Softfloat-2a license unless specifically | |
15 | * indicated otherwise. | |
16 | */ | |
17 | ||
18 | static void partsN(return_nan)(FloatPartsN *a, float_status *s) | |
19 | { | |
20 | switch (a->cls) { | |
21 | case float_class_snan: | |
22 | float_raise(float_flag_invalid, s); | |
23 | if (s->default_nan_mode) { | |
24 | parts_default_nan(a, s); | |
25 | } else { | |
26 | parts_silence_nan(a, s); | |
27 | } | |
28 | break; | |
29 | case float_class_qnan: | |
30 | if (s->default_nan_mode) { | |
31 | parts_default_nan(a, s); | |
32 | } | |
33 | break; | |
34 | default: | |
35 | g_assert_not_reached(); | |
36 | } | |
37 | } | |
22c355f4 RH |
38 | |
39 | static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, | |
40 | float_status *s) | |
41 | { | |
42 | if (is_snan(a->cls) || is_snan(b->cls)) { | |
43 | float_raise(float_flag_invalid, s); | |
44 | } | |
45 | ||
46 | if (s->default_nan_mode) { | |
47 | parts_default_nan(a, s); | |
48 | } else { | |
49 | int cmp = frac_cmp(a, b); | |
50 | if (cmp == 0) { | |
51 | cmp = a->sign < b->sign; | |
52 | } | |
53 | ||
54 | if (pickNaN(a->cls, b->cls, cmp > 0, s)) { | |
55 | a = b; | |
56 | } | |
57 | if (is_snan(a->cls)) { | |
58 | parts_silence_nan(a, s); | |
59 | } | |
60 | } | |
61 | return a; | |
62 | } | |
979582d0 RH |
63 | |
64 | static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, | |
65 | FloatPartsN *c, float_status *s, | |
66 | int ab_mask, int abc_mask) | |
67 | { | |
68 | int which; | |
69 | ||
70 | if (unlikely(abc_mask & float_cmask_snan)) { | |
71 | float_raise(float_flag_invalid, s); | |
72 | } | |
73 | ||
74 | which = pickNaNMulAdd(a->cls, b->cls, c->cls, | |
75 | ab_mask == float_cmask_infzero, s); | |
76 | ||
77 | if (s->default_nan_mode || which == 3) { | |
78 | /* | |
79 | * Note that this check is after pickNaNMulAdd so that function | |
80 | * has an opportunity to set the Invalid flag for infzero. | |
81 | */ | |
82 | parts_default_nan(a, s); | |
83 | return a; | |
84 | } | |
85 | ||
86 | switch (which) { | |
87 | case 0: | |
88 | break; | |
89 | case 1: | |
90 | a = b; | |
91 | break; | |
92 | case 2: | |
93 | a = c; | |
94 | break; | |
95 | default: | |
96 | g_assert_not_reached(); | |
97 | } | |
98 | if (is_snan(a->cls)) { | |
99 | parts_silence_nan(a, s); | |
100 | } | |
101 | return a; | |
102 | } | |
d46975bc RH |
103 | |
104 | /* | |
105 | * Canonicalize the FloatParts structure. Determine the class, | |
106 | * unbias the exponent, and normalize the fraction. | |
107 | */ | |
108 | static void partsN(canonicalize)(FloatPartsN *p, float_status *status, | |
109 | const FloatFmt *fmt) | |
110 | { | |
111 | if (unlikely(p->exp == 0)) { | |
112 | if (likely(frac_eqz(p))) { | |
113 | p->cls = float_class_zero; | |
114 | } else if (status->flush_inputs_to_zero) { | |
115 | float_raise(float_flag_input_denormal, status); | |
116 | p->cls = float_class_zero; | |
117 | frac_clear(p); | |
118 | } else { | |
119 | int shift = frac_normalize(p); | |
120 | p->cls = float_class_normal; | |
121 | p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1; | |
122 | } | |
123 | } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { | |
124 | p->cls = float_class_normal; | |
125 | p->exp -= fmt->exp_bias; | |
126 | frac_shl(p, fmt->frac_shift); | |
127 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
128 | } else if (likely(frac_eqz(p))) { | |
129 | p->cls = float_class_inf; | |
130 | } else { | |
131 | frac_shl(p, fmt->frac_shift); | |
132 | p->cls = (parts_is_snan_frac(p->frac_hi, status) | |
133 | ? float_class_snan : float_class_qnan); | |
134 | } | |
135 | } | |
ee6959f2 RH |
136 | |
137 | /* | |
138 | * Round and uncanonicalize a floating-point number by parts. There | |
139 | * are FRAC_SHIFT bits that may require rounding at the bottom of the | |
140 | * fraction; these bits will be removed. The exponent will be biased | |
141 | * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. | |
142 | */ | |
143 | static void partsN(uncanon)(FloatPartsN *p, float_status *s, | |
144 | const FloatFmt *fmt) | |
145 | { | |
146 | const int exp_max = fmt->exp_max; | |
147 | const int frac_shift = fmt->frac_shift; | |
148 | const uint64_t frac_lsb = fmt->frac_lsb; | |
149 | const uint64_t frac_lsbm1 = fmt->frac_lsbm1; | |
150 | const uint64_t round_mask = fmt->round_mask; | |
151 | const uint64_t roundeven_mask = fmt->roundeven_mask; | |
152 | uint64_t inc; | |
153 | bool overflow_norm; | |
154 | int exp, flags = 0; | |
155 | ||
156 | if (unlikely(p->cls != float_class_normal)) { | |
157 | switch (p->cls) { | |
158 | case float_class_zero: | |
159 | p->exp = 0; | |
160 | frac_clear(p); | |
161 | return; | |
162 | case float_class_inf: | |
163 | g_assert(!fmt->arm_althp); | |
164 | p->exp = fmt->exp_max; | |
165 | frac_clear(p); | |
166 | return; | |
167 | case float_class_qnan: | |
168 | case float_class_snan: | |
169 | g_assert(!fmt->arm_althp); | |
170 | p->exp = fmt->exp_max; | |
171 | frac_shr(p, fmt->frac_shift); | |
172 | return; | |
173 | default: | |
174 | break; | |
175 | } | |
176 | g_assert_not_reached(); | |
177 | } | |
178 | ||
60c8f726 | 179 | overflow_norm = false; |
ee6959f2 RH |
180 | switch (s->float_rounding_mode) { |
181 | case float_round_nearest_even: | |
ee6959f2 RH |
182 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0); |
183 | break; | |
184 | case float_round_ties_away: | |
ee6959f2 RH |
185 | inc = frac_lsbm1; |
186 | break; | |
187 | case float_round_to_zero: | |
188 | overflow_norm = true; | |
189 | inc = 0; | |
190 | break; | |
191 | case float_round_up: | |
192 | inc = p->sign ? 0 : round_mask; | |
193 | overflow_norm = p->sign; | |
194 | break; | |
195 | case float_round_down: | |
196 | inc = p->sign ? round_mask : 0; | |
197 | overflow_norm = !p->sign; | |
198 | break; | |
199 | case float_round_to_odd: | |
200 | overflow_norm = true; | |
60c8f726 RH |
201 | /* fall through */ |
202 | case float_round_to_odd_inf: | |
ee6959f2 RH |
203 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; |
204 | break; | |
205 | default: | |
206 | g_assert_not_reached(); | |
207 | } | |
208 | ||
209 | exp = p->exp + fmt->exp_bias; | |
210 | if (likely(exp > 0)) { | |
211 | if (p->frac_lo & round_mask) { | |
212 | flags |= float_flag_inexact; | |
213 | if (frac_addi(p, p, inc)) { | |
214 | frac_shr(p, 1); | |
215 | p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
216 | exp++; | |
217 | } | |
218 | } | |
219 | frac_shr(p, frac_shift); | |
220 | ||
221 | if (fmt->arm_althp) { | |
222 | /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ | |
223 | if (unlikely(exp > exp_max)) { | |
224 | /* Overflow. Return the maximum normal. */ | |
225 | flags = float_flag_invalid; | |
226 | exp = exp_max; | |
227 | frac_allones(p); | |
228 | } | |
229 | } else if (unlikely(exp >= exp_max)) { | |
230 | flags |= float_flag_overflow | float_flag_inexact; | |
231 | if (overflow_norm) { | |
232 | exp = exp_max - 1; | |
233 | frac_allones(p); | |
234 | } else { | |
235 | p->cls = float_class_inf; | |
236 | exp = exp_max; | |
237 | frac_clear(p); | |
238 | } | |
239 | } | |
240 | } else if (s->flush_to_zero) { | |
241 | flags |= float_flag_output_denormal; | |
242 | p->cls = float_class_zero; | |
243 | exp = 0; | |
244 | frac_clear(p); | |
245 | } else { | |
246 | bool is_tiny = s->tininess_before_rounding || exp < 0; | |
247 | ||
248 | if (!is_tiny) { | |
249 | FloatPartsN discard; | |
250 | is_tiny = !frac_addi(&discard, p, inc); | |
251 | } | |
252 | ||
253 | frac_shrjam(p, 1 - exp); | |
254 | ||
255 | if (p->frac_lo & round_mask) { | |
256 | /* Need to recompute round-to-even/round-to-odd. */ | |
257 | switch (s->float_rounding_mode) { | |
258 | case float_round_nearest_even: | |
259 | inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 | |
260 | ? frac_lsbm1 : 0); | |
261 | break; | |
262 | case float_round_to_odd: | |
60c8f726 | 263 | case float_round_to_odd_inf: |
ee6959f2 RH |
264 | inc = p->frac_lo & frac_lsb ? 0 : round_mask; |
265 | break; | |
266 | default: | |
267 | break; | |
268 | } | |
269 | flags |= float_flag_inexact; | |
270 | frac_addi(p, p, inc); | |
271 | } | |
272 | ||
273 | exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0; | |
274 | frac_shr(p, frac_shift); | |
275 | ||
276 | if (is_tiny && (flags & float_flag_inexact)) { | |
277 | flags |= float_flag_underflow; | |
278 | } | |
279 | if (exp == 0 && frac_eqz(p)) { | |
280 | p->cls = float_class_zero; | |
281 | } | |
282 | } | |
283 | p->exp = exp; | |
284 | float_raise(flags, s); | |
285 | } | |
da10a907 RH |
286 | |
287 | /* | |
288 | * Returns the result of adding or subtracting the values of the | |
289 | * floating-point values `a' and `b'. The operation is performed | |
290 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
291 | * Arithmetic. | |
292 | */ | |
293 | static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, | |
294 | float_status *s, bool subtract) | |
295 | { | |
296 | bool b_sign = b->sign ^ subtract; | |
297 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
298 | ||
299 | if (a->sign != b_sign) { | |
300 | /* Subtraction */ | |
301 | if (likely(ab_mask == float_cmask_normal)) { | |
302 | if (parts_sub_normal(a, b)) { | |
303 | return a; | |
304 | } | |
305 | /* Subtract was exact, fall through to set sign. */ | |
306 | ab_mask = float_cmask_zero; | |
307 | } | |
308 | ||
309 | if (ab_mask == float_cmask_zero) { | |
310 | a->sign = s->float_rounding_mode == float_round_down; | |
311 | return a; | |
312 | } | |
313 | ||
314 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
315 | goto p_nan; | |
316 | } | |
317 | ||
318 | if (ab_mask & float_cmask_inf) { | |
319 | if (a->cls != float_class_inf) { | |
320 | /* N - Inf */ | |
321 | goto return_b; | |
322 | } | |
323 | if (b->cls != float_class_inf) { | |
324 | /* Inf - N */ | |
325 | return a; | |
326 | } | |
327 | /* Inf - Inf */ | |
328 | float_raise(float_flag_invalid, s); | |
329 | parts_default_nan(a, s); | |
330 | return a; | |
331 | } | |
332 | } else { | |
333 | /* Addition */ | |
334 | if (likely(ab_mask == float_cmask_normal)) { | |
335 | parts_add_normal(a, b); | |
336 | return a; | |
337 | } | |
338 | ||
339 | if (ab_mask == float_cmask_zero) { | |
340 | return a; | |
341 | } | |
342 | ||
343 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
344 | goto p_nan; | |
345 | } | |
346 | ||
347 | if (ab_mask & float_cmask_inf) { | |
348 | a->cls = float_class_inf; | |
349 | return a; | |
350 | } | |
351 | } | |
352 | ||
353 | if (b->cls == float_class_zero) { | |
354 | g_assert(a->cls == float_class_normal); | |
355 | return a; | |
356 | } | |
357 | ||
358 | g_assert(a->cls == float_class_zero); | |
359 | g_assert(b->cls == float_class_normal); | |
360 | return_b: | |
361 | b->sign = b_sign; | |
362 | return b; | |
363 | ||
364 | p_nan: | |
365 | return parts_pick_nan(a, b, s); | |
366 | } | |
aca84527 RH |
367 | |
368 | /* | |
369 | * Returns the result of multiplying the floating-point values `a' and | |
370 | * `b'. The operation is performed according to the IEC/IEEE Standard | |
371 | * for Binary Floating-Point Arithmetic. | |
372 | */ | |
373 | static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, | |
374 | float_status *s) | |
375 | { | |
376 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
377 | bool sign = a->sign ^ b->sign; | |
378 | ||
379 | if (likely(ab_mask == float_cmask_normal)) { | |
380 | FloatPartsW tmp; | |
381 | ||
382 | frac_mulw(&tmp, a, b); | |
383 | frac_truncjam(a, &tmp); | |
384 | ||
385 | a->exp += b->exp + 1; | |
386 | if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
387 | frac_add(a, a, a); | |
388 | a->exp -= 1; | |
389 | } | |
390 | ||
391 | a->sign = sign; | |
392 | return a; | |
393 | } | |
394 | ||
395 | /* Inf * Zero == NaN */ | |
396 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
397 | float_raise(float_flag_invalid, s); | |
398 | parts_default_nan(a, s); | |
399 | return a; | |
400 | } | |
401 | ||
402 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
403 | return parts_pick_nan(a, b, s); | |
404 | } | |
405 | ||
406 | /* Multiply by 0 or Inf */ | |
407 | if (ab_mask & float_cmask_inf) { | |
408 | a->cls = float_class_inf; | |
409 | a->sign = sign; | |
410 | return a; | |
411 | } | |
412 | ||
413 | g_assert(ab_mask & float_cmask_zero); | |
414 | a->cls = float_class_zero; | |
415 | a->sign = sign; | |
416 | return a; | |
417 | } | |
dedd123c RH |
418 | |
419 | /* | |
420 | * Returns the result of multiplying the floating-point values `a' and | |
421 | * `b' then adding 'c', with no intermediate rounding step after the | |
422 | * multiplication. The operation is performed according to the | |
423 | * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. | |
424 | * The flags argument allows the caller to select negation of the | |
425 | * addend, the intermediate product, or the final result. (The | |
426 | * difference between this and having the caller do a separate | |
427 | * negation is that negating externally will flip the sign bit on NaNs.) | |
428 | * | |
429 | * Requires A and C extracted into a double-sized structure to provide the | |
430 | * extra space for the widening multiply. | |
431 | */ | |
432 | static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, | |
433 | FloatPartsN *c, int flags, float_status *s) | |
434 | { | |
435 | int ab_mask, abc_mask; | |
436 | FloatPartsW p_widen, c_widen; | |
437 | ||
438 | ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
439 | abc_mask = float_cmask(c->cls) | ab_mask; | |
440 | ||
441 | /* | |
442 | * It is implementation-defined whether the cases of (0,inf,qnan) | |
443 | * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN | |
444 | * they return if they do), so we have to hand this information | |
445 | * off to the target-specific pick-a-NaN routine. | |
446 | */ | |
447 | if (unlikely(abc_mask & float_cmask_anynan)) { | |
448 | return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); | |
449 | } | |
450 | ||
451 | if (flags & float_muladd_negate_c) { | |
452 | c->sign ^= 1; | |
453 | } | |
454 | ||
455 | /* Compute the sign of the product into A. */ | |
456 | a->sign ^= b->sign; | |
457 | if (flags & float_muladd_negate_product) { | |
458 | a->sign ^= 1; | |
459 | } | |
460 | ||
461 | if (unlikely(ab_mask != float_cmask_normal)) { | |
462 | if (unlikely(ab_mask == float_cmask_infzero)) { | |
463 | goto d_nan; | |
464 | } | |
465 | ||
466 | if (ab_mask & float_cmask_inf) { | |
467 | if (c->cls == float_class_inf && a->sign != c->sign) { | |
468 | goto d_nan; | |
469 | } | |
470 | goto return_inf; | |
471 | } | |
472 | ||
473 | g_assert(ab_mask & float_cmask_zero); | |
474 | if (c->cls == float_class_normal) { | |
475 | *a = *c; | |
476 | goto return_normal; | |
477 | } | |
478 | if (c->cls == float_class_zero) { | |
479 | if (a->sign != c->sign) { | |
480 | goto return_sub_zero; | |
481 | } | |
482 | goto return_zero; | |
483 | } | |
484 | g_assert(c->cls == float_class_inf); | |
485 | } | |
486 | ||
487 | if (unlikely(c->cls == float_class_inf)) { | |
488 | a->sign = c->sign; | |
489 | goto return_inf; | |
490 | } | |
491 | ||
492 | /* Perform the multiplication step. */ | |
493 | p_widen.sign = a->sign; | |
494 | p_widen.exp = a->exp + b->exp + 1; | |
495 | frac_mulw(&p_widen, a, b); | |
496 | if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { | |
497 | frac_add(&p_widen, &p_widen, &p_widen); | |
498 | p_widen.exp -= 1; | |
499 | } | |
500 | ||
501 | /* Perform the addition step. */ | |
502 | if (c->cls != float_class_zero) { | |
503 | /* Zero-extend C to less significant bits. */ | |
504 | frac_widen(&c_widen, c); | |
505 | c_widen.exp = c->exp; | |
506 | ||
507 | if (a->sign == c->sign) { | |
508 | parts_add_normal(&p_widen, &c_widen); | |
509 | } else if (!parts_sub_normal(&p_widen, &c_widen)) { | |
510 | goto return_sub_zero; | |
511 | } | |
512 | } | |
513 | ||
514 | /* Narrow with sticky bit, for proper rounding later. */ | |
515 | frac_truncjam(a, &p_widen); | |
516 | a->sign = p_widen.sign; | |
517 | a->exp = p_widen.exp; | |
518 | ||
519 | return_normal: | |
520 | if (flags & float_muladd_halve_result) { | |
521 | a->exp -= 1; | |
522 | } | |
523 | finish_sign: | |
524 | if (flags & float_muladd_negate_result) { | |
525 | a->sign ^= 1; | |
526 | } | |
527 | return a; | |
528 | ||
529 | return_sub_zero: | |
530 | a->sign = s->float_rounding_mode == float_round_down; | |
531 | return_zero: | |
532 | a->cls = float_class_zero; | |
533 | goto finish_sign; | |
534 | ||
535 | return_inf: | |
536 | a->cls = float_class_inf; | |
537 | goto finish_sign; | |
538 | ||
539 | d_nan: | |
540 | float_raise(float_flag_invalid, s); | |
541 | parts_default_nan(a, s); | |
542 | return a; | |
543 | } | |
ec961b81 RH |
544 | |
545 | /* | |
546 | * Returns the result of dividing the floating-point value `a' by the | |
547 | * corresponding value `b'. The operation is performed according to | |
548 | * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. | |
549 | */ | |
550 | static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, | |
551 | float_status *s) | |
552 | { | |
553 | int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); | |
554 | bool sign = a->sign ^ b->sign; | |
555 | ||
556 | if (likely(ab_mask == float_cmask_normal)) { | |
557 | a->sign = sign; | |
558 | a->exp -= b->exp + frac_div(a, b); | |
559 | return a; | |
560 | } | |
561 | ||
562 | /* 0/0 or Inf/Inf => NaN */ | |
563 | if (unlikely(ab_mask == float_cmask_zero) || | |
564 | unlikely(ab_mask == float_cmask_inf)) { | |
565 | float_raise(float_flag_invalid, s); | |
566 | parts_default_nan(a, s); | |
567 | return a; | |
568 | } | |
569 | ||
570 | /* All the NaN cases */ | |
571 | if (unlikely(ab_mask & float_cmask_anynan)) { | |
572 | return parts_pick_nan(a, b, s); | |
573 | } | |
574 | ||
575 | a->sign = sign; | |
576 | ||
577 | /* Inf / X */ | |
578 | if (a->cls == float_class_inf) { | |
579 | return a; | |
580 | } | |
581 | ||
582 | /* 0 / X */ | |
583 | if (a->cls == float_class_zero) { | |
584 | return a; | |
585 | } | |
586 | ||
587 | /* X / Inf */ | |
588 | if (b->cls == float_class_inf) { | |
589 | a->cls = float_class_zero; | |
590 | return a; | |
591 | } | |
592 | ||
593 | /* X / 0 => Inf */ | |
594 | g_assert(b->cls == float_class_zero); | |
595 | float_raise(float_flag_divbyzero, s); | |
596 | a->cls = float_class_inf; | |
597 | return a; | |
598 | } | |
afc34931 RH |
599 | |
600 | /* | |
601 | * Rounds the floating-point value `a' to an integer, and returns the | |
602 | * result as a floating-point value. The operation is performed | |
603 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
604 | * Arithmetic. | |
605 | * | |
606 | * parts_round_to_int_normal is an internal helper function for | |
607 | * normal numbers only, returning true for inexact but not directly | |
608 | * raising float_flag_inexact. | |
609 | */ | |
610 | static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, | |
611 | int scale, int frac_size) | |
612 | { | |
613 | uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; | |
614 | int shift_adj; | |
615 | ||
616 | scale = MIN(MAX(scale, -0x10000), 0x10000); | |
617 | a->exp += scale; | |
618 | ||
619 | if (a->exp < 0) { | |
620 | bool one; | |
621 | ||
622 | /* All fractional */ | |
623 | switch (rmode) { | |
624 | case float_round_nearest_even: | |
625 | one = false; | |
626 | if (a->exp == -1) { | |
627 | FloatPartsN tmp; | |
628 | /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ | |
629 | frac_add(&tmp, a, a); | |
630 | /* Anything remaining means frac > 0.5. */ | |
631 | one = !frac_eqz(&tmp); | |
632 | } | |
633 | break; | |
634 | case float_round_ties_away: | |
635 | one = a->exp == -1; | |
636 | break; | |
637 | case float_round_to_zero: | |
638 | one = false; | |
639 | break; | |
640 | case float_round_up: | |
641 | one = !a->sign; | |
642 | break; | |
643 | case float_round_down: | |
644 | one = a->sign; | |
645 | break; | |
646 | case float_round_to_odd: | |
647 | one = true; | |
648 | break; | |
649 | default: | |
650 | g_assert_not_reached(); | |
651 | } | |
652 | ||
653 | frac_clear(a); | |
654 | a->exp = 0; | |
655 | if (one) { | |
656 | a->frac_hi = DECOMPOSED_IMPLICIT_BIT; | |
657 | } else { | |
658 | a->cls = float_class_zero; | |
659 | } | |
660 | return true; | |
661 | } | |
662 | ||
663 | if (a->exp >= frac_size) { | |
664 | /* All integral */ | |
665 | return false; | |
666 | } | |
667 | ||
668 | if (N > 64 && a->exp < N - 64) { | |
669 | /* | |
670 | * Rounding is not in the low word -- shift lsb to bit 2, | |
671 | * which leaves room for sticky and rounding bit. | |
672 | */ | |
673 | shift_adj = (N - 1) - (a->exp + 2); | |
674 | frac_shrjam(a, shift_adj); | |
675 | frac_lsb = 1 << 2; | |
676 | } else { | |
677 | shift_adj = 0; | |
678 | frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); | |
679 | } | |
680 | ||
681 | frac_lsbm1 = frac_lsb >> 1; | |
682 | rnd_mask = frac_lsb - 1; | |
683 | rnd_even_mask = rnd_mask | frac_lsb; | |
684 | ||
685 | if (!(a->frac_lo & rnd_mask)) { | |
686 | /* Fractional bits already clear, undo the shift above. */ | |
687 | frac_shl(a, shift_adj); | |
688 | return false; | |
689 | } | |
690 | ||
691 | switch (rmode) { | |
692 | case float_round_nearest_even: | |
693 | inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); | |
694 | break; | |
695 | case float_round_ties_away: | |
696 | inc = frac_lsbm1; | |
697 | break; | |
698 | case float_round_to_zero: | |
699 | inc = 0; | |
700 | break; | |
701 | case float_round_up: | |
702 | inc = a->sign ? 0 : rnd_mask; | |
703 | break; | |
704 | case float_round_down: | |
705 | inc = a->sign ? rnd_mask : 0; | |
706 | break; | |
707 | case float_round_to_odd: | |
708 | inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; | |
709 | break; | |
710 | default: | |
711 | g_assert_not_reached(); | |
712 | } | |
713 | ||
714 | if (shift_adj == 0) { | |
715 | if (frac_addi(a, a, inc)) { | |
716 | frac_shr(a, 1); | |
717 | a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; | |
718 | a->exp++; | |
719 | } | |
720 | a->frac_lo &= ~rnd_mask; | |
721 | } else { | |
722 | frac_addi(a, a, inc); | |
723 | a->frac_lo &= ~rnd_mask; | |
724 | /* Be careful shifting back, not to overflow */ | |
725 | frac_shl(a, shift_adj - 1); | |
726 | if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { | |
727 | a->exp++; | |
728 | } else { | |
729 | frac_add(a, a, a); | |
730 | } | |
731 | } | |
732 | return true; | |
733 | } | |
734 | ||
735 | static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, | |
736 | int scale, float_status *s, | |
737 | const FloatFmt *fmt) | |
738 | { | |
739 | switch (a->cls) { | |
740 | case float_class_qnan: | |
741 | case float_class_snan: | |
742 | parts_return_nan(a, s); | |
743 | break; | |
744 | case float_class_zero: | |
745 | case float_class_inf: | |
746 | break; | |
747 | case float_class_normal: | |
748 | if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { | |
749 | float_raise(float_flag_inexact, s); | |
750 | } | |
751 | break; | |
752 | default: | |
753 | g_assert_not_reached(); | |
754 | } | |
755 | } | |
463b3f0d RH |
756 | |
757 | /* | |
758 | * Returns the result of converting the floating-point value `a' to | |
759 | * the two's complement integer format. The conversion is performed | |
760 | * according to the IEC/IEEE Standard for Binary Floating-Point | |
761 | * Arithmetic---which means in particular that the conversion is | |
762 | * rounded according to the current rounding mode. If `a' is a NaN, | |
763 | * the largest positive integer is returned. Otherwise, if the | |
764 | * conversion overflows, the largest integer with the same sign as `a' | |
765 | * is returned. | |
4ab4aef0 | 766 | */ |
463b3f0d RH |
767 | static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, |
768 | int scale, int64_t min, int64_t max, | |
769 | float_status *s) | |
770 | { | |
771 | int flags = 0; | |
772 | uint64_t r; | |
773 | ||
774 | switch (p->cls) { | |
775 | case float_class_snan: | |
776 | case float_class_qnan: | |
777 | flags = float_flag_invalid; | |
778 | r = max; | |
779 | break; | |
780 | ||
781 | case float_class_inf: | |
782 | flags = float_flag_invalid; | |
783 | r = p->sign ? min : max; | |
784 | break; | |
785 | ||
786 | case float_class_zero: | |
787 | return 0; | |
788 | ||
789 | case float_class_normal: | |
790 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
791 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
792 | flags = float_flag_inexact; | |
793 | } | |
794 | ||
795 | if (p->exp <= DECOMPOSED_BINARY_POINT) { | |
796 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
797 | } else { | |
798 | r = UINT64_MAX; | |
799 | } | |
800 | if (p->sign) { | |
801 | if (r <= -(uint64_t)min) { | |
802 | r = -r; | |
803 | } else { | |
804 | flags = float_flag_invalid; | |
805 | r = min; | |
806 | } | |
807 | } else if (r > max) { | |
808 | flags = float_flag_invalid; | |
809 | r = max; | |
810 | } | |
811 | break; | |
812 | ||
813 | default: | |
814 | g_assert_not_reached(); | |
815 | } | |
816 | ||
817 | float_raise(flags, s); | |
818 | return r; | |
819 | } | |
4ab4aef0 RH |
820 | |
821 | /* | |
822 | * Returns the result of converting the floating-point value `a' to | |
823 | * the unsigned integer format. The conversion is performed according | |
824 | * to the IEC/IEEE Standard for Binary Floating-Point | |
825 | * Arithmetic---which means in particular that the conversion is | |
826 | * rounded according to the current rounding mode. If `a' is a NaN, | |
827 | * the largest unsigned integer is returned. Otherwise, if the | |
828 | * conversion overflows, the largest unsigned integer is returned. If | |
829 | * the 'a' is negative, the result is rounded and zero is returned; | |
830 | * values that do not round to zero will raise the inexact exception | |
831 | * flag. | |
832 | */ | |
833 | static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, | |
834 | int scale, uint64_t max, float_status *s) | |
835 | { | |
836 | int flags = 0; | |
837 | uint64_t r; | |
838 | ||
839 | switch (p->cls) { | |
840 | case float_class_snan: | |
841 | case float_class_qnan: | |
842 | flags = float_flag_invalid; | |
843 | r = max; | |
844 | break; | |
845 | ||
846 | case float_class_inf: | |
847 | flags = float_flag_invalid; | |
848 | r = p->sign ? 0 : max; | |
849 | break; | |
850 | ||
851 | case float_class_zero: | |
852 | return 0; | |
853 | ||
854 | case float_class_normal: | |
855 | /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ | |
856 | if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { | |
857 | flags = float_flag_inexact; | |
858 | if (p->cls == float_class_zero) { | |
859 | r = 0; | |
860 | break; | |
861 | } | |
862 | } | |
863 | ||
864 | if (p->sign) { | |
865 | flags = float_flag_invalid; | |
866 | r = 0; | |
867 | } else if (p->exp > DECOMPOSED_BINARY_POINT) { | |
868 | flags = float_flag_invalid; | |
869 | r = max; | |
870 | } else { | |
871 | r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); | |
872 | if (r > max) { | |
873 | flags = float_flag_invalid; | |
874 | r = max; | |
875 | } | |
876 | } | |
877 | break; | |
878 | ||
879 | default: | |
880 | g_assert_not_reached(); | |
881 | } | |
882 | ||
883 | float_raise(flags, s); | |
884 | return r; | |
885 | } |