]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_apfloat/src/ieee.rs
New upstream version 1.57.0+dfsg1
[rustc.git] / compiler / rustc_apfloat / src / ieee.rs
1 use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
2 use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
3
4 use core::cmp::{self, Ordering};
5 use core::convert::TryFrom;
6 use core::fmt::{self, Write};
7 use core::marker::PhantomData;
8 use core::mem;
9 use core::ops::Neg;
10 use smallvec::{smallvec, SmallVec};
11
12 #[must_use]
13 pub struct IeeeFloat<S> {
14 /// Absolute significand value (including the integer bit).
15 sig: [Limb; 1],
16
17 /// The signed unbiased exponent of the value.
18 exp: ExpInt,
19
20 /// What kind of floating point number this is.
21 category: Category,
22
23 /// Sign bit of the number.
24 sign: bool,
25
26 marker: PhantomData<S>,
27 }
28
29 /// Fundamental unit of big integer arithmetic, but also
30 /// large to store the largest significands by itself.
31 type Limb = u128;
32 const LIMB_BITS: usize = 128;
33 fn limbs_for_bits(bits: usize) -> usize {
34 (bits + LIMB_BITS - 1) / LIMB_BITS
35 }
36
37 /// Enum that represents what fraction of the LSB truncated bits of an fp number
38 /// represent.
39 ///
40 /// This essentially combines the roles of guard and sticky bits.
41 #[must_use]
42 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
43 enum Loss {
44 // Example of truncated bits:
45 ExactlyZero, // 000000
46 LessThanHalf, // 0xxxxx x's not all zero
47 ExactlyHalf, // 100000
48 MoreThanHalf, // 1xxxxx x's not all zero
49 }
50
51 /// Represents floating point arithmetic semantics.
52 pub trait Semantics: Sized {
53 /// Total number of bits in the in-memory format.
54 const BITS: usize;
55
56 /// Number of bits in the significand. This includes the integer bit.
57 const PRECISION: usize;
58
59 /// The largest E such that 2<sup>E</sup> is representable; this matches the
60 /// definition of IEEE 754.
61 const MAX_EXP: ExpInt;
62
63 /// The smallest E such that 2<sup>E</sup> is a normalized number; this
64 /// matches the definition of IEEE 754.
65 const MIN_EXP: ExpInt = -Self::MAX_EXP + 1;
66
67 /// The significand bit that marks NaN as quiet.
68 const QNAN_BIT: usize = Self::PRECISION - 2;
69
70 /// The significand bitpattern to mark a NaN as quiet.
71 /// NOTE: for X87DoubleExtended we need to set two bits instead of 2.
72 const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT;
73
74 fn from_bits(bits: u128) -> IeeeFloat<Self> {
75 assert!(Self::BITS > Self::PRECISION);
76
77 let sign = bits & (1 << (Self::BITS - 1));
78 let exponent = (bits & !sign) >> (Self::PRECISION - 1);
79 let mut r = IeeeFloat {
80 sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
81 // Convert the exponent from its bias representation to a signed integer.
82 exp: (exponent as ExpInt) - Self::MAX_EXP,
83 category: Category::Zero,
84 sign: sign != 0,
85 marker: PhantomData,
86 };
87
88 if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
89 // Exponent, significand meaningless.
90 r.category = Category::Zero;
91 } else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] {
92 // Exponent, significand meaningless.
93 r.category = Category::Infinity;
94 } else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] {
95 // Sign, exponent, significand meaningless.
96 r.category = Category::NaN;
97 } else {
98 r.category = Category::Normal;
99 if r.exp == Self::MIN_EXP - 1 {
100 // Denormal.
101 r.exp = Self::MIN_EXP;
102 } else {
103 // Set integer bit.
104 sig::set_bit(&mut r.sig, Self::PRECISION - 1);
105 }
106 }
107
108 r
109 }
110
111 fn to_bits(x: IeeeFloat<Self>) -> u128 {
112 assert!(Self::BITS > Self::PRECISION);
113
114 // Split integer bit from significand.
115 let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
116 let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1);
117 let exponent = match x.category {
118 Category::Normal => {
119 if x.exp == Self::MIN_EXP && !integer_bit {
120 // Denormal.
121 Self::MIN_EXP - 1
122 } else {
123 x.exp
124 }
125 }
126 Category::Zero => {
127 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
128 significand = 0;
129 Self::MIN_EXP - 1
130 }
131 Category::Infinity => {
132 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
133 significand = 0;
134 Self::MAX_EXP + 1
135 }
136 Category::NaN => Self::MAX_EXP + 1,
137 };
138
139 // Convert the exponent from a signed integer to its bias representation.
140 let exponent = (exponent + Self::MAX_EXP) as u128;
141
142 ((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand
143 }
144 }
145
146 impl<S> Copy for IeeeFloat<S> {}
147 impl<S> Clone for IeeeFloat<S> {
148 fn clone(&self) -> Self {
149 *self
150 }
151 }
152
153 macro_rules! ieee_semantics {
154 ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => {
155 $(pub struct $sem;)*
156 $(pub type $name = IeeeFloat<$sem>;)*
157 $(impl Semantics for $sem {
158 const BITS: usize = $bits;
159 const PRECISION: usize = ($bits - 1 - $exp_bits) + 1;
160 const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1;
161 })*
162 }
163 }
164
165 ieee_semantics! {
166 Half = HalfS(16:5),
167 Single = SingleS(32:8),
168 Double = DoubleS(64:11),
169 Quad = QuadS(128:15)
170 }
171
172 pub struct X87DoubleExtendedS;
173 pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>;
174 impl Semantics for X87DoubleExtendedS {
175 const BITS: usize = 80;
176 const PRECISION: usize = 64;
177 const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1;
178
179 /// For x87 extended precision, we want to make a NaN, not a
180 /// pseudo-NaN. Maybe we should expose the ability to make
181 /// pseudo-NaNs?
182 const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT;
183
184 /// Integer bit is explicit in this format. Intel hardware (387 and later)
185 /// does not support these bit patterns:
186 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
187 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
188 /// exponent = 0, integer bit 1 ("pseudodenormal")
189 /// exponent != 0 nor all 1's, integer bit 0 ("unnormal")
190 /// At the moment, the first two are treated as NaNs, the second two as Normal.
191 fn from_bits(bits: u128) -> IeeeFloat<Self> {
192 let sign = bits & (1 << (Self::BITS - 1));
193 let exponent = (bits & !sign) >> Self::PRECISION;
194 let mut r = IeeeFloat {
195 sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
196 // Convert the exponent from its bias representation to a signed integer.
197 exp: (exponent as ExpInt) - Self::MAX_EXP,
198 category: Category::Zero,
199 sign: sign != 0,
200 marker: PhantomData,
201 };
202
203 if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
204 // Exponent, significand meaningless.
205 r.category = Category::Zero;
206 } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] {
207 // Exponent, significand meaningless.
208 r.category = Category::Infinity;
209 } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] {
210 // Sign, exponent, significand meaningless.
211 r.category = Category::NaN;
212 } else {
213 r.category = Category::Normal;
214 if r.exp == Self::MIN_EXP - 1 {
215 // Denormal.
216 r.exp = Self::MIN_EXP;
217 }
218 }
219
220 r
221 }
222
223 fn to_bits(x: IeeeFloat<Self>) -> u128 {
224 // Get integer bit from significand.
225 let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
226 let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1);
227 let exponent = match x.category {
228 Category::Normal => {
229 if x.exp == Self::MIN_EXP && !integer_bit {
230 // Denormal.
231 Self::MIN_EXP - 1
232 } else {
233 x.exp
234 }
235 }
236 Category::Zero => {
237 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
238 significand = 0;
239 Self::MIN_EXP - 1
240 }
241 Category::Infinity => {
242 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
243 significand = 1 << (Self::PRECISION - 1);
244 Self::MAX_EXP + 1
245 }
246 Category::NaN => Self::MAX_EXP + 1,
247 };
248
249 // Convert the exponent from a signed integer to its bias representation.
250 let exponent = (exponent + Self::MAX_EXP) as u128;
251
252 ((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand
253 }
254 }
255
256 float_common_impls!(IeeeFloat<S>);
257
258 impl<S: Semantics> PartialEq for IeeeFloat<S> {
259 fn eq(&self, rhs: &Self) -> bool {
260 self.partial_cmp(rhs) == Some(Ordering::Equal)
261 }
262 }
263
264 impl<S: Semantics> PartialOrd for IeeeFloat<S> {
265 fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
266 match (self.category, rhs.category) {
267 (Category::NaN, _) | (_, Category::NaN) => None,
268
269 (Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))),
270
271 (Category::Zero, Category::Zero) => Some(Ordering::Equal),
272
273 (Category::Infinity, _) | (Category::Normal, Category::Zero) => {
274 Some((!self.sign).cmp(&self.sign))
275 }
276
277 (_, Category::Infinity) | (Category::Zero, Category::Normal) => {
278 Some(rhs.sign.cmp(&(!rhs.sign)))
279 }
280
281 (Category::Normal, Category::Normal) => {
282 // Two normal numbers. Do they have the same sign?
283 Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| {
284 // Compare absolute values; invert result if negative.
285 let result = self.cmp_abs_normal(*rhs);
286
287 if self.sign { result.reverse() } else { result }
288 }))
289 }
290 }
291 }
292 }
293
294 impl<S> Neg for IeeeFloat<S> {
295 type Output = Self;
296 fn neg(mut self) -> Self {
297 self.sign = !self.sign;
298 self
299 }
300 }
301
302 /// Prints this value as a decimal string.
303 ///
304 /// \param precision The maximum number of digits of
305 /// precision to output. If there are fewer digits available,
306 /// zero padding will not be used unless the value is
307 /// integral and small enough to be expressed in
308 /// precision digits. 0 means to use the natural
309 /// precision of the number.
310 /// \param width The maximum number of zeros to
311 /// consider inserting before falling back to scientific
312 /// notation. 0 means to always use scientific notation.
313 ///
314 /// \param alternate Indicate whether to remove the trailing zero in
315 /// fraction part or not. Also setting this parameter to true forces
316 /// producing of output more similar to default printf behavior.
317 /// Specifically the lower e is used as exponent delimiter and exponent
318 /// always contains no less than two digits.
319 ///
320 /// Number precision width Result
321 /// ------ --------- ----- ------
322 /// 1.01E+4 5 2 10100
323 /// 1.01E+4 4 2 1.01E+4
324 /// 1.01E+4 5 1 1.01E+4
325 /// 1.01E-2 5 2 0.0101
326 /// 1.01E-2 4 2 0.0101
327 /// 1.01E-2 4 1 1.01E-2
328 impl<S: Semantics> fmt::Display for IeeeFloat<S> {
329 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
330 let width = f.width().unwrap_or(3);
331 let alternate = f.alternate();
332
333 match self.category {
334 Category::Infinity => {
335 if self.sign {
336 return f.write_str("-Inf");
337 } else {
338 return f.write_str("+Inf");
339 }
340 }
341
342 Category::NaN => return f.write_str("NaN"),
343
344 Category::Zero => {
345 if self.sign {
346 f.write_char('-')?;
347 }
348
349 if width == 0 {
350 if alternate {
351 f.write_str("0.0")?;
352 if let Some(n) = f.precision() {
353 for _ in 1..n {
354 f.write_char('0')?;
355 }
356 }
357 f.write_str("e+00")?;
358 } else {
359 f.write_str("0.0E+0")?;
360 }
361 } else {
362 f.write_char('0')?;
363 }
364 return Ok(());
365 }
366
367 Category::Normal => {}
368 }
369
370 if self.sign {
371 f.write_char('-')?;
372 }
373
374 // We use enough digits so the number can be round-tripped back to an
375 // APFloat. The formula comes from "How to Print Floating-Point Numbers
376 // Accurately" by Steele and White.
377 // FIXME: Using a formula based purely on the precision is conservative;
378 // we can print fewer digits depending on the actual value being printed.
379
380 // precision = 2 + floor(S::PRECISION / lg_2(10))
381 let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196);
382
383 // Decompose the number into an APInt and an exponent.
384 let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
385 let mut sig = vec![self.sig[0]];
386
387 // Ignore trailing binary zeros.
388 let trailing_zeros = sig[0].trailing_zeros();
389 let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize);
390
391 // Change the exponent from 2^e to 10^e.
392 #[allow(clippy::comparison_chain)]
393 if exp == 0 {
394 // Nothing to do.
395 } else if exp > 0 {
396 // Just shift left.
397 let shift = exp as usize;
398 sig.resize(limbs_for_bits(S::PRECISION + shift), 0);
399 sig::shift_left(&mut sig, &mut exp, shift);
400 } else {
401 // exp < 0
402 let mut texp = -exp as usize;
403
404 // We transform this using the identity:
405 // (N)(2^-e) == (N)(5^e)(10^-e)
406
407 // Multiply significand by 5^e.
408 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
409 let mut sig_scratch = vec![];
410 let mut p5 = vec![];
411 let mut p5_scratch = vec![];
412 while texp != 0 {
413 if p5.is_empty() {
414 p5.push(5);
415 } else {
416 p5_scratch.resize(p5.len() * 2, 0);
417 let _: Loss =
418 sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
419 while p5_scratch.last() == Some(&0) {
420 p5_scratch.pop();
421 }
422 mem::swap(&mut p5, &mut p5_scratch);
423 }
424 if texp & 1 != 0 {
425 sig_scratch.resize(sig.len() + p5.len(), 0);
426 let _: Loss = sig::mul(
427 &mut sig_scratch,
428 &mut 0,
429 &sig,
430 &p5,
431 (sig.len() + p5.len()) * LIMB_BITS,
432 );
433 while sig_scratch.last() == Some(&0) {
434 sig_scratch.pop();
435 }
436 mem::swap(&mut sig, &mut sig_scratch);
437 }
438 texp >>= 1;
439 }
440 }
441
442 // Fill the buffer.
443 let mut buffer = vec![];
444
445 // Ignore digits from the significand until it is no more
446 // precise than is required for the desired precision.
447 // 196/59 is a very slight overestimate of lg_2(10).
448 let required = (precision * 196 + 58) / 59;
449 let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196;
450 let mut in_trail = true;
451 while !sig.is_empty() {
452 // Perform short division by 10 to extract the rightmost digit.
453 // rem <- sig % 10
454 // sig <- sig / 10
455 let mut rem = 0;
456
457 // Use 64-bit division and remainder, with 32-bit chunks from sig.
458 sig::each_chunk(&mut sig, 32, |chunk| {
459 let chunk = chunk as u32;
460 let combined = ((rem as u64) << 32) | (chunk as u64);
461 rem = (combined % 10) as u8;
462 (combined / 10) as u32 as Limb
463 });
464
465 // Reduce the sigificand to avoid wasting time dividing 0's.
466 while sig.last() == Some(&0) {
467 sig.pop();
468 }
469
470 let digit = rem;
471
472 // Ignore digits we don't need.
473 if discard_digits > 0 {
474 discard_digits -= 1;
475 exp += 1;
476 continue;
477 }
478
479 // Drop trailing zeros.
480 if in_trail && digit == 0 {
481 exp += 1;
482 } else {
483 in_trail = false;
484 buffer.push(b'0' + digit);
485 }
486 }
487
488 assert!(!buffer.is_empty(), "no characters in buffer!");
489
490 // Drop down to precision.
491 // FIXME: don't do more precise calculations above than are required.
492 if buffer.len() > precision {
493 // The most significant figures are the last ones in the buffer.
494 let mut first_sig = buffer.len() - precision;
495
496 // Round.
497 // FIXME: this probably shouldn't use 'round half up'.
498
499 // Rounding down is just a truncation, except we also want to drop
500 // trailing zeros from the new result.
501 if buffer[first_sig - 1] < b'5' {
502 while first_sig < buffer.len() && buffer[first_sig] == b'0' {
503 first_sig += 1;
504 }
505 } else {
506 // Rounding up requires a decimal add-with-carry. If we continue
507 // the carry, the newly-introduced zeros will just be truncated.
508 for x in &mut buffer[first_sig..] {
509 if *x == b'9' {
510 first_sig += 1;
511 } else {
512 *x += 1;
513 break;
514 }
515 }
516 }
517
518 exp += first_sig as ExpInt;
519 buffer.drain(..first_sig);
520
521 // If we carried through, we have exactly one digit of precision.
522 if buffer.is_empty() {
523 buffer.push(b'1');
524 }
525 }
526
527 let digits = buffer.len();
528
529 // Check whether we should use scientific notation.
530 let scientific = if width == 0 {
531 true
532 } else if exp >= 0 {
533 // 765e3 --> 765000
534 // ^^^
535 // But we shouldn't make the number look more precise than it is.
536 exp as usize > width || digits + exp as usize > precision
537 } else {
538 // Power of the most significant digit.
539 let msd = exp + (digits - 1) as ExpInt;
540 if msd >= 0 {
541 // 765e-2 == 7.65
542 false
543 } else {
544 // 765e-5 == 0.00765
545 // ^ ^^
546 -msd as usize > width
547 }
548 };
549
550 // Scientific formatting is pretty straightforward.
551 if scientific {
552 exp += digits as ExpInt - 1;
553
554 f.write_char(buffer[digits - 1] as char)?;
555 f.write_char('.')?;
556 let truncate_zero = !alternate;
557 if digits == 1 && truncate_zero {
558 f.write_char('0')?;
559 } else {
560 for &d in buffer[..digits - 1].iter().rev() {
561 f.write_char(d as char)?;
562 }
563 }
564 // Fill with zeros up to precision.
565 if !truncate_zero && precision > digits - 1 {
566 for _ in 0..=precision - digits {
567 f.write_char('0')?;
568 }
569 }
570 // For alternate we use lower 'e'.
571 f.write_char(if alternate { 'e' } else { 'E' })?;
572
573 // Exponent always at least two digits if we do not truncate zeros.
574 if truncate_zero {
575 write!(f, "{:+}", exp)?;
576 } else {
577 write!(f, "{:+03}", exp)?;
578 }
579
580 return Ok(());
581 }
582
583 // Non-scientific, positive exponents.
584 if exp >= 0 {
585 for &d in buffer.iter().rev() {
586 f.write_char(d as char)?;
587 }
588 for _ in 0..exp {
589 f.write_char('0')?;
590 }
591 return Ok(());
592 }
593
594 // Non-scientific, negative exponents.
595 let unit_place = -exp as usize;
596 if unit_place < digits {
597 for &d in buffer[unit_place..].iter().rev() {
598 f.write_char(d as char)?;
599 }
600 f.write_char('.')?;
601 for &d in buffer[..unit_place].iter().rev() {
602 f.write_char(d as char)?;
603 }
604 } else {
605 f.write_str("0.")?;
606 for _ in digits..unit_place {
607 f.write_char('0')?;
608 }
609 for &d in buffer.iter().rev() {
610 f.write_char(d as char)?;
611 }
612 }
613
614 Ok(())
615 }
616 }
617
618 impl<S: Semantics> fmt::Debug for IeeeFloat<S> {
619 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
620 write!(
621 f,
622 "{}({:?} | {}{:?} * 2^{})",
623 self,
624 self.category,
625 if self.sign { "-" } else { "+" },
626 self.sig,
627 self.exp
628 )
629 }
630 }
631
632 impl<S: Semantics> Float for IeeeFloat<S> {
633 const BITS: usize = S::BITS;
634 const PRECISION: usize = S::PRECISION;
635 const MAX_EXP: ExpInt = S::MAX_EXP;
636 const MIN_EXP: ExpInt = S::MIN_EXP;
637
638 const ZERO: Self = IeeeFloat {
639 sig: [0],
640 exp: S::MIN_EXP - 1,
641 category: Category::Zero,
642 sign: false,
643 marker: PhantomData,
644 };
645
646 const INFINITY: Self = IeeeFloat {
647 sig: [0],
648 exp: S::MAX_EXP + 1,
649 category: Category::Infinity,
650 sign: false,
651 marker: PhantomData,
652 };
653
654 // FIXME(eddyb) remove when qnan becomes const fn.
655 const NAN: Self = IeeeFloat {
656 sig: [S::QNAN_SIGNIFICAND],
657 exp: S::MAX_EXP + 1,
658 category: Category::NaN,
659 sign: false,
660 marker: PhantomData,
661 };
662
663 fn qnan(payload: Option<u128>) -> Self {
664 IeeeFloat {
665 sig: [S::QNAN_SIGNIFICAND
666 | payload.map_or(0, |payload| {
667 // Zero out the excess bits of the significand.
668 payload & ((1 << S::QNAN_BIT) - 1)
669 })],
670 exp: S::MAX_EXP + 1,
671 category: Category::NaN,
672 sign: false,
673 marker: PhantomData,
674 }
675 }
676
677 fn snan(payload: Option<u128>) -> Self {
678 let mut snan = Self::qnan(payload);
679
680 // We always have to clear the QNaN bit to make it an SNaN.
681 sig::clear_bit(&mut snan.sig, S::QNAN_BIT);
682
683 // If there are no bits set in the payload, we have to set
684 // *something* to make it a NaN instead of an infinity;
685 // conventionally, this is the next bit down from the QNaN bit.
686 if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 {
687 sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1);
688 }
689
690 snan
691 }
692
693 fn largest() -> Self {
694 // We want (in interchange format):
695 // exponent = 1..10
696 // significand = 1..1
697 IeeeFloat {
698 sig: [(1 << S::PRECISION) - 1],
699 exp: S::MAX_EXP,
700 category: Category::Normal,
701 sign: false,
702 marker: PhantomData,
703 }
704 }
705
706 // We want (in interchange format):
707 // exponent = 0..0
708 // significand = 0..01
709 const SMALLEST: Self = IeeeFloat {
710 sig: [1],
711 exp: S::MIN_EXP,
712 category: Category::Normal,
713 sign: false,
714 marker: PhantomData,
715 };
716
717 fn smallest_normalized() -> Self {
718 // We want (in interchange format):
719 // exponent = 0..0
720 // significand = 10..0
721 IeeeFloat {
722 sig: [1 << (S::PRECISION - 1)],
723 exp: S::MIN_EXP,
724 category: Category::Normal,
725 sign: false,
726 marker: PhantomData,
727 }
728 }
729
730 fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
731 let status = match (self.category, rhs.category) {
732 (Category::Infinity, Category::Infinity) => {
733 // Differently signed infinities can only be validly
734 // subtracted.
735 if self.sign != rhs.sign {
736 self = Self::NAN;
737 Status::INVALID_OP
738 } else {
739 Status::OK
740 }
741 }
742
743 // Sign may depend on rounding mode; handled below.
744 (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => {
745 Status::OK
746 }
747
748 (Category::Zero, _) | (_, Category::NaN | Category::Infinity) => {
749 self = rhs;
750 Status::OK
751 }
752
753 // This return code means it was not a simple case.
754 (Category::Normal, Category::Normal) => {
755 let loss = sig::add_or_sub(
756 &mut self.sig,
757 &mut self.exp,
758 &mut self.sign,
759 &mut [rhs.sig[0]],
760 rhs.exp,
761 rhs.sign,
762 );
763 let status;
764 self = unpack!(status=, self.normalize(round, loss));
765
766 // Can only be zero if we lost no fraction.
767 assert!(self.category != Category::Zero || loss == Loss::ExactlyZero);
768
769 status
770 }
771 };
772
773 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
774 // positive zero unless rounding to minus infinity, except that
775 // adding two like-signed zeroes gives that zero.
776 if self.category == Category::Zero
777 && (rhs.category != Category::Zero || self.sign != rhs.sign)
778 {
779 self.sign = round == Round::TowardNegative;
780 }
781
782 status.and(self)
783 }
784
785 fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
786 self.sign ^= rhs.sign;
787
788 match (self.category, rhs.category) {
789 (Category::NaN, _) => {
790 self.sign = false;
791 Status::OK.and(self)
792 }
793
794 (_, Category::NaN) => {
795 self.sign = false;
796 self.category = Category::NaN;
797 self.sig = rhs.sig;
798 Status::OK.and(self)
799 }
800
801 (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => {
802 Status::INVALID_OP.and(Self::NAN)
803 }
804
805 (_, Category::Infinity) | (Category::Infinity, _) => {
806 self.category = Category::Infinity;
807 Status::OK.and(self)
808 }
809
810 (Category::Zero, _) | (_, Category::Zero) => {
811 self.category = Category::Zero;
812 Status::OK.and(self)
813 }
814
815 (Category::Normal, Category::Normal) => {
816 self.exp += rhs.exp;
817 let mut wide_sig = [0; 2];
818 let loss =
819 sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION);
820 self.sig = [wide_sig[0]];
821 let mut status;
822 self = unpack!(status=, self.normalize(round, loss));
823 if loss != Loss::ExactlyZero {
824 status |= Status::INEXACT;
825 }
826 status.and(self)
827 }
828 }
829 }
830
831 fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
832 // If and only if all arguments are normal do we need to do an
833 // extended-precision calculation.
834 if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() {
835 let mut status;
836 self = unpack!(status=, self.mul_r(multiplicand, round));
837
838 // FS can only be Status::OK or Status::INVALID_OP. There is no more work
839 // to do in the latter case. The IEEE-754R standard says it is
840 // implementation-defined in this case whether, if ADDEND is a
841 // quiet NaN, we raise invalid op; this implementation does so.
842 //
843 // If we need to do the addition we can do so with normal
844 // precision.
845 if status == Status::OK {
846 self = unpack!(status=, self.add_r(addend, round));
847 }
848 return status.and(self);
849 }
850
851 // Post-multiplication sign, before addition.
852 self.sign ^= multiplicand.sign;
853
854 // Allocate space for twice as many bits as the original significand, plus one
855 // extra bit for the addition to overflow into.
856 assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2);
857 let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]);
858
859 let mut loss = Loss::ExactlyZero;
860 let mut omsb = sig::omsb(&wide_sig);
861 self.exp += multiplicand.exp;
862
863 // Assume the operands involved in the multiplication are single-precision
864 // FP, and the two multiplicants are:
865 // lhs = a23 . a22 ... a0 * 2^e1
866 // rhs = b23 . b22 ... b0 * 2^e2
867 // the result of multiplication is:
868 // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
869 // Note that there are three significant bits at the left-hand side of the
870 // radix point: two for the multiplication, and an overflow bit for the
871 // addition (that will always be zero at this point). Move the radix point
872 // toward left by two bits, and adjust exponent accordingly.
873 self.exp += 2;
874
875 if addend.is_non_zero() {
876 // Normalize our MSB to one below the top bit to allow for overflow.
877 let ext_precision = 2 * S::PRECISION + 1;
878 if omsb != ext_precision - 1 {
879 assert!(ext_precision > omsb);
880 sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb);
881 }
882
883 // The intermediate result of the multiplication has "2 * S::PRECISION"
884 // significant bit; adjust the addend to be consistent with mul result.
885 let mut ext_addend_sig = [addend.sig[0], 0];
886
887 // Extend the addend significand to ext_precision - 1. This guarantees
888 // that the high bit of the significand is zero (same as wide_sig),
889 // so the addition will overflow (if it does overflow at all) into the top bit.
890 sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION);
891 loss = sig::add_or_sub(
892 &mut wide_sig,
893 &mut self.exp,
894 &mut self.sign,
895 &mut ext_addend_sig,
896 addend.exp + 1,
897 addend.sign,
898 );
899
900 omsb = sig::omsb(&wide_sig);
901 }
902
903 // Convert the result having "2 * S::PRECISION" significant-bits back to the one
904 // having "S::PRECISION" significant-bits. First, move the radix point from
905 // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
906 // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
907 self.exp -= S::PRECISION as ExpInt + 1;
908
909 // In case MSB resides at the left-hand side of radix point, shift the
910 // mantissa right by some amount to make sure the MSB reside right before
911 // the radix point (i.e., "MSB . rest-significant-bits").
912 if omsb > S::PRECISION {
913 let bits = omsb - S::PRECISION;
914 loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss);
915 }
916
917 self.sig[0] = wide_sig[0];
918
919 let mut status;
920 self = unpack!(status=, self.normalize(round, loss));
921 if loss != Loss::ExactlyZero {
922 status |= Status::INEXACT;
923 }
924
925 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
926 // positive zero unless rounding to minus infinity, except that
927 // adding two like-signed zeroes gives that zero.
928 if self.category == Category::Zero
929 && !status.intersects(Status::UNDERFLOW)
930 && self.sign != addend.sign
931 {
932 self.sign = round == Round::TowardNegative;
933 }
934
935 status.and(self)
936 }
937
938 fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
939 self.sign ^= rhs.sign;
940
941 match (self.category, rhs.category) {
942 (Category::NaN, _) => {
943 self.sign = false;
944 Status::OK.and(self)
945 }
946
947 (_, Category::NaN) => {
948 self.category = Category::NaN;
949 self.sig = rhs.sig;
950 self.sign = false;
951 Status::OK.and(self)
952 }
953
954 (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => {
955 Status::INVALID_OP.and(Self::NAN)
956 }
957
958 (Category::Infinity | Category::Zero, _) => Status::OK.and(self),
959
960 (Category::Normal, Category::Infinity) => {
961 self.category = Category::Zero;
962 Status::OK.and(self)
963 }
964
965 (Category::Normal, Category::Zero) => {
966 self.category = Category::Infinity;
967 Status::DIV_BY_ZERO.and(self)
968 }
969
970 (Category::Normal, Category::Normal) => {
971 self.exp -= rhs.exp;
972 let dividend = self.sig[0];
973 let loss = sig::div(
974 &mut self.sig,
975 &mut self.exp,
976 &mut [dividend],
977 &mut [rhs.sig[0]],
978 S::PRECISION,
979 );
980 let mut status;
981 self = unpack!(status=, self.normalize(round, loss));
982 if loss != Loss::ExactlyZero {
983 status |= Status::INEXACT;
984 }
985 status.and(self)
986 }
987 }
988 }
989
990 fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> {
991 match (self.category, rhs.category) {
992 (Category::NaN, _)
993 | (Category::Zero, Category::Infinity | Category::Normal)
994 | (Category::Normal, Category::Infinity) => Status::OK.and(self),
995
996 (_, Category::NaN) => {
997 self.sign = false;
998 self.category = Category::NaN;
999 self.sig = rhs.sig;
1000 Status::OK.and(self)
1001 }
1002
1003 (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
1004
1005 (Category::Normal, Category::Normal) => {
1006 while self.is_finite_non_zero()
1007 && rhs.is_finite_non_zero()
1008 && self.cmp_abs_normal(rhs) != Ordering::Less
1009 {
1010 let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb());
1011 if self.cmp_abs_normal(v) == Ordering::Less {
1012 v = v.scalbn(-1);
1013 }
1014 v.sign = self.sign;
1015
1016 let status;
1017 self = unpack!(status=, self - v);
1018 assert_eq!(status, Status::OK);
1019 }
1020 Status::OK.and(self)
1021 }
1022 }
1023 }
1024
1025 fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
1026 // If the exponent is large enough, we know that this value is already
1027 // integral, and the arithmetic below would potentially cause it to saturate
1028 // to +/-Inf. Bail out early instead.
1029 if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt {
1030 return Status::OK.and(self);
1031 }
1032
1033 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1034 // precision of our format, and then subtract it back off again. The choice
1035 // of rounding modes for the addition/subtraction determines the rounding mode
1036 // for our integral rounding as well.
1037 // NOTE: When the input value is negative, we do subtraction followed by
1038 // addition instead.
1039 assert!(S::PRECISION <= 128);
1040 let mut status;
1041 let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1)));
1042 let magic_const = magic_const.copy_sign(self);
1043
1044 if status != Status::OK {
1045 return status.and(self);
1046 }
1047
1048 let mut r = self;
1049 r = unpack!(status=, r.add_r(magic_const, round));
1050 if status != Status::OK && status != Status::INEXACT {
1051 return status.and(self);
1052 }
1053
1054 // Restore the input sign to handle 0.0/-0.0 cases correctly.
1055 r.sub_r(magic_const, round).map(|r| r.copy_sign(self))
1056 }
1057
1058 fn next_up(mut self) -> StatusAnd<Self> {
1059 // Compute nextUp(x), handling each float category separately.
1060 match self.category {
1061 Category::Infinity => {
1062 if self.sign {
1063 // nextUp(-inf) = -largest
1064 Status::OK.and(-Self::largest())
1065 } else {
1066 // nextUp(+inf) = +inf
1067 Status::OK.and(self)
1068 }
1069 }
1070 Category::NaN => {
1071 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
1072 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
1073 // change the payload.
1074 if self.is_signaling() {
1075 // For consistency, propagate the sign of the sNaN to the qNaN.
1076 Status::INVALID_OP.and(Self::NAN.copy_sign(self))
1077 } else {
1078 Status::OK.and(self)
1079 }
1080 }
1081 Category::Zero => {
1082 // nextUp(pm 0) = +smallest
1083 Status::OK.and(Self::SMALLEST)
1084 }
1085 Category::Normal => {
1086 // nextUp(-smallest) = -0
1087 if self.is_smallest() && self.sign {
1088 return Status::OK.and(-Self::ZERO);
1089 }
1090
1091 // nextUp(largest) == INFINITY
1092 if self.is_largest() && !self.sign {
1093 return Status::OK.and(Self::INFINITY);
1094 }
1095
1096 // Excluding the integral bit. This allows us to test for binade boundaries.
1097 let sig_mask = (1 << (S::PRECISION - 1)) - 1;
1098
1099 // nextUp(normal) == normal + inc.
1100 if self.sign {
1101 // If we are negative, we need to decrement the significand.
1102
1103 // We only cross a binade boundary that requires adjusting the exponent
1104 // if:
1105 // 1. exponent != S::MIN_EXP. This implies we are not in the
1106 // smallest binade or are dealing with denormals.
1107 // 2. Our significand excluding the integral bit is all zeros.
1108 let crossing_binade_boundary =
1109 self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0;
1110
1111 // Decrement the significand.
1112 //
1113 // We always do this since:
1114 // 1. If we are dealing with a non-binade decrement, by definition we
1115 // just decrement the significand.
1116 // 2. If we are dealing with a normal -> normal binade decrement, since
1117 // we have an explicit integral bit the fact that all bits but the
1118 // integral bit are zero implies that subtracting one will yield a
1119 // significand with 0 integral bit and 1 in all other spots. Thus we
1120 // must just adjust the exponent and set the integral bit to 1.
1121 // 3. If we are dealing with a normal -> denormal binade decrement,
1122 // since we set the integral bit to 0 when we represent denormals, we
1123 // just decrement the significand.
1124 sig::decrement(&mut self.sig);
1125
1126 if crossing_binade_boundary {
1127 // Our result is a normal number. Do the following:
1128 // 1. Set the integral bit to 1.
1129 // 2. Decrement the exponent.
1130 sig::set_bit(&mut self.sig, S::PRECISION - 1);
1131 self.exp -= 1;
1132 }
1133 } else {
1134 // If we are positive, we need to increment the significand.
1135
1136 // We only cross a binade boundary that requires adjusting the exponent if
1137 // the input is not a denormal and all of said input's significand bits
1138 // are set. If all of said conditions are true: clear the significand, set
1139 // the integral bit to 1, and increment the exponent. If we have a
1140 // denormal always increment since moving denormals and the numbers in the
1141 // smallest normal binade have the same exponent in our representation.
1142 let crossing_binade_boundary =
1143 !self.is_denormal() && self.sig[0] & sig_mask == sig_mask;
1144
1145 if crossing_binade_boundary {
1146 self.sig = [0];
1147 sig::set_bit(&mut self.sig, S::PRECISION - 1);
1148 assert_ne!(
1149 self.exp,
1150 S::MAX_EXP,
1151 "We can not increment an exponent beyond the MAX_EXP \
1152 allowed by the given floating point semantics."
1153 );
1154 self.exp += 1;
1155 } else {
1156 sig::increment(&mut self.sig);
1157 }
1158 }
1159 Status::OK.and(self)
1160 }
1161 }
1162 }
1163
1164 fn from_bits(input: u128) -> Self {
1165 // Dispatch to semantics.
1166 S::from_bits(input)
1167 }
1168
1169 fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
1170 IeeeFloat {
1171 sig: [input],
1172 exp: S::PRECISION as ExpInt - 1,
1173 category: Category::Normal,
1174 sign: false,
1175 marker: PhantomData,
1176 }
1177 .normalize(round, Loss::ExactlyZero)
1178 }
1179
1180 fn from_str_r(mut s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> {
1181 if s.is_empty() {
1182 return Err(ParseError("Invalid string length"));
1183 }
1184
1185 // Handle special cases.
1186 match s {
1187 "inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)),
1188 "-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)),
1189 "nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)),
1190 "-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)),
1191 _ => {}
1192 }
1193
1194 // Handle a leading minus sign.
1195 let minus = s.starts_with('-');
1196 if minus || s.starts_with('+') {
1197 s = &s[1..];
1198 if s.is_empty() {
1199 return Err(ParseError("String has no digits"));
1200 }
1201 }
1202
1203 // Adjust the rounding mode for the absolute value below.
1204 if minus {
1205 round = -round;
1206 }
1207
1208 let r = if s.starts_with("0x") || s.starts_with("0X") {
1209 s = &s[2..];
1210 if s.is_empty() {
1211 return Err(ParseError("Invalid string"));
1212 }
1213 Self::from_hexadecimal_string(s, round)?
1214 } else {
1215 Self::from_decimal_string(s, round)?
1216 };
1217
1218 Ok(r.map(|r| if minus { -r } else { r }))
1219 }
1220
1221 fn to_bits(self) -> u128 {
1222 // Dispatch to semantics.
1223 S::to_bits(self)
1224 }
1225
1226 fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
1227 // The result of trying to convert a number too large.
1228 let overflow = if self.sign {
1229 // Negative numbers cannot be represented as unsigned.
1230 0
1231 } else {
1232 // Largest unsigned integer of the given width.
1233 !0 >> (128 - width)
1234 };
1235
1236 *is_exact = false;
1237
1238 match self.category {
1239 Category::NaN => Status::INVALID_OP.and(0),
1240
1241 Category::Infinity => Status::INVALID_OP.and(overflow),
1242
1243 Category::Zero => {
1244 // Negative zero can't be represented as an int.
1245 *is_exact = !self.sign;
1246 Status::OK.and(0)
1247 }
1248
1249 Category::Normal => {
1250 let mut r = 0;
1251
1252 // Step 1: place our absolute value, with any fraction truncated, in
1253 // the destination.
1254 let truncated_bits = if self.exp < 0 {
1255 // Our absolute value is less than one; truncate everything.
1256 // For exponent -1 the integer bit represents .5, look at that.
1257 // For smaller exponents leftmost truncated bit is 0.
1258 S::PRECISION - 1 + (-self.exp) as usize
1259 } else {
1260 // We want the most significant (exponent + 1) bits; the rest are
1261 // truncated.
1262 let bits = self.exp as usize + 1;
1263
1264 // Hopelessly large in magnitude?
1265 if bits > width {
1266 return Status::INVALID_OP.and(overflow);
1267 }
1268
1269 if bits < S::PRECISION {
1270 // We truncate (S::PRECISION - bits) bits.
1271 r = self.sig[0] >> (S::PRECISION - bits);
1272 S::PRECISION - bits
1273 } else {
1274 // We want at least as many bits as are available.
1275 r = self.sig[0] << (bits - S::PRECISION);
1276 0
1277 }
1278 };
1279
1280 // Step 2: work out any lost fraction, and increment the absolute
1281 // value if we would round away from zero.
1282 let mut loss = Loss::ExactlyZero;
1283 if truncated_bits > 0 {
1284 loss = Loss::through_truncation(&self.sig, truncated_bits);
1285 if loss != Loss::ExactlyZero
1286 && self.round_away_from_zero(round, loss, truncated_bits)
1287 {
1288 r = r.wrapping_add(1);
1289 if r == 0 {
1290 return Status::INVALID_OP.and(overflow); // Overflow.
1291 }
1292 }
1293 }
1294
1295 // Step 3: check if we fit in the destination.
1296 if r > overflow {
1297 return Status::INVALID_OP.and(overflow);
1298 }
1299
1300 if loss == Loss::ExactlyZero {
1301 *is_exact = true;
1302 Status::OK.and(r)
1303 } else {
1304 Status::INEXACT.and(r)
1305 }
1306 }
1307 }
1308 }
1309
1310 fn cmp_abs_normal(self, rhs: Self) -> Ordering {
1311 assert!(self.is_finite_non_zero());
1312 assert!(rhs.is_finite_non_zero());
1313
1314 // If exponents are equal, do an unsigned comparison of the significands.
1315 self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig))
1316 }
1317
1318 fn bitwise_eq(self, rhs: Self) -> bool {
1319 if self.category != rhs.category || self.sign != rhs.sign {
1320 return false;
1321 }
1322
1323 if self.category == Category::Zero || self.category == Category::Infinity {
1324 return true;
1325 }
1326
1327 if self.is_finite_non_zero() && self.exp != rhs.exp {
1328 return false;
1329 }
1330
1331 self.sig == rhs.sig
1332 }
1333
1334 fn is_negative(self) -> bool {
1335 self.sign
1336 }
1337
1338 fn is_denormal(self) -> bool {
1339 self.is_finite_non_zero()
1340 && self.exp == S::MIN_EXP
1341 && !sig::get_bit(&self.sig, S::PRECISION - 1)
1342 }
1343
1344 fn is_signaling(self) -> bool {
1345 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
1346 // first bit of the trailing significand being 0.
1347 self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT)
1348 }
1349
1350 fn category(self) -> Category {
1351 self.category
1352 }
1353
1354 fn get_exact_inverse(self) -> Option<Self> {
1355 // Special floats and denormals have no exact inverse.
1356 if !self.is_finite_non_zero() {
1357 return None;
1358 }
1359
1360 // Check that the number is a power of two by making sure that only the
1361 // integer bit is set in the significand.
1362 if self.sig != [1 << (S::PRECISION - 1)] {
1363 return None;
1364 }
1365
1366 // Get the inverse.
1367 let mut reciprocal = Self::from_u128(1).value;
1368 let status;
1369 reciprocal = unpack!(status=, reciprocal / self);
1370 if status != Status::OK {
1371 return None;
1372 }
1373
1374 // Avoid multiplication with a denormal, it is not safe on all platforms and
1375 // may be slower than a normal division.
1376 if reciprocal.is_denormal() {
1377 return None;
1378 }
1379
1380 assert!(reciprocal.is_finite_non_zero());
1381 assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]);
1382
1383 Some(reciprocal)
1384 }
1385
1386 fn ilogb(mut self) -> ExpInt {
1387 if self.is_nan() {
1388 return IEK_NAN;
1389 }
1390 if self.is_zero() {
1391 return IEK_ZERO;
1392 }
1393 if self.is_infinite() {
1394 return IEK_INF;
1395 }
1396 if !self.is_denormal() {
1397 return self.exp;
1398 }
1399
1400 let sig_bits = (S::PRECISION - 1) as ExpInt;
1401 self.exp += sig_bits;
1402 self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value;
1403 self.exp - sig_bits
1404 }
1405
1406 fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self {
1407 // If exp is wildly out-of-scale, simply adding it to self.exp will
1408 // overflow; clamp it to a safe range before adding, but ensure that the range
1409 // is large enough that the clamp does not change the result. The range we
1410 // need to support is the difference between the largest possible exponent and
1411 // the normalized exponent of half the smallest denormal.
1412
1413 let sig_bits = (S::PRECISION - 1) as i32;
1414 let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1;
1415
1416 // Clamp to one past the range ends to let normalize handle overflow.
1417 let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change);
1418 self.exp = self.exp.saturating_add(exp_change as ExpInt);
1419 self = self.normalize(round, Loss::ExactlyZero).value;
1420 if self.is_nan() {
1421 sig::set_bit(&mut self.sig, S::QNAN_BIT);
1422 }
1423 self
1424 }
1425
1426 fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self {
1427 *exp = self.ilogb();
1428
1429 // Quiet signalling nans.
1430 if *exp == IEK_NAN {
1431 sig::set_bit(&mut self.sig, S::QNAN_BIT);
1432 return self;
1433 }
1434
1435 if *exp == IEK_INF {
1436 return self;
1437 }
1438
1439 // 1 is added because frexp is defined to return a normalized fraction in
1440 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
1441 if *exp == IEK_ZERO {
1442 *exp = 0;
1443 } else {
1444 *exp += 1;
1445 }
1446 self.scalbn_r(-*exp, round)
1447 }
1448 }
1449
1450 impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> {
1451 fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> {
1452 let mut r = IeeeFloat {
1453 sig: self.sig,
1454 exp: self.exp,
1455 category: self.category,
1456 sign: self.sign,
1457 marker: PhantomData,
1458 };
1459
1460 // x86 has some unusual NaNs which cannot be represented in any other
1461 // format; note them here.
1462 fn is_x87_double_extended<S: Semantics>() -> bool {
1463 S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND
1464 }
1465 let x87_special_nan = is_x87_double_extended::<S>()
1466 && !is_x87_double_extended::<T>()
1467 && r.category == Category::NaN
1468 && (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND;
1469
1470 // If this is a truncation of a denormal number, and the target semantics
1471 // has larger exponent range than the source semantics (this can happen
1472 // when truncating from PowerPC double-double to double format), the
1473 // right shift could lose result mantissa bits. Adjust exponent instead
1474 // of performing excessive shift.
1475 let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt;
1476 if shift < 0 && r.is_finite_non_zero() {
1477 let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt;
1478 if r.exp + exp_change < T::MIN_EXP {
1479 exp_change = T::MIN_EXP - r.exp;
1480 }
1481 if exp_change < shift {
1482 exp_change = shift;
1483 }
1484 if exp_change < 0 {
1485 shift -= exp_change;
1486 r.exp += exp_change;
1487 }
1488 }
1489
1490 // If this is a truncation, perform the shift.
1491 let loss = if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1492 sig::shift_right(&mut r.sig, &mut 0, -shift as usize)
1493 } else {
1494 Loss::ExactlyZero
1495 };
1496
1497 // If this is an extension, perform the shift.
1498 if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1499 sig::shift_left(&mut r.sig, &mut 0, shift as usize);
1500 }
1501
1502 let status;
1503 if r.is_finite_non_zero() {
1504 r = unpack!(status=, r.normalize(round, loss));
1505 *loses_info = status != Status::OK;
1506 } else if r.category == Category::NaN {
1507 *loses_info = loss != Loss::ExactlyZero || x87_special_nan;
1508
1509 // For x87 extended precision, we want to make a NaN, not a special NaN if
1510 // the input wasn't special either.
1511 if !x87_special_nan && is_x87_double_extended::<T>() {
1512 sig::set_bit(&mut r.sig, T::PRECISION - 1);
1513 }
1514
1515 // Convert of sNaN creates qNaN and raises an exception (invalid op).
1516 // This also guarantees that a sNaN does not become Inf on a truncation
1517 // that loses all payload bits.
1518 if self.is_signaling() {
1519 // Quiet signaling NaN.
1520 sig::set_bit(&mut r.sig, T::QNAN_BIT);
1521 status = Status::INVALID_OP;
1522 } else {
1523 status = Status::OK;
1524 }
1525 } else {
1526 *loses_info = false;
1527 status = Status::OK;
1528 }
1529
1530 status.and(r)
1531 }
1532 }
1533
1534 impl<S: Semantics> IeeeFloat<S> {
1535 /// Handle positive overflow. We either return infinity or
1536 /// the largest finite number. For negative overflow,
1537 /// negate the `round` argument before calling.
1538 fn overflow_result(round: Round) -> StatusAnd<Self> {
1539 match round {
1540 // Infinity?
1541 Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => {
1542 (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY)
1543 }
1544 // Otherwise we become the largest finite number.
1545 Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()),
1546 }
1547 }
1548
1549 /// Returns `true` if, when truncating the current number, with `bit` the
1550 /// new LSB, with the given lost fraction and rounding mode, the result
1551 /// would need to be rounded away from zero (i.e., by increasing the
1552 /// signficand). This routine must work for `Category::Zero` of both signs, and
1553 /// `Category::Normal` numbers.
1554 fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool {
1555 // NaNs and infinities should not have lost fractions.
1556 assert!(self.is_finite_non_zero() || self.is_zero());
1557
1558 // Current callers never pass this so we don't handle it.
1559 assert_ne!(loss, Loss::ExactlyZero);
1560
1561 match round {
1562 Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf,
1563 Round::NearestTiesToEven => {
1564 if loss == Loss::MoreThanHalf {
1565 return true;
1566 }
1567
1568 // Our zeros don't have a significand to test.
1569 if loss == Loss::ExactlyHalf && self.category != Category::Zero {
1570 return sig::get_bit(&self.sig, bit);
1571 }
1572
1573 false
1574 }
1575 Round::TowardZero => false,
1576 Round::TowardPositive => !self.sign,
1577 Round::TowardNegative => self.sign,
1578 }
1579 }
1580
1581 fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> {
1582 if !self.is_finite_non_zero() {
1583 return Status::OK.and(self);
1584 }
1585
1586 // Before rounding normalize the exponent of Category::Normal numbers.
1587 let mut omsb = sig::omsb(&self.sig);
1588
1589 if omsb > 0 {
1590 // OMSB is numbered from 1. We want to place it in the integer
1591 // bit numbered PRECISION if possible, with a compensating change in
1592 // the exponent.
1593 let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt);
1594
1595 // If the resulting exponent is too high, overflow according to
1596 // the rounding mode.
1597 if final_exp > S::MAX_EXP {
1598 let round = if self.sign { -round } else { round };
1599 return Self::overflow_result(round).map(|r| r.copy_sign(self));
1600 }
1601
1602 // Subnormal numbers have exponent MIN_EXP, and their MSB
1603 // is forced based on that.
1604 if final_exp < S::MIN_EXP {
1605 final_exp = S::MIN_EXP;
1606 }
1607
1608 // Shifting left is easy as we don't lose precision.
1609 if final_exp < self.exp {
1610 assert_eq!(loss, Loss::ExactlyZero);
1611
1612 let exp_change = (self.exp - final_exp) as usize;
1613 sig::shift_left(&mut self.sig, &mut self.exp, exp_change);
1614
1615 return Status::OK.and(self);
1616 }
1617
1618 // Shift right and capture any new lost fraction.
1619 if final_exp > self.exp {
1620 let exp_change = (final_exp - self.exp) as usize;
1621 loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss);
1622
1623 // Keep OMSB up-to-date.
1624 omsb = omsb.saturating_sub(exp_change);
1625 }
1626 }
1627
1628 // Now round the number according to round given the lost
1629 // fraction.
1630
1631 // As specified in IEEE 754, since we do not trap we do not report
1632 // underflow for exact results.
1633 if loss == Loss::ExactlyZero {
1634 // Canonicalize zeros.
1635 if omsb == 0 {
1636 self.category = Category::Zero;
1637 }
1638
1639 return Status::OK.and(self);
1640 }
1641
1642 // Increment the significand if we're rounding away from zero.
1643 if self.round_away_from_zero(round, loss, 0) {
1644 if omsb == 0 {
1645 self.exp = S::MIN_EXP;
1646 }
1647
1648 // We should never overflow.
1649 assert_eq!(sig::increment(&mut self.sig), 0);
1650 omsb = sig::omsb(&self.sig);
1651
1652 // Did the significand increment overflow?
1653 if omsb == S::PRECISION + 1 {
1654 // Renormalize by incrementing the exponent and shifting our
1655 // significand right one. However if we already have the
1656 // maximum exponent we overflow to infinity.
1657 if self.exp == S::MAX_EXP {
1658 self.category = Category::Infinity;
1659
1660 return (Status::OVERFLOW | Status::INEXACT).and(self);
1661 }
1662
1663 let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1);
1664
1665 return Status::INEXACT.and(self);
1666 }
1667 }
1668
1669 // The normal case - we were and are not denormal, and any
1670 // significand increment above didn't overflow.
1671 if omsb == S::PRECISION {
1672 return Status::INEXACT.and(self);
1673 }
1674
1675 // We have a non-zero denormal.
1676 assert!(omsb < S::PRECISION);
1677
1678 // Canonicalize zeros.
1679 if omsb == 0 {
1680 self.category = Category::Zero;
1681 }
1682
1683 // The Category::Zero case is a denormal that underflowed to zero.
1684 (Status::UNDERFLOW | Status::INEXACT).and(self)
1685 }
1686
1687 fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1688 let mut r = IeeeFloat {
1689 sig: [0],
1690 exp: 0,
1691 category: Category::Normal,
1692 sign: false,
1693 marker: PhantomData,
1694 };
1695
1696 let mut any_digits = false;
1697 let mut has_exp = false;
1698 let mut bit_pos = LIMB_BITS as isize;
1699 let mut loss = None;
1700
1701 // Without leading or trailing zeros, irrespective of the dot.
1702 let mut first_sig_digit = None;
1703 let mut dot = s.len();
1704
1705 for (p, c) in s.char_indices() {
1706 // Skip leading zeros and any (hexa)decimal point.
1707 if c == '.' {
1708 if dot != s.len() {
1709 return Err(ParseError("String contains multiple dots"));
1710 }
1711 dot = p;
1712 } else if let Some(hex_value) = c.to_digit(16) {
1713 any_digits = true;
1714
1715 if first_sig_digit.is_none() {
1716 if hex_value == 0 {
1717 continue;
1718 }
1719 first_sig_digit = Some(p);
1720 }
1721
1722 // Store the number while we have space.
1723 bit_pos -= 4;
1724 if bit_pos >= 0 {
1725 r.sig[0] |= (hex_value as Limb) << bit_pos;
1726 // If zero or one-half (the hexadecimal digit 8) are followed
1727 // by non-zero, they're a little more than zero or one-half.
1728 } else if let Some(ref mut loss) = loss {
1729 if hex_value != 0 {
1730 if *loss == Loss::ExactlyZero {
1731 *loss = Loss::LessThanHalf;
1732 }
1733 if *loss == Loss::ExactlyHalf {
1734 *loss = Loss::MoreThanHalf;
1735 }
1736 }
1737 } else {
1738 loss = Some(match hex_value {
1739 0 => Loss::ExactlyZero,
1740 1..=7 => Loss::LessThanHalf,
1741 8 => Loss::ExactlyHalf,
1742 9..=15 => Loss::MoreThanHalf,
1743 _ => unreachable!(),
1744 });
1745 }
1746 } else if c == 'p' || c == 'P' {
1747 if !any_digits {
1748 return Err(ParseError("Significand has no digits"));
1749 }
1750
1751 if dot == s.len() {
1752 dot = p;
1753 }
1754
1755 let mut chars = s[p + 1..].chars().peekable();
1756
1757 // Adjust for the given exponent.
1758 let exp_minus = chars.peek() == Some(&'-');
1759 if exp_minus || chars.peek() == Some(&'+') {
1760 chars.next();
1761 }
1762
1763 for c in chars {
1764 if let Some(value) = c.to_digit(10) {
1765 has_exp = true;
1766 r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt);
1767 } else {
1768 return Err(ParseError("Invalid character in exponent"));
1769 }
1770 }
1771 if !has_exp {
1772 return Err(ParseError("Exponent has no digits"));
1773 }
1774
1775 if exp_minus {
1776 r.exp = -r.exp;
1777 }
1778
1779 break;
1780 } else {
1781 return Err(ParseError("Invalid character in significand"));
1782 }
1783 }
1784 if !any_digits {
1785 return Err(ParseError("Significand has no digits"));
1786 }
1787
1788 // Hex floats require an exponent but not a hexadecimal point.
1789 if !has_exp {
1790 return Err(ParseError("Hex strings require an exponent"));
1791 }
1792
1793 // Ignore the exponent if we are zero.
1794 let first_sig_digit = match first_sig_digit {
1795 Some(p) => p,
1796 None => return Ok(Status::OK.and(Self::ZERO)),
1797 };
1798
1799 // Calculate the exponent adjustment implicit in the number of
1800 // significant digits and adjust for writing the significand starting
1801 // at the most significant nibble.
1802 let exp_adjustment = if dot > first_sig_digit {
1803 ExpInt::try_from(dot - first_sig_digit).unwrap()
1804 } else {
1805 -ExpInt::try_from(first_sig_digit - dot - 1).unwrap()
1806 };
1807 let exp_adjustment = exp_adjustment
1808 .saturating_mul(4)
1809 .saturating_sub(1)
1810 .saturating_add(S::PRECISION as ExpInt)
1811 .saturating_sub(LIMB_BITS as ExpInt);
1812 r.exp = r.exp.saturating_add(exp_adjustment);
1813
1814 Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero)))
1815 }
1816
1817 fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1818 // Given a normal decimal floating point number of the form
1819 //
1820 // dddd.dddd[eE][+-]ddd
1821 //
1822 // where the decimal point and exponent are optional, fill out the
1823 // variables below. Exponent is appropriate if the significand is
1824 // treated as an integer, and normalized_exp if the significand
1825 // is taken to have the decimal point after a single leading
1826 // non-zero digit.
1827 //
1828 // If the value is zero, first_sig_digit is None.
1829
1830 let mut any_digits = false;
1831 let mut dec_exp = 0i32;
1832
1833 // Without leading or trailing zeros, irrespective of the dot.
1834 let mut first_sig_digit = None;
1835 let mut last_sig_digit = 0;
1836 let mut dot = s.len();
1837
1838 for (p, c) in s.char_indices() {
1839 if c == '.' {
1840 if dot != s.len() {
1841 return Err(ParseError("String contains multiple dots"));
1842 }
1843 dot = p;
1844 } else if let Some(dec_value) = c.to_digit(10) {
1845 any_digits = true;
1846
1847 if dec_value != 0 {
1848 if first_sig_digit.is_none() {
1849 first_sig_digit = Some(p);
1850 }
1851 last_sig_digit = p;
1852 }
1853 } else if c == 'e' || c == 'E' {
1854 if !any_digits {
1855 return Err(ParseError("Significand has no digits"));
1856 }
1857
1858 if dot == s.len() {
1859 dot = p;
1860 }
1861
1862 let mut chars = s[p + 1..].chars().peekable();
1863
1864 // Adjust for the given exponent.
1865 let exp_minus = chars.peek() == Some(&'-');
1866 if exp_minus || chars.peek() == Some(&'+') {
1867 chars.next();
1868 }
1869
1870 any_digits = false;
1871 for c in chars {
1872 if let Some(value) = c.to_digit(10) {
1873 any_digits = true;
1874 dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32);
1875 } else {
1876 return Err(ParseError("Invalid character in exponent"));
1877 }
1878 }
1879 if !any_digits {
1880 return Err(ParseError("Exponent has no digits"));
1881 }
1882
1883 if exp_minus {
1884 dec_exp = -dec_exp;
1885 }
1886
1887 break;
1888 } else {
1889 return Err(ParseError("Invalid character in significand"));
1890 }
1891 }
1892 if !any_digits {
1893 return Err(ParseError("Significand has no digits"));
1894 }
1895
1896 // Test if we have a zero number allowing for non-zero exponents.
1897 let first_sig_digit = match first_sig_digit {
1898 Some(p) => p,
1899 None => return Ok(Status::OK.and(Self::ZERO)),
1900 };
1901
1902 // Adjust the exponents for any decimal point.
1903 if dot > last_sig_digit {
1904 dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32);
1905 } else {
1906 dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32);
1907 }
1908 let significand_digits = last_sig_digit - first_sig_digit + 1
1909 - (dot > first_sig_digit && dot < last_sig_digit) as usize;
1910 let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1);
1911
1912 // Handle the cases where exponents are obviously too large or too
1913 // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
1914 // definitely overflows if
1915 //
1916 // (dec_exp - 1) * L >= MAX_EXP
1917 //
1918 // and definitely underflows to zero where
1919 //
1920 // (dec_exp + 1) * L <= MIN_EXP - PRECISION
1921 //
1922 // With integer arithmetic the tightest bounds for L are
1923 //
1924 // 93/28 < L < 196/59 [ numerator <= 256 ]
1925 // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
1926
1927 // Check for MAX_EXP.
1928 if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 {
1929 // Overflow and round.
1930 return Ok(Self::overflow_result(round));
1931 }
1932
1933 // Check for MIN_EXP.
1934 if normalized_exp.saturating_add(1).saturating_mul(28738)
1935 <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32)
1936 {
1937 // Underflow to zero and round.
1938 let r =
1939 if round == Round::TowardPositive { IeeeFloat::SMALLEST } else { IeeeFloat::ZERO };
1940 return Ok((Status::UNDERFLOW | Status::INEXACT).and(r));
1941 }
1942
1943 // A tight upper bound on number of bits required to hold an
1944 // N-digit decimal integer is N * 196 / 59. Allocate enough space
1945 // to hold the full significand, and an extra limb required by
1946 // tcMultiplyPart.
1947 let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
1948 let mut dec_sig: SmallVec<[Limb; 1]> = SmallVec::with_capacity(max_limbs);
1949
1950 // Convert to binary efficiently - we do almost all multiplication
1951 // in a Limb. When this would overflow do we do a single
1952 // bignum multiplication, and then revert again to multiplication
1953 // in a Limb.
1954 let mut chars = s[first_sig_digit..=last_sig_digit].chars();
1955 loop {
1956 let mut val = 0;
1957 let mut multiplier = 1;
1958
1959 loop {
1960 let dec_value = match chars.next() {
1961 Some('.') => continue,
1962 Some(c) => c.to_digit(10).unwrap(),
1963 None => break,
1964 };
1965
1966 multiplier *= 10;
1967 val = val * 10 + dec_value as Limb;
1968
1969 // The maximum number that can be multiplied by ten with any
1970 // digit added without overflowing a Limb.
1971 if multiplier > (!0 - 9) / 10 {
1972 break;
1973 }
1974 }
1975
1976 // If we've consumed no digits, we're done.
1977 if multiplier == 1 {
1978 break;
1979 }
1980
1981 // Multiply out the current limb.
1982 let mut carry = val;
1983 for x in &mut dec_sig {
1984 let [low, mut high] = sig::widening_mul(*x, multiplier);
1985
1986 // Now add carry.
1987 let (low, overflow) = low.overflowing_add(carry);
1988 high += overflow as Limb;
1989
1990 *x = low;
1991 carry = high;
1992 }
1993
1994 // If we had carry, we need another limb (likely but not guaranteed).
1995 if carry > 0 {
1996 dec_sig.push(carry);
1997 }
1998 }
1999
2000 // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
2001 // The *_calc Vec's are reused scratch space, as an optimization.
2002 let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = {
2003 let mut power = dec_exp.abs() as usize;
2004
2005 const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
2006
2007 let mut p5_scratch = smallvec![];
2008 let mut p5: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[4]];
2009
2010 let mut r_scratch = smallvec![];
2011 let mut r: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[power & 7]];
2012 power >>= 3;
2013
2014 while power > 0 {
2015 // Calculate pow(5,pow(2,n+3)).
2016 p5_scratch.resize(p5.len() * 2, 0);
2017 let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
2018 while p5_scratch.last() == Some(&0) {
2019 p5_scratch.pop();
2020 }
2021 mem::swap(&mut p5, &mut p5_scratch);
2022
2023 if power & 1 != 0 {
2024 r_scratch.resize(r.len() + p5.len(), 0);
2025 let _: Loss =
2026 sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS);
2027 while r_scratch.last() == Some(&0) {
2028 r_scratch.pop();
2029 }
2030 mem::swap(&mut r, &mut r_scratch);
2031 }
2032
2033 power >>= 1;
2034 }
2035
2036 (r, r_scratch, p5, p5_scratch)
2037 };
2038
2039 // Attempt dec_sig * 10^dec_exp with increasing precision.
2040 let mut attempt = 0;
2041 loop {
2042 let calc_precision = (LIMB_BITS << attempt) - 1;
2043 attempt += 1;
2044
2045 let calc_normal_from_limbs = |sig: &mut SmallVec<[Limb; 1]>,
2046 limbs: &[Limb]|
2047 -> StatusAnd<ExpInt> {
2048 sig.resize(limbs_for_bits(calc_precision), 0);
2049 let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
2050
2051 // Before rounding normalize the exponent of Category::Normal numbers.
2052 let mut omsb = sig::omsb(sig);
2053
2054 assert_ne!(omsb, 0);
2055
2056 // OMSB is numbered from 1. We want to place it in the integer
2057 // bit numbered PRECISION if possible, with a compensating change in
2058 // the exponent.
2059 let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt);
2060
2061 // Shifting left is easy as we don't lose precision.
2062 if final_exp < exp {
2063 assert_eq!(loss, Loss::ExactlyZero);
2064
2065 let exp_change = (exp - final_exp) as usize;
2066 sig::shift_left(sig, &mut exp, exp_change);
2067
2068 return Status::OK.and(exp);
2069 }
2070
2071 // Shift right and capture any new lost fraction.
2072 if final_exp > exp {
2073 let exp_change = (final_exp - exp) as usize;
2074 loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss);
2075
2076 // Keep OMSB up-to-date.
2077 omsb = omsb.saturating_sub(exp_change);
2078 }
2079
2080 assert_eq!(omsb, calc_precision);
2081
2082 // Now round the number according to round given the lost
2083 // fraction.
2084
2085 // As specified in IEEE 754, since we do not trap we do not report
2086 // underflow for exact results.
2087 if loss == Loss::ExactlyZero {
2088 return Status::OK.and(exp);
2089 }
2090
2091 // Increment the significand if we're rounding away from zero.
2092 if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) {
2093 // We should never overflow.
2094 assert_eq!(sig::increment(sig), 0);
2095 omsb = sig::omsb(sig);
2096
2097 // Did the significand increment overflow?
2098 if omsb == calc_precision + 1 {
2099 let _: Loss = sig::shift_right(sig, &mut exp, 1);
2100
2101 return Status::INEXACT.and(exp);
2102 }
2103 }
2104
2105 // The normal case - we were and are not denormal, and any
2106 // significand increment above didn't overflow.
2107 Status::INEXACT.and(exp)
2108 };
2109
2110 let status;
2111 let mut exp = unpack!(status=,
2112 calc_normal_from_limbs(&mut sig_calc, &dec_sig));
2113 let pow5_status;
2114 let pow5_exp = unpack!(pow5_status=,
2115 calc_normal_from_limbs(&mut pow5_calc, &pow5_full));
2116
2117 // Add dec_exp, as 10^n = 5^n * 2^n.
2118 exp += dec_exp as ExpInt;
2119
2120 let mut used_bits = S::PRECISION;
2121 let mut truncated_bits = calc_precision - used_bits;
2122
2123 let half_ulp_err1 = (status != Status::OK) as Limb;
2124 let (calc_loss, half_ulp_err2);
2125 if dec_exp >= 0 {
2126 exp += pow5_exp;
2127
2128 sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0);
2129 calc_loss = sig::mul(
2130 &mut sig_scratch_calc,
2131 &mut exp,
2132 &sig_calc,
2133 &pow5_calc,
2134 calc_precision,
2135 );
2136 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2137
2138 half_ulp_err2 = (pow5_status != Status::OK) as Limb;
2139 } else {
2140 exp -= pow5_exp;
2141
2142 sig_scratch_calc.resize(sig_calc.len(), 0);
2143 calc_loss = sig::div(
2144 &mut sig_scratch_calc,
2145 &mut exp,
2146 &mut sig_calc,
2147 &mut pow5_calc,
2148 calc_precision,
2149 );
2150 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2151
2152 // Denormal numbers have less precision.
2153 if exp < S::MIN_EXP {
2154 truncated_bits += (S::MIN_EXP - exp) as usize;
2155 used_bits = calc_precision.saturating_sub(truncated_bits);
2156 }
2157 // Extra half-ulp lost in reciprocal of exponent.
2158 half_ulp_err2 =
2159 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb;
2160 }
2161
2162 // Both sig::mul and sig::div return the
2163 // result with the integer bit set.
2164 assert!(sig::get_bit(&sig_calc, calc_precision - 1));
2165
2166 // The error from the true value, in half-ulps, on multiplying two
2167 // floating point numbers, which differ from the value they
2168 // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
2169 // than the returned value.
2170 //
2171 // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
2172 assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8));
2173
2174 let inexact = (calc_loss != Loss::ExactlyZero) as Limb;
2175 let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 {
2176 inexact * 2 // <= inexact half-ulps.
2177 } else {
2178 inexact + 2 * (half_ulp_err1 + half_ulp_err2)
2179 };
2180
2181 let ulps_from_boundary = {
2182 let bits = calc_precision - used_bits - 1;
2183
2184 let i = bits / LIMB_BITS;
2185 let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS));
2186 let boundary = match round {
2187 Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS),
2188 _ => 0,
2189 };
2190 if i == 0 {
2191 let delta = limb.wrapping_sub(boundary);
2192 cmp::min(delta, delta.wrapping_neg())
2193 } else if limb == boundary {
2194 if !sig::is_all_zeros(&sig_calc[1..i]) {
2195 !0 // A lot.
2196 } else {
2197 sig_calc[0]
2198 }
2199 } else if limb == boundary.wrapping_sub(1) {
2200 if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) {
2201 !0 // A lot.
2202 } else {
2203 sig_calc[0].wrapping_neg()
2204 }
2205 } else {
2206 !0 // A lot.
2207 }
2208 };
2209
2210 // Are we guaranteed to round correctly if we truncate?
2211 if ulps_from_boundary.saturating_mul(2) >= half_ulp_err {
2212 let mut r = IeeeFloat {
2213 sig: [0],
2214 exp,
2215 category: Category::Normal,
2216 sign: false,
2217 marker: PhantomData,
2218 };
2219 sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits);
2220 // If we extracted less bits above we must adjust our exponent
2221 // to compensate for the implicit right shift.
2222 r.exp += (S::PRECISION - used_bits) as ExpInt;
2223 let loss = Loss::through_truncation(&sig_calc, truncated_bits);
2224 return Ok(r.normalize(round, loss));
2225 }
2226 }
2227 }
2228 }
2229
2230 impl Loss {
2231 /// Combine the effect of two lost fractions.
2232 fn combine(self, less_significant: Loss) -> Loss {
2233 let mut more_significant = self;
2234 if less_significant != Loss::ExactlyZero {
2235 if more_significant == Loss::ExactlyZero {
2236 more_significant = Loss::LessThanHalf;
2237 } else if more_significant == Loss::ExactlyHalf {
2238 more_significant = Loss::MoreThanHalf;
2239 }
2240 }
2241
2242 more_significant
2243 }
2244
2245 /// Returns the fraction lost were a bignum truncated losing the least
2246 /// significant `bits` bits.
2247 fn through_truncation(limbs: &[Limb], bits: usize) -> Loss {
2248 if bits == 0 {
2249 return Loss::ExactlyZero;
2250 }
2251
2252 let half_bit = bits - 1;
2253 let half_limb = half_bit / LIMB_BITS;
2254 let (half_limb, rest) = if half_limb < limbs.len() {
2255 (limbs[half_limb], &limbs[..half_limb])
2256 } else {
2257 (0, limbs)
2258 };
2259 let half = 1 << (half_bit % LIMB_BITS);
2260 let has_half = half_limb & half != 0;
2261 let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest);
2262
2263 match (has_half, has_rest) {
2264 (false, false) => Loss::ExactlyZero,
2265 (false, true) => Loss::LessThanHalf,
2266 (true, false) => Loss::ExactlyHalf,
2267 (true, true) => Loss::MoreThanHalf,
2268 }
2269 }
2270 }
2271
2272 /// Implementation details of IeeeFloat significands, such as big integer arithmetic.
2273 /// As a rule of thumb, no functions in this module should dynamically allocate.
2274 mod sig {
2275 use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS};
2276 use core::cmp::Ordering;
2277 use core::iter;
2278 use core::mem;
2279
2280 pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool {
2281 limbs.iter().all(|&l| l == 0)
2282 }
2283
2284 /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
2285 pub(super) fn olsb(limbs: &[Limb]) -> usize {
2286 limbs
2287 .iter()
2288 .enumerate()
2289 .find(|(_, &limb)| limb != 0)
2290 .map_or(0, |(i, limb)| i * LIMB_BITS + limb.trailing_zeros() as usize + 1)
2291 }
2292
2293 /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
2294 pub(super) fn omsb(limbs: &[Limb]) -> usize {
2295 limbs
2296 .iter()
2297 .enumerate()
2298 .rfind(|(_, &limb)| limb != 0)
2299 .map_or(0, |(i, limb)| (i + 1) * LIMB_BITS - limb.leading_zeros() as usize)
2300 }
2301
2302 /// Comparison (unsigned) of two significands.
2303 pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering {
2304 assert_eq!(a.len(), b.len());
2305 for (a, b) in a.iter().zip(b).rev() {
2306 match a.cmp(b) {
2307 Ordering::Equal => {}
2308 o => return o,
2309 }
2310 }
2311
2312 Ordering::Equal
2313 }
2314
2315 /// Extracts the given bit.
2316 pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool {
2317 limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0
2318 }
2319
2320 /// Sets the given bit.
2321 pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) {
2322 limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS);
2323 }
2324
2325 /// Clear the given bit.
2326 pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) {
2327 limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS));
2328 }
2329
2330 /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent.
2331 pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) {
2332 if bits > 0 {
2333 // Our exponent should not underflow.
2334 *exp = exp.checked_sub(bits as ExpInt).unwrap();
2335
2336 // Jump is the inter-limb jump; shift is the intra-limb shift.
2337 let jump = bits / LIMB_BITS;
2338 let shift = bits % LIMB_BITS;
2339
2340 for i in (0..dst.len()).rev() {
2341 let mut limb;
2342
2343 if i < jump {
2344 limb = 0;
2345 } else {
2346 // dst[i] comes from the two limbs src[i - jump] and, if we have
2347 // an intra-limb shift, src[i - jump - 1].
2348 limb = dst[i - jump];
2349 if shift > 0 {
2350 limb <<= shift;
2351 if i > jump {
2352 limb |= dst[i - jump - 1] >> (LIMB_BITS - shift);
2353 }
2354 }
2355 }
2356
2357 dst[i] = limb;
2358 }
2359 }
2360 }
2361
2362 /// Shifts `dst` right `bits` bits noting lost fraction.
2363 pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss {
2364 let loss = Loss::through_truncation(dst, bits);
2365
2366 if bits > 0 {
2367 // Our exponent should not overflow.
2368 *exp = exp.checked_add(bits as ExpInt).unwrap();
2369
2370 // Jump is the inter-limb jump; shift is the intra-limb shift.
2371 let jump = bits / LIMB_BITS;
2372 let shift = bits % LIMB_BITS;
2373
2374 // Perform the shift. This leaves the most significant `bits` bits
2375 // of the result at zero.
2376 for i in 0..dst.len() {
2377 let mut limb;
2378
2379 if i + jump >= dst.len() {
2380 limb = 0;
2381 } else {
2382 limb = dst[i + jump];
2383 if shift > 0 {
2384 limb >>= shift;
2385 if i + jump + 1 < dst.len() {
2386 limb |= dst[i + jump + 1] << (LIMB_BITS - shift);
2387 }
2388 }
2389 }
2390
2391 dst[i] = limb;
2392 }
2393 }
2394
2395 loss
2396 }
2397
2398 /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
2399 /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
2400 /// All high bits above `src_bits` in `dst` are zero-filled.
2401 pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) {
2402 if src_bits == 0 {
2403 return;
2404 }
2405
2406 let dst_limbs = limbs_for_bits(src_bits);
2407 assert!(dst_limbs <= dst.len());
2408
2409 let src = &src[src_lsb / LIMB_BITS..];
2410 dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]);
2411
2412 let shift = src_lsb % LIMB_BITS;
2413 let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift);
2414
2415 // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
2416 // in `dst`. If this is less that src_bits, append the rest, else
2417 // clear the high bits.
2418 let n = dst_limbs * LIMB_BITS - shift;
2419 if n < src_bits {
2420 let mask = (1 << (src_bits - n)) - 1;
2421 dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << (n % LIMB_BITS);
2422 } else if n > src_bits && src_bits % LIMB_BITS > 0 {
2423 dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1;
2424 }
2425
2426 // Clear high limbs.
2427 for x in &mut dst[dst_limbs..] {
2428 *x = 0;
2429 }
2430 }
2431
2432 /// We want the most significant PRECISION bits of `src`. There may not
2433 /// be that many; extract what we can.
2434 pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) {
2435 let omsb = omsb(src);
2436
2437 if precision <= omsb {
2438 extract(dst, src, precision, omsb - precision);
2439 (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1)
2440 } else {
2441 extract(dst, src, omsb, 0);
2442 (Loss::ExactlyZero, precision as ExpInt - 1)
2443 }
2444 }
2445
2446 /// For every consecutive chunk of `bits` bits from `limbs`,
2447 /// going from most significant to the least significant bits,
2448 /// call `f` to transform those bits and store the result back.
2449 pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) {
2450 assert_eq!(LIMB_BITS % bits, 0);
2451 for limb in limbs.iter_mut().rev() {
2452 let mut r = 0;
2453 for i in (0..LIMB_BITS / bits).rev() {
2454 r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits);
2455 }
2456 *limb = r;
2457 }
2458 }
2459
2460 /// Increment in-place, return the carry flag.
2461 pub(super) fn increment(dst: &mut [Limb]) -> Limb {
2462 for x in dst {
2463 *x = x.wrapping_add(1);
2464 if *x != 0 {
2465 return 0;
2466 }
2467 }
2468
2469 1
2470 }
2471
2472 /// Decrement in-place, return the borrow flag.
2473 pub(super) fn decrement(dst: &mut [Limb]) -> Limb {
2474 for x in dst {
2475 *x = x.wrapping_sub(1);
2476 if *x != !0 {
2477 return 0;
2478 }
2479 }
2480
2481 1
2482 }
2483
2484 /// `a += b + c` where `c` is zero or one. Returns the carry flag.
2485 pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2486 assert!(c <= 1);
2487
2488 for (a, &b) in iter::zip(a, b) {
2489 let (r, overflow) = a.overflowing_add(b);
2490 let (r, overflow2) = r.overflowing_add(c);
2491 *a = r;
2492 c = (overflow | overflow2) as Limb;
2493 }
2494
2495 c
2496 }
2497
2498 /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
2499 pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2500 assert!(c <= 1);
2501
2502 for (a, &b) in iter::zip(a, b) {
2503 let (r, overflow) = a.overflowing_sub(b);
2504 let (r, overflow2) = r.overflowing_sub(c);
2505 *a = r;
2506 c = (overflow | overflow2) as Limb;
2507 }
2508
2509 c
2510 }
2511
2512 /// `a += b` or `a -= b`. Does not preserve `b`.
2513 pub(super) fn add_or_sub(
2514 a_sig: &mut [Limb],
2515 a_exp: &mut ExpInt,
2516 a_sign: &mut bool,
2517 b_sig: &mut [Limb],
2518 b_exp: ExpInt,
2519 b_sign: bool,
2520 ) -> Loss {
2521 // Are we bigger exponent-wise than the RHS?
2522 let bits = *a_exp - b_exp;
2523
2524 // Determine if the operation on the absolute values is effectively
2525 // an addition or subtraction.
2526 // Subtraction is more subtle than one might naively expect.
2527 if *a_sign ^ b_sign {
2528 let (reverse, loss);
2529
2530 #[allow(clippy::comparison_chain)]
2531 if bits == 0 {
2532 reverse = cmp(a_sig, b_sig) == Ordering::Less;
2533 loss = Loss::ExactlyZero;
2534 } else if bits > 0 {
2535 loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
2536 shift_left(a_sig, a_exp, 1);
2537 reverse = false;
2538 } else {
2539 loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
2540 shift_left(b_sig, &mut 0, 1);
2541 reverse = true;
2542 }
2543
2544 let borrow = (loss != Loss::ExactlyZero) as Limb;
2545 if reverse {
2546 // The code above is intended to ensure that no borrow is necessary.
2547 assert_eq!(sub(b_sig, a_sig, borrow), 0);
2548 a_sig.copy_from_slice(b_sig);
2549 *a_sign = !*a_sign;
2550 } else {
2551 // The code above is intended to ensure that no borrow is necessary.
2552 assert_eq!(sub(a_sig, b_sig, borrow), 0);
2553 }
2554
2555 // Invert the lost fraction - it was on the RHS and subtracted.
2556 match loss {
2557 Loss::LessThanHalf => Loss::MoreThanHalf,
2558 Loss::MoreThanHalf => Loss::LessThanHalf,
2559 _ => loss,
2560 }
2561 } else {
2562 let loss = if bits > 0 {
2563 shift_right(b_sig, &mut 0, bits as usize)
2564 } else {
2565 shift_right(a_sig, a_exp, -bits as usize)
2566 };
2567 // We have a guard bit; generating a carry cannot happen.
2568 assert_eq!(add(a_sig, b_sig, 0), 0);
2569 loss
2570 }
2571 }
2572
2573 /// `[low, high] = a * b`.
2574 ///
2575 /// This cannot overflow, because
2576 ///
2577 /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
2578 ///
2579 /// which is less than n<sup>2</sup>.
2580 pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] {
2581 let mut wide = [0, 0];
2582
2583 if a == 0 || b == 0 {
2584 return wide;
2585 }
2586
2587 const HALF_BITS: usize = LIMB_BITS / 2;
2588
2589 let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1);
2590 for i in 0..2 {
2591 for j in 0..2 {
2592 let mut x = [select(a, i) * select(b, j), 0];
2593 shift_left(&mut x, &mut 0, (i + j) * HALF_BITS);
2594 assert_eq!(add(&mut wide, &x, 0), 0);
2595 }
2596 }
2597
2598 wide
2599 }
2600
2601 /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
2602 pub(super) fn mul<'a>(
2603 dst: &mut [Limb],
2604 exp: &mut ExpInt,
2605 mut a: &'a [Limb],
2606 mut b: &'a [Limb],
2607 precision: usize,
2608 ) -> Loss {
2609 // Put the narrower number on the `a` for less loops below.
2610 if a.len() > b.len() {
2611 mem::swap(&mut a, &mut b);
2612 }
2613
2614 for x in &mut dst[..b.len()] {
2615 *x = 0;
2616 }
2617
2618 for i in 0..a.len() {
2619 let mut carry = 0;
2620 for j in 0..b.len() {
2621 let [low, mut high] = widening_mul(a[i], b[j]);
2622
2623 // Now add carry.
2624 let (low, overflow) = low.overflowing_add(carry);
2625 high += overflow as Limb;
2626
2627 // And now `dst[i + j]`, and store the new low part there.
2628 let (low, overflow) = low.overflowing_add(dst[i + j]);
2629 high += overflow as Limb;
2630
2631 dst[i + j] = low;
2632 carry = high;
2633 }
2634 dst[i + b.len()] = carry;
2635 }
2636
2637 // Assume the operands involved in the multiplication are single-precision
2638 // FP, and the two multiplicants are:
2639 // a = a23 . a22 ... a0 * 2^e1
2640 // b = b23 . b22 ... b0 * 2^e2
2641 // the result of multiplication is:
2642 // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
2643 // Note that there are three significant bits at the left-hand side of the
2644 // radix point: two for the multiplication, and an overflow bit for the
2645 // addition (that will always be zero at this point). Move the radix point
2646 // toward left by two bits, and adjust exponent accordingly.
2647 *exp += 2;
2648
2649 // Convert the result having "2 * precision" significant-bits back to the one
2650 // having "precision" significant-bits. First, move the radix point from
2651 // poision "2*precision - 1" to "precision - 1". The exponent need to be
2652 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
2653 *exp -= precision as ExpInt + 1;
2654
2655 // In case MSB resides at the left-hand side of radix point, shift the
2656 // mantissa right by some amount to make sure the MSB reside right before
2657 // the radix point (i.e., "MSB . rest-significant-bits").
2658 //
2659 // Note that the result is not normalized when "omsb < precision". So, the
2660 // caller needs to call IeeeFloat::normalize() if normalized value is
2661 // expected.
2662 let omsb = omsb(dst);
2663 if omsb <= precision { Loss::ExactlyZero } else { shift_right(dst, exp, omsb - precision) }
2664 }
2665
2666 /// `quotient = dividend / divisor`. Returns the lost fraction.
2667 /// Does not preserve `dividend` or `divisor`.
2668 pub(super) fn div(
2669 quotient: &mut [Limb],
2670 exp: &mut ExpInt,
2671 dividend: &mut [Limb],
2672 divisor: &mut [Limb],
2673 precision: usize,
2674 ) -> Loss {
2675 // Normalize the divisor.
2676 let bits = precision - omsb(divisor);
2677 shift_left(divisor, &mut 0, bits);
2678 *exp += bits as ExpInt;
2679
2680 // Normalize the dividend.
2681 let bits = precision - omsb(dividend);
2682 shift_left(dividend, exp, bits);
2683
2684 // Division by 1.
2685 let olsb_divisor = olsb(divisor);
2686 if olsb_divisor == precision {
2687 quotient.copy_from_slice(dividend);
2688 return Loss::ExactlyZero;
2689 }
2690
2691 // Ensure the dividend >= divisor initially for the loop below.
2692 // Incidentally, this means that the division loop below is
2693 // guaranteed to set the integer bit to one.
2694 if cmp(dividend, divisor) == Ordering::Less {
2695 shift_left(dividend, exp, 1);
2696 assert_ne!(cmp(dividend, divisor), Ordering::Less)
2697 }
2698
2699 // Helper for figuring out the lost fraction.
2700 let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) {
2701 Ordering::Greater => Loss::MoreThanHalf,
2702 Ordering::Equal => Loss::ExactlyHalf,
2703 Ordering::Less => {
2704 if is_all_zeros(dividend) {
2705 Loss::ExactlyZero
2706 } else {
2707 Loss::LessThanHalf
2708 }
2709 }
2710 };
2711
2712 // Try to perform a (much faster) short division for small divisors.
2713 let divisor_bits = precision - (olsb_divisor - 1);
2714 macro_rules! try_short_div {
2715 ($W:ty, $H:ty, $half:expr) => {
2716 if divisor_bits * 2 <= $half {
2717 // Extract the small divisor.
2718 let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1);
2719 let divisor = divisor[0] as $H as $W;
2720
2721 // Shift the dividend to produce a quotient with the unit bit set.
2722 let top_limb = *dividend.last().unwrap();
2723 let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H;
2724 shift_left(dividend, &mut 0, divisor_bits - 1);
2725
2726 // Apply short division in place on $H (of $half bits) chunks.
2727 each_chunk(dividend, $half, |chunk| {
2728 let chunk = chunk as $H;
2729 let combined = ((rem as $W) << $half) | (chunk as $W);
2730 rem = (combined % divisor) as $H;
2731 (combined / divisor) as $H as Limb
2732 });
2733 quotient.copy_from_slice(dividend);
2734
2735 return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]);
2736 }
2737 };
2738 }
2739
2740 try_short_div!(u32, u16, 16);
2741 try_short_div!(u64, u32, 32);
2742 try_short_div!(u128, u64, 64);
2743
2744 // Zero the quotient before setting bits in it.
2745 for x in &mut quotient[..limbs_for_bits(precision)] {
2746 *x = 0;
2747 }
2748
2749 // Long division.
2750 for bit in (0..precision).rev() {
2751 if cmp(dividend, divisor) != Ordering::Less {
2752 sub(dividend, divisor, 0);
2753 set_bit(quotient, bit);
2754 }
2755 shift_left(dividend, &mut 0, 1);
2756 }
2757
2758 lost_fraction(dividend, divisor)
2759 }
2760 }