]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_apfloat/src/ieee.rs
New upstream version 1.53.0+dfsg1
[rustc.git] / compiler / rustc_apfloat / src / ieee.rs
CommitLineData
9fa01778
XL
1use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
2use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
3b2f2976 3
e1599b0c
XL
4use core::cmp::{self, Ordering};
5use core::convert::TryFrom;
6use core::fmt::{self, Write};
7use core::marker::PhantomData;
8use core::mem;
9use core::ops::Neg;
dfeec247 10use smallvec::{smallvec, SmallVec};
3b2f2976
XL
11
12#[must_use]
13pub struct IeeeFloat<S> {
14 /// Absolute significand value (including the integer bit).
15 sig: [Limb; 1],
16
17 /// The signed unbiased exponent of the value.
18 exp: ExpInt,
19
20 /// What kind of floating point number this is.
21 category: Category,
22
23 /// Sign bit of the number.
24 sign: bool,
25
26 marker: PhantomData<S>,
27}
28
29/// Fundamental unit of big integer arithmetic, but also
30/// large to store the largest significands by itself.
31type Limb = u128;
32const LIMB_BITS: usize = 128;
33fn limbs_for_bits(bits: usize) -> usize {
34 (bits + LIMB_BITS - 1) / LIMB_BITS
35}
36
37/// Enum that represents what fraction of the LSB truncated bits of an fp number
38/// represent.
39///
40/// This essentially combines the roles of guard and sticky bits.
41#[must_use]
42#[derive(Copy, Clone, PartialEq, Eq, Debug)]
43enum Loss {
44 // Example of truncated bits:
dfeec247 45 ExactlyZero, // 000000
3b2f2976 46 LessThanHalf, // 0xxxxx x's not all zero
dfeec247 47 ExactlyHalf, // 100000
3b2f2976
XL
48 MoreThanHalf, // 1xxxxx x's not all zero
49}
50
51/// Represents floating point arithmetic semantics.
52pub trait Semantics: Sized {
53 /// Total number of bits in the in-memory format.
54 const BITS: usize;
55
56 /// Number of bits in the significand. This includes the integer bit.
57 const PRECISION: usize;
58
ff7c6d11 59 /// The largest E such that 2<sup>E</sup> is representable; this matches the
3b2f2976
XL
60 /// definition of IEEE 754.
61 const MAX_EXP: ExpInt;
62
ff7c6d11 63 /// The smallest E such that 2<sup>E</sup> is a normalized number; this
3b2f2976
XL
64 /// matches the definition of IEEE 754.
65 const MIN_EXP: ExpInt = -Self::MAX_EXP + 1;
66
67 /// The significand bit that marks NaN as quiet.
68 const QNAN_BIT: usize = Self::PRECISION - 2;
69
70 /// The significand bitpattern to mark a NaN as quiet.
71 /// NOTE: for X87DoubleExtended we need to set two bits instead of 2.
72 const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT;
73
74 fn from_bits(bits: u128) -> IeeeFloat<Self> {
75 assert!(Self::BITS > Self::PRECISION);
76
77 let sign = bits & (1 << (Self::BITS - 1));
78 let exponent = (bits & !sign) >> (Self::PRECISION - 1);
79 let mut r = IeeeFloat {
80 sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
81 // Convert the exponent from its bias representation to a signed integer.
82 exp: (exponent as ExpInt) - Self::MAX_EXP,
83 category: Category::Zero,
84 sign: sign != 0,
85 marker: PhantomData,
86 };
87
88 if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
89 // Exponent, significand meaningless.
90 r.category = Category::Zero;
91 } else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] {
92 // Exponent, significand meaningless.
93 r.category = Category::Infinity;
94 } else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] {
95 // Sign, exponent, significand meaningless.
96 r.category = Category::NaN;
97 } else {
98 r.category = Category::Normal;
99 if r.exp == Self::MIN_EXP - 1 {
100 // Denormal.
101 r.exp = Self::MIN_EXP;
102 } else {
103 // Set integer bit.
104 sig::set_bit(&mut r.sig, Self::PRECISION - 1);
105 }
106 }
107
108 r
109 }
110
111 fn to_bits(x: IeeeFloat<Self>) -> u128 {
112 assert!(Self::BITS > Self::PRECISION);
113
114 // Split integer bit from significand.
115 let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
116 let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1);
117 let exponent = match x.category {
118 Category::Normal => {
119 if x.exp == Self::MIN_EXP && !integer_bit {
120 // Denormal.
121 Self::MIN_EXP - 1
122 } else {
123 x.exp
124 }
125 }
126 Category::Zero => {
127 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
128 significand = 0;
129 Self::MIN_EXP - 1
130 }
131 Category::Infinity => {
132 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
133 significand = 0;
134 Self::MAX_EXP + 1
135 }
136 Category::NaN => Self::MAX_EXP + 1,
137 };
138
139 // Convert the exponent from a signed integer to its bias representation.
140 let exponent = (exponent + Self::MAX_EXP) as u128;
141
142 ((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand
143 }
144}
145
146impl<S> Copy for IeeeFloat<S> {}
147impl<S> Clone for IeeeFloat<S> {
148 fn clone(&self) -> Self {
149 *self
150 }
151}
152
153macro_rules! ieee_semantics {
154 ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => {
155 $(pub struct $sem;)*
156 $(pub type $name = IeeeFloat<$sem>;)*
157 $(impl Semantics for $sem {
158 const BITS: usize = $bits;
159 const PRECISION: usize = ($bits - 1 - $exp_bits) + 1;
160 const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1;
161 })*
162 }
163}
164
165ieee_semantics! {
166 Half = HalfS(16:5),
167 Single = SingleS(32:8),
168 Double = DoubleS(64:11),
169 Quad = QuadS(128:15)
170}
171
172pub struct X87DoubleExtendedS;
173pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>;
174impl Semantics for X87DoubleExtendedS {
175 const BITS: usize = 80;
176 const PRECISION: usize = 64;
177 const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1;
178
179 /// For x87 extended precision, we want to make a NaN, not a
180 /// pseudo-NaN. Maybe we should expose the ability to make
181 /// pseudo-NaNs?
182 const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT;
183
184 /// Integer bit is explicit in this format. Intel hardware (387 and later)
185 /// does not support these bit patterns:
186 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
187 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
188 /// exponent = 0, integer bit 1 ("pseudodenormal")
9fa01778 189 /// exponent != 0 nor all 1's, integer bit 0 ("unnormal")
3b2f2976
XL
190 /// At the moment, the first two are treated as NaNs, the second two as Normal.
191 fn from_bits(bits: u128) -> IeeeFloat<Self> {
192 let sign = bits & (1 << (Self::BITS - 1));
193 let exponent = (bits & !sign) >> Self::PRECISION;
194 let mut r = IeeeFloat {
195 sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
196 // Convert the exponent from its bias representation to a signed integer.
197 exp: (exponent as ExpInt) - Self::MAX_EXP,
198 category: Category::Zero,
199 sign: sign != 0,
200 marker: PhantomData,
201 };
202
203 if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
204 // Exponent, significand meaningless.
205 r.category = Category::Zero;
206 } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] {
207 // Exponent, significand meaningless.
208 r.category = Category::Infinity;
209 } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] {
210 // Sign, exponent, significand meaningless.
211 r.category = Category::NaN;
212 } else {
213 r.category = Category::Normal;
214 if r.exp == Self::MIN_EXP - 1 {
215 // Denormal.
216 r.exp = Self::MIN_EXP;
217 }
218 }
219
220 r
221 }
222
223 fn to_bits(x: IeeeFloat<Self>) -> u128 {
224 // Get integer bit from significand.
225 let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
226 let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1);
227 let exponent = match x.category {
228 Category::Normal => {
229 if x.exp == Self::MIN_EXP && !integer_bit {
230 // Denormal.
231 Self::MIN_EXP - 1
232 } else {
233 x.exp
234 }
235 }
236 Category::Zero => {
237 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
238 significand = 0;
239 Self::MIN_EXP - 1
240 }
241 Category::Infinity => {
242 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
243 significand = 1 << (Self::PRECISION - 1);
244 Self::MAX_EXP + 1
245 }
246 Category::NaN => Self::MAX_EXP + 1,
247 };
248
249 // Convert the exponent from a signed integer to its bias representation.
250 let exponent = (exponent + Self::MAX_EXP) as u128;
251
252 ((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand
253 }
254}
255
256float_common_impls!(IeeeFloat<S>);
257
258impl<S: Semantics> PartialEq for IeeeFloat<S> {
259 fn eq(&self, rhs: &Self) -> bool {
260 self.partial_cmp(rhs) == Some(Ordering::Equal)
261 }
262}
263
264impl<S: Semantics> PartialOrd for IeeeFloat<S> {
265 fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
266 match (self.category, rhs.category) {
dfeec247 267 (Category::NaN, _) | (_, Category::NaN) => None,
3b2f2976
XL
268
269 (Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))),
270
271 (Category::Zero, Category::Zero) => Some(Ordering::Equal),
272
dfeec247
XL
273 (Category::Infinity, _) | (Category::Normal, Category::Zero) => {
274 Some((!self.sign).cmp(&self.sign))
275 }
3b2f2976 276
dfeec247
XL
277 (_, Category::Infinity) | (Category::Zero, Category::Normal) => {
278 Some(rhs.sign.cmp(&(!rhs.sign)))
279 }
3b2f2976
XL
280
281 (Category::Normal, Category::Normal) => {
282 // Two normal numbers. Do they have the same sign?
283 Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| {
284 // Compare absolute values; invert result if negative.
285 let result = self.cmp_abs_normal(*rhs);
286
287 if self.sign { result.reverse() } else { result }
288 }))
289 }
290 }
291 }
292}
293
294impl<S> Neg for IeeeFloat<S> {
295 type Output = Self;
296 fn neg(mut self) -> Self {
297 self.sign = !self.sign;
298 self
299 }
300}
301
302/// Prints this value as a decimal string.
303///
304/// \param precision The maximum number of digits of
305/// precision to output. If there are fewer digits available,
306/// zero padding will not be used unless the value is
307/// integral and small enough to be expressed in
308/// precision digits. 0 means to use the natural
309/// precision of the number.
310/// \param width The maximum number of zeros to
311/// consider inserting before falling back to scientific
312/// notation. 0 means to always use scientific notation.
313///
314/// \param alternate Indicate whether to remove the trailing zero in
315/// fraction part or not. Also setting this parameter to true forces
316/// producing of output more similar to default printf behavior.
317/// Specifically the lower e is used as exponent delimiter and exponent
318/// always contains no less than two digits.
319///
320/// Number precision width Result
321/// ------ --------- ----- ------
322/// 1.01E+4 5 2 10100
323/// 1.01E+4 4 2 1.01E+4
324/// 1.01E+4 5 1 1.01E+4
325/// 1.01E-2 5 2 0.0101
326/// 1.01E-2 4 2 0.0101
327/// 1.01E-2 4 1 1.01E-2
328impl<S: Semantics> fmt::Display for IeeeFloat<S> {
9fa01778 329 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3b2f2976
XL
330 let width = f.width().unwrap_or(3);
331 let alternate = f.alternate();
332
333 match self.category {
334 Category::Infinity => {
335 if self.sign {
336 return f.write_str("-Inf");
337 } else {
338 return f.write_str("+Inf");
339 }
340 }
341
342 Category::NaN => return f.write_str("NaN"),
343
344 Category::Zero => {
345 if self.sign {
346 f.write_char('-')?;
347 }
348
349 if width == 0 {
350 if alternate {
351 f.write_str("0.0")?;
352 if let Some(n) = f.precision() {
353 for _ in 1..n {
354 f.write_char('0')?;
355 }
356 }
357 f.write_str("e+00")?;
358 } else {
359 f.write_str("0.0E+0")?;
360 }
361 } else {
362 f.write_char('0')?;
363 }
364 return Ok(());
365 }
366
367 Category::Normal => {}
368 }
369
370 if self.sign {
371 f.write_char('-')?;
372 }
373
374 // We use enough digits so the number can be round-tripped back to an
375 // APFloat. The formula comes from "How to Print Floating-Point Numbers
376 // Accurately" by Steele and White.
377 // FIXME: Using a formula based purely on the precision is conservative;
378 // we can print fewer digits depending on the actual value being printed.
379
380 // precision = 2 + floor(S::PRECISION / lg_2(10))
381 let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196);
382
383 // Decompose the number into an APInt and an exponent.
384 let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
385 let mut sig = vec![self.sig[0]];
386
387 // Ignore trailing binary zeros.
388 let trailing_zeros = sig[0].trailing_zeros();
389 let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize);
390
391 // Change the exponent from 2^e to 10^e.
392 if exp == 0 {
393 // Nothing to do.
394 } else if exp > 0 {
395 // Just shift left.
396 let shift = exp as usize;
397 sig.resize(limbs_for_bits(S::PRECISION + shift), 0);
398 sig::shift_left(&mut sig, &mut exp, shift);
399 } else {
400 // exp < 0
401 let mut texp = -exp as usize;
402
403 // We transform this using the identity:
404 // (N)(2^-e) == (N)(5^e)(10^-e)
405
406 // Multiply significand by 5^e.
407 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
408 let mut sig_scratch = vec![];
409 let mut p5 = vec![];
410 let mut p5_scratch = vec![];
411 while texp != 0 {
412 if p5.is_empty() {
413 p5.push(5);
414 } else {
415 p5_scratch.resize(p5.len() * 2, 0);
416 let _: Loss =
417 sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
418 while p5_scratch.last() == Some(&0) {
419 p5_scratch.pop();
420 }
421 mem::swap(&mut p5, &mut p5_scratch);
422 }
423 if texp & 1 != 0 {
424 sig_scratch.resize(sig.len() + p5.len(), 0);
425 let _: Loss = sig::mul(
426 &mut sig_scratch,
427 &mut 0,
428 &sig,
429 &p5,
430 (sig.len() + p5.len()) * LIMB_BITS,
431 );
432 while sig_scratch.last() == Some(&0) {
433 sig_scratch.pop();
434 }
435 mem::swap(&mut sig, &mut sig_scratch);
436 }
437 texp >>= 1;
438 }
439 }
440
441 // Fill the buffer.
442 let mut buffer = vec![];
443
444 // Ignore digits from the significand until it is no more
445 // precise than is required for the desired precision.
446 // 196/59 is a very slight overestimate of lg_2(10).
447 let required = (precision * 196 + 58) / 59;
448 let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196;
449 let mut in_trail = true;
450 while !sig.is_empty() {
451 // Perform short division by 10 to extract the rightmost digit.
452 // rem <- sig % 10
453 // sig <- sig / 10
454 let mut rem = 0;
455
456 // Use 64-bit division and remainder, with 32-bit chunks from sig.
457 sig::each_chunk(&mut sig, 32, |chunk| {
458 let chunk = chunk as u32;
459 let combined = ((rem as u64) << 32) | (chunk as u64);
460 rem = (combined % 10) as u8;
461 (combined / 10) as u32 as Limb
462 });
463
464 // Reduce the sigificand to avoid wasting time dividing 0's.
465 while sig.last() == Some(&0) {
466 sig.pop();
467 }
468
469 let digit = rem;
470
471 // Ignore digits we don't need.
472 if discard_digits > 0 {
473 discard_digits -= 1;
474 exp += 1;
475 continue;
476 }
477
478 // Drop trailing zeros.
479 if in_trail && digit == 0 {
480 exp += 1;
481 } else {
482 in_trail = false;
483 buffer.push(b'0' + digit);
484 }
485 }
486
487 assert!(!buffer.is_empty(), "no characters in buffer!");
488
489 // Drop down to precision.
490 // FIXME: don't do more precise calculations above than are required.
491 if buffer.len() > precision {
492 // The most significant figures are the last ones in the buffer.
493 let mut first_sig = buffer.len() - precision;
494
495 // Round.
496 // FIXME: this probably shouldn't use 'round half up'.
497
498 // Rounding down is just a truncation, except we also want to drop
499 // trailing zeros from the new result.
500 if buffer[first_sig - 1] < b'5' {
501 while first_sig < buffer.len() && buffer[first_sig] == b'0' {
502 first_sig += 1;
503 }
504 } else {
505 // Rounding up requires a decimal add-with-carry. If we continue
506 // the carry, the newly-introduced zeros will just be truncated.
507 for x in &mut buffer[first_sig..] {
508 if *x == b'9' {
509 first_sig += 1;
510 } else {
511 *x += 1;
512 break;
513 }
514 }
515 }
516
517 exp += first_sig as ExpInt;
518 buffer.drain(..first_sig);
519
520 // If we carried through, we have exactly one digit of precision.
521 if buffer.is_empty() {
522 buffer.push(b'1');
523 }
524 }
525
526 let digits = buffer.len();
527
528 // Check whether we should use scientific notation.
529 let scientific = if width == 0 {
530 true
b7449926
XL
531 } else if exp >= 0 {
532 // 765e3 --> 765000
533 // ^^^
534 // But we shouldn't make the number look more precise than it is.
535 exp as usize > width || digits + exp as usize > precision
3b2f2976 536 } else {
b7449926
XL
537 // Power of the most significant digit.
538 let msd = exp + (digits - 1) as ExpInt;
539 if msd >= 0 {
540 // 765e-2 == 7.65
541 false
3b2f2976 542 } else {
b7449926
XL
543 // 765e-5 == 0.00765
544 // ^ ^^
545 -msd as usize > width
3b2f2976
XL
546 }
547 };
548
549 // Scientific formatting is pretty straightforward.
550 if scientific {
551 exp += digits as ExpInt - 1;
552
553 f.write_char(buffer[digits - 1] as char)?;
554 f.write_char('.')?;
555 let truncate_zero = !alternate;
556 if digits == 1 && truncate_zero {
557 f.write_char('0')?;
558 } else {
559 for &d in buffer[..digits - 1].iter().rev() {
560 f.write_char(d as char)?;
561 }
562 }
563 // Fill with zeros up to precision.
564 if !truncate_zero && precision > digits - 1 {
0731742a 565 for _ in 0..=precision - digits {
3b2f2976
XL
566 f.write_char('0')?;
567 }
568 }
569 // For alternate we use lower 'e'.
570 f.write_char(if alternate { 'e' } else { 'E' })?;
571
572 // Exponent always at least two digits if we do not truncate zeros.
573 if truncate_zero {
574 write!(f, "{:+}", exp)?;
575 } else {
576 write!(f, "{:+03}", exp)?;
577 }
578
579 return Ok(());
580 }
581
582 // Non-scientific, positive exponents.
583 if exp >= 0 {
584 for &d in buffer.iter().rev() {
585 f.write_char(d as char)?;
586 }
587 for _ in 0..exp {
588 f.write_char('0')?;
589 }
590 return Ok(());
591 }
592
593 // Non-scientific, negative exponents.
594 let unit_place = -exp as usize;
595 if unit_place < digits {
596 for &d in buffer[unit_place..].iter().rev() {
597 f.write_char(d as char)?;
598 }
599 f.write_char('.')?;
600 for &d in buffer[..unit_place].iter().rev() {
601 f.write_char(d as char)?;
602 }
603 } else {
604 f.write_str("0.")?;
605 for _ in digits..unit_place {
606 f.write_char('0')?;
607 }
608 for &d in buffer.iter().rev() {
609 f.write_char(d as char)?;
610 }
611 }
612
613 Ok(())
614 }
615}
616
617impl<S: Semantics> fmt::Debug for IeeeFloat<S> {
9fa01778 618 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
dfeec247
XL
619 write!(
620 f,
621 "{}({:?} | {}{:?} * 2^{})",
622 self,
623 self.category,
624 if self.sign { "-" } else { "+" },
625 self.sig,
626 self.exp
627 )
3b2f2976
XL
628 }
629}
630
631impl<S: Semantics> Float for IeeeFloat<S> {
632 const BITS: usize = S::BITS;
633 const PRECISION: usize = S::PRECISION;
634 const MAX_EXP: ExpInt = S::MAX_EXP;
635 const MIN_EXP: ExpInt = S::MIN_EXP;
636
637 const ZERO: Self = IeeeFloat {
638 sig: [0],
639 exp: S::MIN_EXP - 1,
640 category: Category::Zero,
641 sign: false,
642 marker: PhantomData,
643 };
644
645 const INFINITY: Self = IeeeFloat {
646 sig: [0],
647 exp: S::MAX_EXP + 1,
648 category: Category::Infinity,
649 sign: false,
650 marker: PhantomData,
651 };
652
653 // FIXME(eddyb) remove when qnan becomes const fn.
654 const NAN: Self = IeeeFloat {
655 sig: [S::QNAN_SIGNIFICAND],
656 exp: S::MAX_EXP + 1,
657 category: Category::NaN,
658 sign: false,
659 marker: PhantomData,
660 };
661
662 fn qnan(payload: Option<u128>) -> Self {
663 IeeeFloat {
dfeec247
XL
664 sig: [S::QNAN_SIGNIFICAND
665 | payload.map_or(0, |payload| {
666 // Zero out the excess bits of the significand.
667 payload & ((1 << S::QNAN_BIT) - 1)
668 })],
3b2f2976
XL
669 exp: S::MAX_EXP + 1,
670 category: Category::NaN,
671 sign: false,
672 marker: PhantomData,
673 }
674 }
675
676 fn snan(payload: Option<u128>) -> Self {
677 let mut snan = Self::qnan(payload);
678
679 // We always have to clear the QNaN bit to make it an SNaN.
680 sig::clear_bit(&mut snan.sig, S::QNAN_BIT);
681
682 // If there are no bits set in the payload, we have to set
683 // *something* to make it a NaN instead of an infinity;
684 // conventionally, this is the next bit down from the QNaN bit.
685 if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 {
686 sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1);
687 }
688
689 snan
690 }
691
692 fn largest() -> Self {
693 // We want (in interchange format):
694 // exponent = 1..10
695 // significand = 1..1
696 IeeeFloat {
b7449926 697 sig: [(1 << S::PRECISION) - 1],
3b2f2976
XL
698 exp: S::MAX_EXP,
699 category: Category::Normal,
700 sign: false,
701 marker: PhantomData,
702 }
703 }
704
705 // We want (in interchange format):
706 // exponent = 0..0
707 // significand = 0..01
708 const SMALLEST: Self = IeeeFloat {
709 sig: [1],
710 exp: S::MIN_EXP,
711 category: Category::Normal,
712 sign: false,
713 marker: PhantomData,
714 };
715
716 fn smallest_normalized() -> Self {
717 // We want (in interchange format):
718 // exponent = 0..0
719 // significand = 10..0
720 IeeeFloat {
721 sig: [1 << (S::PRECISION - 1)],
722 exp: S::MIN_EXP,
723 category: Category::Normal,
724 sign: false,
725 marker: PhantomData,
726 }
727 }
728
729 fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
730 let status = match (self.category, rhs.category) {
731 (Category::Infinity, Category::Infinity) => {
732 // Differently signed infinities can only be validly
733 // subtracted.
734 if self.sign != rhs.sign {
735 self = Self::NAN;
736 Status::INVALID_OP
737 } else {
738 Status::OK
739 }
740 }
741
742 // Sign may depend on rounding mode; handled below.
dfeec247
XL
743 (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => {
744 Status::OK
745 }
3b2f2976 746
ba9703b0 747 (Category::Zero, _) | (_, Category::NaN | Category::Infinity) => {
3b2f2976
XL
748 self = rhs;
749 Status::OK
750 }
751
752 // This return code means it was not a simple case.
753 (Category::Normal, Category::Normal) => {
754 let loss = sig::add_or_sub(
755 &mut self.sig,
756 &mut self.exp,
757 &mut self.sign,
758 &mut [rhs.sig[0]],
759 rhs.exp,
760 rhs.sign,
761 );
762 let status;
763 self = unpack!(status=, self.normalize(round, loss));
764
765 // Can only be zero if we lost no fraction.
766 assert!(self.category != Category::Zero || loss == Loss::ExactlyZero);
767
768 status
769 }
770 };
771
772 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
773 // positive zero unless rounding to minus infinity, except that
774 // adding two like-signed zeroes gives that zero.
dfeec247
XL
775 if self.category == Category::Zero
776 && (rhs.category != Category::Zero || self.sign != rhs.sign)
3b2f2976
XL
777 {
778 self.sign = round == Round::TowardNegative;
779 }
780
781 status.and(self)
782 }
783
784 fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
785 self.sign ^= rhs.sign;
786
787 match (self.category, rhs.category) {
788 (Category::NaN, _) => {
789 self.sign = false;
790 Status::OK.and(self)
791 }
792
793 (_, Category::NaN) => {
794 self.sign = false;
795 self.category = Category::NaN;
796 self.sig = rhs.sig;
797 Status::OK.and(self)
798 }
799
dfeec247
XL
800 (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => {
801 Status::INVALID_OP.and(Self::NAN)
802 }
3b2f2976 803
dfeec247 804 (_, Category::Infinity) | (Category::Infinity, _) => {
3b2f2976
XL
805 self.category = Category::Infinity;
806 Status::OK.and(self)
807 }
808
dfeec247 809 (Category::Zero, _) | (_, Category::Zero) => {
3b2f2976
XL
810 self.category = Category::Zero;
811 Status::OK.and(self)
812 }
813
814 (Category::Normal, Category::Normal) => {
815 self.exp += rhs.exp;
816 let mut wide_sig = [0; 2];
dfeec247
XL
817 let loss =
818 sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION);
3b2f2976
XL
819 self.sig = [wide_sig[0]];
820 let mut status;
821 self = unpack!(status=, self.normalize(round, loss));
822 if loss != Loss::ExactlyZero {
823 status |= Status::INEXACT;
824 }
825 status.and(self)
826 }
827 }
828 }
829
830 fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
831 // If and only if all arguments are normal do we need to do an
832 // extended-precision calculation.
833 if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() {
834 let mut status;
835 self = unpack!(status=, self.mul_r(multiplicand, round));
836
837 // FS can only be Status::OK or Status::INVALID_OP. There is no more work
838 // to do in the latter case. The IEEE-754R standard says it is
839 // implementation-defined in this case whether, if ADDEND is a
840 // quiet NaN, we raise invalid op; this implementation does so.
841 //
842 // If we need to do the addition we can do so with normal
843 // precision.
844 if status == Status::OK {
845 self = unpack!(status=, self.add_r(addend, round));
846 }
847 return status.and(self);
848 }
849
850 // Post-multiplication sign, before addition.
851 self.sign ^= multiplicand.sign;
852
853 // Allocate space for twice as many bits as the original significand, plus one
854 // extra bit for the addition to overflow into.
855 assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2);
856 let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]);
857
858 let mut loss = Loss::ExactlyZero;
859 let mut omsb = sig::omsb(&wide_sig);
860 self.exp += multiplicand.exp;
861
862 // Assume the operands involved in the multiplication are single-precision
863 // FP, and the two multiplicants are:
864 // lhs = a23 . a22 ... a0 * 2^e1
865 // rhs = b23 . b22 ... b0 * 2^e2
866 // the result of multiplication is:
867 // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
868 // Note that there are three significant bits at the left-hand side of the
869 // radix point: two for the multiplication, and an overflow bit for the
870 // addition (that will always be zero at this point). Move the radix point
871 // toward left by two bits, and adjust exponent accordingly.
872 self.exp += 2;
873
874 if addend.is_non_zero() {
875 // Normalize our MSB to one below the top bit to allow for overflow.
876 let ext_precision = 2 * S::PRECISION + 1;
877 if omsb != ext_precision - 1 {
878 assert!(ext_precision > omsb);
879 sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb);
880 }
881
882 // The intermediate result of the multiplication has "2 * S::PRECISION"
a1dfa0c6 883 // significant bit; adjust the addend to be consistent with mul result.
3b2f2976
XL
884 let mut ext_addend_sig = [addend.sig[0], 0];
885
886 // Extend the addend significand to ext_precision - 1. This guarantees
887 // that the high bit of the significand is zero (same as wide_sig),
888 // so the addition will overflow (if it does overflow at all) into the top bit.
dfeec247 889 sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION);
3b2f2976
XL
890 loss = sig::add_or_sub(
891 &mut wide_sig,
892 &mut self.exp,
893 &mut self.sign,
894 &mut ext_addend_sig,
895 addend.exp + 1,
896 addend.sign,
897 );
898
899 omsb = sig::omsb(&wide_sig);
900 }
901
902 // Convert the result having "2 * S::PRECISION" significant-bits back to the one
903 // having "S::PRECISION" significant-bits. First, move the radix point from
a1dfa0c6 904 // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
3b2f2976
XL
905 // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
906 self.exp -= S::PRECISION as ExpInt + 1;
907
908 // In case MSB resides at the left-hand side of radix point, shift the
909 // mantissa right by some amount to make sure the MSB reside right before
0731742a 910 // the radix point (i.e., "MSB . rest-significant-bits").
3b2f2976
XL
911 if omsb > S::PRECISION {
912 let bits = omsb - S::PRECISION;
913 loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss);
914 }
915
916 self.sig[0] = wide_sig[0];
917
918 let mut status;
919 self = unpack!(status=, self.normalize(round, loss));
920 if loss != Loss::ExactlyZero {
921 status |= Status::INEXACT;
922 }
923
924 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
925 // positive zero unless rounding to minus infinity, except that
926 // adding two like-signed zeroes gives that zero.
dfeec247
XL
927 if self.category == Category::Zero
928 && !status.intersects(Status::UNDERFLOW)
929 && self.sign != addend.sign
3b2f2976
XL
930 {
931 self.sign = round == Round::TowardNegative;
932 }
933
934 status.and(self)
935 }
936
937 fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
938 self.sign ^= rhs.sign;
939
940 match (self.category, rhs.category) {
941 (Category::NaN, _) => {
942 self.sign = false;
943 Status::OK.and(self)
944 }
945
946 (_, Category::NaN) => {
947 self.category = Category::NaN;
948 self.sig = rhs.sig;
949 self.sign = false;
950 Status::OK.and(self)
951 }
952
dfeec247
XL
953 (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => {
954 Status::INVALID_OP.and(Self::NAN)
955 }
3b2f2976 956
ba9703b0 957 (Category::Infinity | Category::Zero, _) => Status::OK.and(self),
3b2f2976
XL
958
959 (Category::Normal, Category::Infinity) => {
960 self.category = Category::Zero;
961 Status::OK.and(self)
962 }
963
964 (Category::Normal, Category::Zero) => {
965 self.category = Category::Infinity;
966 Status::DIV_BY_ZERO.and(self)
967 }
968
969 (Category::Normal, Category::Normal) => {
970 self.exp -= rhs.exp;
971 let dividend = self.sig[0];
972 let loss = sig::div(
973 &mut self.sig,
974 &mut self.exp,
975 &mut [dividend],
976 &mut [rhs.sig[0]],
977 S::PRECISION,
978 );
979 let mut status;
980 self = unpack!(status=, self.normalize(round, loss));
981 if loss != Loss::ExactlyZero {
982 status |= Status::INEXACT;
983 }
984 status.and(self)
985 }
986 }
987 }
988
989 fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> {
990 match (self.category, rhs.category) {
dfeec247 991 (Category::NaN, _)
ba9703b0 992 | (Category::Zero, Category::Infinity | Category::Normal)
dfeec247 993 | (Category::Normal, Category::Infinity) => Status::OK.and(self),
3b2f2976
XL
994
995 (_, Category::NaN) => {
996 self.sign = false;
997 self.category = Category::NaN;
998 self.sig = rhs.sig;
999 Status::OK.and(self)
1000 }
1001
dfeec247 1002 (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
3b2f2976
XL
1003
1004 (Category::Normal, Category::Normal) => {
dfeec247
XL
1005 while self.is_finite_non_zero()
1006 && rhs.is_finite_non_zero()
1007 && self.cmp_abs_normal(rhs) != Ordering::Less
3b2f2976
XL
1008 {
1009 let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb());
1010 if self.cmp_abs_normal(v) == Ordering::Less {
1011 v = v.scalbn(-1);
1012 }
1013 v.sign = self.sign;
1014
1015 let status;
1016 self = unpack!(status=, self - v);
1017 assert_eq!(status, Status::OK);
1018 }
1019 Status::OK.and(self)
1020 }
1021 }
1022 }
1023
1024 fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
1025 // If the exponent is large enough, we know that this value is already
1026 // integral, and the arithmetic below would potentially cause it to saturate
1027 // to +/-Inf. Bail out early instead.
1028 if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt {
1029 return Status::OK.and(self);
1030 }
1031
1032 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1033 // precision of our format, and then subtract it back off again. The choice
1034 // of rounding modes for the addition/subtraction determines the rounding mode
1035 // for our integral rounding as well.
1036 // NOTE: When the input value is negative, we do subtraction followed by
1037 // addition instead.
1038 assert!(S::PRECISION <= 128);
1039 let mut status;
1040 let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1)));
1041 let magic_const = magic_const.copy_sign(self);
1042
1043 if status != Status::OK {
1044 return status.and(self);
1045 }
1046
1047 let mut r = self;
1048 r = unpack!(status=, r.add_r(magic_const, round));
1049 if status != Status::OK && status != Status::INEXACT {
1050 return status.and(self);
1051 }
1052
1053 // Restore the input sign to handle 0.0/-0.0 cases correctly.
1054 r.sub_r(magic_const, round).map(|r| r.copy_sign(self))
1055 }
1056
1057 fn next_up(mut self) -> StatusAnd<Self> {
1058 // Compute nextUp(x), handling each float category separately.
1059 match self.category {
1060 Category::Infinity => {
1061 if self.sign {
1062 // nextUp(-inf) = -largest
1063 Status::OK.and(-Self::largest())
1064 } else {
1065 // nextUp(+inf) = +inf
1066 Status::OK.and(self)
1067 }
1068 }
1069 Category::NaN => {
1070 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
1071 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
1072 // change the payload.
1073 if self.is_signaling() {
1074 // For consistency, propagate the sign of the sNaN to the qNaN.
1075 Status::INVALID_OP.and(Self::NAN.copy_sign(self))
1076 } else {
1077 Status::OK.and(self)
1078 }
1079 }
1080 Category::Zero => {
1081 // nextUp(pm 0) = +smallest
1082 Status::OK.and(Self::SMALLEST)
1083 }
1084 Category::Normal => {
1085 // nextUp(-smallest) = -0
1086 if self.is_smallest() && self.sign {
1087 return Status::OK.and(-Self::ZERO);
1088 }
1089
1090 // nextUp(largest) == INFINITY
1091 if self.is_largest() && !self.sign {
1092 return Status::OK.and(Self::INFINITY);
1093 }
1094
1095 // Excluding the integral bit. This allows us to test for binade boundaries.
1096 let sig_mask = (1 << (S::PRECISION - 1)) - 1;
1097
1098 // nextUp(normal) == normal + inc.
1099 if self.sign {
1100 // If we are negative, we need to decrement the significand.
1101
1102 // We only cross a binade boundary that requires adjusting the exponent
1103 // if:
1104 // 1. exponent != S::MIN_EXP. This implies we are not in the
1105 // smallest binade or are dealing with denormals.
1106 // 2. Our significand excluding the integral bit is all zeros.
dfeec247
XL
1107 let crossing_binade_boundary =
1108 self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0;
3b2f2976
XL
1109
1110 // Decrement the significand.
1111 //
1112 // We always do this since:
1113 // 1. If we are dealing with a non-binade decrement, by definition we
1114 // just decrement the significand.
1115 // 2. If we are dealing with a normal -> normal binade decrement, since
1116 // we have an explicit integral bit the fact that all bits but the
1117 // integral bit are zero implies that subtracting one will yield a
1118 // significand with 0 integral bit and 1 in all other spots. Thus we
1119 // must just adjust the exponent and set the integral bit to 1.
1120 // 3. If we are dealing with a normal -> denormal binade decrement,
1121 // since we set the integral bit to 0 when we represent denormals, we
1122 // just decrement the significand.
1123 sig::decrement(&mut self.sig);
1124
1125 if crossing_binade_boundary {
1126 // Our result is a normal number. Do the following:
1127 // 1. Set the integral bit to 1.
1128 // 2. Decrement the exponent.
1129 sig::set_bit(&mut self.sig, S::PRECISION - 1);
1130 self.exp -= 1;
1131 }
1132 } else {
1133 // If we are positive, we need to increment the significand.
1134
1135 // We only cross a binade boundary that requires adjusting the exponent if
1136 // the input is not a denormal and all of said input's significand bits
1137 // are set. If all of said conditions are true: clear the significand, set
1138 // the integral bit to 1, and increment the exponent. If we have a
1139 // denormal always increment since moving denormals and the numbers in the
1140 // smallest normal binade have the same exponent in our representation.
dfeec247
XL
1141 let crossing_binade_boundary =
1142 !self.is_denormal() && self.sig[0] & sig_mask == sig_mask;
3b2f2976
XL
1143
1144 if crossing_binade_boundary {
1145 self.sig = [0];
1146 sig::set_bit(&mut self.sig, S::PRECISION - 1);
1147 assert_ne!(
1148 self.exp,
1149 S::MAX_EXP,
1150 "We can not increment an exponent beyond the MAX_EXP \
1151 allowed by the given floating point semantics."
1152 );
1153 self.exp += 1;
1154 } else {
1155 sig::increment(&mut self.sig);
1156 }
1157 }
1158 Status::OK.and(self)
1159 }
1160 }
1161 }
1162
1163 fn from_bits(input: u128) -> Self {
1164 // Dispatch to semantics.
1165 S::from_bits(input)
1166 }
1167
1168 fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
1169 IeeeFloat {
1170 sig: [input],
1171 exp: S::PRECISION as ExpInt - 1,
1172 category: Category::Normal,
1173 sign: false,
1174 marker: PhantomData,
dfeec247
XL
1175 }
1176 .normalize(round, Loss::ExactlyZero)
3b2f2976
XL
1177 }
1178
1179 fn from_str_r(mut s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> {
1180 if s.is_empty() {
1181 return Err(ParseError("Invalid string length"));
1182 }
1183
1184 // Handle special cases.
1185 match s {
1186 "inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)),
1187 "-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)),
1188 "nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)),
1189 "-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)),
1190 _ => {}
1191 }
1192
1193 // Handle a leading minus sign.
e74abb32
XL
1194 let minus = s.starts_with('-');
1195 if minus || s.starts_with('+') {
3b2f2976
XL
1196 s = &s[1..];
1197 if s.is_empty() {
1198 return Err(ParseError("String has no digits"));
1199 }
1200 }
1201
1202 // Adjust the rounding mode for the absolute value below.
1203 if minus {
1204 round = -round;
1205 }
1206
1207 let r = if s.starts_with("0x") || s.starts_with("0X") {
1208 s = &s[2..];
1209 if s.is_empty() {
1210 return Err(ParseError("Invalid string"));
1211 }
1212 Self::from_hexadecimal_string(s, round)?
1213 } else {
1214 Self::from_decimal_string(s, round)?
1215 };
1216
1217 Ok(r.map(|r| if minus { -r } else { r }))
1218 }
1219
1220 fn to_bits(self) -> u128 {
1221 // Dispatch to semantics.
1222 S::to_bits(self)
1223 }
1224
1225 fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
1226 // The result of trying to convert a number too large.
1227 let overflow = if self.sign {
1228 // Negative numbers cannot be represented as unsigned.
1229 0
1230 } else {
1231 // Largest unsigned integer of the given width.
1232 !0 >> (128 - width)
1233 };
1234
1235 *is_exact = false;
1236
1237 match self.category {
1238 Category::NaN => Status::INVALID_OP.and(0),
1239
1240 Category::Infinity => Status::INVALID_OP.and(overflow),
1241
1242 Category::Zero => {
1243 // Negative zero can't be represented as an int.
1244 *is_exact = !self.sign;
1245 Status::OK.and(0)
1246 }
1247
1248 Category::Normal => {
1249 let mut r = 0;
1250
1251 // Step 1: place our absolute value, with any fraction truncated, in
1252 // the destination.
1253 let truncated_bits = if self.exp < 0 {
1254 // Our absolute value is less than one; truncate everything.
1255 // For exponent -1 the integer bit represents .5, look at that.
1256 // For smaller exponents leftmost truncated bit is 0.
1257 S::PRECISION - 1 + (-self.exp) as usize
1258 } else {
1259 // We want the most significant (exponent + 1) bits; the rest are
1260 // truncated.
1261 let bits = self.exp as usize + 1;
1262
1263 // Hopelessly large in magnitude?
1264 if bits > width {
1265 return Status::INVALID_OP.and(overflow);
1266 }
1267
1268 if bits < S::PRECISION {
1269 // We truncate (S::PRECISION - bits) bits.
1270 r = self.sig[0] >> (S::PRECISION - bits);
1271 S::PRECISION - bits
1272 } else {
1273 // We want at least as many bits as are available.
1274 r = self.sig[0] << (bits - S::PRECISION);
1275 0
1276 }
1277 };
1278
1279 // Step 2: work out any lost fraction, and increment the absolute
1280 // value if we would round away from zero.
1281 let mut loss = Loss::ExactlyZero;
1282 if truncated_bits > 0 {
1283 loss = Loss::through_truncation(&self.sig, truncated_bits);
dfeec247
XL
1284 if loss != Loss::ExactlyZero
1285 && self.round_away_from_zero(round, loss, truncated_bits)
3b2f2976
XL
1286 {
1287 r = r.wrapping_add(1);
1288 if r == 0 {
1289 return Status::INVALID_OP.and(overflow); // Overflow.
1290 }
1291 }
1292 }
1293
1294 // Step 3: check if we fit in the destination.
1295 if r > overflow {
1296 return Status::INVALID_OP.and(overflow);
1297 }
1298
1299 if loss == Loss::ExactlyZero {
1300 *is_exact = true;
1301 Status::OK.and(r)
1302 } else {
1303 Status::INEXACT.and(r)
1304 }
1305 }
1306 }
1307 }
1308
1309 fn cmp_abs_normal(self, rhs: Self) -> Ordering {
1310 assert!(self.is_finite_non_zero());
1311 assert!(rhs.is_finite_non_zero());
1312
1313 // If exponents are equal, do an unsigned comparison of the significands.
dfeec247 1314 self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig))
3b2f2976
XL
1315 }
1316
1317 fn bitwise_eq(self, rhs: Self) -> bool {
1318 if self.category != rhs.category || self.sign != rhs.sign {
1319 return false;
1320 }
1321
1322 if self.category == Category::Zero || self.category == Category::Infinity {
1323 return true;
1324 }
1325
1326 if self.is_finite_non_zero() && self.exp != rhs.exp {
1327 return false;
1328 }
1329
1330 self.sig == rhs.sig
1331 }
1332
1333 fn is_negative(self) -> bool {
1334 self.sign
1335 }
1336
1337 fn is_denormal(self) -> bool {
dfeec247
XL
1338 self.is_finite_non_zero()
1339 && self.exp == S::MIN_EXP
1340 && !sig::get_bit(&self.sig, S::PRECISION - 1)
3b2f2976
XL
1341 }
1342
1343 fn is_signaling(self) -> bool {
1344 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
1345 // first bit of the trailing significand being 0.
1346 self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT)
1347 }
1348
1349 fn category(self) -> Category {
1350 self.category
1351 }
1352
1353 fn get_exact_inverse(self) -> Option<Self> {
1354 // Special floats and denormals have no exact inverse.
1355 if !self.is_finite_non_zero() {
1356 return None;
1357 }
1358
1359 // Check that the number is a power of two by making sure that only the
1360 // integer bit is set in the significand.
1361 if self.sig != [1 << (S::PRECISION - 1)] {
1362 return None;
1363 }
1364
1365 // Get the inverse.
1366 let mut reciprocal = Self::from_u128(1).value;
1367 let status;
1368 reciprocal = unpack!(status=, reciprocal / self);
1369 if status != Status::OK {
1370 return None;
1371 }
1372
1373 // Avoid multiplication with a denormal, it is not safe on all platforms and
1374 // may be slower than a normal division.
1375 if reciprocal.is_denormal() {
1376 return None;
1377 }
1378
1379 assert!(reciprocal.is_finite_non_zero());
1380 assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]);
1381
1382 Some(reciprocal)
1383 }
1384
1385 fn ilogb(mut self) -> ExpInt {
1386 if self.is_nan() {
1387 return IEK_NAN;
1388 }
1389 if self.is_zero() {
1390 return IEK_ZERO;
1391 }
1392 if self.is_infinite() {
1393 return IEK_INF;
1394 }
1395 if !self.is_denormal() {
1396 return self.exp;
1397 }
1398
1399 let sig_bits = (S::PRECISION - 1) as ExpInt;
1400 self.exp += sig_bits;
dfeec247 1401 self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value;
3b2f2976
XL
1402 self.exp - sig_bits
1403 }
1404
1405 fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self {
1406 // If exp is wildly out-of-scale, simply adding it to self.exp will
1407 // overflow; clamp it to a safe range before adding, but ensure that the range
1408 // is large enough that the clamp does not change the result. The range we
1409 // need to support is the difference between the largest possible exponent and
1410 // the normalized exponent of half the smallest denormal.
1411
1412 let sig_bits = (S::PRECISION - 1) as i32;
1413 let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1;
1414
1415 // Clamp to one past the range ends to let normalize handle overflow.
2c00a5a8 1416 let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change);
3b2f2976
XL
1417 self.exp = self.exp.saturating_add(exp_change as ExpInt);
1418 self = self.normalize(round, Loss::ExactlyZero).value;
1419 if self.is_nan() {
1420 sig::set_bit(&mut self.sig, S::QNAN_BIT);
1421 }
1422 self
1423 }
1424
1425 fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self {
1426 *exp = self.ilogb();
1427
1428 // Quiet signalling nans.
1429 if *exp == IEK_NAN {
1430 sig::set_bit(&mut self.sig, S::QNAN_BIT);
1431 return self;
1432 }
1433
1434 if *exp == IEK_INF {
1435 return self;
1436 }
1437
1438 // 1 is added because frexp is defined to return a normalized fraction in
1439 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
1440 if *exp == IEK_ZERO {
1441 *exp = 0;
1442 } else {
1443 *exp += 1;
1444 }
1445 self.scalbn_r(-*exp, round)
1446 }
1447}
1448
1449impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> {
1450 fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> {
1451 let mut r = IeeeFloat {
1452 sig: self.sig,
1453 exp: self.exp,
1454 category: self.category,
1455 sign: self.sign,
1456 marker: PhantomData,
1457 };
1458
1459 // x86 has some unusual NaNs which cannot be represented in any other
1460 // format; note them here.
1461 fn is_x87_double_extended<S: Semantics>() -> bool {
1462 S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND
1463 }
dfeec247
XL
1464 let x87_special_nan = is_x87_double_extended::<S>()
1465 && !is_x87_double_extended::<T>()
1466 && r.category == Category::NaN
1467 && (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND;
3b2f2976
XL
1468
1469 // If this is a truncation of a denormal number, and the target semantics
1470 // has larger exponent range than the source semantics (this can happen
1471 // when truncating from PowerPC double-double to double format), the
1472 // right shift could lose result mantissa bits. Adjust exponent instead
1473 // of performing excessive shift.
1474 let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt;
1475 if shift < 0 && r.is_finite_non_zero() {
1476 let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt;
1477 if r.exp + exp_change < T::MIN_EXP {
1478 exp_change = T::MIN_EXP - r.exp;
1479 }
1480 if exp_change < shift {
1481 exp_change = shift;
1482 }
1483 if exp_change < 0 {
1484 shift -= exp_change;
1485 r.exp += exp_change;
1486 }
1487 }
1488
1489 // If this is a truncation, perform the shift.
b7449926
XL
1490 let loss = if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1491 sig::shift_right(&mut r.sig, &mut 0, -shift as usize)
1492 } else {
1493 Loss::ExactlyZero
1494 };
3b2f2976
XL
1495
1496 // If this is an extension, perform the shift.
1497 if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
1498 sig::shift_left(&mut r.sig, &mut 0, shift as usize);
1499 }
1500
1501 let status;
1502 if r.is_finite_non_zero() {
1503 r = unpack!(status=, r.normalize(round, loss));
1504 *loses_info = status != Status::OK;
1505 } else if r.category == Category::NaN {
1506 *loses_info = loss != Loss::ExactlyZero || x87_special_nan;
1507
1508 // For x87 extended precision, we want to make a NaN, not a special NaN if
1509 // the input wasn't special either.
1510 if !x87_special_nan && is_x87_double_extended::<T>() {
1511 sig::set_bit(&mut r.sig, T::PRECISION - 1);
1512 }
1513
29967ef6
XL
1514 // Convert of sNaN creates qNaN and raises an exception (invalid op).
1515 // This also guarantees that a sNaN does not become Inf on a truncation
1516 // that loses all payload bits.
1517 if self.is_signaling() {
1518 // Quiet signaling NaN.
1519 sig::set_bit(&mut r.sig, T::QNAN_BIT);
1520 status = Status::INVALID_OP;
1521 } else {
1522 status = Status::OK;
1523 }
3b2f2976
XL
1524 } else {
1525 *loses_info = false;
1526 status = Status::OK;
1527 }
1528
1529 status.and(r)
1530 }
1531}
1532
1533impl<S: Semantics> IeeeFloat<S> {
1534 /// Handle positive overflow. We either return infinity or
1535 /// the largest finite number. For negative overflow,
1536 /// negate the `round` argument before calling.
1537 fn overflow_result(round: Round) -> StatusAnd<Self> {
1538 match round {
1539 // Infinity?
1540 Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => {
1541 (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY)
1542 }
1543 // Otherwise we become the largest finite number.
1544 Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()),
1545 }
1546 }
1547
9fa01778 1548 /// Returns `true` if, when truncating the current number, with `bit` the
3b2f2976
XL
1549 /// new LSB, with the given lost fraction and rounding mode, the result
1550 /// would need to be rounded away from zero (i.e., by increasing the
9fa01778
XL
1551 /// signficand). This routine must work for `Category::Zero` of both signs, and
1552 /// `Category::Normal` numbers.
3b2f2976
XL
1553 fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool {
1554 // NaNs and infinities should not have lost fractions.
1555 assert!(self.is_finite_non_zero() || self.is_zero());
1556
1557 // Current callers never pass this so we don't handle it.
1558 assert_ne!(loss, Loss::ExactlyZero);
1559
1560 match round {
1561 Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf,
1562 Round::NearestTiesToEven => {
1563 if loss == Loss::MoreThanHalf {
1564 return true;
1565 }
1566
1567 // Our zeros don't have a significand to test.
1568 if loss == Loss::ExactlyHalf && self.category != Category::Zero {
1569 return sig::get_bit(&self.sig, bit);
1570 }
1571
1572 false
1573 }
1574 Round::TowardZero => false,
1575 Round::TowardPositive => !self.sign,
1576 Round::TowardNegative => self.sign,
1577 }
1578 }
1579
1580 fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> {
1581 if !self.is_finite_non_zero() {
1582 return Status::OK.and(self);
1583 }
1584
1585 // Before rounding normalize the exponent of Category::Normal numbers.
1586 let mut omsb = sig::omsb(&self.sig);
1587
1588 if omsb > 0 {
1589 // OMSB is numbered from 1. We want to place it in the integer
1590 // bit numbered PRECISION if possible, with a compensating change in
1591 // the exponent.
dfeec247 1592 let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt);
3b2f2976
XL
1593
1594 // If the resulting exponent is too high, overflow according to
1595 // the rounding mode.
1596 if final_exp > S::MAX_EXP {
1597 let round = if self.sign { -round } else { round };
1598 return Self::overflow_result(round).map(|r| r.copy_sign(self));
1599 }
1600
1601 // Subnormal numbers have exponent MIN_EXP, and their MSB
1602 // is forced based on that.
1603 if final_exp < S::MIN_EXP {
1604 final_exp = S::MIN_EXP;
1605 }
1606
1607 // Shifting left is easy as we don't lose precision.
1608 if final_exp < self.exp {
1609 assert_eq!(loss, Loss::ExactlyZero);
1610
1611 let exp_change = (self.exp - final_exp) as usize;
1612 sig::shift_left(&mut self.sig, &mut self.exp, exp_change);
1613
1614 return Status::OK.and(self);
1615 }
1616
1617 // Shift right and capture any new lost fraction.
1618 if final_exp > self.exp {
1619 let exp_change = (final_exp - self.exp) as usize;
1620 loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss);
1621
1622 // Keep OMSB up-to-date.
1623 omsb = omsb.saturating_sub(exp_change);
1624 }
1625 }
1626
1627 // Now round the number according to round given the lost
1628 // fraction.
1629
1630 // As specified in IEEE 754, since we do not trap we do not report
1631 // underflow for exact results.
1632 if loss == Loss::ExactlyZero {
1633 // Canonicalize zeros.
1634 if omsb == 0 {
1635 self.category = Category::Zero;
1636 }
1637
1638 return Status::OK.and(self);
1639 }
1640
1641 // Increment the significand if we're rounding away from zero.
1642 if self.round_away_from_zero(round, loss, 0) {
1643 if omsb == 0 {
1644 self.exp = S::MIN_EXP;
1645 }
1646
1647 // We should never overflow.
1648 assert_eq!(sig::increment(&mut self.sig), 0);
1649 omsb = sig::omsb(&self.sig);
1650
1651 // Did the significand increment overflow?
1652 if omsb == S::PRECISION + 1 {
1653 // Renormalize by incrementing the exponent and shifting our
1654 // significand right one. However if we already have the
1655 // maximum exponent we overflow to infinity.
1656 if self.exp == S::MAX_EXP {
1657 self.category = Category::Infinity;
1658
1659 return (Status::OVERFLOW | Status::INEXACT).and(self);
1660 }
1661
1662 let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1);
1663
1664 return Status::INEXACT.and(self);
1665 }
1666 }
1667
1668 // The normal case - we were and are not denormal, and any
1669 // significand increment above didn't overflow.
1670 if omsb == S::PRECISION {
1671 return Status::INEXACT.and(self);
1672 }
1673
1674 // We have a non-zero denormal.
1675 assert!(omsb < S::PRECISION);
1676
1677 // Canonicalize zeros.
1678 if omsb == 0 {
1679 self.category = Category::Zero;
1680 }
1681
1682 // The Category::Zero case is a denormal that underflowed to zero.
1683 (Status::UNDERFLOW | Status::INEXACT).and(self)
1684 }
1685
1686 fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1687 let mut r = IeeeFloat {
1688 sig: [0],
1689 exp: 0,
1690 category: Category::Normal,
1691 sign: false,
1692 marker: PhantomData,
1693 };
1694
1695 let mut any_digits = false;
1696 let mut has_exp = false;
1697 let mut bit_pos = LIMB_BITS as isize;
1698 let mut loss = None;
1699
1700 // Without leading or trailing zeros, irrespective of the dot.
1701 let mut first_sig_digit = None;
1702 let mut dot = s.len();
1703
1704 for (p, c) in s.char_indices() {
1705 // Skip leading zeros and any (hexa)decimal point.
1706 if c == '.' {
1707 if dot != s.len() {
1708 return Err(ParseError("String contains multiple dots"));
1709 }
1710 dot = p;
1711 } else if let Some(hex_value) = c.to_digit(16) {
1712 any_digits = true;
1713
1714 if first_sig_digit.is_none() {
1715 if hex_value == 0 {
1716 continue;
1717 }
1718 first_sig_digit = Some(p);
1719 }
1720
1721 // Store the number while we have space.
1722 bit_pos -= 4;
1723 if bit_pos >= 0 {
1724 r.sig[0] |= (hex_value as Limb) << bit_pos;
b7449926
XL
1725 // If zero or one-half (the hexadecimal digit 8) are followed
1726 // by non-zero, they're a little more than zero or one-half.
1727 } else if let Some(ref mut loss) = loss {
1728 if hex_value != 0 {
1729 if *loss == Loss::ExactlyZero {
1730 *loss = Loss::LessThanHalf;
1731 }
1732 if *loss == Loss::ExactlyHalf {
1733 *loss = Loss::MoreThanHalf;
3b2f2976 1734 }
3b2f2976 1735 }
b7449926
XL
1736 } else {
1737 loss = Some(match hex_value {
1738 0 => Loss::ExactlyZero,
1739 1..=7 => Loss::LessThanHalf,
1740 8 => Loss::ExactlyHalf,
1741 9..=15 => Loss::MoreThanHalf,
1742 _ => unreachable!(),
1743 });
3b2f2976
XL
1744 }
1745 } else if c == 'p' || c == 'P' {
1746 if !any_digits {
1747 return Err(ParseError("Significand has no digits"));
1748 }
1749
1750 if dot == s.len() {
1751 dot = p;
1752 }
1753
1754 let mut chars = s[p + 1..].chars().peekable();
1755
1756 // Adjust for the given exponent.
1757 let exp_minus = chars.peek() == Some(&'-');
1758 if exp_minus || chars.peek() == Some(&'+') {
1759 chars.next();
1760 }
1761
1762 for c in chars {
1763 if let Some(value) = c.to_digit(10) {
1764 has_exp = true;
1765 r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt);
1766 } else {
1767 return Err(ParseError("Invalid character in exponent"));
1768 }
1769 }
1770 if !has_exp {
1771 return Err(ParseError("Exponent has no digits"));
1772 }
1773
1774 if exp_minus {
1775 r.exp = -r.exp;
1776 }
1777
1778 break;
1779 } else {
1780 return Err(ParseError("Invalid character in significand"));
1781 }
1782 }
1783 if !any_digits {
1784 return Err(ParseError("Significand has no digits"));
1785 }
1786
1787 // Hex floats require an exponent but not a hexadecimal point.
1788 if !has_exp {
1789 return Err(ParseError("Hex strings require an exponent"));
1790 }
1791
1792 // Ignore the exponent if we are zero.
1793 let first_sig_digit = match first_sig_digit {
1794 Some(p) => p,
1795 None => return Ok(Status::OK.and(Self::ZERO)),
1796 };
1797
1798 // Calculate the exponent adjustment implicit in the number of
1799 // significant digits and adjust for writing the significand starting
1800 // at the most significant nibble.
1801 let exp_adjustment = if dot > first_sig_digit {
1802 ExpInt::try_from(dot - first_sig_digit).unwrap()
1803 } else {
1804 -ExpInt::try_from(first_sig_digit - dot - 1).unwrap()
1805 };
1806 let exp_adjustment = exp_adjustment
1807 .saturating_mul(4)
1808 .saturating_sub(1)
1809 .saturating_add(S::PRECISION as ExpInt)
1810 .saturating_sub(LIMB_BITS as ExpInt);
1811 r.exp = r.exp.saturating_add(exp_adjustment);
1812
1813 Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero)))
1814 }
1815
1816 fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
1817 // Given a normal decimal floating point number of the form
1818 //
1819 // dddd.dddd[eE][+-]ddd
1820 //
1821 // where the decimal point and exponent are optional, fill out the
1822 // variables below. Exponent is appropriate if the significand is
1823 // treated as an integer, and normalized_exp if the significand
1824 // is taken to have the decimal point after a single leading
1825 // non-zero digit.
1826 //
1827 // If the value is zero, first_sig_digit is None.
1828
1829 let mut any_digits = false;
1830 let mut dec_exp = 0i32;
1831
1832 // Without leading or trailing zeros, irrespective of the dot.
1833 let mut first_sig_digit = None;
1834 let mut last_sig_digit = 0;
1835 let mut dot = s.len();
1836
1837 for (p, c) in s.char_indices() {
1838 if c == '.' {
1839 if dot != s.len() {
1840 return Err(ParseError("String contains multiple dots"));
1841 }
1842 dot = p;
1843 } else if let Some(dec_value) = c.to_digit(10) {
1844 any_digits = true;
1845
1846 if dec_value != 0 {
1847 if first_sig_digit.is_none() {
1848 first_sig_digit = Some(p);
1849 }
1850 last_sig_digit = p;
1851 }
1852 } else if c == 'e' || c == 'E' {
1853 if !any_digits {
1854 return Err(ParseError("Significand has no digits"));
1855 }
1856
1857 if dot == s.len() {
1858 dot = p;
1859 }
1860
1861 let mut chars = s[p + 1..].chars().peekable();
1862
1863 // Adjust for the given exponent.
1864 let exp_minus = chars.peek() == Some(&'-');
1865 if exp_minus || chars.peek() == Some(&'+') {
1866 chars.next();
1867 }
1868
1869 any_digits = false;
1870 for c in chars {
1871 if let Some(value) = c.to_digit(10) {
1872 any_digits = true;
1873 dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32);
1874 } else {
1875 return Err(ParseError("Invalid character in exponent"));
1876 }
1877 }
1878 if !any_digits {
1879 return Err(ParseError("Exponent has no digits"));
1880 }
1881
1882 if exp_minus {
1883 dec_exp = -dec_exp;
1884 }
1885
1886 break;
1887 } else {
1888 return Err(ParseError("Invalid character in significand"));
1889 }
1890 }
1891 if !any_digits {
1892 return Err(ParseError("Significand has no digits"));
1893 }
1894
1895 // Test if we have a zero number allowing for non-zero exponents.
1896 let first_sig_digit = match first_sig_digit {
1897 Some(p) => p,
1898 None => return Ok(Status::OK.and(Self::ZERO)),
1899 };
1900
1901 // Adjust the exponents for any decimal point.
1902 if dot > last_sig_digit {
1903 dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32);
1904 } else {
1905 dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32);
1906 }
dfeec247
XL
1907 let significand_digits = last_sig_digit - first_sig_digit + 1
1908 - (dot > first_sig_digit && dot < last_sig_digit) as usize;
3b2f2976
XL
1909 let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1);
1910
1911 // Handle the cases where exponents are obviously too large or too
1912 // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
1913 // definitely overflows if
1914 //
1915 // (dec_exp - 1) * L >= MAX_EXP
1916 //
1917 // and definitely underflows to zero where
1918 //
1919 // (dec_exp + 1) * L <= MIN_EXP - PRECISION
1920 //
1921 // With integer arithmetic the tightest bounds for L are
1922 //
1923 // 93/28 < L < 196/59 [ numerator <= 256 ]
1924 // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
1925
1926 // Check for MAX_EXP.
1927 if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 {
1928 // Overflow and round.
1929 return Ok(Self::overflow_result(round));
1930 }
1931
1932 // Check for MIN_EXP.
dfeec247
XL
1933 if normalized_exp.saturating_add(1).saturating_mul(28738)
1934 <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32)
3b2f2976
XL
1935 {
1936 // Underflow to zero and round.
dfeec247
XL
1937 let r =
1938 if round == Round::TowardPositive { IeeeFloat::SMALLEST } else { IeeeFloat::ZERO };
3b2f2976
XL
1939 return Ok((Status::UNDERFLOW | Status::INEXACT).and(r));
1940 }
1941
1942 // A tight upper bound on number of bits required to hold an
1943 // N-digit decimal integer is N * 196 / 59. Allocate enough space
1944 // to hold the full significand, and an extra limb required by
1945 // tcMultiplyPart.
1946 let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
a1dfa0c6 1947 let mut dec_sig: SmallVec<[Limb; 1]> = SmallVec::with_capacity(max_limbs);
3b2f2976
XL
1948
1949 // Convert to binary efficiently - we do almost all multiplication
1950 // in a Limb. When this would overflow do we do a single
1951 // bignum multiplication, and then revert again to multiplication
1952 // in a Limb.
0731742a 1953 let mut chars = s[first_sig_digit..=last_sig_digit].chars();
3b2f2976
XL
1954 loop {
1955 let mut val = 0;
1956 let mut multiplier = 1;
1957
1958 loop {
1959 let dec_value = match chars.next() {
1960 Some('.') => continue,
1961 Some(c) => c.to_digit(10).unwrap(),
1962 None => break,
1963 };
1964
1965 multiplier *= 10;
1966 val = val * 10 + dec_value as Limb;
1967
1968 // The maximum number that can be multiplied by ten with any
1969 // digit added without overflowing a Limb.
1970 if multiplier > (!0 - 9) / 10 {
1971 break;
1972 }
1973 }
1974
1975 // If we've consumed no digits, we're done.
1976 if multiplier == 1 {
1977 break;
1978 }
1979
1980 // Multiply out the current limb.
1981 let mut carry = val;
1982 for x in &mut dec_sig {
1983 let [low, mut high] = sig::widening_mul(*x, multiplier);
1984
1985 // Now add carry.
1986 let (low, overflow) = low.overflowing_add(carry);
1987 high += overflow as Limb;
1988
1989 *x = low;
1990 carry = high;
1991 }
1992
1993 // If we had carry, we need another limb (likely but not guaranteed).
1994 if carry > 0 {
1995 dec_sig.push(carry);
1996 }
1997 }
1998
1999 // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
2000 // The *_calc Vec's are reused scratch space, as an optimization.
2001 let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = {
2002 let mut power = dec_exp.abs() as usize;
2003
2004 const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
2005
a1dfa0c6
XL
2006 let mut p5_scratch = smallvec![];
2007 let mut p5: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[4]];
3b2f2976 2008
a1dfa0c6
XL
2009 let mut r_scratch = smallvec![];
2010 let mut r: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[power & 7]];
3b2f2976
XL
2011 power >>= 3;
2012
2013 while power > 0 {
2014 // Calculate pow(5,pow(2,n+3)).
2015 p5_scratch.resize(p5.len() * 2, 0);
2016 let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
2017 while p5_scratch.last() == Some(&0) {
2018 p5_scratch.pop();
2019 }
2020 mem::swap(&mut p5, &mut p5_scratch);
2021
2022 if power & 1 != 0 {
2023 r_scratch.resize(r.len() + p5.len(), 0);
dfeec247
XL
2024 let _: Loss =
2025 sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS);
3b2f2976
XL
2026 while r_scratch.last() == Some(&0) {
2027 r_scratch.pop();
2028 }
2029 mem::swap(&mut r, &mut r_scratch);
2030 }
2031
2032 power >>= 1;
2033 }
2034
2035 (r, r_scratch, p5, p5_scratch)
2036 };
2037
2038 // Attempt dec_sig * 10^dec_exp with increasing precision.
2039 let mut attempt = 0;
2040 loop {
2041 let calc_precision = (LIMB_BITS << attempt) - 1;
2042 attempt += 1;
2043
a1dfa0c6 2044 let calc_normal_from_limbs = |sig: &mut SmallVec<[Limb; 1]>,
3b2f2976
XL
2045 limbs: &[Limb]|
2046 -> StatusAnd<ExpInt> {
2047 sig.resize(limbs_for_bits(calc_precision), 0);
2048 let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
2049
2050 // Before rounding normalize the exponent of Category::Normal numbers.
2051 let mut omsb = sig::omsb(sig);
2052
2053 assert_ne!(omsb, 0);
2054
2055 // OMSB is numbered from 1. We want to place it in the integer
2056 // bit numbered PRECISION if possible, with a compensating change in
2057 // the exponent.
2058 let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt);
2059
2060 // Shifting left is easy as we don't lose precision.
2061 if final_exp < exp {
2062 assert_eq!(loss, Loss::ExactlyZero);
2063
2064 let exp_change = (exp - final_exp) as usize;
2065 sig::shift_left(sig, &mut exp, exp_change);
2066
2067 return Status::OK.and(exp);
2068 }
2069
2070 // Shift right and capture any new lost fraction.
2071 if final_exp > exp {
2072 let exp_change = (final_exp - exp) as usize;
2073 loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss);
2074
2075 // Keep OMSB up-to-date.
2076 omsb = omsb.saturating_sub(exp_change);
2077 }
2078
2079 assert_eq!(omsb, calc_precision);
2080
2081 // Now round the number according to round given the lost
2082 // fraction.
2083
2084 // As specified in IEEE 754, since we do not trap we do not report
2085 // underflow for exact results.
2086 if loss == Loss::ExactlyZero {
2087 return Status::OK.and(exp);
2088 }
2089
2090 // Increment the significand if we're rounding away from zero.
2091 if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) {
2092 // We should never overflow.
2093 assert_eq!(sig::increment(sig), 0);
2094 omsb = sig::omsb(sig);
2095
2096 // Did the significand increment overflow?
2097 if omsb == calc_precision + 1 {
2098 let _: Loss = sig::shift_right(sig, &mut exp, 1);
2099
2100 return Status::INEXACT.and(exp);
2101 }
2102 }
2103
2104 // The normal case - we were and are not denormal, and any
2105 // significand increment above didn't overflow.
2106 Status::INEXACT.and(exp)
2107 };
2108
2109 let status;
2110 let mut exp = unpack!(status=,
2111 calc_normal_from_limbs(&mut sig_calc, &dec_sig));
2112 let pow5_status;
2113 let pow5_exp = unpack!(pow5_status=,
2114 calc_normal_from_limbs(&mut pow5_calc, &pow5_full));
2115
2116 // Add dec_exp, as 10^n = 5^n * 2^n.
2117 exp += dec_exp as ExpInt;
2118
2119 let mut used_bits = S::PRECISION;
2120 let mut truncated_bits = calc_precision - used_bits;
2121
2122 let half_ulp_err1 = (status != Status::OK) as Limb;
2123 let (calc_loss, half_ulp_err2);
2124 if dec_exp >= 0 {
2125 exp += pow5_exp;
2126
2127 sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0);
2128 calc_loss = sig::mul(
2129 &mut sig_scratch_calc,
2130 &mut exp,
2131 &sig_calc,
2132 &pow5_calc,
2133 calc_precision,
2134 );
2135 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2136
2137 half_ulp_err2 = (pow5_status != Status::OK) as Limb;
2138 } else {
2139 exp -= pow5_exp;
2140
2141 sig_scratch_calc.resize(sig_calc.len(), 0);
2142 calc_loss = sig::div(
2143 &mut sig_scratch_calc,
2144 &mut exp,
2145 &mut sig_calc,
2146 &mut pow5_calc,
2147 calc_precision,
2148 );
2149 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2150
2151 // Denormal numbers have less precision.
2152 if exp < S::MIN_EXP {
2153 truncated_bits += (S::MIN_EXP - exp) as usize;
2154 used_bits = calc_precision.saturating_sub(truncated_bits);
2155 }
2156 // Extra half-ulp lost in reciprocal of exponent.
dfeec247
XL
2157 half_ulp_err2 =
2158 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb;
3b2f2976
XL
2159 }
2160
2161 // Both sig::mul and sig::div return the
2162 // result with the integer bit set.
2163 assert!(sig::get_bit(&sig_calc, calc_precision - 1));
2164
2165 // The error from the true value, in half-ulps, on multiplying two
2166 // floating point numbers, which differ from the value they
2167 // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
2168 // than the returned value.
2169 //
2170 // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
dfeec247 2171 assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8));
3b2f2976
XL
2172
2173 let inexact = (calc_loss != Loss::ExactlyZero) as Limb;
2174 let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 {
2175 inexact * 2 // <= inexact half-ulps.
2176 } else {
2177 inexact + 2 * (half_ulp_err1 + half_ulp_err2)
2178 };
2179
2180 let ulps_from_boundary = {
2181 let bits = calc_precision - used_bits - 1;
2182
2183 let i = bits / LIMB_BITS;
2184 let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS));
2185 let boundary = match round {
2186 Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS),
2187 _ => 0,
2188 };
2189 if i == 0 {
2190 let delta = limb.wrapping_sub(boundary);
2191 cmp::min(delta, delta.wrapping_neg())
2192 } else if limb == boundary {
2193 if !sig::is_all_zeros(&sig_calc[1..i]) {
2194 !0 // A lot.
2195 } else {
2196 sig_calc[0]
2197 }
2198 } else if limb == boundary.wrapping_sub(1) {
2199 if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) {
2200 !0 // A lot.
2201 } else {
2202 sig_calc[0].wrapping_neg()
2203 }
2204 } else {
2205 !0 // A lot.
2206 }
2207 };
2208
2209 // Are we guaranteed to round correctly if we truncate?
2210 if ulps_from_boundary.saturating_mul(2) >= half_ulp_err {
2211 let mut r = IeeeFloat {
2212 sig: [0],
2213 exp,
2214 category: Category::Normal,
2215 sign: false,
2216 marker: PhantomData,
2217 };
2218 sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits);
2219 // If we extracted less bits above we must adjust our exponent
2220 // to compensate for the implicit right shift.
2221 r.exp += (S::PRECISION - used_bits) as ExpInt;
2222 let loss = Loss::through_truncation(&sig_calc, truncated_bits);
2223 return Ok(r.normalize(round, loss));
2224 }
2225 }
2226 }
2227}
2228
2229impl Loss {
2230 /// Combine the effect of two lost fractions.
2231 fn combine(self, less_significant: Loss) -> Loss {
2232 let mut more_significant = self;
2233 if less_significant != Loss::ExactlyZero {
2234 if more_significant == Loss::ExactlyZero {
2235 more_significant = Loss::LessThanHalf;
2236 } else if more_significant == Loss::ExactlyHalf {
2237 more_significant = Loss::MoreThanHalf;
2238 }
2239 }
2240
2241 more_significant
2242 }
2243
9fa01778 2244 /// Returns the fraction lost were a bignum truncated losing the least
3b2f2976
XL
2245 /// significant `bits` bits.
2246 fn through_truncation(limbs: &[Limb], bits: usize) -> Loss {
2247 if bits == 0 {
2248 return Loss::ExactlyZero;
2249 }
2250
2251 let half_bit = bits - 1;
2252 let half_limb = half_bit / LIMB_BITS;
2253 let (half_limb, rest) = if half_limb < limbs.len() {
2254 (limbs[half_limb], &limbs[..half_limb])
2255 } else {
2256 (0, limbs)
2257 };
2258 let half = 1 << (half_bit % LIMB_BITS);
2259 let has_half = half_limb & half != 0;
2260 let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest);
2261
2262 match (has_half, has_rest) {
2263 (false, false) => Loss::ExactlyZero,
2264 (false, true) => Loss::LessThanHalf,
2265 (true, false) => Loss::ExactlyHalf,
2266 (true, true) => Loss::MoreThanHalf,
2267 }
2268 }
2269}
2270
2271/// Implementation details of IeeeFloat significands, such as big integer arithmetic.
2272/// As a rule of thumb, no functions in this module should dynamically allocate.
2273mod sig {
dfeec247 2274 use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS};
e1599b0c 2275 use core::cmp::Ordering;
cdc7bbd5 2276 use core::iter;
e1599b0c 2277 use core::mem;
3b2f2976
XL
2278
2279 pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool {
2280 limbs.iter().all(|&l| l == 0)
2281 }
2282
2283 /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
2284 pub(super) fn olsb(limbs: &[Limb]) -> usize {
dfeec247
XL
2285 limbs
2286 .iter()
2287 .enumerate()
2288 .find(|(_, &limb)| limb != 0)
2289 .map_or(0, |(i, limb)| i * LIMB_BITS + limb.trailing_zeros() as usize + 1)
3b2f2976
XL
2290 }
2291
2292 /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
2293 pub(super) fn omsb(limbs: &[Limb]) -> usize {
dfeec247
XL
2294 limbs
2295 .iter()
2296 .enumerate()
2297 .rfind(|(_, &limb)| limb != 0)
2298 .map_or(0, |(i, limb)| (i + 1) * LIMB_BITS - limb.leading_zeros() as usize)
3b2f2976
XL
2299 }
2300
2301 /// Comparison (unsigned) of two significands.
2302 pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering {
2303 assert_eq!(a.len(), b.len());
2304 for (a, b) in a.iter().zip(b).rev() {
2305 match a.cmp(b) {
2306 Ordering::Equal => {}
2307 o => return o,
2308 }
2309 }
2310
2311 Ordering::Equal
2312 }
2313
9fa01778 2314 /// Extracts the given bit.
3b2f2976
XL
2315 pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool {
2316 limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0
2317 }
2318
9fa01778 2319 /// Sets the given bit.
3b2f2976
XL
2320 pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) {
2321 limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS);
2322 }
2323
2324 /// Clear the given bit.
2325 pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) {
2326 limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS));
2327 }
2328
9fa01778 2329 /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent.
3b2f2976
XL
2330 pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) {
2331 if bits > 0 {
2332 // Our exponent should not underflow.
2333 *exp = exp.checked_sub(bits as ExpInt).unwrap();
2334
0731742a 2335 // Jump is the inter-limb jump; shift is the intra-limb shift.
3b2f2976
XL
2336 let jump = bits / LIMB_BITS;
2337 let shift = bits % LIMB_BITS;
2338
2339 for i in (0..dst.len()).rev() {
2340 let mut limb;
2341
2342 if i < jump {
2343 limb = 0;
2344 } else {
2345 // dst[i] comes from the two limbs src[i - jump] and, if we have
2346 // an intra-limb shift, src[i - jump - 1].
2347 limb = dst[i - jump];
2348 if shift > 0 {
2349 limb <<= shift;
b7449926 2350 if i > jump {
3b2f2976
XL
2351 limb |= dst[i - jump - 1] >> (LIMB_BITS - shift);
2352 }
2353 }
2354 }
2355
2356 dst[i] = limb;
2357 }
2358 }
2359 }
2360
9fa01778 2361 /// Shifts `dst` right `bits` bits noting lost fraction.
3b2f2976
XL
2362 pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss {
2363 let loss = Loss::through_truncation(dst, bits);
2364
2365 if bits > 0 {
2366 // Our exponent should not overflow.
2367 *exp = exp.checked_add(bits as ExpInt).unwrap();
2368
0731742a 2369 // Jump is the inter-limb jump; shift is the intra-limb shift.
3b2f2976
XL
2370 let jump = bits / LIMB_BITS;
2371 let shift = bits % LIMB_BITS;
2372
2373 // Perform the shift. This leaves the most significant `bits` bits
2374 // of the result at zero.
2375 for i in 0..dst.len() {
2376 let mut limb;
2377
2378 if i + jump >= dst.len() {
2379 limb = 0;
2380 } else {
2381 limb = dst[i + jump];
2382 if shift > 0 {
2383 limb >>= shift;
2384 if i + jump + 1 < dst.len() {
2385 limb |= dst[i + jump + 1] << (LIMB_BITS - shift);
2386 }
2387 }
2388 }
2389
2390 dst[i] = limb;
2391 }
2392 }
2393
2394 loss
2395 }
2396
9fa01778 2397 /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
3b2f2976
XL
2398 /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
2399 /// All high bits above `src_bits` in `dst` are zero-filled.
2400 pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) {
2401 if src_bits == 0 {
2402 return;
2403 }
2404
2405 let dst_limbs = limbs_for_bits(src_bits);
2406 assert!(dst_limbs <= dst.len());
2407
2408 let src = &src[src_lsb / LIMB_BITS..];
2409 dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]);
2410
2411 let shift = src_lsb % LIMB_BITS;
2412 let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift);
2413
2414 // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
2415 // in `dst`. If this is less that src_bits, append the rest, else
2416 // clear the high bits.
2417 let n = dst_limbs * LIMB_BITS - shift;
2418 if n < src_bits {
2419 let mask = (1 << (src_bits - n)) - 1;
b7449926 2420 dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << (n % LIMB_BITS);
3b2f2976
XL
2421 } else if n > src_bits && src_bits % LIMB_BITS > 0 {
2422 dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1;
2423 }
2424
2425 // Clear high limbs.
2426 for x in &mut dst[dst_limbs..] {
2427 *x = 0;
2428 }
2429 }
2430
2431 /// We want the most significant PRECISION bits of `src`. There may not
2432 /// be that many; extract what we can.
2433 pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) {
2434 let omsb = omsb(src);
2435
2436 if precision <= omsb {
2437 extract(dst, src, precision, omsb - precision);
dfeec247 2438 (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1)
3b2f2976
XL
2439 } else {
2440 extract(dst, src, omsb, 0);
2441 (Loss::ExactlyZero, precision as ExpInt - 1)
2442 }
2443 }
2444
2445 /// For every consecutive chunk of `bits` bits from `limbs`,
2446 /// going from most significant to the least significant bits,
2447 /// call `f` to transform those bits and store the result back.
2448 pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) {
2449 assert_eq!(LIMB_BITS % bits, 0);
2450 for limb in limbs.iter_mut().rev() {
2451 let mut r = 0;
2452 for i in (0..LIMB_BITS / bits).rev() {
2453 r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits);
2454 }
2455 *limb = r;
2456 }
2457 }
2458
2459 /// Increment in-place, return the carry flag.
2460 pub(super) fn increment(dst: &mut [Limb]) -> Limb {
2461 for x in dst {
2462 *x = x.wrapping_add(1);
2463 if *x != 0 {
2464 return 0;
2465 }
2466 }
2467
2468 1
2469 }
2470
2471 /// Decrement in-place, return the borrow flag.
2472 pub(super) fn decrement(dst: &mut [Limb]) -> Limb {
2473 for x in dst {
2474 *x = x.wrapping_sub(1);
2475 if *x != !0 {
2476 return 0;
2477 }
2478 }
2479
2480 1
2481 }
2482
2483 /// `a += b + c` where `c` is zero or one. Returns the carry flag.
2484 pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2485 assert!(c <= 1);
2486
cdc7bbd5 2487 for (a, &b) in iter::zip(a, b) {
3b2f2976
XL
2488 let (r, overflow) = a.overflowing_add(b);
2489 let (r, overflow2) = r.overflowing_add(c);
2490 *a = r;
2491 c = (overflow | overflow2) as Limb;
2492 }
2493
2494 c
2495 }
2496
2497 /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
2498 pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2499 assert!(c <= 1);
2500
cdc7bbd5 2501 for (a, &b) in iter::zip(a, b) {
3b2f2976
XL
2502 let (r, overflow) = a.overflowing_sub(b);
2503 let (r, overflow2) = r.overflowing_sub(c);
2504 *a = r;
2505 c = (overflow | overflow2) as Limb;
2506 }
2507
2508 c
2509 }
2510
2511 /// `a += b` or `a -= b`. Does not preserve `b`.
2512 pub(super) fn add_or_sub(
2513 a_sig: &mut [Limb],
2514 a_exp: &mut ExpInt,
2515 a_sign: &mut bool,
2516 b_sig: &mut [Limb],
2517 b_exp: ExpInt,
2518 b_sign: bool,
2519 ) -> Loss {
2520 // Are we bigger exponent-wise than the RHS?
2521 let bits = *a_exp - b_exp;
2522
2523 // Determine if the operation on the absolute values is effectively
2524 // an addition or subtraction.
2525 // Subtraction is more subtle than one might naively expect.
2526 if *a_sign ^ b_sign {
2527 let (reverse, loss);
2528
2529 if bits == 0 {
2530 reverse = cmp(a_sig, b_sig) == Ordering::Less;
2531 loss = Loss::ExactlyZero;
2532 } else if bits > 0 {
2533 loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
2534 shift_left(a_sig, a_exp, 1);
2535 reverse = false;
2536 } else {
2537 loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
2538 shift_left(b_sig, &mut 0, 1);
2539 reverse = true;
2540 }
2541
2542 let borrow = (loss != Loss::ExactlyZero) as Limb;
2543 if reverse {
2544 // The code above is intended to ensure that no borrow is necessary.
2545 assert_eq!(sub(b_sig, a_sig, borrow), 0);
2546 a_sig.copy_from_slice(b_sig);
2547 *a_sign = !*a_sign;
2548 } else {
2549 // The code above is intended to ensure that no borrow is necessary.
2550 assert_eq!(sub(a_sig, b_sig, borrow), 0);
2551 }
2552
2553 // Invert the lost fraction - it was on the RHS and subtracted.
2554 match loss {
2555 Loss::LessThanHalf => Loss::MoreThanHalf,
2556 Loss::MoreThanHalf => Loss::LessThanHalf,
2557 _ => loss,
2558 }
2559 } else {
2560 let loss = if bits > 0 {
2561 shift_right(b_sig, &mut 0, bits as usize)
2562 } else {
2563 shift_right(a_sig, a_exp, -bits as usize)
2564 };
2565 // We have a guard bit; generating a carry cannot happen.
2566 assert_eq!(add(a_sig, b_sig, 0), 0);
2567 loss
2568 }
2569 }
2570
2571 /// `[low, high] = a * b`.
2572 ///
2573 /// This cannot overflow, because
2574 ///
2575 /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
2576 ///
ff7c6d11 2577 /// which is less than n<sup>2</sup>.
3b2f2976
XL
2578 pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] {
2579 let mut wide = [0, 0];
2580
2581 if a == 0 || b == 0 {
2582 return wide;
2583 }
2584
2585 const HALF_BITS: usize = LIMB_BITS / 2;
2586
2587 let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1);
2588 for i in 0..2 {
2589 for j in 0..2 {
2590 let mut x = [select(a, i) * select(b, j), 0];
2591 shift_left(&mut x, &mut 0, (i + j) * HALF_BITS);
2592 assert_eq!(add(&mut wide, &x, 0), 0);
2593 }
2594 }
2595
2596 wide
2597 }
2598
2599 /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
2600 pub(super) fn mul<'a>(
2601 dst: &mut [Limb],
2602 exp: &mut ExpInt,
2603 mut a: &'a [Limb],
2604 mut b: &'a [Limb],
2605 precision: usize,
2606 ) -> Loss {
2607 // Put the narrower number on the `a` for less loops below.
2608 if a.len() > b.len() {
2609 mem::swap(&mut a, &mut b);
2610 }
2611
2612 for x in &mut dst[..b.len()] {
2613 *x = 0;
2614 }
2615
2616 for i in 0..a.len() {
2617 let mut carry = 0;
2618 for j in 0..b.len() {
2619 let [low, mut high] = widening_mul(a[i], b[j]);
2620
2621 // Now add carry.
2622 let (low, overflow) = low.overflowing_add(carry);
2623 high += overflow as Limb;
2624
2625 // And now `dst[i + j]`, and store the new low part there.
2626 let (low, overflow) = low.overflowing_add(dst[i + j]);
2627 high += overflow as Limb;
2628
2629 dst[i + j] = low;
2630 carry = high;
2631 }
2632 dst[i + b.len()] = carry;
2633 }
2634
2635 // Assume the operands involved in the multiplication are single-precision
2636 // FP, and the two multiplicants are:
2637 // a = a23 . a22 ... a0 * 2^e1
2638 // b = b23 . b22 ... b0 * 2^e2
2639 // the result of multiplication is:
2640 // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
2641 // Note that there are three significant bits at the left-hand side of the
2642 // radix point: two for the multiplication, and an overflow bit for the
2643 // addition (that will always be zero at this point). Move the radix point
2644 // toward left by two bits, and adjust exponent accordingly.
2645 *exp += 2;
2646
2647 // Convert the result having "2 * precision" significant-bits back to the one
2648 // having "precision" significant-bits. First, move the radix point from
2649 // poision "2*precision - 1" to "precision - 1". The exponent need to be
2650 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
2651 *exp -= precision as ExpInt + 1;
2652
2653 // In case MSB resides at the left-hand side of radix point, shift the
2654 // mantissa right by some amount to make sure the MSB reside right before
0731742a 2655 // the radix point (i.e., "MSB . rest-significant-bits").
3b2f2976
XL
2656 //
2657 // Note that the result is not normalized when "omsb < precision". So, the
2658 // caller needs to call IeeeFloat::normalize() if normalized value is
2659 // expected.
2660 let omsb = omsb(dst);
dfeec247 2661 if omsb <= precision { Loss::ExactlyZero } else { shift_right(dst, exp, omsb - precision) }
3b2f2976
XL
2662 }
2663
2664 /// `quotient = dividend / divisor`. Returns the lost fraction.
2665 /// Does not preserve `dividend` or `divisor`.
2666 pub(super) fn div(
2667 quotient: &mut [Limb],
2668 exp: &mut ExpInt,
2669 dividend: &mut [Limb],
2670 divisor: &mut [Limb],
2671 precision: usize,
2672 ) -> Loss {
3b2f2976
XL
2673 // Normalize the divisor.
2674 let bits = precision - omsb(divisor);
2675 shift_left(divisor, &mut 0, bits);
2676 *exp += bits as ExpInt;
2677
2678 // Normalize the dividend.
2679 let bits = precision - omsb(dividend);
2680 shift_left(dividend, exp, bits);
2681
2682 // Division by 1.
2683 let olsb_divisor = olsb(divisor);
2684 if olsb_divisor == precision {
2685 quotient.copy_from_slice(dividend);
2686 return Loss::ExactlyZero;
2687 }
2688
2689 // Ensure the dividend >= divisor initially for the loop below.
2690 // Incidentally, this means that the division loop below is
2691 // guaranteed to set the integer bit to one.
2692 if cmp(dividend, divisor) == Ordering::Less {
2693 shift_left(dividend, exp, 1);
2694 assert_ne!(cmp(dividend, divisor), Ordering::Less)
2695 }
2696
2697 // Helper for figuring out the lost fraction.
dfeec247
XL
2698 let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) {
2699 Ordering::Greater => Loss::MoreThanHalf,
2700 Ordering::Equal => Loss::ExactlyHalf,
2701 Ordering::Less => {
2702 if is_all_zeros(dividend) {
2703 Loss::ExactlyZero
2704 } else {
2705 Loss::LessThanHalf
3b2f2976
XL
2706 }
2707 }
2708 };
2709
2710 // Try to perform a (much faster) short division for small divisors.
2711 let divisor_bits = precision - (olsb_divisor - 1);
2712 macro_rules! try_short_div {
2713 ($W:ty, $H:ty, $half:expr) => {
2714 if divisor_bits * 2 <= $half {
2715 // Extract the small divisor.
2716 let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1);
2717 let divisor = divisor[0] as $H as $W;
2718
2719 // Shift the dividend to produce a quotient with the unit bit set.
2720 let top_limb = *dividend.last().unwrap();
2721 let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H;
2722 shift_left(dividend, &mut 0, divisor_bits - 1);
2723
2724 // Apply short division in place on $H (of $half bits) chunks.
2725 each_chunk(dividend, $half, |chunk| {
2726 let chunk = chunk as $H;
2727 let combined = ((rem as $W) << $half) | (chunk as $W);
2728 rem = (combined % divisor) as $H;
2729 (combined / divisor) as $H as Limb
2730 });
2731 quotient.copy_from_slice(dividend);
2732
2733 return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]);
2734 }
dfeec247 2735 };
3b2f2976
XL
2736 }
2737
2738 try_short_div!(u32, u16, 16);
2739 try_short_div!(u64, u32, 32);
2740 try_short_div!(u128, u64, 64);
2741
2742 // Zero the quotient before setting bits in it.
2743 for x in &mut quotient[..limbs_for_bits(precision)] {
2744 *x = 0;
2745 }
2746
2747 // Long division.
2748 for bit in (0..precision).rev() {
2749 if cmp(dividend, divisor) != Ordering::Less {
2750 sub(dividend, divisor, 0);
2751 set_bit(quotient, bit);
2752 }
2753 shift_left(dividend, &mut 0, 1);
2754 }
2755
2756 lost_fraction(dividend, divisor)
2757 }
2758}