1 use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO}
;
2 use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd}
;
4 use core
::cmp
::{self, Ordering}
;
5 use core
::convert
::TryFrom
;
6 use core
::fmt
::{self, Write}
;
7 use core
::marker
::PhantomData
;
10 use smallvec
::{smallvec, SmallVec}
;
13 pub struct IeeeFloat
<S
> {
14 /// Absolute significand value (including the integer bit).
17 /// The signed unbiased exponent of the value.
20 /// What kind of floating point number this is.
23 /// Sign bit of the number.
26 marker
: PhantomData
<S
>,
29 /// Fundamental unit of big integer arithmetic, but also
30 /// large to store the largest significands by itself.
32 const LIMB_BITS
: usize = 128;
33 fn limbs_for_bits(bits
: usize) -> usize {
34 (bits
+ LIMB_BITS
- 1) / LIMB_BITS
37 /// Enum that represents what fraction of the LSB truncated bits of an fp number
40 /// This essentially combines the roles of guard and sticky bits.
42 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
44 // Example of truncated bits:
45 ExactlyZero
, // 000000
46 LessThanHalf
, // 0xxxxx x's not all zero
47 ExactlyHalf
, // 100000
48 MoreThanHalf
, // 1xxxxx x's not all zero
51 /// Represents floating point arithmetic semantics.
52 pub trait Semantics
: Sized
{
53 /// Total number of bits in the in-memory format.
56 /// Number of bits in the significand. This includes the integer bit.
57 const PRECISION
: usize;
59 /// The largest E such that 2<sup>E</sup> is representable; this matches the
60 /// definition of IEEE 754.
61 const MAX_EXP
: ExpInt
;
63 /// The smallest E such that 2<sup>E</sup> is a normalized number; this
64 /// matches the definition of IEEE 754.
65 const MIN_EXP
: ExpInt
= -Self::MAX_EXP
+ 1;
67 /// The significand bit that marks NaN as quiet.
68 const QNAN_BIT
: usize = Self::PRECISION
- 2;
70 /// The significand bitpattern to mark a NaN as quiet.
71 /// NOTE: for X87DoubleExtended we need to set two bits instead of 2.
72 const QNAN_SIGNIFICAND
: Limb
= 1 << Self::QNAN_BIT
;
74 fn from_bits(bits
: u128
) -> IeeeFloat
<Self> {
75 assert
!(Self::BITS
> Self::PRECISION
);
77 let sign
= bits
& (1 << (Self::BITS
- 1));
78 let exponent
= (bits
& !sign
) >> (Self::PRECISION
- 1);
79 let mut r
= IeeeFloat
{
80 sig
: [bits
& ((1 << (Self::PRECISION
- 1)) - 1)],
81 // Convert the exponent from its bias representation to a signed integer.
82 exp
: (exponent
as ExpInt
) - Self::MAX_EXP
,
83 category
: Category
::Zero
,
88 if r
.exp
== Self::MIN_EXP
- 1 && r
.sig
== [0] {
89 // Exponent, significand meaningless.
90 r
.category
= Category
::Zero
;
91 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
== [0] {
92 // Exponent, significand meaningless.
93 r
.category
= Category
::Infinity
;
94 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
!= [0] {
95 // Sign, exponent, significand meaningless.
96 r
.category
= Category
::NaN
;
98 r
.category
= Category
::Normal
;
99 if r
.exp
== Self::MIN_EXP
- 1 {
101 r
.exp
= Self::MIN_EXP
;
104 sig
::set_bit(&mut r
.sig
, Self::PRECISION
- 1);
111 fn to_bits(x
: IeeeFloat
<Self>) -> u128
{
112 assert
!(Self::BITS
> Self::PRECISION
);
114 // Split integer bit from significand.
115 let integer_bit
= sig
::get_bit(&x
.sig
, Self::PRECISION
- 1);
116 let mut significand
= x
.sig
[0] & ((1 << (Self::PRECISION
- 1)) - 1);
117 let exponent
= match x
.category
{
118 Category
::Normal
=> {
119 if x
.exp
== Self::MIN_EXP
&& !integer_bit
{
127 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
131 Category
::Infinity
=> {
132 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
136 Category
::NaN
=> Self::MAX_EXP
+ 1,
139 // Convert the exponent from a signed integer to its bias representation.
140 let exponent
= (exponent
+ Self::MAX_EXP
) as u128
;
142 ((x
.sign
as u128
) << (Self::BITS
- 1)) | (exponent
<< (Self::PRECISION
- 1)) | significand
146 impl<S
> Copy
for IeeeFloat
<S
> {}
147 impl<S
> Clone
for IeeeFloat
<S
> {
148 fn clone(&self) -> Self {
153 macro_rules
! ieee_semantics
{
154 ($
($name
:ident
= $sem
:ident($bits
:tt
: $exp_bits
:tt
)),*) => {
156 $
(pub type $name
= IeeeFloat
<$sem
>;)*
157 $
(impl Semantics
for $sem
{
158 const BITS
: usize = $bits
;
159 const PRECISION
: usize = ($bits
- 1 - $exp_bits
) + 1;
160 const MAX_EXP
: ExpInt
= (1 << ($exp_bits
- 1)) - 1;
167 Single
= SingleS(32:8),
168 Double
= DoubleS(64:11),
172 pub struct X87DoubleExtendedS
;
173 pub type X87DoubleExtended
= IeeeFloat
<X87DoubleExtendedS
>;
174 impl Semantics
for X87DoubleExtendedS
{
175 const BITS
: usize = 80;
176 const PRECISION
: usize = 64;
177 const MAX_EXP
: ExpInt
= (1 << (15 - 1)) - 1;
179 /// For x87 extended precision, we want to make a NaN, not a
180 /// pseudo-NaN. Maybe we should expose the ability to make
182 const QNAN_SIGNIFICAND
: Limb
= 0b11 << Self::QNAN_BIT
;
184 /// Integer bit is explicit in this format. Intel hardware (387 and later)
185 /// does not support these bit patterns:
186 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
187 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
188 /// exponent = 0, integer bit 1 ("pseudodenormal")
189 /// exponent != 0 nor all 1's, integer bit 0 ("unnormal")
190 /// At the moment, the first two are treated as NaNs, the second two as Normal.
191 fn from_bits(bits
: u128
) -> IeeeFloat
<Self> {
192 let sign
= bits
& (1 << (Self::BITS
- 1));
193 let exponent
= (bits
& !sign
) >> Self::PRECISION
;
194 let mut r
= IeeeFloat
{
195 sig
: [bits
& ((1 << (Self::PRECISION
- 1)) - 1)],
196 // Convert the exponent from its bias representation to a signed integer.
197 exp
: (exponent
as ExpInt
) - Self::MAX_EXP
,
198 category
: Category
::Zero
,
203 if r
.exp
== Self::MIN_EXP
- 1 && r
.sig
== [0] {
204 // Exponent, significand meaningless.
205 r
.category
= Category
::Zero
;
206 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
== [1 << (Self::PRECISION
- 1)] {
207 // Exponent, significand meaningless.
208 r
.category
= Category
::Infinity
;
209 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
!= [1 << (Self::PRECISION
- 1)] {
210 // Sign, exponent, significand meaningless.
211 r
.category
= Category
::NaN
;
213 r
.category
= Category
::Normal
;
214 if r
.exp
== Self::MIN_EXP
- 1 {
216 r
.exp
= Self::MIN_EXP
;
223 fn to_bits(x
: IeeeFloat
<Self>) -> u128
{
224 // Get integer bit from significand.
225 let integer_bit
= sig
::get_bit(&x
.sig
, Self::PRECISION
- 1);
226 let mut significand
= x
.sig
[0] & ((1 << Self::PRECISION
) - 1);
227 let exponent
= match x
.category
{
228 Category
::Normal
=> {
229 if x
.exp
== Self::MIN_EXP
&& !integer_bit
{
237 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
241 Category
::Infinity
=> {
242 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
243 significand
= 1 << (Self::PRECISION
- 1);
246 Category
::NaN
=> Self::MAX_EXP
+ 1,
249 // Convert the exponent from a signed integer to its bias representation.
250 let exponent
= (exponent
+ Self::MAX_EXP
) as u128
;
252 ((x
.sign
as u128
) << (Self::BITS
- 1)) | (exponent
<< Self::PRECISION
) | significand
256 float_common_impls
!(IeeeFloat
<S
>);
258 impl<S
: Semantics
> PartialEq
for IeeeFloat
<S
> {
259 fn eq(&self, rhs
: &Self) -> bool
{
260 self.partial_cmp(rhs
) == Some(Ordering
::Equal
)
264 impl<S
: Semantics
> PartialOrd
for IeeeFloat
<S
> {
265 fn partial_cmp(&self, rhs
: &Self) -> Option
<Ordering
> {
266 match (self.category
, rhs
.category
) {
267 (Category
::NaN
, _
) | (_
, Category
::NaN
) => None
,
269 (Category
::Infinity
, Category
::Infinity
) => Some((!self.sign
).cmp(&(!rhs
.sign
))),
271 (Category
::Zero
, Category
::Zero
) => Some(Ordering
::Equal
),
273 (Category
::Infinity
, _
) | (Category
::Normal
, Category
::Zero
) => {
274 Some((!self.sign
).cmp(&self.sign
))
277 (_
, Category
::Infinity
) | (Category
::Zero
, Category
::Normal
) => {
278 Some(rhs
.sign
.cmp(&(!rhs
.sign
)))
281 (Category
::Normal
, Category
::Normal
) => {
282 // Two normal numbers. Do they have the same sign?
283 Some((!self.sign
).cmp(&(!rhs
.sign
)).then_with(|| {
284 // Compare absolute values; invert result if negative.
285 let result
= self.cmp_abs_normal(*rhs
);
287 if self.sign { result.reverse() }
else { result }
294 impl<S
> Neg
for IeeeFloat
<S
> {
296 fn neg(mut self) -> Self {
297 self.sign
= !self.sign
;
302 /// Prints this value as a decimal string.
304 /// \param precision The maximum number of digits of
305 /// precision to output. If there are fewer digits available,
306 /// zero padding will not be used unless the value is
307 /// integral and small enough to be expressed in
308 /// precision digits. 0 means to use the natural
309 /// precision of the number.
310 /// \param width The maximum number of zeros to
311 /// consider inserting before falling back to scientific
312 /// notation. 0 means to always use scientific notation.
314 /// \param alternate Indicate whether to remove the trailing zero in
315 /// fraction part or not. Also setting this parameter to true forces
316 /// producing of output more similar to default printf behavior.
317 /// Specifically the lower e is used as exponent delimiter and exponent
318 /// always contains no less than two digits.
320 /// Number precision width Result
321 /// ------ --------- ----- ------
322 /// 1.01E+4 5 2 10100
323 /// 1.01E+4 4 2 1.01E+4
324 /// 1.01E+4 5 1 1.01E+4
325 /// 1.01E-2 5 2 0.0101
326 /// 1.01E-2 4 2 0.0101
327 /// 1.01E-2 4 1 1.01E-2
328 impl<S
: Semantics
> fmt
::Display
for IeeeFloat
<S
> {
329 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
330 let width
= f
.width().unwrap_or(3);
331 let alternate
= f
.alternate();
333 match self.category
{
334 Category
::Infinity
=> {
336 return f
.write_str("-Inf");
338 return f
.write_str("+Inf");
342 Category
::NaN
=> return f
.write_str("NaN"),
352 if let Some(n
) = f
.precision() {
357 f
.write_str("e+00")?
;
359 f
.write_str("0.0E+0")?
;
367 Category
::Normal
=> {}
374 // We use enough digits so the number can be round-tripped back to an
375 // APFloat. The formula comes from "How to Print Floating-Point Numbers
376 // Accurately" by Steele and White.
377 // FIXME: Using a formula based purely on the precision is conservative;
378 // we can print fewer digits depending on the actual value being printed.
380 // precision = 2 + floor(S::PRECISION / lg_2(10))
381 let precision
= f
.precision().unwrap_or(2 + S
::PRECISION
* 59 / 196);
383 // Decompose the number into an APInt and an exponent.
384 let mut exp
= self.exp
- (S
::PRECISION
as ExpInt
- 1);
385 let mut sig
= vec
![self.sig
[0]];
387 // Ignore trailing binary zeros.
388 let trailing_zeros
= sig
[0].trailing_zeros();
389 let _
: Loss
= sig
::shift_right(&mut sig
, &mut exp
, trailing_zeros
as usize);
391 // Change the exponent from 2^e to 10^e.
396 let shift
= exp
as usize;
397 sig
.resize(limbs_for_bits(S
::PRECISION
+ shift
), 0);
398 sig
::shift_left(&mut sig
, &mut exp
, shift
);
401 let mut texp
= -exp
as usize;
403 // We transform this using the identity:
404 // (N)(2^-e) == (N)(5^e)(10^-e)
406 // Multiply significand by 5^e.
407 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
408 let mut sig_scratch
= vec
![];
410 let mut p5_scratch
= vec
![];
415 p5_scratch
.resize(p5
.len() * 2, 0);
417 sig
::mul(&mut p5_scratch
, &mut 0, &p5
, &p5
, p5
.len() * 2 * LIMB_BITS
);
418 while p5_scratch
.last() == Some(&0) {
421 mem
::swap(&mut p5
, &mut p5_scratch
);
424 sig_scratch
.resize(sig
.len() + p5
.len(), 0);
425 let _
: Loss
= sig
::mul(
430 (sig
.len() + p5
.len()) * LIMB_BITS
,
432 while sig_scratch
.last() == Some(&0) {
435 mem
::swap(&mut sig
, &mut sig_scratch
);
442 let mut buffer
= vec
![];
444 // Ignore digits from the significand until it is no more
445 // precise than is required for the desired precision.
446 // 196/59 is a very slight overestimate of lg_2(10).
447 let required
= (precision
* 196 + 58) / 59;
448 let mut discard_digits
= sig
::omsb(&sig
).saturating_sub(required
) * 59 / 196;
449 let mut in_trail
= true;
450 while !sig
.is_empty() {
451 // Perform short division by 10 to extract the rightmost digit.
456 // Use 64-bit division and remainder, with 32-bit chunks from sig.
457 sig
::each_chunk(&mut sig
, 32, |chunk
| {
458 let chunk
= chunk
as u32;
459 let combined
= ((rem
as u64) << 32) | (chunk
as u64);
460 rem
= (combined
% 10) as u8;
461 (combined
/ 10) as u32 as Limb
464 // Reduce the sigificand to avoid wasting time dividing 0's.
465 while sig
.last() == Some(&0) {
471 // Ignore digits we don't need.
472 if discard_digits
> 0 {
478 // Drop trailing zeros.
479 if in_trail
&& digit
== 0 {
483 buffer
.push(b'
0'
+ digit
);
487 assert
!(!buffer
.is_empty(), "no characters in buffer!");
489 // Drop down to precision.
490 // FIXME: don't do more precise calculations above than are required.
491 if buffer
.len() > precision
{
492 // The most significant figures are the last ones in the buffer.
493 let mut first_sig
= buffer
.len() - precision
;
496 // FIXME: this probably shouldn't use 'round half up'.
498 // Rounding down is just a truncation, except we also want to drop
499 // trailing zeros from the new result.
500 if buffer
[first_sig
- 1] < b'
5'
{
501 while first_sig
< buffer
.len() && buffer
[first_sig
] == b'
0'
{
505 // Rounding up requires a decimal add-with-carry. If we continue
506 // the carry, the newly-introduced zeros will just be truncated.
507 for x
in &mut buffer
[first_sig
..] {
517 exp
+= first_sig
as ExpInt
;
518 buffer
.drain(..first_sig
);
520 // If we carried through, we have exactly one digit of precision.
521 if buffer
.is_empty() {
526 let digits
= buffer
.len();
528 // Check whether we should use scientific notation.
529 let scientific
= if width
== 0 {
534 // But we shouldn't make the number look more precise than it is.
535 exp
as usize > width
|| digits
+ exp
as usize > precision
537 // Power of the most significant digit.
538 let msd
= exp
+ (digits
- 1) as ExpInt
;
545 -msd
as usize > width
549 // Scientific formatting is pretty straightforward.
551 exp
+= digits
as ExpInt
- 1;
553 f
.write_char(buffer
[digits
- 1] as char)?
;
555 let truncate_zero
= !alternate
;
556 if digits
== 1 && truncate_zero
{
559 for &d
in buffer
[..digits
- 1].iter().rev() {
560 f
.write_char(d
as char)?
;
563 // Fill with zeros up to precision.
564 if !truncate_zero
&& precision
> digits
- 1 {
565 for _
in 0..=precision
- digits
{
569 // For alternate we use lower 'e'.
570 f
.write_char(if alternate { 'e' }
else { 'E' }
)?
;
572 // Exponent always at least two digits if we do not truncate zeros.
574 write
!(f
, "{:+}", exp
)?
;
576 write
!(f
, "{:+03}", exp
)?
;
582 // Non-scientific, positive exponents.
584 for &d
in buffer
.iter().rev() {
585 f
.write_char(d
as char)?
;
593 // Non-scientific, negative exponents.
594 let unit_place
= -exp
as usize;
595 if unit_place
< digits
{
596 for &d
in buffer
[unit_place
..].iter().rev() {
597 f
.write_char(d
as char)?
;
600 for &d
in buffer
[..unit_place
].iter().rev() {
601 f
.write_char(d
as char)?
;
605 for _
in digits
..unit_place
{
608 for &d
in buffer
.iter().rev() {
609 f
.write_char(d
as char)?
;
617 impl<S
: Semantics
> fmt
::Debug
for IeeeFloat
<S
> {
618 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
621 "{}({:?} | {}{:?} * 2^{})",
624 if self.sign { "-" }
else { "+" }
,
631 impl<S
: Semantics
> Float
for IeeeFloat
<S
> {
632 const BITS
: usize = S
::BITS
;
633 const PRECISION
: usize = S
::PRECISION
;
634 const MAX_EXP
: ExpInt
= S
::MAX_EXP
;
635 const MIN_EXP
: ExpInt
= S
::MIN_EXP
;
637 const ZERO
: Self = IeeeFloat
{
640 category
: Category
::Zero
,
645 const INFINITY
: Self = IeeeFloat
{
648 category
: Category
::Infinity
,
653 // FIXME(eddyb) remove when qnan becomes const fn.
654 const NAN
: Self = IeeeFloat
{
655 sig
: [S
::QNAN_SIGNIFICAND
],
657 category
: Category
::NaN
,
662 fn qnan(payload
: Option
<u128
>) -> Self {
664 sig
: [S
::QNAN_SIGNIFICAND
665 | payload
.map_or(0, |payload
| {
666 // Zero out the excess bits of the significand.
667 payload
& ((1 << S
::QNAN_BIT
) - 1)
670 category
: Category
::NaN
,
676 fn snan(payload
: Option
<u128
>) -> Self {
677 let mut snan
= Self::qnan(payload
);
679 // We always have to clear the QNaN bit to make it an SNaN.
680 sig
::clear_bit(&mut snan
.sig
, S
::QNAN_BIT
);
682 // If there are no bits set in the payload, we have to set
683 // *something* to make it a NaN instead of an infinity;
684 // conventionally, this is the next bit down from the QNaN bit.
685 if snan
.sig
[0] & !S
::QNAN_SIGNIFICAND
== 0 {
686 sig
::set_bit(&mut snan
.sig
, S
::QNAN_BIT
- 1);
692 fn largest() -> Self {
693 // We want (in interchange format):
695 // significand = 1..1
697 sig
: [(1 << S
::PRECISION
) - 1],
699 category
: Category
::Normal
,
705 // We want (in interchange format):
707 // significand = 0..01
708 const SMALLEST
: Self = IeeeFloat
{
711 category
: Category
::Normal
,
716 fn smallest_normalized() -> Self {
717 // We want (in interchange format):
719 // significand = 10..0
721 sig
: [1 << (S
::PRECISION
- 1)],
723 category
: Category
::Normal
,
729 fn add_r(mut self, rhs
: Self, round
: Round
) -> StatusAnd
<Self> {
730 let status
= match (self.category
, rhs
.category
) {
731 (Category
::Infinity
, Category
::Infinity
) => {
732 // Differently signed infinities can only be validly
734 if self.sign
!= rhs
.sign
{
742 // Sign may depend on rounding mode; handled below.
743 (_
, Category
::Zero
) | (Category
::NaN
, _
) | (Category
::Infinity
, Category
::Normal
) => {
747 (Category
::Zero
, _
) | (_
, Category
::NaN
| Category
::Infinity
) => {
752 // This return code means it was not a simple case.
753 (Category
::Normal
, Category
::Normal
) => {
754 let loss
= sig
::add_or_sub(
763 self = unpack
!(status
=, self.normalize(round
, loss
));
765 // Can only be zero if we lost no fraction.
766 assert
!(self.category
!= Category
::Zero
|| loss
== Loss
::ExactlyZero
);
772 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
773 // positive zero unless rounding to minus infinity, except that
774 // adding two like-signed zeroes gives that zero.
775 if self.category
== Category
::Zero
776 && (rhs
.category
!= Category
::Zero
|| self.sign
!= rhs
.sign
)
778 self.sign
= round
== Round
::TowardNegative
;
784 fn mul_r(mut self, rhs
: Self, round
: Round
) -> StatusAnd
<Self> {
785 self.sign ^
= rhs
.sign
;
787 match (self.category
, rhs
.category
) {
788 (Category
::NaN
, _
) => {
793 (_
, Category
::NaN
) => {
795 self.category
= Category
::NaN
;
800 (Category
::Zero
, Category
::Infinity
) | (Category
::Infinity
, Category
::Zero
) => {
801 Status
::INVALID_OP
.and(Self::NAN
)
804 (_
, Category
::Infinity
) | (Category
::Infinity
, _
) => {
805 self.category
= Category
::Infinity
;
809 (Category
::Zero
, _
) | (_
, Category
::Zero
) => {
810 self.category
= Category
::Zero
;
814 (Category
::Normal
, Category
::Normal
) => {
816 let mut wide_sig
= [0; 2];
818 sig
::mul(&mut wide_sig
, &mut self.exp
, &self.sig
, &rhs
.sig
, S
::PRECISION
);
819 self.sig
= [wide_sig
[0]];
821 self = unpack
!(status
=, self.normalize(round
, loss
));
822 if loss
!= Loss
::ExactlyZero
{
823 status
|= Status
::INEXACT
;
830 fn mul_add_r(mut self, multiplicand
: Self, addend
: Self, round
: Round
) -> StatusAnd
<Self> {
831 // If and only if all arguments are normal do we need to do an
832 // extended-precision calculation.
833 if !self.is_finite_non_zero() || !multiplicand
.is_finite_non_zero() || !addend
.is_finite() {
835 self = unpack
!(status
=, self.mul_r(multiplicand
, round
));
837 // FS can only be Status::OK or Status::INVALID_OP. There is no more work
838 // to do in the latter case. The IEEE-754R standard says it is
839 // implementation-defined in this case whether, if ADDEND is a
840 // quiet NaN, we raise invalid op; this implementation does so.
842 // If we need to do the addition we can do so with normal
844 if status
== Status
::OK
{
845 self = unpack
!(status
=, self.add_r(addend
, round
));
847 return status
.and(self);
850 // Post-multiplication sign, before addition.
851 self.sign ^
= multiplicand
.sign
;
853 // Allocate space for twice as many bits as the original significand, plus one
854 // extra bit for the addition to overflow into.
855 assert
!(limbs_for_bits(S
::PRECISION
* 2 + 1) <= 2);
856 let mut wide_sig
= sig
::widening_mul(self.sig
[0], multiplicand
.sig
[0]);
858 let mut loss
= Loss
::ExactlyZero
;
859 let mut omsb
= sig
::omsb(&wide_sig
);
860 self.exp
+= multiplicand
.exp
;
862 // Assume the operands involved in the multiplication are single-precision
863 // FP, and the two multiplicants are:
864 // lhs = a23 . a22 ... a0 * 2^e1
865 // rhs = b23 . b22 ... b0 * 2^e2
866 // the result of multiplication is:
867 // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
868 // Note that there are three significant bits at the left-hand side of the
869 // radix point: two for the multiplication, and an overflow bit for the
870 // addition (that will always be zero at this point). Move the radix point
871 // toward left by two bits, and adjust exponent accordingly.
874 if addend
.is_non_zero() {
875 // Normalize our MSB to one below the top bit to allow for overflow.
876 let ext_precision
= 2 * S
::PRECISION
+ 1;
877 if omsb
!= ext_precision
- 1 {
878 assert
!(ext_precision
> omsb
);
879 sig
::shift_left(&mut wide_sig
, &mut self.exp
, (ext_precision
- 1) - omsb
);
882 // The intermediate result of the multiplication has "2 * S::PRECISION"
883 // significant bit; adjust the addend to be consistent with mul result.
884 let mut ext_addend_sig
= [addend
.sig
[0], 0];
886 // Extend the addend significand to ext_precision - 1. This guarantees
887 // that the high bit of the significand is zero (same as wide_sig),
888 // so the addition will overflow (if it does overflow at all) into the top bit.
889 sig
::shift_left(&mut ext_addend_sig
, &mut 0, ext_precision
- 1 - S
::PRECISION
);
890 loss
= sig
::add_or_sub(
899 omsb
= sig
::omsb(&wide_sig
);
902 // Convert the result having "2 * S::PRECISION" significant-bits back to the one
903 // having "S::PRECISION" significant-bits. First, move the radix point from
904 // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
905 // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
906 self.exp
-= S
::PRECISION
as ExpInt
+ 1;
908 // In case MSB resides at the left-hand side of radix point, shift the
909 // mantissa right by some amount to make sure the MSB reside right before
910 // the radix point (i.e., "MSB . rest-significant-bits").
911 if omsb
> S
::PRECISION
{
912 let bits
= omsb
- S
::PRECISION
;
913 loss
= sig
::shift_right(&mut wide_sig
, &mut self.exp
, bits
).combine(loss
);
916 self.sig
[0] = wide_sig
[0];
919 self = unpack
!(status
=, self.normalize(round
, loss
));
920 if loss
!= Loss
::ExactlyZero
{
921 status
|= Status
::INEXACT
;
924 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
925 // positive zero unless rounding to minus infinity, except that
926 // adding two like-signed zeroes gives that zero.
927 if self.category
== Category
::Zero
928 && !status
.intersects(Status
::UNDERFLOW
)
929 && self.sign
!= addend
.sign
931 self.sign
= round
== Round
::TowardNegative
;
937 fn div_r(mut self, rhs
: Self, round
: Round
) -> StatusAnd
<Self> {
938 self.sign ^
= rhs
.sign
;
940 match (self.category
, rhs
.category
) {
941 (Category
::NaN
, _
) => {
946 (_
, Category
::NaN
) => {
947 self.category
= Category
::NaN
;
953 (Category
::Infinity
, Category
::Infinity
) | (Category
::Zero
, Category
::Zero
) => {
954 Status
::INVALID_OP
.and(Self::NAN
)
957 (Category
::Infinity
| Category
::Zero
, _
) => Status
::OK
.and(self),
959 (Category
::Normal
, Category
::Infinity
) => {
960 self.category
= Category
::Zero
;
964 (Category
::Normal
, Category
::Zero
) => {
965 self.category
= Category
::Infinity
;
966 Status
::DIV_BY_ZERO
.and(self)
969 (Category
::Normal
, Category
::Normal
) => {
971 let dividend
= self.sig
[0];
980 self = unpack
!(status
=, self.normalize(round
, loss
));
981 if loss
!= Loss
::ExactlyZero
{
982 status
|= Status
::INEXACT
;
989 fn c_fmod(mut self, rhs
: Self) -> StatusAnd
<Self> {
990 match (self.category
, rhs
.category
) {
992 | (Category
::Zero
, Category
::Infinity
| Category
::Normal
)
993 | (Category
::Normal
, Category
::Infinity
) => Status
::OK
.and(self),
995 (_
, Category
::NaN
) => {
997 self.category
= Category
::NaN
;
1002 (Category
::Infinity
, _
) | (_
, Category
::Zero
) => Status
::INVALID_OP
.and(Self::NAN
),
1004 (Category
::Normal
, Category
::Normal
) => {
1005 while self.is_finite_non_zero()
1006 && rhs
.is_finite_non_zero()
1007 && self.cmp_abs_normal(rhs
) != Ordering
::Less
1009 let mut v
= rhs
.scalbn(self.ilogb() - rhs
.ilogb());
1010 if self.cmp_abs_normal(v
) == Ordering
::Less
{
1016 self = unpack
!(status
=, self - v
);
1017 assert_eq
!(status
, Status
::OK
);
1019 Status
::OK
.and(self)
1024 fn round_to_integral(self, round
: Round
) -> StatusAnd
<Self> {
1025 // If the exponent is large enough, we know that this value is already
1026 // integral, and the arithmetic below would potentially cause it to saturate
1027 // to +/-Inf. Bail out early instead.
1028 if self.is_finite_non_zero() && self.exp
+ 1 >= S
::PRECISION
as ExpInt
{
1029 return Status
::OK
.and(self);
1032 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1033 // precision of our format, and then subtract it back off again. The choice
1034 // of rounding modes for the addition/subtraction determines the rounding mode
1035 // for our integral rounding as well.
1036 // NOTE: When the input value is negative, we do subtraction followed by
1037 // addition instead.
1038 assert
!(S
::PRECISION
<= 128);
1040 let magic_const
= unpack
!(status
=, Self::from_u128(1 << (S
::PRECISION
- 1)));
1041 let magic_const
= magic_const
.copy_sign(self);
1043 if status
!= Status
::OK
{
1044 return status
.and(self);
1048 r
= unpack
!(status
=, r
.add_r(magic_const
, round
));
1049 if status
!= Status
::OK
&& status
!= Status
::INEXACT
{
1050 return status
.and(self);
1053 // Restore the input sign to handle 0.0/-0.0 cases correctly.
1054 r
.sub_r(magic_const
, round
).map(|r
| r
.copy_sign(self))
1057 fn next_up(mut self) -> StatusAnd
<Self> {
1058 // Compute nextUp(x), handling each float category separately.
1059 match self.category
{
1060 Category
::Infinity
=> {
1062 // nextUp(-inf) = -largest
1063 Status
::OK
.and(-Self::largest())
1065 // nextUp(+inf) = +inf
1066 Status
::OK
.and(self)
1070 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
1071 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
1072 // change the payload.
1073 if self.is_signaling() {
1074 // For consistency, propagate the sign of the sNaN to the qNaN.
1075 Status
::INVALID_OP
.and(Self::NAN
.copy_sign(self))
1077 Status
::OK
.and(self)
1081 // nextUp(pm 0) = +smallest
1082 Status
::OK
.and(Self::SMALLEST
)
1084 Category
::Normal
=> {
1085 // nextUp(-smallest) = -0
1086 if self.is_smallest() && self.sign
{
1087 return Status
::OK
.and(-Self::ZERO
);
1090 // nextUp(largest) == INFINITY
1091 if self.is_largest() && !self.sign
{
1092 return Status
::OK
.and(Self::INFINITY
);
1095 // Excluding the integral bit. This allows us to test for binade boundaries.
1096 let sig_mask
= (1 << (S
::PRECISION
- 1)) - 1;
1098 // nextUp(normal) == normal + inc.
1100 // If we are negative, we need to decrement the significand.
1102 // We only cross a binade boundary that requires adjusting the exponent
1104 // 1. exponent != S::MIN_EXP. This implies we are not in the
1105 // smallest binade or are dealing with denormals.
1106 // 2. Our significand excluding the integral bit is all zeros.
1107 let crossing_binade_boundary
=
1108 self.exp
!= S
::MIN_EXP
&& self.sig
[0] & sig_mask
== 0;
1110 // Decrement the significand.
1112 // We always do this since:
1113 // 1. If we are dealing with a non-binade decrement, by definition we
1114 // just decrement the significand.
1115 // 2. If we are dealing with a normal -> normal binade decrement, since
1116 // we have an explicit integral bit the fact that all bits but the
1117 // integral bit are zero implies that subtracting one will yield a
1118 // significand with 0 integral bit and 1 in all other spots. Thus we
1119 // must just adjust the exponent and set the integral bit to 1.
1120 // 3. If we are dealing with a normal -> denormal binade decrement,
1121 // since we set the integral bit to 0 when we represent denormals, we
1122 // just decrement the significand.
1123 sig
::decrement(&mut self.sig
);
1125 if crossing_binade_boundary
{
1126 // Our result is a normal number. Do the following:
1127 // 1. Set the integral bit to 1.
1128 // 2. Decrement the exponent.
1129 sig
::set_bit(&mut self.sig
, S
::PRECISION
- 1);
1133 // If we are positive, we need to increment the significand.
1135 // We only cross a binade boundary that requires adjusting the exponent if
1136 // the input is not a denormal and all of said input's significand bits
1137 // are set. If all of said conditions are true: clear the significand, set
1138 // the integral bit to 1, and increment the exponent. If we have a
1139 // denormal always increment since moving denormals and the numbers in the
1140 // smallest normal binade have the same exponent in our representation.
1141 let crossing_binade_boundary
=
1142 !self.is_denormal() && self.sig
[0] & sig_mask
== sig_mask
;
1144 if crossing_binade_boundary
{
1146 sig
::set_bit(&mut self.sig
, S
::PRECISION
- 1);
1150 "We can not increment an exponent beyond the MAX_EXP \
1151 allowed by the given floating point semantics."
1155 sig
::increment(&mut self.sig
);
1158 Status
::OK
.and(self)
1163 fn from_bits(input
: u128
) -> Self {
1164 // Dispatch to semantics.
1168 fn from_u128_r(input
: u128
, round
: Round
) -> StatusAnd
<Self> {
1171 exp
: S
::PRECISION
as ExpInt
- 1,
1172 category
: Category
::Normal
,
1174 marker
: PhantomData
,
1176 .normalize(round
, Loss
::ExactlyZero
)
1179 fn from_str_r(mut s
: &str, mut round
: Round
) -> Result
<StatusAnd
<Self>, ParseError
> {
1181 return Err(ParseError("Invalid string length"));
1184 // Handle special cases.
1186 "inf" | "INFINITY" => return Ok(Status
::OK
.and(Self::INFINITY
)),
1187 "-inf" | "-INFINITY" => return Ok(Status
::OK
.and(-Self::INFINITY
)),
1188 "nan" | "NaN" => return Ok(Status
::OK
.and(Self::NAN
)),
1189 "-nan" | "-NaN" => return Ok(Status
::OK
.and(-Self::NAN
)),
1193 // Handle a leading minus sign.
1194 let minus
= s
.starts_with('
-'
);
1195 if minus
|| s
.starts_with('
+'
) {
1198 return Err(ParseError("String has no digits"));
1202 // Adjust the rounding mode for the absolute value below.
1207 let r
= if s
.starts_with("0x") || s
.starts_with("0X") {
1210 return Err(ParseError("Invalid string"));
1212 Self::from_hexadecimal_string(s
, round
)?
1214 Self::from_decimal_string(s
, round
)?
1217 Ok(r
.map(|r
| if minus { -r }
else { r }
))
1220 fn to_bits(self) -> u128
{
1221 // Dispatch to semantics.
1225 fn to_u128_r(self, width
: usize, round
: Round
, is_exact
: &mut bool
) -> StatusAnd
<u128
> {
1226 // The result of trying to convert a number too large.
1227 let overflow
= if self.sign
{
1228 // Negative numbers cannot be represented as unsigned.
1231 // Largest unsigned integer of the given width.
1237 match self.category
{
1238 Category
::NaN
=> Status
::INVALID_OP
.and(0),
1240 Category
::Infinity
=> Status
::INVALID_OP
.and(overflow
),
1243 // Negative zero can't be represented as an int.
1244 *is_exact
= !self.sign
;
1248 Category
::Normal
=> {
1251 // Step 1: place our absolute value, with any fraction truncated, in
1253 let truncated_bits
= if self.exp
< 0 {
1254 // Our absolute value is less than one; truncate everything.
1255 // For exponent -1 the integer bit represents .5, look at that.
1256 // For smaller exponents leftmost truncated bit is 0.
1257 S
::PRECISION
- 1 + (-self.exp
) as usize
1259 // We want the most significant (exponent + 1) bits; the rest are
1261 let bits
= self.exp
as usize + 1;
1263 // Hopelessly large in magnitude?
1265 return Status
::INVALID_OP
.and(overflow
);
1268 if bits
< S
::PRECISION
{
1269 // We truncate (S::PRECISION - bits) bits.
1270 r
= self.sig
[0] >> (S
::PRECISION
- bits
);
1273 // We want at least as many bits as are available.
1274 r
= self.sig
[0] << (bits
- S
::PRECISION
);
1279 // Step 2: work out any lost fraction, and increment the absolute
1280 // value if we would round away from zero.
1281 let mut loss
= Loss
::ExactlyZero
;
1282 if truncated_bits
> 0 {
1283 loss
= Loss
::through_truncation(&self.sig
, truncated_bits
);
1284 if loss
!= Loss
::ExactlyZero
1285 && self.round_away_from_zero(round
, loss
, truncated_bits
)
1287 r
= r
.wrapping_add(1);
1289 return Status
::INVALID_OP
.and(overflow
); // Overflow.
1294 // Step 3: check if we fit in the destination.
1296 return Status
::INVALID_OP
.and(overflow
);
1299 if loss
== Loss
::ExactlyZero
{
1303 Status
::INEXACT
.and(r
)
1309 fn cmp_abs_normal(self, rhs
: Self) -> Ordering
{
1310 assert
!(self.is_finite_non_zero());
1311 assert
!(rhs
.is_finite_non_zero());
1313 // If exponents are equal, do an unsigned comparison of the significands.
1314 self.exp
.cmp(&rhs
.exp
).then_with(|| sig
::cmp(&self.sig
, &rhs
.sig
))
1317 fn bitwise_eq(self, rhs
: Self) -> bool
{
1318 if self.category
!= rhs
.category
|| self.sign
!= rhs
.sign
{
1322 if self.category
== Category
::Zero
|| self.category
== Category
::Infinity
{
1326 if self.is_finite_non_zero() && self.exp
!= rhs
.exp
{
1333 fn is_negative(self) -> bool
{
1337 fn is_denormal(self) -> bool
{
1338 self.is_finite_non_zero()
1339 && self.exp
== S
::MIN_EXP
1340 && !sig
::get_bit(&self.sig
, S
::PRECISION
- 1)
1343 fn is_signaling(self) -> bool
{
1344 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
1345 // first bit of the trailing significand being 0.
1346 self.is_nan() && !sig
::get_bit(&self.sig
, S
::QNAN_BIT
)
1349 fn category(self) -> Category
{
1353 fn get_exact_inverse(self) -> Option
<Self> {
1354 // Special floats and denormals have no exact inverse.
1355 if !self.is_finite_non_zero() {
1359 // Check that the number is a power of two by making sure that only the
1360 // integer bit is set in the significand.
1361 if self.sig
!= [1 << (S
::PRECISION
- 1)] {
1366 let mut reciprocal
= Self::from_u128(1).value
;
1368 reciprocal
= unpack
!(status
=, reciprocal
/ self);
1369 if status
!= Status
::OK
{
1373 // Avoid multiplication with a denormal, it is not safe on all platforms and
1374 // may be slower than a normal division.
1375 if reciprocal
.is_denormal() {
1379 assert
!(reciprocal
.is_finite_non_zero());
1380 assert_eq
!(reciprocal
.sig
, [1 << (S
::PRECISION
- 1)]);
1385 fn ilogb(mut self) -> ExpInt
{
1392 if self.is_infinite() {
1395 if !self.is_denormal() {
1399 let sig_bits
= (S
::PRECISION
- 1) as ExpInt
;
1400 self.exp
+= sig_bits
;
1401 self = self.normalize(Round
::NearestTiesToEven
, Loss
::ExactlyZero
).value
;
1405 fn scalbn_r(mut self, exp
: ExpInt
, round
: Round
) -> Self {
1406 // If exp is wildly out-of-scale, simply adding it to self.exp will
1407 // overflow; clamp it to a safe range before adding, but ensure that the range
1408 // is large enough that the clamp does not change the result. The range we
1409 // need to support is the difference between the largest possible exponent and
1410 // the normalized exponent of half the smallest denormal.
1412 let sig_bits
= (S
::PRECISION
- 1) as i32;
1413 let max_change
= S
::MAX_EXP
as i32 - (S
::MIN_EXP
as i32 - sig_bits
) + 1;
1415 // Clamp to one past the range ends to let normalize handle overflow.
1416 let exp_change
= cmp
::min(cmp
::max(exp
as i32, -max_change
- 1), max_change
);
1417 self.exp
= self.exp
.saturating_add(exp_change
as ExpInt
);
1418 self = self.normalize(round
, Loss
::ExactlyZero
).value
;
1420 sig
::set_bit(&mut self.sig
, S
::QNAN_BIT
);
1425 fn frexp_r(mut self, exp
: &mut ExpInt
, round
: Round
) -> Self {
1426 *exp
= self.ilogb();
1428 // Quiet signalling nans.
1429 if *exp
== IEK_NAN
{
1430 sig
::set_bit(&mut self.sig
, S
::QNAN_BIT
);
1434 if *exp
== IEK_INF
{
1438 // 1 is added because frexp is defined to return a normalized fraction in
1439 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
1440 if *exp
== IEK_ZERO
{
1445 self.scalbn_r(-*exp
, round
)
1449 impl<S
: Semantics
, T
: Semantics
> FloatConvert
<IeeeFloat
<T
>> for IeeeFloat
<S
> {
1450 fn convert_r(self, round
: Round
, loses_info
: &mut bool
) -> StatusAnd
<IeeeFloat
<T
>> {
1451 let mut r
= IeeeFloat
{
1454 category
: self.category
,
1456 marker
: PhantomData
,
1459 // x86 has some unusual NaNs which cannot be represented in any other
1460 // format; note them here.
1461 fn is_x87_double_extended
<S
: Semantics
>() -> bool
{
1462 S
::QNAN_SIGNIFICAND
== X87DoubleExtendedS
::QNAN_SIGNIFICAND
1464 let x87_special_nan
= is_x87_double_extended
::<S
>()
1465 && !is_x87_double_extended
::<T
>()
1466 && r
.category
== Category
::NaN
1467 && (r
.sig
[0] & S
::QNAN_SIGNIFICAND
) != S
::QNAN_SIGNIFICAND
;
1469 // If this is a truncation of a denormal number, and the target semantics
1470 // has larger exponent range than the source semantics (this can happen
1471 // when truncating from PowerPC double-double to double format), the
1472 // right shift could lose result mantissa bits. Adjust exponent instead
1473 // of performing excessive shift.
1474 let mut shift
= T
::PRECISION
as ExpInt
- S
::PRECISION
as ExpInt
;
1475 if shift
< 0 && r
.is_finite_non_zero() {
1476 let mut exp_change
= sig
::omsb(&r
.sig
) as ExpInt
- S
::PRECISION
as ExpInt
;
1477 if r
.exp
+ exp_change
< T
::MIN_EXP
{
1478 exp_change
= T
::MIN_EXP
- r
.exp
;
1480 if exp_change
< shift
{
1484 shift
-= exp_change
;
1485 r
.exp
+= exp_change
;
1489 // If this is a truncation, perform the shift.
1490 let loss
= if shift
< 0 && (r
.is_finite_non_zero() || r
.category
== Category
::NaN
) {
1491 sig
::shift_right(&mut r
.sig
, &mut 0, -shift
as usize)
1496 // If this is an extension, perform the shift.
1497 if shift
> 0 && (r
.is_finite_non_zero() || r
.category
== Category
::NaN
) {
1498 sig
::shift_left(&mut r
.sig
, &mut 0, shift
as usize);
1502 if r
.is_finite_non_zero() {
1503 r
= unpack
!(status
=, r
.normalize(round
, loss
));
1504 *loses_info
= status
!= Status
::OK
;
1505 } else if r
.category
== Category
::NaN
{
1506 *loses_info
= loss
!= Loss
::ExactlyZero
|| x87_special_nan
;
1508 // For x87 extended precision, we want to make a NaN, not a special NaN if
1509 // the input wasn't special either.
1510 if !x87_special_nan
&& is_x87_double_extended
::<T
>() {
1511 sig
::set_bit(&mut r
.sig
, T
::PRECISION
- 1);
1514 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1515 // does not give you back the same bits. This is dubious, and we
1516 // don't currently do it. You're really supposed to get
1517 // an invalid operation signal at runtime, but nobody does that.
1518 status
= Status
::OK
;
1520 *loses_info
= false;
1521 status
= Status
::OK
;
1528 impl<S
: Semantics
> IeeeFloat
<S
> {
1529 /// Handle positive overflow. We either return infinity or
1530 /// the largest finite number. For negative overflow,
1531 /// negate the `round` argument before calling.
1532 fn overflow_result(round
: Round
) -> StatusAnd
<Self> {
1535 Round
::NearestTiesToEven
| Round
::NearestTiesToAway
| Round
::TowardPositive
=> {
1536 (Status
::OVERFLOW
| Status
::INEXACT
).and(Self::INFINITY
)
1538 // Otherwise we become the largest finite number.
1539 Round
::TowardNegative
| Round
::TowardZero
=> Status
::INEXACT
.and(Self::largest()),
1543 /// Returns `true` if, when truncating the current number, with `bit` the
1544 /// new LSB, with the given lost fraction and rounding mode, the result
1545 /// would need to be rounded away from zero (i.e., by increasing the
1546 /// signficand). This routine must work for `Category::Zero` of both signs, and
1547 /// `Category::Normal` numbers.
1548 fn round_away_from_zero(&self, round
: Round
, loss
: Loss
, bit
: usize) -> bool
{
1549 // NaNs and infinities should not have lost fractions.
1550 assert
!(self.is_finite_non_zero() || self.is_zero());
1552 // Current callers never pass this so we don't handle it.
1553 assert_ne
!(loss
, Loss
::ExactlyZero
);
1556 Round
::NearestTiesToAway
=> loss
== Loss
::ExactlyHalf
|| loss
== Loss
::MoreThanHalf
,
1557 Round
::NearestTiesToEven
=> {
1558 if loss
== Loss
::MoreThanHalf
{
1562 // Our zeros don't have a significand to test.
1563 if loss
== Loss
::ExactlyHalf
&& self.category
!= Category
::Zero
{
1564 return sig
::get_bit(&self.sig
, bit
);
1569 Round
::TowardZero
=> false,
1570 Round
::TowardPositive
=> !self.sign
,
1571 Round
::TowardNegative
=> self.sign
,
1575 fn normalize(mut self, round
: Round
, mut loss
: Loss
) -> StatusAnd
<Self> {
1576 if !self.is_finite_non_zero() {
1577 return Status
::OK
.and(self);
1580 // Before rounding normalize the exponent of Category::Normal numbers.
1581 let mut omsb
= sig
::omsb(&self.sig
);
1584 // OMSB is numbered from 1. We want to place it in the integer
1585 // bit numbered PRECISION if possible, with a compensating change in
1587 let mut final_exp
= self.exp
.saturating_add(omsb
as ExpInt
- S
::PRECISION
as ExpInt
);
1589 // If the resulting exponent is too high, overflow according to
1590 // the rounding mode.
1591 if final_exp
> S
::MAX_EXP
{
1592 let round
= if self.sign { -round }
else { round }
;
1593 return Self::overflow_result(round
).map(|r
| r
.copy_sign(self));
1596 // Subnormal numbers have exponent MIN_EXP, and their MSB
1597 // is forced based on that.
1598 if final_exp
< S
::MIN_EXP
{
1599 final_exp
= S
::MIN_EXP
;
1602 // Shifting left is easy as we don't lose precision.
1603 if final_exp
< self.exp
{
1604 assert_eq
!(loss
, Loss
::ExactlyZero
);
1606 let exp_change
= (self.exp
- final_exp
) as usize;
1607 sig
::shift_left(&mut self.sig
, &mut self.exp
, exp_change
);
1609 return Status
::OK
.and(self);
1612 // Shift right and capture any new lost fraction.
1613 if final_exp
> self.exp
{
1614 let exp_change
= (final_exp
- self.exp
) as usize;
1615 loss
= sig
::shift_right(&mut self.sig
, &mut self.exp
, exp_change
).combine(loss
);
1617 // Keep OMSB up-to-date.
1618 omsb
= omsb
.saturating_sub(exp_change
);
1622 // Now round the number according to round given the lost
1625 // As specified in IEEE 754, since we do not trap we do not report
1626 // underflow for exact results.
1627 if loss
== Loss
::ExactlyZero
{
1628 // Canonicalize zeros.
1630 self.category
= Category
::Zero
;
1633 return Status
::OK
.and(self);
1636 // Increment the significand if we're rounding away from zero.
1637 if self.round_away_from_zero(round
, loss
, 0) {
1639 self.exp
= S
::MIN_EXP
;
1642 // We should never overflow.
1643 assert_eq
!(sig
::increment(&mut self.sig
), 0);
1644 omsb
= sig
::omsb(&self.sig
);
1646 // Did the significand increment overflow?
1647 if omsb
== S
::PRECISION
+ 1 {
1648 // Renormalize by incrementing the exponent and shifting our
1649 // significand right one. However if we already have the
1650 // maximum exponent we overflow to infinity.
1651 if self.exp
== S
::MAX_EXP
{
1652 self.category
= Category
::Infinity
;
1654 return (Status
::OVERFLOW
| Status
::INEXACT
).and(self);
1657 let _
: Loss
= sig
::shift_right(&mut self.sig
, &mut self.exp
, 1);
1659 return Status
::INEXACT
.and(self);
1663 // The normal case - we were and are not denormal, and any
1664 // significand increment above didn't overflow.
1665 if omsb
== S
::PRECISION
{
1666 return Status
::INEXACT
.and(self);
1669 // We have a non-zero denormal.
1670 assert
!(omsb
< S
::PRECISION
);
1672 // Canonicalize zeros.
1674 self.category
= Category
::Zero
;
1677 // The Category::Zero case is a denormal that underflowed to zero.
1678 (Status
::UNDERFLOW
| Status
::INEXACT
).and(self)
1681 fn from_hexadecimal_string(s
: &str, round
: Round
) -> Result
<StatusAnd
<Self>, ParseError
> {
1682 let mut r
= IeeeFloat
{
1685 category
: Category
::Normal
,
1687 marker
: PhantomData
,
1690 let mut any_digits
= false;
1691 let mut has_exp
= false;
1692 let mut bit_pos
= LIMB_BITS
as isize;
1693 let mut loss
= None
;
1695 // Without leading or trailing zeros, irrespective of the dot.
1696 let mut first_sig_digit
= None
;
1697 let mut dot
= s
.len();
1699 for (p
, c
) in s
.char_indices() {
1700 // Skip leading zeros and any (hexa)decimal point.
1703 return Err(ParseError("String contains multiple dots"));
1706 } else if let Some(hex_value
) = c
.to_digit(16) {
1709 if first_sig_digit
.is_none() {
1713 first_sig_digit
= Some(p
);
1716 // Store the number while we have space.
1719 r
.sig
[0] |= (hex_value
as Limb
) << bit_pos
;
1720 // If zero or one-half (the hexadecimal digit 8) are followed
1721 // by non-zero, they're a little more than zero or one-half.
1722 } else if let Some(ref mut loss
) = loss
{
1724 if *loss
== Loss
::ExactlyZero
{
1725 *loss
= Loss
::LessThanHalf
;
1727 if *loss
== Loss
::ExactlyHalf
{
1728 *loss
= Loss
::MoreThanHalf
;
1732 loss
= Some(match hex_value
{
1733 0 => Loss
::ExactlyZero
,
1734 1..=7 => Loss
::LessThanHalf
,
1735 8 => Loss
::ExactlyHalf
,
1736 9..=15 => Loss
::MoreThanHalf
,
1737 _
=> unreachable
!(),
1740 } else if c
== 'p'
|| c
== 'P'
{
1742 return Err(ParseError("Significand has no digits"));
1749 let mut chars
= s
[p
+ 1..].chars().peekable();
1751 // Adjust for the given exponent.
1752 let exp_minus
= chars
.peek() == Some(&'
-'
);
1753 if exp_minus
|| chars
.peek() == Some(&'
+'
) {
1758 if let Some(value
) = c
.to_digit(10) {
1760 r
.exp
= r
.exp
.saturating_mul(10).saturating_add(value
as ExpInt
);
1762 return Err(ParseError("Invalid character in exponent"));
1766 return Err(ParseError("Exponent has no digits"));
1775 return Err(ParseError("Invalid character in significand"));
1779 return Err(ParseError("Significand has no digits"));
1782 // Hex floats require an exponent but not a hexadecimal point.
1784 return Err(ParseError("Hex strings require an exponent"));
1787 // Ignore the exponent if we are zero.
1788 let first_sig_digit
= match first_sig_digit
{
1790 None
=> return Ok(Status
::OK
.and(Self::ZERO
)),
1793 // Calculate the exponent adjustment implicit in the number of
1794 // significant digits and adjust for writing the significand starting
1795 // at the most significant nibble.
1796 let exp_adjustment
= if dot
> first_sig_digit
{
1797 ExpInt
::try_from(dot
- first_sig_digit
).unwrap()
1799 -ExpInt
::try_from(first_sig_digit
- dot
- 1).unwrap()
1801 let exp_adjustment
= exp_adjustment
1804 .saturating_add(S
::PRECISION
as ExpInt
)
1805 .saturating_sub(LIMB_BITS
as ExpInt
);
1806 r
.exp
= r
.exp
.saturating_add(exp_adjustment
);
1808 Ok(r
.normalize(round
, loss
.unwrap_or(Loss
::ExactlyZero
)))
1811 fn from_decimal_string(s
: &str, round
: Round
) -> Result
<StatusAnd
<Self>, ParseError
> {
1812 // Given a normal decimal floating point number of the form
1814 // dddd.dddd[eE][+-]ddd
1816 // where the decimal point and exponent are optional, fill out the
1817 // variables below. Exponent is appropriate if the significand is
1818 // treated as an integer, and normalized_exp if the significand
1819 // is taken to have the decimal point after a single leading
1822 // If the value is zero, first_sig_digit is None.
1824 let mut any_digits
= false;
1825 let mut dec_exp
= 0i32;
1827 // Without leading or trailing zeros, irrespective of the dot.
1828 let mut first_sig_digit
= None
;
1829 let mut last_sig_digit
= 0;
1830 let mut dot
= s
.len();
1832 for (p
, c
) in s
.char_indices() {
1835 return Err(ParseError("String contains multiple dots"));
1838 } else if let Some(dec_value
) = c
.to_digit(10) {
1842 if first_sig_digit
.is_none() {
1843 first_sig_digit
= Some(p
);
1847 } else if c
== 'e'
|| c
== 'E'
{
1849 return Err(ParseError("Significand has no digits"));
1856 let mut chars
= s
[p
+ 1..].chars().peekable();
1858 // Adjust for the given exponent.
1859 let exp_minus
= chars
.peek() == Some(&'
-'
);
1860 if exp_minus
|| chars
.peek() == Some(&'
+'
) {
1866 if let Some(value
) = c
.to_digit(10) {
1868 dec_exp
= dec_exp
.saturating_mul(10).saturating_add(value
as i32);
1870 return Err(ParseError("Invalid character in exponent"));
1874 return Err(ParseError("Exponent has no digits"));
1883 return Err(ParseError("Invalid character in significand"));
1887 return Err(ParseError("Significand has no digits"));
1890 // Test if we have a zero number allowing for non-zero exponents.
1891 let first_sig_digit
= match first_sig_digit
{
1893 None
=> return Ok(Status
::OK
.and(Self::ZERO
)),
1896 // Adjust the exponents for any decimal point.
1897 if dot
> last_sig_digit
{
1898 dec_exp
= dec_exp
.saturating_add((dot
- last_sig_digit
- 1) as i32);
1900 dec_exp
= dec_exp
.saturating_sub((last_sig_digit
- dot
) as i32);
1902 let significand_digits
= last_sig_digit
- first_sig_digit
+ 1
1903 - (dot
> first_sig_digit
&& dot
< last_sig_digit
) as usize;
1904 let normalized_exp
= dec_exp
.saturating_add(significand_digits
as i32 - 1);
1906 // Handle the cases where exponents are obviously too large or too
1907 // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
1908 // definitely overflows if
1910 // (dec_exp - 1) * L >= MAX_EXP
1912 // and definitely underflows to zero where
1914 // (dec_exp + 1) * L <= MIN_EXP - PRECISION
1916 // With integer arithmetic the tightest bounds for L are
1918 // 93/28 < L < 196/59 [ numerator <= 256 ]
1919 // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
1921 // Check for MAX_EXP.
1922 if normalized_exp
.saturating_sub(1).saturating_mul(42039) >= 12655 * S
::MAX_EXP
as i32 {
1923 // Overflow and round.
1924 return Ok(Self::overflow_result(round
));
1927 // Check for MIN_EXP.
1928 if normalized_exp
.saturating_add(1).saturating_mul(28738)
1929 <= 8651 * (S
::MIN_EXP
as i32 - S
::PRECISION
as i32)
1931 // Underflow to zero and round.
1933 if round
== Round
::TowardPositive { IeeeFloat::SMALLEST }
else { IeeeFloat::ZERO }
;
1934 return Ok((Status
::UNDERFLOW
| Status
::INEXACT
).and(r
));
1937 // A tight upper bound on number of bits required to hold an
1938 // N-digit decimal integer is N * 196 / 59. Allocate enough space
1939 // to hold the full significand, and an extra limb required by
1941 let max_limbs
= limbs_for_bits(1 + 196 * significand_digits
/ 59);
1942 let mut dec_sig
: SmallVec
<[Limb
; 1]> = SmallVec
::with_capacity(max_limbs
);
1944 // Convert to binary efficiently - we do almost all multiplication
1945 // in a Limb. When this would overflow do we do a single
1946 // bignum multiplication, and then revert again to multiplication
1948 let mut chars
= s
[first_sig_digit
..=last_sig_digit
].chars();
1951 let mut multiplier
= 1;
1954 let dec_value
= match chars
.next() {
1955 Some('
.'
) => continue,
1956 Some(c
) => c
.to_digit(10).unwrap(),
1961 val
= val
* 10 + dec_value
as Limb
;
1963 // The maximum number that can be multiplied by ten with any
1964 // digit added without overflowing a Limb.
1965 if multiplier
> (!0 - 9) / 10 {
1970 // If we've consumed no digits, we're done.
1971 if multiplier
== 1 {
1975 // Multiply out the current limb.
1976 let mut carry
= val
;
1977 for x
in &mut dec_sig
{
1978 let [low
, mut high
] = sig
::widening_mul(*x
, multiplier
);
1981 let (low
, overflow
) = low
.overflowing_add(carry
);
1982 high
+= overflow
as Limb
;
1988 // If we had carry, we need another limb (likely but not guaranteed).
1990 dec_sig
.push(carry
);
1994 // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
1995 // The *_calc Vec's are reused scratch space, as an optimization.
1996 let (pow5_full
, mut pow5_calc
, mut sig_calc
, mut sig_scratch_calc
) = {
1997 let mut power
= dec_exp
.abs() as usize;
1999 const FIRST_EIGHT_POWERS
: [Limb
; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
2001 let mut p5_scratch
= smallvec
![];
2002 let mut p5
: SmallVec
<[Limb
; 1]> = smallvec
![FIRST_EIGHT_POWERS
[4]];
2004 let mut r_scratch
= smallvec
![];
2005 let mut r
: SmallVec
<[Limb
; 1]> = smallvec
![FIRST_EIGHT_POWERS
[power
& 7]];
2009 // Calculate pow(5,pow(2,n+3)).
2010 p5_scratch
.resize(p5
.len() * 2, 0);
2011 let _
: Loss
= sig
::mul(&mut p5_scratch
, &mut 0, &p5
, &p5
, p5
.len() * 2 * LIMB_BITS
);
2012 while p5_scratch
.last() == Some(&0) {
2015 mem
::swap(&mut p5
, &mut p5_scratch
);
2018 r_scratch
.resize(r
.len() + p5
.len(), 0);
2020 sig
::mul(&mut r_scratch
, &mut 0, &r
, &p5
, (r
.len() + p5
.len()) * LIMB_BITS
);
2021 while r_scratch
.last() == Some(&0) {
2024 mem
::swap(&mut r
, &mut r_scratch
);
2030 (r
, r_scratch
, p5
, p5_scratch
)
2033 // Attempt dec_sig * 10^dec_exp with increasing precision.
2034 let mut attempt
= 0;
2036 let calc_precision
= (LIMB_BITS
<< attempt
) - 1;
2039 let calc_normal_from_limbs
= |sig
: &mut SmallVec
<[Limb
; 1]>,
2041 -> StatusAnd
<ExpInt
> {
2042 sig
.resize(limbs_for_bits(calc_precision
), 0);
2043 let (mut loss
, mut exp
) = sig
::from_limbs(sig
, limbs
, calc_precision
);
2045 // Before rounding normalize the exponent of Category::Normal numbers.
2046 let mut omsb
= sig
::omsb(sig
);
2048 assert_ne
!(omsb
, 0);
2050 // OMSB is numbered from 1. We want to place it in the integer
2051 // bit numbered PRECISION if possible, with a compensating change in
2053 let final_exp
= exp
.saturating_add(omsb
as ExpInt
- calc_precision
as ExpInt
);
2055 // Shifting left is easy as we don't lose precision.
2056 if final_exp
< exp
{
2057 assert_eq
!(loss
, Loss
::ExactlyZero
);
2059 let exp_change
= (exp
- final_exp
) as usize;
2060 sig
::shift_left(sig
, &mut exp
, exp_change
);
2062 return Status
::OK
.and(exp
);
2065 // Shift right and capture any new lost fraction.
2066 if final_exp
> exp
{
2067 let exp_change
= (final_exp
- exp
) as usize;
2068 loss
= sig
::shift_right(sig
, &mut exp
, exp_change
).combine(loss
);
2070 // Keep OMSB up-to-date.
2071 omsb
= omsb
.saturating_sub(exp_change
);
2074 assert_eq
!(omsb
, calc_precision
);
2076 // Now round the number according to round given the lost
2079 // As specified in IEEE 754, since we do not trap we do not report
2080 // underflow for exact results.
2081 if loss
== Loss
::ExactlyZero
{
2082 return Status
::OK
.and(exp
);
2085 // Increment the significand if we're rounding away from zero.
2086 if loss
== Loss
::MoreThanHalf
|| loss
== Loss
::ExactlyHalf
&& sig
::get_bit(sig
, 0) {
2087 // We should never overflow.
2088 assert_eq
!(sig
::increment(sig
), 0);
2089 omsb
= sig
::omsb(sig
);
2091 // Did the significand increment overflow?
2092 if omsb
== calc_precision
+ 1 {
2093 let _
: Loss
= sig
::shift_right(sig
, &mut exp
, 1);
2095 return Status
::INEXACT
.and(exp
);
2099 // The normal case - we were and are not denormal, and any
2100 // significand increment above didn't overflow.
2101 Status
::INEXACT
.and(exp
)
2105 let mut exp
= unpack
!(status
=,
2106 calc_normal_from_limbs(&mut sig_calc
, &dec_sig
));
2108 let pow5_exp
= unpack
!(pow5_status
=,
2109 calc_normal_from_limbs(&mut pow5_calc
, &pow5_full
));
2111 // Add dec_exp, as 10^n = 5^n * 2^n.
2112 exp
+= dec_exp
as ExpInt
;
2114 let mut used_bits
= S
::PRECISION
;
2115 let mut truncated_bits
= calc_precision
- used_bits
;
2117 let half_ulp_err1
= (status
!= Status
::OK
) as Limb
;
2118 let (calc_loss
, half_ulp_err2
);
2122 sig_scratch_calc
.resize(sig_calc
.len() + pow5_calc
.len(), 0);
2123 calc_loss
= sig
::mul(
2124 &mut sig_scratch_calc
,
2130 mem
::swap(&mut sig_calc
, &mut sig_scratch_calc
);
2132 half_ulp_err2
= (pow5_status
!= Status
::OK
) as Limb
;
2136 sig_scratch_calc
.resize(sig_calc
.len(), 0);
2137 calc_loss
= sig
::div(
2138 &mut sig_scratch_calc
,
2144 mem
::swap(&mut sig_calc
, &mut sig_scratch_calc
);
2146 // Denormal numbers have less precision.
2147 if exp
< S
::MIN_EXP
{
2148 truncated_bits
+= (S
::MIN_EXP
- exp
) as usize;
2149 used_bits
= calc_precision
.saturating_sub(truncated_bits
);
2151 // Extra half-ulp lost in reciprocal of exponent.
2153 2 * (pow5_status
!= Status
::OK
|| calc_loss
!= Loss
::ExactlyZero
) as Limb
;
2156 // Both sig::mul and sig::div return the
2157 // result with the integer bit set.
2158 assert
!(sig
::get_bit(&sig_calc
, calc_precision
- 1));
2160 // The error from the true value, in half-ulps, on multiplying two
2161 // floating point numbers, which differ from the value they
2162 // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
2163 // than the returned value.
2165 // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
2166 assert
!(half_ulp_err1
< 2 || half_ulp_err2
< 2 || (half_ulp_err1
+ half_ulp_err2
< 8));
2168 let inexact
= (calc_loss
!= Loss
::ExactlyZero
) as Limb
;
2169 let half_ulp_err
= if half_ulp_err1
+ half_ulp_err2
== 0 {
2170 inexact
* 2 // <= inexact half-ulps.
2172 inexact
+ 2 * (half_ulp_err1
+ half_ulp_err2
)
2175 let ulps_from_boundary
= {
2176 let bits
= calc_precision
- used_bits
- 1;
2178 let i
= bits
/ LIMB_BITS
;
2179 let limb
= sig_calc
[i
] & (!0 >> (LIMB_BITS
- 1 - bits
% LIMB_BITS
));
2180 let boundary
= match round
{
2181 Round
::NearestTiesToEven
| Round
::NearestTiesToAway
=> 1 << (bits
% LIMB_BITS
),
2185 let delta
= limb
.wrapping_sub(boundary
);
2186 cmp
::min(delta
, delta
.wrapping_neg())
2187 } else if limb
== boundary
{
2188 if !sig
::is_all_zeros(&sig_calc
[1..i
]) {
2193 } else if limb
== boundary
.wrapping_sub(1) {
2194 if sig_calc
[1..i
].iter().any(|&x
| x
.wrapping_neg() != 1) {
2197 sig_calc
[0].wrapping_neg()
2204 // Are we guaranteed to round correctly if we truncate?
2205 if ulps_from_boundary
.saturating_mul(2) >= half_ulp_err
{
2206 let mut r
= IeeeFloat
{
2209 category
: Category
::Normal
,
2211 marker
: PhantomData
,
2213 sig
::extract(&mut r
.sig
, &sig_calc
, used_bits
, calc_precision
- used_bits
);
2214 // If we extracted less bits above we must adjust our exponent
2215 // to compensate for the implicit right shift.
2216 r
.exp
+= (S
::PRECISION
- used_bits
) as ExpInt
;
2217 let loss
= Loss
::through_truncation(&sig_calc
, truncated_bits
);
2218 return Ok(r
.normalize(round
, loss
));
2225 /// Combine the effect of two lost fractions.
2226 fn combine(self, less_significant
: Loss
) -> Loss
{
2227 let mut more_significant
= self;
2228 if less_significant
!= Loss
::ExactlyZero
{
2229 if more_significant
== Loss
::ExactlyZero
{
2230 more_significant
= Loss
::LessThanHalf
;
2231 } else if more_significant
== Loss
::ExactlyHalf
{
2232 more_significant
= Loss
::MoreThanHalf
;
2239 /// Returns the fraction lost were a bignum truncated losing the least
2240 /// significant `bits` bits.
2241 fn through_truncation(limbs
: &[Limb
], bits
: usize) -> Loss
{
2243 return Loss
::ExactlyZero
;
2246 let half_bit
= bits
- 1;
2247 let half_limb
= half_bit
/ LIMB_BITS
;
2248 let (half_limb
, rest
) = if half_limb
< limbs
.len() {
2249 (limbs
[half_limb
], &limbs
[..half_limb
])
2253 let half
= 1 << (half_bit
% LIMB_BITS
);
2254 let has_half
= half_limb
& half
!= 0;
2255 let has_rest
= half_limb
& (half
- 1) != 0 || !sig
::is_all_zeros(rest
);
2257 match (has_half
, has_rest
) {
2258 (false, false) => Loss
::ExactlyZero
,
2259 (false, true) => Loss
::LessThanHalf
,
2260 (true, false) => Loss
::ExactlyHalf
,
2261 (true, true) => Loss
::MoreThanHalf
,
2266 /// Implementation details of IeeeFloat significands, such as big integer arithmetic.
2267 /// As a rule of thumb, no functions in this module should dynamically allocate.
2269 use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS}
;
2270 use core
::cmp
::Ordering
;
2273 pub(super) fn is_all_zeros(limbs
: &[Limb
]) -> bool
{
2274 limbs
.iter().all(|&l
| l
== 0)
2277 /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
2278 pub(super) fn olsb(limbs
: &[Limb
]) -> usize {
2282 .find(|(_
, &limb
)| limb
!= 0)
2283 .map_or(0, |(i
, limb
)| i
* LIMB_BITS
+ limb
.trailing_zeros() as usize + 1)
2286 /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
2287 pub(super) fn omsb(limbs
: &[Limb
]) -> usize {
2291 .rfind(|(_
, &limb
)| limb
!= 0)
2292 .map_or(0, |(i
, limb
)| (i
+ 1) * LIMB_BITS
- limb
.leading_zeros() as usize)
2295 /// Comparison (unsigned) of two significands.
2296 pub(super) fn cmp(a
: &[Limb
], b
: &[Limb
]) -> Ordering
{
2297 assert_eq
!(a
.len(), b
.len());
2298 for (a
, b
) in a
.iter().zip(b
).rev() {
2300 Ordering
::Equal
=> {}
2308 /// Extracts the given bit.
2309 pub(super) fn get_bit(limbs
: &[Limb
], bit
: usize) -> bool
{
2310 limbs
[bit
/ LIMB_BITS
] & (1 << (bit
% LIMB_BITS
)) != 0
2313 /// Sets the given bit.
2314 pub(super) fn set_bit(limbs
: &mut [Limb
], bit
: usize) {
2315 limbs
[bit
/ LIMB_BITS
] |= 1 << (bit
% LIMB_BITS
);
2318 /// Clear the given bit.
2319 pub(super) fn clear_bit(limbs
: &mut [Limb
], bit
: usize) {
2320 limbs
[bit
/ LIMB_BITS
] &= !(1 << (bit
% LIMB_BITS
));
2323 /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent.
2324 pub(super) fn shift_left(dst
: &mut [Limb
], exp
: &mut ExpInt
, bits
: usize) {
2326 // Our exponent should not underflow.
2327 *exp
= exp
.checked_sub(bits
as ExpInt
).unwrap();
2329 // Jump is the inter-limb jump; shift is the intra-limb shift.
2330 let jump
= bits
/ LIMB_BITS
;
2331 let shift
= bits
% LIMB_BITS
;
2333 for i
in (0..dst
.len()).rev() {
2339 // dst[i] comes from the two limbs src[i - jump] and, if we have
2340 // an intra-limb shift, src[i - jump - 1].
2341 limb
= dst
[i
- jump
];
2345 limb
|= dst
[i
- jump
- 1] >> (LIMB_BITS
- shift
);
2355 /// Shifts `dst` right `bits` bits noting lost fraction.
2356 pub(super) fn shift_right(dst
: &mut [Limb
], exp
: &mut ExpInt
, bits
: usize) -> Loss
{
2357 let loss
= Loss
::through_truncation(dst
, bits
);
2360 // Our exponent should not overflow.
2361 *exp
= exp
.checked_add(bits
as ExpInt
).unwrap();
2363 // Jump is the inter-limb jump; shift is the intra-limb shift.
2364 let jump
= bits
/ LIMB_BITS
;
2365 let shift
= bits
% LIMB_BITS
;
2367 // Perform the shift. This leaves the most significant `bits` bits
2368 // of the result at zero.
2369 for i
in 0..dst
.len() {
2372 if i
+ jump
>= dst
.len() {
2375 limb
= dst
[i
+ jump
];
2378 if i
+ jump
+ 1 < dst
.len() {
2379 limb
|= dst
[i
+ jump
+ 1] << (LIMB_BITS
- shift
);
2391 /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
2392 /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
2393 /// All high bits above `src_bits` in `dst` are zero-filled.
2394 pub(super) fn extract(dst
: &mut [Limb
], src
: &[Limb
], src_bits
: usize, src_lsb
: usize) {
2399 let dst_limbs
= limbs_for_bits(src_bits
);
2400 assert
!(dst_limbs
<= dst
.len());
2402 let src
= &src
[src_lsb
/ LIMB_BITS
..];
2403 dst
[..dst_limbs
].copy_from_slice(&src
[..dst_limbs
]);
2405 let shift
= src_lsb
% LIMB_BITS
;
2406 let _
: Loss
= shift_right(&mut dst
[..dst_limbs
], &mut 0, shift
);
2408 // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
2409 // in `dst`. If this is less that src_bits, append the rest, else
2410 // clear the high bits.
2411 let n
= dst_limbs
* LIMB_BITS
- shift
;
2413 let mask
= (1 << (src_bits
- n
)) - 1;
2414 dst
[dst_limbs
- 1] |= (src
[dst_limbs
] & mask
) << (n
% LIMB_BITS
);
2415 } else if n
> src_bits
&& src_bits
% LIMB_BITS
> 0 {
2416 dst
[dst_limbs
- 1] &= (1 << (src_bits
% LIMB_BITS
)) - 1;
2419 // Clear high limbs.
2420 for x
in &mut dst
[dst_limbs
..] {
2425 /// We want the most significant PRECISION bits of `src`. There may not
2426 /// be that many; extract what we can.
2427 pub(super) fn from_limbs(dst
: &mut [Limb
], src
: &[Limb
], precision
: usize) -> (Loss
, ExpInt
) {
2428 let omsb
= omsb(src
);
2430 if precision
<= omsb
{
2431 extract(dst
, src
, precision
, omsb
- precision
);
2432 (Loss
::through_truncation(src
, omsb
- precision
), omsb
as ExpInt
- 1)
2434 extract(dst
, src
, omsb
, 0);
2435 (Loss
::ExactlyZero
, precision
as ExpInt
- 1)
2439 /// For every consecutive chunk of `bits` bits from `limbs`,
2440 /// going from most significant to the least significant bits,
2441 /// call `f` to transform those bits and store the result back.
2442 pub(super) fn each_chunk
<F
: FnMut(Limb
) -> Limb
>(limbs
: &mut [Limb
], bits
: usize, mut f
: F
) {
2443 assert_eq
!(LIMB_BITS
% bits
, 0);
2444 for limb
in limbs
.iter_mut().rev() {
2446 for i
in (0..LIMB_BITS
/ bits
).rev() {
2447 r
|= f((*limb
>> (i
* bits
)) & ((1 << bits
) - 1)) << (i
* bits
);
2453 /// Increment in-place, return the carry flag.
2454 pub(super) fn increment(dst
: &mut [Limb
]) -> Limb
{
2456 *x
= x
.wrapping_add(1);
2465 /// Decrement in-place, return the borrow flag.
2466 pub(super) fn decrement(dst
: &mut [Limb
]) -> Limb
{
2468 *x
= x
.wrapping_sub(1);
2477 /// `a += b + c` where `c` is zero or one. Returns the carry flag.
2478 pub(super) fn add(a
: &mut [Limb
], b
: &[Limb
], mut c
: Limb
) -> Limb
{
2481 for (a
, &b
) in a
.iter_mut().zip(b
) {
2482 let (r
, overflow
) = a
.overflowing_add(b
);
2483 let (r
, overflow2
) = r
.overflowing_add(c
);
2485 c
= (overflow
| overflow2
) as Limb
;
2491 /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
2492 pub(super) fn sub(a
: &mut [Limb
], b
: &[Limb
], mut c
: Limb
) -> Limb
{
2495 for (a
, &b
) in a
.iter_mut().zip(b
) {
2496 let (r
, overflow
) = a
.overflowing_sub(b
);
2497 let (r
, overflow2
) = r
.overflowing_sub(c
);
2499 c
= (overflow
| overflow2
) as Limb
;
2505 /// `a += b` or `a -= b`. Does not preserve `b`.
2506 pub(super) fn add_or_sub(
2514 // Are we bigger exponent-wise than the RHS?
2515 let bits
= *a_exp
- b_exp
;
2517 // Determine if the operation on the absolute values is effectively
2518 // an addition or subtraction.
2519 // Subtraction is more subtle than one might naively expect.
2520 if *a_sign ^ b_sign
{
2521 let (reverse
, loss
);
2524 reverse
= cmp(a_sig
, b_sig
) == Ordering
::Less
;
2525 loss
= Loss
::ExactlyZero
;
2526 } else if bits
> 0 {
2527 loss
= shift_right(b_sig
, &mut 0, (bits
- 1) as usize);
2528 shift_left(a_sig
, a_exp
, 1);
2531 loss
= shift_right(a_sig
, a_exp
, (-bits
- 1) as usize);
2532 shift_left(b_sig
, &mut 0, 1);
2536 let borrow
= (loss
!= Loss
::ExactlyZero
) as Limb
;
2538 // The code above is intended to ensure that no borrow is necessary.
2539 assert_eq
!(sub(b_sig
, a_sig
, borrow
), 0);
2540 a_sig
.copy_from_slice(b_sig
);
2543 // The code above is intended to ensure that no borrow is necessary.
2544 assert_eq
!(sub(a_sig
, b_sig
, borrow
), 0);
2547 // Invert the lost fraction - it was on the RHS and subtracted.
2549 Loss
::LessThanHalf
=> Loss
::MoreThanHalf
,
2550 Loss
::MoreThanHalf
=> Loss
::LessThanHalf
,
2554 let loss
= if bits
> 0 {
2555 shift_right(b_sig
, &mut 0, bits
as usize)
2557 shift_right(a_sig
, a_exp
, -bits
as usize)
2559 // We have a guard bit; generating a carry cannot happen.
2560 assert_eq
!(add(a_sig
, b_sig
, 0), 0);
2565 /// `[low, high] = a * b`.
2567 /// This cannot overflow, because
2569 /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
2571 /// which is less than n<sup>2</sup>.
2572 pub(super) fn widening_mul(a
: Limb
, b
: Limb
) -> [Limb
; 2] {
2573 let mut wide
= [0, 0];
2575 if a
== 0 || b
== 0 {
2579 const HALF_BITS
: usize = LIMB_BITS
/ 2;
2581 let select
= |limb
, i
| (limb
>> (i
* HALF_BITS
)) & ((1 << HALF_BITS
) - 1);
2584 let mut x
= [select(a
, i
) * select(b
, j
), 0];
2585 shift_left(&mut x
, &mut 0, (i
+ j
) * HALF_BITS
);
2586 assert_eq
!(add(&mut wide
, &x
, 0), 0);
2593 /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
2594 pub(super) fn mul
<'a
>(
2601 // Put the narrower number on the `a` for less loops below.
2602 if a
.len() > b
.len() {
2603 mem
::swap(&mut a
, &mut b
);
2606 for x
in &mut dst
[..b
.len()] {
2610 for i
in 0..a
.len() {
2612 for j
in 0..b
.len() {
2613 let [low
, mut high
] = widening_mul(a
[i
], b
[j
]);
2616 let (low
, overflow
) = low
.overflowing_add(carry
);
2617 high
+= overflow
as Limb
;
2619 // And now `dst[i + j]`, and store the new low part there.
2620 let (low
, overflow
) = low
.overflowing_add(dst
[i
+ j
]);
2621 high
+= overflow
as Limb
;
2626 dst
[i
+ b
.len()] = carry
;
2629 // Assume the operands involved in the multiplication are single-precision
2630 // FP, and the two multiplicants are:
2631 // a = a23 . a22 ... a0 * 2^e1
2632 // b = b23 . b22 ... b0 * 2^e2
2633 // the result of multiplication is:
2634 // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
2635 // Note that there are three significant bits at the left-hand side of the
2636 // radix point: two for the multiplication, and an overflow bit for the
2637 // addition (that will always be zero at this point). Move the radix point
2638 // toward left by two bits, and adjust exponent accordingly.
2641 // Convert the result having "2 * precision" significant-bits back to the one
2642 // having "precision" significant-bits. First, move the radix point from
2643 // poision "2*precision - 1" to "precision - 1". The exponent need to be
2644 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
2645 *exp
-= precision
as ExpInt
+ 1;
2647 // In case MSB resides at the left-hand side of radix point, shift the
2648 // mantissa right by some amount to make sure the MSB reside right before
2649 // the radix point (i.e., "MSB . rest-significant-bits").
2651 // Note that the result is not normalized when "omsb < precision". So, the
2652 // caller needs to call IeeeFloat::normalize() if normalized value is
2654 let omsb
= omsb(dst
);
2655 if omsb
<= precision { Loss::ExactlyZero }
else { shift_right(dst, exp, omsb - precision) }
2658 /// `quotient = dividend / divisor`. Returns the lost fraction.
2659 /// Does not preserve `dividend` or `divisor`.
2661 quotient
: &mut [Limb
],
2663 dividend
: &mut [Limb
],
2664 divisor
: &mut [Limb
],
2667 // Normalize the divisor.
2668 let bits
= precision
- omsb(divisor
);
2669 shift_left(divisor
, &mut 0, bits
);
2670 *exp
+= bits
as ExpInt
;
2672 // Normalize the dividend.
2673 let bits
= precision
- omsb(dividend
);
2674 shift_left(dividend
, exp
, bits
);
2677 let olsb_divisor
= olsb(divisor
);
2678 if olsb_divisor
== precision
{
2679 quotient
.copy_from_slice(dividend
);
2680 return Loss
::ExactlyZero
;
2683 // Ensure the dividend >= divisor initially for the loop below.
2684 // Incidentally, this means that the division loop below is
2685 // guaranteed to set the integer bit to one.
2686 if cmp(dividend
, divisor
) == Ordering
::Less
{
2687 shift_left(dividend
, exp
, 1);
2688 assert_ne
!(cmp(dividend
, divisor
), Ordering
::Less
)
2691 // Helper for figuring out the lost fraction.
2692 let lost_fraction
= |dividend
: &[Limb
], divisor
: &[Limb
]| match cmp(dividend
, divisor
) {
2693 Ordering
::Greater
=> Loss
::MoreThanHalf
,
2694 Ordering
::Equal
=> Loss
::ExactlyHalf
,
2696 if is_all_zeros(dividend
) {
2704 // Try to perform a (much faster) short division for small divisors.
2705 let divisor_bits
= precision
- (olsb_divisor
- 1);
2706 macro_rules
! try_short_div
{
2707 ($W
:ty
, $H
:ty
, $half
:expr
) => {
2708 if divisor_bits
* 2 <= $half
{
2709 // Extract the small divisor.
2710 let _
: Loss
= shift_right(divisor
, &mut 0, olsb_divisor
- 1);
2711 let divisor
= divisor
[0] as $H
as $W
;
2713 // Shift the dividend to produce a quotient with the unit bit set.
2714 let top_limb
= *dividend
.last().unwrap();
2715 let mut rem
= (top_limb
>> (LIMB_BITS
- (divisor_bits
- 1))) as $H
;
2716 shift_left(dividend
, &mut 0, divisor_bits
- 1);
2718 // Apply short division in place on $H (of $half bits) chunks.
2719 each_chunk(dividend
, $half
, |chunk
| {
2720 let chunk
= chunk
as $H
;
2721 let combined
= ((rem
as $W
) << $half
) | (chunk
as $W
);
2722 rem
= (combined
% divisor
) as $H
;
2723 (combined
/ divisor
) as $H
as Limb
2725 quotient
.copy_from_slice(dividend
);
2727 return lost_fraction(&[(rem
as Limb
) << 1], &[divisor
as Limb
]);
2732 try_short_div
!(u32, u16, 16);
2733 try_short_div
!(u64, u32, 32);
2734 try_short_div
!(u128
, u64, 64);
2736 // Zero the quotient before setting bits in it.
2737 for x
in &mut quotient
[..limbs_for_bits(precision
)] {
2742 for bit
in (0..precision
).rev() {
2743 if cmp(dividend
, divisor
) != Ordering
::Less
{
2744 sub(dividend
, divisor
, 0);
2745 set_bit(quotient
, bit
);
2747 shift_left(dividend
, &mut 0, 1);
2750 lost_fraction(dividend
, divisor
)