1 use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO}
;
2 use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd}
;
4 use core
::cmp
::{self, Ordering}
;
5 use core
::convert
::TryFrom
;
6 use core
::fmt
::{self, Write}
;
7 use core
::marker
::PhantomData
;
10 use smallvec
::{smallvec, SmallVec}
;
13 pub struct IeeeFloat
<S
> {
14 /// Absolute significand value (including the integer bit).
17 /// The signed unbiased exponent of the value.
20 /// What kind of floating point number this is.
23 /// Sign bit of the number.
26 marker
: PhantomData
<S
>,
29 /// Fundamental unit of big integer arithmetic, but also
30 /// large to store the largest significands by itself.
32 const LIMB_BITS
: usize = 128;
33 fn limbs_for_bits(bits
: usize) -> usize {
34 (bits
+ LIMB_BITS
- 1) / LIMB_BITS
37 /// Enum that represents what fraction of the LSB truncated bits of an fp number
40 /// This essentially combines the roles of guard and sticky bits.
42 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
44 // Example of truncated bits:
45 ExactlyZero
, // 000000
46 LessThanHalf
, // 0xxxxx x's not all zero
47 ExactlyHalf
, // 100000
48 MoreThanHalf
, // 1xxxxx x's not all zero
51 /// Represents floating point arithmetic semantics.
52 pub trait Semantics
: Sized
{
53 /// Total number of bits in the in-memory format.
56 /// Number of bits in the significand. This includes the integer bit.
57 const PRECISION
: usize;
59 /// The largest E such that 2<sup>E</sup> is representable; this matches the
60 /// definition of IEEE 754.
61 const MAX_EXP
: ExpInt
;
63 /// The smallest E such that 2<sup>E</sup> is a normalized number; this
64 /// matches the definition of IEEE 754.
65 const MIN_EXP
: ExpInt
= -Self::MAX_EXP
+ 1;
67 /// The significand bit that marks NaN as quiet.
68 const QNAN_BIT
: usize = Self::PRECISION
- 2;
70 /// The significand bitpattern to mark a NaN as quiet.
71 /// NOTE: for X87DoubleExtended we need to set two bits instead of 2.
72 const QNAN_SIGNIFICAND
: Limb
= 1 << Self::QNAN_BIT
;
74 fn from_bits(bits
: u128
) -> IeeeFloat
<Self> {
75 assert
!(Self::BITS
> Self::PRECISION
);
77 let sign
= bits
& (1 << (Self::BITS
- 1));
78 let exponent
= (bits
& !sign
) >> (Self::PRECISION
- 1);
79 let mut r
= IeeeFloat
{
80 sig
: [bits
& ((1 << (Self::PRECISION
- 1)) - 1)],
81 // Convert the exponent from its bias representation to a signed integer.
82 exp
: (exponent
as ExpInt
) - Self::MAX_EXP
,
83 category
: Category
::Zero
,
88 if r
.exp
== Self::MIN_EXP
- 1 && r
.sig
== [0] {
89 // Exponent, significand meaningless.
90 r
.category
= Category
::Zero
;
91 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
== [0] {
92 // Exponent, significand meaningless.
93 r
.category
= Category
::Infinity
;
94 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
!= [0] {
95 // Sign, exponent, significand meaningless.
96 r
.category
= Category
::NaN
;
98 r
.category
= Category
::Normal
;
99 if r
.exp
== Self::MIN_EXP
- 1 {
101 r
.exp
= Self::MIN_EXP
;
104 sig
::set_bit(&mut r
.sig
, Self::PRECISION
- 1);
111 fn to_bits(x
: IeeeFloat
<Self>) -> u128
{
112 assert
!(Self::BITS
> Self::PRECISION
);
114 // Split integer bit from significand.
115 let integer_bit
= sig
::get_bit(&x
.sig
, Self::PRECISION
- 1);
116 let mut significand
= x
.sig
[0] & ((1 << (Self::PRECISION
- 1)) - 1);
117 let exponent
= match x
.category
{
118 Category
::Normal
=> {
119 if x
.exp
== Self::MIN_EXP
&& !integer_bit
{
127 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
131 Category
::Infinity
=> {
132 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
136 Category
::NaN
=> Self::MAX_EXP
+ 1,
139 // Convert the exponent from a signed integer to its bias representation.
140 let exponent
= (exponent
+ Self::MAX_EXP
) as u128
;
142 ((x
.sign
as u128
) << (Self::BITS
- 1)) | (exponent
<< (Self::PRECISION
- 1)) | significand
146 impl<S
> Copy
for IeeeFloat
<S
> {}
147 impl<S
> Clone
for IeeeFloat
<S
> {
148 fn clone(&self) -> Self {
153 macro_rules
! ieee_semantics
{
154 ($
($name
:ident
= $sem
:ident($bits
:tt
: $exp_bits
:tt
)),*) => {
156 $
(pub type $name
= IeeeFloat
<$sem
>;)*
157 $
(impl Semantics
for $sem
{
158 const BITS
: usize = $bits
;
159 const PRECISION
: usize = ($bits
- 1 - $exp_bits
) + 1;
160 const MAX_EXP
: ExpInt
= (1 << ($exp_bits
- 1)) - 1;
167 Single
= SingleS(32:8),
168 Double
= DoubleS(64:11),
172 pub struct X87DoubleExtendedS
;
173 pub type X87DoubleExtended
= IeeeFloat
<X87DoubleExtendedS
>;
174 impl Semantics
for X87DoubleExtendedS
{
175 const BITS
: usize = 80;
176 const PRECISION
: usize = 64;
177 const MAX_EXP
: ExpInt
= (1 << (15 - 1)) - 1;
179 /// For x87 extended precision, we want to make a NaN, not a
180 /// pseudo-NaN. Maybe we should expose the ability to make
182 const QNAN_SIGNIFICAND
: Limb
= 0b11 << Self::QNAN_BIT
;
184 /// Integer bit is explicit in this format. Intel hardware (387 and later)
185 /// does not support these bit patterns:
186 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
187 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
188 /// exponent = 0, integer bit 1 ("pseudodenormal")
189 /// exponent != 0 nor all 1's, integer bit 0 ("unnormal")
190 /// At the moment, the first two are treated as NaNs, the second two as Normal.
191 fn from_bits(bits
: u128
) -> IeeeFloat
<Self> {
192 let sign
= bits
& (1 << (Self::BITS
- 1));
193 let exponent
= (bits
& !sign
) >> Self::PRECISION
;
194 let mut r
= IeeeFloat
{
195 sig
: [bits
& ((1 << (Self::PRECISION
- 1)) - 1)],
196 // Convert the exponent from its bias representation to a signed integer.
197 exp
: (exponent
as ExpInt
) - Self::MAX_EXP
,
198 category
: Category
::Zero
,
203 if r
.exp
== Self::MIN_EXP
- 1 && r
.sig
== [0] {
204 // Exponent, significand meaningless.
205 r
.category
= Category
::Zero
;
206 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
== [1 << (Self::PRECISION
- 1)] {
207 // Exponent, significand meaningless.
208 r
.category
= Category
::Infinity
;
209 } else if r
.exp
== Self::MAX_EXP
+ 1 && r
.sig
!= [1 << (Self::PRECISION
- 1)] {
210 // Sign, exponent, significand meaningless.
211 r
.category
= Category
::NaN
;
213 r
.category
= Category
::Normal
;
214 if r
.exp
== Self::MIN_EXP
- 1 {
216 r
.exp
= Self::MIN_EXP
;
223 fn to_bits(x
: IeeeFloat
<Self>) -> u128
{
224 // Get integer bit from significand.
225 let integer_bit
= sig
::get_bit(&x
.sig
, Self::PRECISION
- 1);
226 let mut significand
= x
.sig
[0] & ((1 << Self::PRECISION
) - 1);
227 let exponent
= match x
.category
{
228 Category
::Normal
=> {
229 if x
.exp
== Self::MIN_EXP
&& !integer_bit
{
237 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
241 Category
::Infinity
=> {
242 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
243 significand
= 1 << (Self::PRECISION
- 1);
246 Category
::NaN
=> Self::MAX_EXP
+ 1,
249 // Convert the exponent from a signed integer to its bias representation.
250 let exponent
= (exponent
+ Self::MAX_EXP
) as u128
;
252 ((x
.sign
as u128
) << (Self::BITS
- 1)) | (exponent
<< Self::PRECISION
) | significand
256 float_common_impls
!(IeeeFloat
<S
>);
258 impl<S
: Semantics
> PartialEq
for IeeeFloat
<S
> {
259 fn eq(&self, rhs
: &Self) -> bool
{
260 self.partial_cmp(rhs
) == Some(Ordering
::Equal
)
264 impl<S
: Semantics
> PartialOrd
for IeeeFloat
<S
> {
265 fn partial_cmp(&self, rhs
: &Self) -> Option
<Ordering
> {
266 match (self.category
, rhs
.category
) {
267 (Category
::NaN
, _
) | (_
, Category
::NaN
) => None
,
269 (Category
::Infinity
, Category
::Infinity
) => Some((!self.sign
).cmp(&(!rhs
.sign
))),
271 (Category
::Zero
, Category
::Zero
) => Some(Ordering
::Equal
),
273 (Category
::Infinity
, _
) | (Category
::Normal
, Category
::Zero
) => {
274 Some((!self.sign
).cmp(&self.sign
))
277 (_
, Category
::Infinity
) | (Category
::Zero
, Category
::Normal
) => {
278 Some(rhs
.sign
.cmp(&(!rhs
.sign
)))
281 (Category
::Normal
, Category
::Normal
) => {
282 // Two normal numbers. Do they have the same sign?
283 Some((!self.sign
).cmp(&(!rhs
.sign
)).then_with(|| {
284 // Compare absolute values; invert result if negative.
285 let result
= self.cmp_abs_normal(*rhs
);
287 if self.sign { result.reverse() }
else { result }
294 impl<S
> Neg
for IeeeFloat
<S
> {
296 fn neg(mut self) -> Self {
297 self.sign
= !self.sign
;
302 /// Prints this value as a decimal string.
304 /// \param precision The maximum number of digits of
305 /// precision to output. If there are fewer digits available,
306 /// zero padding will not be used unless the value is
307 /// integral and small enough to be expressed in
308 /// precision digits. 0 means to use the natural
309 /// precision of the number.
310 /// \param width The maximum number of zeros to
311 /// consider inserting before falling back to scientific
312 /// notation. 0 means to always use scientific notation.
314 /// \param alternate Indicate whether to remove the trailing zero in
315 /// fraction part or not. Also setting this parameter to true forces
316 /// producing of output more similar to default printf behavior.
317 /// Specifically the lower e is used as exponent delimiter and exponent
318 /// always contains no less than two digits.
320 /// Number precision width Result
321 /// ------ --------- ----- ------
322 /// 1.01E+4 5 2 10100
323 /// 1.01E+4 4 2 1.01E+4
324 /// 1.01E+4 5 1 1.01E+4
325 /// 1.01E-2 5 2 0.0101
326 /// 1.01E-2 4 2 0.0101
327 /// 1.01E-2 4 1 1.01E-2
328 impl<S
: Semantics
> fmt
::Display
for IeeeFloat
<S
> {
329 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
330 let width
= f
.width().unwrap_or(3);
331 let alternate
= f
.alternate();
333 match self.category
{
334 Category
::Infinity
=> {
336 return f
.write_str("-Inf");
338 return f
.write_str("+Inf");
342 Category
::NaN
=> return f
.write_str("NaN"),
352 if let Some(n
) = f
.precision() {
357 f
.write_str("e+00")?
;
359 f
.write_str("0.0E+0")?
;
367 Category
::Normal
=> {}
374 // We use enough digits so the number can be round-tripped back to an
375 // APFloat. The formula comes from "How to Print Floating-Point Numbers
376 // Accurately" by Steele and White.
377 // FIXME: Using a formula based purely on the precision is conservative;
378 // we can print fewer digits depending on the actual value being printed.
380 // precision = 2 + floor(S::PRECISION / lg_2(10))
381 let precision
= f
.precision().unwrap_or(2 + S
::PRECISION
* 59 / 196);
383 // Decompose the number into an APInt and an exponent.
384 let mut exp
= self.exp
- (S
::PRECISION
as ExpInt
- 1);
385 let mut sig
= vec
![self.sig
[0]];
387 // Ignore trailing binary zeros.
388 let trailing_zeros
= sig
[0].trailing_zeros();
389 let _
: Loss
= sig
::shift_right(&mut sig
, &mut exp
, trailing_zeros
as usize);
391 // Change the exponent from 2^e to 10^e.
392 #[allow(clippy::comparison_chain)]
397 let shift
= exp
as usize;
398 sig
.resize(limbs_for_bits(S
::PRECISION
+ shift
), 0);
399 sig
::shift_left(&mut sig
, &mut exp
, shift
);
402 let mut texp
= -exp
as usize;
404 // We transform this using the identity:
405 // (N)(2^-e) == (N)(5^e)(10^-e)
407 // Multiply significand by 5^e.
408 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
409 let mut sig_scratch
= vec
![];
411 let mut p5_scratch
= vec
![];
416 p5_scratch
.resize(p5
.len() * 2, 0);
418 sig
::mul(&mut p5_scratch
, &mut 0, &p5
, &p5
, p5
.len() * 2 * LIMB_BITS
);
419 while p5_scratch
.last() == Some(&0) {
422 mem
::swap(&mut p5
, &mut p5_scratch
);
425 sig_scratch
.resize(sig
.len() + p5
.len(), 0);
426 let _
: Loss
= sig
::mul(
431 (sig
.len() + p5
.len()) * LIMB_BITS
,
433 while sig_scratch
.last() == Some(&0) {
436 mem
::swap(&mut sig
, &mut sig_scratch
);
443 let mut buffer
= vec
![];
445 // Ignore digits from the significand until it is no more
446 // precise than is required for the desired precision.
447 // 196/59 is a very slight overestimate of lg_2(10).
448 let required
= (precision
* 196 + 58) / 59;
449 let mut discard_digits
= sig
::omsb(&sig
).saturating_sub(required
) * 59 / 196;
450 let mut in_trail
= true;
451 while !sig
.is_empty() {
452 // Perform short division by 10 to extract the rightmost digit.
457 // Use 64-bit division and remainder, with 32-bit chunks from sig.
458 sig
::each_chunk(&mut sig
, 32, |chunk
| {
459 let chunk
= chunk
as u32;
460 let combined
= ((rem
as u64) << 32) | (chunk
as u64);
461 rem
= (combined
% 10) as u8;
462 (combined
/ 10) as u32 as Limb
465 // Reduce the sigificand to avoid wasting time dividing 0's.
466 while sig
.last() == Some(&0) {
472 // Ignore digits we don't need.
473 if discard_digits
> 0 {
479 // Drop trailing zeros.
480 if in_trail
&& digit
== 0 {
484 buffer
.push(b'
0'
+ digit
);
488 assert
!(!buffer
.is_empty(), "no characters in buffer!");
490 // Drop down to precision.
491 // FIXME: don't do more precise calculations above than are required.
492 if buffer
.len() > precision
{
493 // The most significant figures are the last ones in the buffer.
494 let mut first_sig
= buffer
.len() - precision
;
497 // FIXME: this probably shouldn't use 'round half up'.
499 // Rounding down is just a truncation, except we also want to drop
500 // trailing zeros from the new result.
501 if buffer
[first_sig
- 1] < b'
5'
{
502 while first_sig
< buffer
.len() && buffer
[first_sig
] == b'
0'
{
506 // Rounding up requires a decimal add-with-carry. If we continue
507 // the carry, the newly-introduced zeros will just be truncated.
508 for x
in &mut buffer
[first_sig
..] {
518 exp
+= first_sig
as ExpInt
;
519 buffer
.drain(..first_sig
);
521 // If we carried through, we have exactly one digit of precision.
522 if buffer
.is_empty() {
527 let digits
= buffer
.len();
529 // Check whether we should use scientific notation.
530 let scientific
= if width
== 0 {
535 // But we shouldn't make the number look more precise than it is.
536 exp
as usize > width
|| digits
+ exp
as usize > precision
538 // Power of the most significant digit.
539 let msd
= exp
+ (digits
- 1) as ExpInt
;
546 -msd
as usize > width
550 // Scientific formatting is pretty straightforward.
552 exp
+= digits
as ExpInt
- 1;
554 f
.write_char(buffer
[digits
- 1] as char)?
;
556 let truncate_zero
= !alternate
;
557 if digits
== 1 && truncate_zero
{
560 for &d
in buffer
[..digits
- 1].iter().rev() {
561 f
.write_char(d
as char)?
;
564 // Fill with zeros up to precision.
565 if !truncate_zero
&& precision
> digits
- 1 {
566 for _
in 0..=precision
- digits
{
570 // For alternate we use lower 'e'.
571 f
.write_char(if alternate { 'e' }
else { 'E' }
)?
;
573 // Exponent always at least two digits if we do not truncate zeros.
575 write
!(f
, "{:+}", exp
)?
;
577 write
!(f
, "{:+03}", exp
)?
;
583 // Non-scientific, positive exponents.
585 for &d
in buffer
.iter().rev() {
586 f
.write_char(d
as char)?
;
594 // Non-scientific, negative exponents.
595 let unit_place
= -exp
as usize;
596 if unit_place
< digits
{
597 for &d
in buffer
[unit_place
..].iter().rev() {
598 f
.write_char(d
as char)?
;
601 for &d
in buffer
[..unit_place
].iter().rev() {
602 f
.write_char(d
as char)?
;
606 for _
in digits
..unit_place
{
609 for &d
in buffer
.iter().rev() {
610 f
.write_char(d
as char)?
;
618 impl<S
: Semantics
> fmt
::Debug
for IeeeFloat
<S
> {
619 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
622 "{}({:?} | {}{:?} * 2^{})",
625 if self.sign { "-" }
else { "+" }
,
632 impl<S
: Semantics
> Float
for IeeeFloat
<S
> {
633 const BITS
: usize = S
::BITS
;
634 const PRECISION
: usize = S
::PRECISION
;
635 const MAX_EXP
: ExpInt
= S
::MAX_EXP
;
636 const MIN_EXP
: ExpInt
= S
::MIN_EXP
;
638 const ZERO
: Self = IeeeFloat
{
641 category
: Category
::Zero
,
646 const INFINITY
: Self = IeeeFloat
{
649 category
: Category
::Infinity
,
654 // FIXME(eddyb) remove when qnan becomes const fn.
655 const NAN
: Self = IeeeFloat
{
656 sig
: [S
::QNAN_SIGNIFICAND
],
658 category
: Category
::NaN
,
663 fn qnan(payload
: Option
<u128
>) -> Self {
665 sig
: [S
::QNAN_SIGNIFICAND
666 | payload
.map_or(0, |payload
| {
667 // Zero out the excess bits of the significand.
668 payload
& ((1 << S
::QNAN_BIT
) - 1)
671 category
: Category
::NaN
,
677 fn snan(payload
: Option
<u128
>) -> Self {
678 let mut snan
= Self::qnan(payload
);
680 // We always have to clear the QNaN bit to make it an SNaN.
681 sig
::clear_bit(&mut snan
.sig
, S
::QNAN_BIT
);
683 // If there are no bits set in the payload, we have to set
684 // *something* to make it a NaN instead of an infinity;
685 // conventionally, this is the next bit down from the QNaN bit.
686 if snan
.sig
[0] & !S
::QNAN_SIGNIFICAND
== 0 {
687 sig
::set_bit(&mut snan
.sig
, S
::QNAN_BIT
- 1);
693 fn largest() -> Self {
694 // We want (in interchange format):
696 // significand = 1..1
698 sig
: [(1 << S
::PRECISION
) - 1],
700 category
: Category
::Normal
,
706 // We want (in interchange format):
708 // significand = 0..01
709 const SMALLEST
: Self = IeeeFloat
{
712 category
: Category
::Normal
,
717 fn smallest_normalized() -> Self {
718 // We want (in interchange format):
720 // significand = 10..0
722 sig
: [1 << (S
::PRECISION
- 1)],
724 category
: Category
::Normal
,
730 fn add_r(mut self, rhs
: Self, round
: Round
) -> StatusAnd
<Self> {
731 let status
= match (self.category
, rhs
.category
) {
732 (Category
::Infinity
, Category
::Infinity
) => {
733 // Differently signed infinities can only be validly
735 if self.sign
!= rhs
.sign
{
743 // Sign may depend on rounding mode; handled below.
744 (_
, Category
::Zero
) | (Category
::NaN
, _
) | (Category
::Infinity
, Category
::Normal
) => {
748 (Category
::Zero
, _
) | (_
, Category
::NaN
| Category
::Infinity
) => {
753 // This return code means it was not a simple case.
754 (Category
::Normal
, Category
::Normal
) => {
755 let loss
= sig
::add_or_sub(
764 self = unpack
!(status
=, self.normalize(round
, loss
));
766 // Can only be zero if we lost no fraction.
767 assert
!(self.category
!= Category
::Zero
|| loss
== Loss
::ExactlyZero
);
773 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
774 // positive zero unless rounding to minus infinity, except that
775 // adding two like-signed zeroes gives that zero.
776 if self.category
== Category
::Zero
777 && (rhs
.category
!= Category
::Zero
|| self.sign
!= rhs
.sign
)
779 self.sign
= round
== Round
::TowardNegative
;
785 fn mul_r(mut self, rhs
: Self, round
: Round
) -> StatusAnd
<Self> {
786 self.sign ^
= rhs
.sign
;
788 match (self.category
, rhs
.category
) {
789 (Category
::NaN
, _
) => {
794 (_
, Category
::NaN
) => {
796 self.category
= Category
::NaN
;
801 (Category
::Zero
, Category
::Infinity
) | (Category
::Infinity
, Category
::Zero
) => {
802 Status
::INVALID_OP
.and(Self::NAN
)
805 (_
, Category
::Infinity
) | (Category
::Infinity
, _
) => {
806 self.category
= Category
::Infinity
;
810 (Category
::Zero
, _
) | (_
, Category
::Zero
) => {
811 self.category
= Category
::Zero
;
815 (Category
::Normal
, Category
::Normal
) => {
817 let mut wide_sig
= [0; 2];
819 sig
::mul(&mut wide_sig
, &mut self.exp
, &self.sig
, &rhs
.sig
, S
::PRECISION
);
820 self.sig
= [wide_sig
[0]];
822 self = unpack
!(status
=, self.normalize(round
, loss
));
823 if loss
!= Loss
::ExactlyZero
{
824 status
|= Status
::INEXACT
;
831 fn mul_add_r(mut self, multiplicand
: Self, addend
: Self, round
: Round
) -> StatusAnd
<Self> {
832 // If and only if all arguments are normal do we need to do an
833 // extended-precision calculation.
834 if !self.is_finite_non_zero() || !multiplicand
.is_finite_non_zero() || !addend
.is_finite() {
836 self = unpack
!(status
=, self.mul_r(multiplicand
, round
));
838 // FS can only be Status::OK or Status::INVALID_OP. There is no more work
839 // to do in the latter case. The IEEE-754R standard says it is
840 // implementation-defined in this case whether, if ADDEND is a
841 // quiet NaN, we raise invalid op; this implementation does so.
843 // If we need to do the addition we can do so with normal
845 if status
== Status
::OK
{
846 self = unpack
!(status
=, self.add_r(addend
, round
));
848 return status
.and(self);
851 // Post-multiplication sign, before addition.
852 self.sign ^
= multiplicand
.sign
;
854 // Allocate space for twice as many bits as the original significand, plus one
855 // extra bit for the addition to overflow into.
856 assert
!(limbs_for_bits(S
::PRECISION
* 2 + 1) <= 2);
857 let mut wide_sig
= sig
::widening_mul(self.sig
[0], multiplicand
.sig
[0]);
859 let mut loss
= Loss
::ExactlyZero
;
860 let mut omsb
= sig
::omsb(&wide_sig
);
861 self.exp
+= multiplicand
.exp
;
863 // Assume the operands involved in the multiplication are single-precision
864 // FP, and the two multiplicants are:
865 // lhs = a23 . a22 ... a0 * 2^e1
866 // rhs = b23 . b22 ... b0 * 2^e2
867 // the result of multiplication is:
868 // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
869 // Note that there are three significant bits at the left-hand side of the
870 // radix point: two for the multiplication, and an overflow bit for the
871 // addition (that will always be zero at this point). Move the radix point
872 // toward left by two bits, and adjust exponent accordingly.
875 if addend
.is_non_zero() {
876 // Normalize our MSB to one below the top bit to allow for overflow.
877 let ext_precision
= 2 * S
::PRECISION
+ 1;
878 if omsb
!= ext_precision
- 1 {
879 assert
!(ext_precision
> omsb
);
880 sig
::shift_left(&mut wide_sig
, &mut self.exp
, (ext_precision
- 1) - omsb
);
883 // The intermediate result of the multiplication has "2 * S::PRECISION"
884 // significant bit; adjust the addend to be consistent with mul result.
885 let mut ext_addend_sig
= [addend
.sig
[0], 0];
887 // Extend the addend significand to ext_precision - 1. This guarantees
888 // that the high bit of the significand is zero (same as wide_sig),
889 // so the addition will overflow (if it does overflow at all) into the top bit.
890 sig
::shift_left(&mut ext_addend_sig
, &mut 0, ext_precision
- 1 - S
::PRECISION
);
891 loss
= sig
::add_or_sub(
900 omsb
= sig
::omsb(&wide_sig
);
903 // Convert the result having "2 * S::PRECISION" significant-bits back to the one
904 // having "S::PRECISION" significant-bits. First, move the radix point from
905 // position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
906 // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
907 self.exp
-= S
::PRECISION
as ExpInt
+ 1;
909 // In case MSB resides at the left-hand side of radix point, shift the
910 // mantissa right by some amount to make sure the MSB reside right before
911 // the radix point (i.e., "MSB . rest-significant-bits").
912 if omsb
> S
::PRECISION
{
913 let bits
= omsb
- S
::PRECISION
;
914 loss
= sig
::shift_right(&mut wide_sig
, &mut self.exp
, bits
).combine(loss
);
917 self.sig
[0] = wide_sig
[0];
920 self = unpack
!(status
=, self.normalize(round
, loss
));
921 if loss
!= Loss
::ExactlyZero
{
922 status
|= Status
::INEXACT
;
925 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
926 // positive zero unless rounding to minus infinity, except that
927 // adding two like-signed zeroes gives that zero.
928 if self.category
== Category
::Zero
929 && !status
.intersects(Status
::UNDERFLOW
)
930 && self.sign
!= addend
.sign
932 self.sign
= round
== Round
::TowardNegative
;
938 fn div_r(mut self, rhs
: Self, round
: Round
) -> StatusAnd
<Self> {
939 self.sign ^
= rhs
.sign
;
941 match (self.category
, rhs
.category
) {
942 (Category
::NaN
, _
) => {
947 (_
, Category
::NaN
) => {
948 self.category
= Category
::NaN
;
954 (Category
::Infinity
, Category
::Infinity
) | (Category
::Zero
, Category
::Zero
) => {
955 Status
::INVALID_OP
.and(Self::NAN
)
958 (Category
::Infinity
| Category
::Zero
, _
) => Status
::OK
.and(self),
960 (Category
::Normal
, Category
::Infinity
) => {
961 self.category
= Category
::Zero
;
965 (Category
::Normal
, Category
::Zero
) => {
966 self.category
= Category
::Infinity
;
967 Status
::DIV_BY_ZERO
.and(self)
970 (Category
::Normal
, Category
::Normal
) => {
972 let dividend
= self.sig
[0];
981 self = unpack
!(status
=, self.normalize(round
, loss
));
982 if loss
!= Loss
::ExactlyZero
{
983 status
|= Status
::INEXACT
;
990 fn c_fmod(mut self, rhs
: Self) -> StatusAnd
<Self> {
991 match (self.category
, rhs
.category
) {
993 | (Category
::Zero
, Category
::Infinity
| Category
::Normal
)
994 | (Category
::Normal
, Category
::Infinity
) => Status
::OK
.and(self),
996 (_
, Category
::NaN
) => {
998 self.category
= Category
::NaN
;
1000 Status
::OK
.and(self)
1003 (Category
::Infinity
, _
) | (_
, Category
::Zero
) => Status
::INVALID_OP
.and(Self::NAN
),
1005 (Category
::Normal
, Category
::Normal
) => {
1006 while self.is_finite_non_zero()
1007 && rhs
.is_finite_non_zero()
1008 && self.cmp_abs_normal(rhs
) != Ordering
::Less
1010 let mut v
= rhs
.scalbn(self.ilogb() - rhs
.ilogb());
1011 if self.cmp_abs_normal(v
) == Ordering
::Less
{
1017 self = unpack
!(status
=, self - v
);
1018 assert_eq
!(status
, Status
::OK
);
1020 Status
::OK
.and(self)
1025 fn round_to_integral(self, round
: Round
) -> StatusAnd
<Self> {
1026 // If the exponent is large enough, we know that this value is already
1027 // integral, and the arithmetic below would potentially cause it to saturate
1028 // to +/-Inf. Bail out early instead.
1029 if self.is_finite_non_zero() && self.exp
+ 1 >= S
::PRECISION
as ExpInt
{
1030 return Status
::OK
.and(self);
1033 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1034 // precision of our format, and then subtract it back off again. The choice
1035 // of rounding modes for the addition/subtraction determines the rounding mode
1036 // for our integral rounding as well.
1037 // NOTE: When the input value is negative, we do subtraction followed by
1038 // addition instead.
1039 assert
!(S
::PRECISION
<= 128);
1041 let magic_const
= unpack
!(status
=, Self::from_u128(1 << (S
::PRECISION
- 1)));
1042 let magic_const
= magic_const
.copy_sign(self);
1044 if status
!= Status
::OK
{
1045 return status
.and(self);
1049 r
= unpack
!(status
=, r
.add_r(magic_const
, round
));
1050 if status
!= Status
::OK
&& status
!= Status
::INEXACT
{
1051 return status
.and(self);
1054 // Restore the input sign to handle 0.0/-0.0 cases correctly.
1055 r
.sub_r(magic_const
, round
).map(|r
| r
.copy_sign(self))
1058 fn next_up(mut self) -> StatusAnd
<Self> {
1059 // Compute nextUp(x), handling each float category separately.
1060 match self.category
{
1061 Category
::Infinity
=> {
1063 // nextUp(-inf) = -largest
1064 Status
::OK
.and(-Self::largest())
1066 // nextUp(+inf) = +inf
1067 Status
::OK
.and(self)
1071 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
1072 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
1073 // change the payload.
1074 if self.is_signaling() {
1075 // For consistency, propagate the sign of the sNaN to the qNaN.
1076 Status
::INVALID_OP
.and(Self::NAN
.copy_sign(self))
1078 Status
::OK
.and(self)
1082 // nextUp(pm 0) = +smallest
1083 Status
::OK
.and(Self::SMALLEST
)
1085 Category
::Normal
=> {
1086 // nextUp(-smallest) = -0
1087 if self.is_smallest() && self.sign
{
1088 return Status
::OK
.and(-Self::ZERO
);
1091 // nextUp(largest) == INFINITY
1092 if self.is_largest() && !self.sign
{
1093 return Status
::OK
.and(Self::INFINITY
);
1096 // Excluding the integral bit. This allows us to test for binade boundaries.
1097 let sig_mask
= (1 << (S
::PRECISION
- 1)) - 1;
1099 // nextUp(normal) == normal + inc.
1101 // If we are negative, we need to decrement the significand.
1103 // We only cross a binade boundary that requires adjusting the exponent
1105 // 1. exponent != S::MIN_EXP. This implies we are not in the
1106 // smallest binade or are dealing with denormals.
1107 // 2. Our significand excluding the integral bit is all zeros.
1108 let crossing_binade_boundary
=
1109 self.exp
!= S
::MIN_EXP
&& self.sig
[0] & sig_mask
== 0;
1111 // Decrement the significand.
1113 // We always do this since:
1114 // 1. If we are dealing with a non-binade decrement, by definition we
1115 // just decrement the significand.
1116 // 2. If we are dealing with a normal -> normal binade decrement, since
1117 // we have an explicit integral bit the fact that all bits but the
1118 // integral bit are zero implies that subtracting one will yield a
1119 // significand with 0 integral bit and 1 in all other spots. Thus we
1120 // must just adjust the exponent and set the integral bit to 1.
1121 // 3. If we are dealing with a normal -> denormal binade decrement,
1122 // since we set the integral bit to 0 when we represent denormals, we
1123 // just decrement the significand.
1124 sig
::decrement(&mut self.sig
);
1126 if crossing_binade_boundary
{
1127 // Our result is a normal number. Do the following:
1128 // 1. Set the integral bit to 1.
1129 // 2. Decrement the exponent.
1130 sig
::set_bit(&mut self.sig
, S
::PRECISION
- 1);
1134 // If we are positive, we need to increment the significand.
1136 // We only cross a binade boundary that requires adjusting the exponent if
1137 // the input is not a denormal and all of said input's significand bits
1138 // are set. If all of said conditions are true: clear the significand, set
1139 // the integral bit to 1, and increment the exponent. If we have a
1140 // denormal always increment since moving denormals and the numbers in the
1141 // smallest normal binade have the same exponent in our representation.
1142 let crossing_binade_boundary
=
1143 !self.is_denormal() && self.sig
[0] & sig_mask
== sig_mask
;
1145 if crossing_binade_boundary
{
1147 sig
::set_bit(&mut self.sig
, S
::PRECISION
- 1);
1151 "We can not increment an exponent beyond the MAX_EXP \
1152 allowed by the given floating point semantics."
1156 sig
::increment(&mut self.sig
);
1159 Status
::OK
.and(self)
1164 fn from_bits(input
: u128
) -> Self {
1165 // Dispatch to semantics.
1169 fn from_u128_r(input
: u128
, round
: Round
) -> StatusAnd
<Self> {
1172 exp
: S
::PRECISION
as ExpInt
- 1,
1173 category
: Category
::Normal
,
1175 marker
: PhantomData
,
1177 .normalize(round
, Loss
::ExactlyZero
)
1180 fn from_str_r(mut s
: &str, mut round
: Round
) -> Result
<StatusAnd
<Self>, ParseError
> {
1182 return Err(ParseError("Invalid string length"));
1185 // Handle special cases.
1187 "inf" | "INFINITY" => return Ok(Status
::OK
.and(Self::INFINITY
)),
1188 "-inf" | "-INFINITY" => return Ok(Status
::OK
.and(-Self::INFINITY
)),
1189 "nan" | "NaN" => return Ok(Status
::OK
.and(Self::NAN
)),
1190 "-nan" | "-NaN" => return Ok(Status
::OK
.and(-Self::NAN
)),
1194 // Handle a leading minus sign.
1195 let minus
= s
.starts_with('
-'
);
1196 if minus
|| s
.starts_with('
+'
) {
1199 return Err(ParseError("String has no digits"));
1203 // Adjust the rounding mode for the absolute value below.
1208 let r
= if s
.starts_with("0x") || s
.starts_with("0X") {
1211 return Err(ParseError("Invalid string"));
1213 Self::from_hexadecimal_string(s
, round
)?
1215 Self::from_decimal_string(s
, round
)?
1218 Ok(r
.map(|r
| if minus { -r }
else { r }
))
1221 fn to_bits(self) -> u128
{
1222 // Dispatch to semantics.
1226 fn to_u128_r(self, width
: usize, round
: Round
, is_exact
: &mut bool
) -> StatusAnd
<u128
> {
1227 // The result of trying to convert a number too large.
1228 let overflow
= if self.sign
{
1229 // Negative numbers cannot be represented as unsigned.
1232 // Largest unsigned integer of the given width.
1238 match self.category
{
1239 Category
::NaN
=> Status
::INVALID_OP
.and(0),
1241 Category
::Infinity
=> Status
::INVALID_OP
.and(overflow
),
1244 // Negative zero can't be represented as an int.
1245 *is_exact
= !self.sign
;
1249 Category
::Normal
=> {
1252 // Step 1: place our absolute value, with any fraction truncated, in
1254 let truncated_bits
= if self.exp
< 0 {
1255 // Our absolute value is less than one; truncate everything.
1256 // For exponent -1 the integer bit represents .5, look at that.
1257 // For smaller exponents leftmost truncated bit is 0.
1258 S
::PRECISION
- 1 + (-self.exp
) as usize
1260 // We want the most significant (exponent + 1) bits; the rest are
1262 let bits
= self.exp
as usize + 1;
1264 // Hopelessly large in magnitude?
1266 return Status
::INVALID_OP
.and(overflow
);
1269 if bits
< S
::PRECISION
{
1270 // We truncate (S::PRECISION - bits) bits.
1271 r
= self.sig
[0] >> (S
::PRECISION
- bits
);
1274 // We want at least as many bits as are available.
1275 r
= self.sig
[0] << (bits
- S
::PRECISION
);
1280 // Step 2: work out any lost fraction, and increment the absolute
1281 // value if we would round away from zero.
1282 let mut loss
= Loss
::ExactlyZero
;
1283 if truncated_bits
> 0 {
1284 loss
= Loss
::through_truncation(&self.sig
, truncated_bits
);
1285 if loss
!= Loss
::ExactlyZero
1286 && self.round_away_from_zero(round
, loss
, truncated_bits
)
1288 r
= r
.wrapping_add(1);
1290 return Status
::INVALID_OP
.and(overflow
); // Overflow.
1295 // Step 3: check if we fit in the destination.
1297 return Status
::INVALID_OP
.and(overflow
);
1300 if loss
== Loss
::ExactlyZero
{
1304 Status
::INEXACT
.and(r
)
1310 fn cmp_abs_normal(self, rhs
: Self) -> Ordering
{
1311 assert
!(self.is_finite_non_zero());
1312 assert
!(rhs
.is_finite_non_zero());
1314 // If exponents are equal, do an unsigned comparison of the significands.
1315 self.exp
.cmp(&rhs
.exp
).then_with(|| sig
::cmp(&self.sig
, &rhs
.sig
))
1318 fn bitwise_eq(self, rhs
: Self) -> bool
{
1319 if self.category
!= rhs
.category
|| self.sign
!= rhs
.sign
{
1323 if self.category
== Category
::Zero
|| self.category
== Category
::Infinity
{
1327 if self.is_finite_non_zero() && self.exp
!= rhs
.exp
{
1334 fn is_negative(self) -> bool
{
1338 fn is_denormal(self) -> bool
{
1339 self.is_finite_non_zero()
1340 && self.exp
== S
::MIN_EXP
1341 && !sig
::get_bit(&self.sig
, S
::PRECISION
- 1)
1344 fn is_signaling(self) -> bool
{
1345 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
1346 // first bit of the trailing significand being 0.
1347 self.is_nan() && !sig
::get_bit(&self.sig
, S
::QNAN_BIT
)
1350 fn category(self) -> Category
{
1354 fn get_exact_inverse(self) -> Option
<Self> {
1355 // Special floats and denormals have no exact inverse.
1356 if !self.is_finite_non_zero() {
1360 // Check that the number is a power of two by making sure that only the
1361 // integer bit is set in the significand.
1362 if self.sig
!= [1 << (S
::PRECISION
- 1)] {
1367 let mut reciprocal
= Self::from_u128(1).value
;
1369 reciprocal
= unpack
!(status
=, reciprocal
/ self);
1370 if status
!= Status
::OK
{
1374 // Avoid multiplication with a denormal, it is not safe on all platforms and
1375 // may be slower than a normal division.
1376 if reciprocal
.is_denormal() {
1380 assert
!(reciprocal
.is_finite_non_zero());
1381 assert_eq
!(reciprocal
.sig
, [1 << (S
::PRECISION
- 1)]);
1386 fn ilogb(mut self) -> ExpInt
{
1393 if self.is_infinite() {
1396 if !self.is_denormal() {
1400 let sig_bits
= (S
::PRECISION
- 1) as ExpInt
;
1401 self.exp
+= sig_bits
;
1402 self = self.normalize(Round
::NearestTiesToEven
, Loss
::ExactlyZero
).value
;
1406 fn scalbn_r(mut self, exp
: ExpInt
, round
: Round
) -> Self {
1407 // If exp is wildly out-of-scale, simply adding it to self.exp will
1408 // overflow; clamp it to a safe range before adding, but ensure that the range
1409 // is large enough that the clamp does not change the result. The range we
1410 // need to support is the difference between the largest possible exponent and
1411 // the normalized exponent of half the smallest denormal.
1413 let sig_bits
= (S
::PRECISION
- 1) as i32;
1414 let max_change
= S
::MAX_EXP
as i32 - (S
::MIN_EXP
as i32 - sig_bits
) + 1;
1416 // Clamp to one past the range ends to let normalize handle overflow.
1417 let exp_change
= cmp
::min(cmp
::max(exp
as i32, -max_change
- 1), max_change
);
1418 self.exp
= self.exp
.saturating_add(exp_change
as ExpInt
);
1419 self = self.normalize(round
, Loss
::ExactlyZero
).value
;
1421 sig
::set_bit(&mut self.sig
, S
::QNAN_BIT
);
1426 fn frexp_r(mut self, exp
: &mut ExpInt
, round
: Round
) -> Self {
1427 *exp
= self.ilogb();
1429 // Quiet signalling nans.
1430 if *exp
== IEK_NAN
{
1431 sig
::set_bit(&mut self.sig
, S
::QNAN_BIT
);
1435 if *exp
== IEK_INF
{
1439 // 1 is added because frexp is defined to return a normalized fraction in
1440 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
1441 if *exp
== IEK_ZERO
{
1446 self.scalbn_r(-*exp
, round
)
1450 impl<S
: Semantics
, T
: Semantics
> FloatConvert
<IeeeFloat
<T
>> for IeeeFloat
<S
> {
1451 fn convert_r(self, round
: Round
, loses_info
: &mut bool
) -> StatusAnd
<IeeeFloat
<T
>> {
1452 let mut r
= IeeeFloat
{
1455 category
: self.category
,
1457 marker
: PhantomData
,
1460 // x86 has some unusual NaNs which cannot be represented in any other
1461 // format; note them here.
1462 fn is_x87_double_extended
<S
: Semantics
>() -> bool
{
1463 S
::QNAN_SIGNIFICAND
== X87DoubleExtendedS
::QNAN_SIGNIFICAND
1465 let x87_special_nan
= is_x87_double_extended
::<S
>()
1466 && !is_x87_double_extended
::<T
>()
1467 && r
.category
== Category
::NaN
1468 && (r
.sig
[0] & S
::QNAN_SIGNIFICAND
) != S
::QNAN_SIGNIFICAND
;
1470 // If this is a truncation of a denormal number, and the target semantics
1471 // has larger exponent range than the source semantics (this can happen
1472 // when truncating from PowerPC double-double to double format), the
1473 // right shift could lose result mantissa bits. Adjust exponent instead
1474 // of performing excessive shift.
1475 let mut shift
= T
::PRECISION
as ExpInt
- S
::PRECISION
as ExpInt
;
1476 if shift
< 0 && r
.is_finite_non_zero() {
1477 let mut exp_change
= sig
::omsb(&r
.sig
) as ExpInt
- S
::PRECISION
as ExpInt
;
1478 if r
.exp
+ exp_change
< T
::MIN_EXP
{
1479 exp_change
= T
::MIN_EXP
- r
.exp
;
1481 if exp_change
< shift
{
1485 shift
-= exp_change
;
1486 r
.exp
+= exp_change
;
1490 // If this is a truncation, perform the shift.
1491 let loss
= if shift
< 0 && (r
.is_finite_non_zero() || r
.category
== Category
::NaN
) {
1492 sig
::shift_right(&mut r
.sig
, &mut 0, -shift
as usize)
1497 // If this is an extension, perform the shift.
1498 if shift
> 0 && (r
.is_finite_non_zero() || r
.category
== Category
::NaN
) {
1499 sig
::shift_left(&mut r
.sig
, &mut 0, shift
as usize);
1503 if r
.is_finite_non_zero() {
1504 r
= unpack
!(status
=, r
.normalize(round
, loss
));
1505 *loses_info
= status
!= Status
::OK
;
1506 } else if r
.category
== Category
::NaN
{
1507 *loses_info
= loss
!= Loss
::ExactlyZero
|| x87_special_nan
;
1509 // For x87 extended precision, we want to make a NaN, not a special NaN if
1510 // the input wasn't special either.
1511 if !x87_special_nan
&& is_x87_double_extended
::<T
>() {
1512 sig
::set_bit(&mut r
.sig
, T
::PRECISION
- 1);
1515 // Convert of sNaN creates qNaN and raises an exception (invalid op).
1516 // This also guarantees that a sNaN does not become Inf on a truncation
1517 // that loses all payload bits.
1518 if self.is_signaling() {
1519 // Quiet signaling NaN.
1520 sig
::set_bit(&mut r
.sig
, T
::QNAN_BIT
);
1521 status
= Status
::INVALID_OP
;
1523 status
= Status
::OK
;
1526 *loses_info
= false;
1527 status
= Status
::OK
;
1534 impl<S
: Semantics
> IeeeFloat
<S
> {
1535 /// Handle positive overflow. We either return infinity or
1536 /// the largest finite number. For negative overflow,
1537 /// negate the `round` argument before calling.
1538 fn overflow_result(round
: Round
) -> StatusAnd
<Self> {
1541 Round
::NearestTiesToEven
| Round
::NearestTiesToAway
| Round
::TowardPositive
=> {
1542 (Status
::OVERFLOW
| Status
::INEXACT
).and(Self::INFINITY
)
1544 // Otherwise we become the largest finite number.
1545 Round
::TowardNegative
| Round
::TowardZero
=> Status
::INEXACT
.and(Self::largest()),
1549 /// Returns `true` if, when truncating the current number, with `bit` the
1550 /// new LSB, with the given lost fraction and rounding mode, the result
1551 /// would need to be rounded away from zero (i.e., by increasing the
1552 /// signficand). This routine must work for `Category::Zero` of both signs, and
1553 /// `Category::Normal` numbers.
1554 fn round_away_from_zero(&self, round
: Round
, loss
: Loss
, bit
: usize) -> bool
{
1555 // NaNs and infinities should not have lost fractions.
1556 assert
!(self.is_finite_non_zero() || self.is_zero());
1558 // Current callers never pass this so we don't handle it.
1559 assert_ne
!(loss
, Loss
::ExactlyZero
);
1562 Round
::NearestTiesToAway
=> loss
== Loss
::ExactlyHalf
|| loss
== Loss
::MoreThanHalf
,
1563 Round
::NearestTiesToEven
=> {
1564 if loss
== Loss
::MoreThanHalf
{
1568 // Our zeros don't have a significand to test.
1569 if loss
== Loss
::ExactlyHalf
&& self.category
!= Category
::Zero
{
1570 return sig
::get_bit(&self.sig
, bit
);
1575 Round
::TowardZero
=> false,
1576 Round
::TowardPositive
=> !self.sign
,
1577 Round
::TowardNegative
=> self.sign
,
1581 fn normalize(mut self, round
: Round
, mut loss
: Loss
) -> StatusAnd
<Self> {
1582 if !self.is_finite_non_zero() {
1583 return Status
::OK
.and(self);
1586 // Before rounding normalize the exponent of Category::Normal numbers.
1587 let mut omsb
= sig
::omsb(&self.sig
);
1590 // OMSB is numbered from 1. We want to place it in the integer
1591 // bit numbered PRECISION if possible, with a compensating change in
1593 let mut final_exp
= self.exp
.saturating_add(omsb
as ExpInt
- S
::PRECISION
as ExpInt
);
1595 // If the resulting exponent is too high, overflow according to
1596 // the rounding mode.
1597 if final_exp
> S
::MAX_EXP
{
1598 let round
= if self.sign { -round }
else { round }
;
1599 return Self::overflow_result(round
).map(|r
| r
.copy_sign(self));
1602 // Subnormal numbers have exponent MIN_EXP, and their MSB
1603 // is forced based on that.
1604 if final_exp
< S
::MIN_EXP
{
1605 final_exp
= S
::MIN_EXP
;
1608 // Shifting left is easy as we don't lose precision.
1609 if final_exp
< self.exp
{
1610 assert_eq
!(loss
, Loss
::ExactlyZero
);
1612 let exp_change
= (self.exp
- final_exp
) as usize;
1613 sig
::shift_left(&mut self.sig
, &mut self.exp
, exp_change
);
1615 return Status
::OK
.and(self);
1618 // Shift right and capture any new lost fraction.
1619 if final_exp
> self.exp
{
1620 let exp_change
= (final_exp
- self.exp
) as usize;
1621 loss
= sig
::shift_right(&mut self.sig
, &mut self.exp
, exp_change
).combine(loss
);
1623 // Keep OMSB up-to-date.
1624 omsb
= omsb
.saturating_sub(exp_change
);
1628 // Now round the number according to round given the lost
1631 // As specified in IEEE 754, since we do not trap we do not report
1632 // underflow for exact results.
1633 if loss
== Loss
::ExactlyZero
{
1634 // Canonicalize zeros.
1636 self.category
= Category
::Zero
;
1639 return Status
::OK
.and(self);
1642 // Increment the significand if we're rounding away from zero.
1643 if self.round_away_from_zero(round
, loss
, 0) {
1645 self.exp
= S
::MIN_EXP
;
1648 // We should never overflow.
1649 assert_eq
!(sig
::increment(&mut self.sig
), 0);
1650 omsb
= sig
::omsb(&self.sig
);
1652 // Did the significand increment overflow?
1653 if omsb
== S
::PRECISION
+ 1 {
1654 // Renormalize by incrementing the exponent and shifting our
1655 // significand right one. However if we already have the
1656 // maximum exponent we overflow to infinity.
1657 if self.exp
== S
::MAX_EXP
{
1658 self.category
= Category
::Infinity
;
1660 return (Status
::OVERFLOW
| Status
::INEXACT
).and(self);
1663 let _
: Loss
= sig
::shift_right(&mut self.sig
, &mut self.exp
, 1);
1665 return Status
::INEXACT
.and(self);
1669 // The normal case - we were and are not denormal, and any
1670 // significand increment above didn't overflow.
1671 if omsb
== S
::PRECISION
{
1672 return Status
::INEXACT
.and(self);
1675 // We have a non-zero denormal.
1676 assert
!(omsb
< S
::PRECISION
);
1678 // Canonicalize zeros.
1680 self.category
= Category
::Zero
;
1683 // The Category::Zero case is a denormal that underflowed to zero.
1684 (Status
::UNDERFLOW
| Status
::INEXACT
).and(self)
1687 fn from_hexadecimal_string(s
: &str, round
: Round
) -> Result
<StatusAnd
<Self>, ParseError
> {
1688 let mut r
= IeeeFloat
{
1691 category
: Category
::Normal
,
1693 marker
: PhantomData
,
1696 let mut any_digits
= false;
1697 let mut has_exp
= false;
1698 let mut bit_pos
= LIMB_BITS
as isize;
1699 let mut loss
= None
;
1701 // Without leading or trailing zeros, irrespective of the dot.
1702 let mut first_sig_digit
= None
;
1703 let mut dot
= s
.len();
1705 for (p
, c
) in s
.char_indices() {
1706 // Skip leading zeros and any (hexa)decimal point.
1709 return Err(ParseError("String contains multiple dots"));
1712 } else if let Some(hex_value
) = c
.to_digit(16) {
1715 if first_sig_digit
.is_none() {
1719 first_sig_digit
= Some(p
);
1722 // Store the number while we have space.
1725 r
.sig
[0] |= (hex_value
as Limb
) << bit_pos
;
1726 // If zero or one-half (the hexadecimal digit 8) are followed
1727 // by non-zero, they're a little more than zero or one-half.
1728 } else if let Some(ref mut loss
) = loss
{
1730 if *loss
== Loss
::ExactlyZero
{
1731 *loss
= Loss
::LessThanHalf
;
1733 if *loss
== Loss
::ExactlyHalf
{
1734 *loss
= Loss
::MoreThanHalf
;
1738 loss
= Some(match hex_value
{
1739 0 => Loss
::ExactlyZero
,
1740 1..=7 => Loss
::LessThanHalf
,
1741 8 => Loss
::ExactlyHalf
,
1742 9..=15 => Loss
::MoreThanHalf
,
1743 _
=> unreachable
!(),
1746 } else if c
== 'p'
|| c
== 'P'
{
1748 return Err(ParseError("Significand has no digits"));
1755 let mut chars
= s
[p
+ 1..].chars().peekable();
1757 // Adjust for the given exponent.
1758 let exp_minus
= chars
.peek() == Some(&'
-'
);
1759 if exp_minus
|| chars
.peek() == Some(&'
+'
) {
1764 if let Some(value
) = c
.to_digit(10) {
1766 r
.exp
= r
.exp
.saturating_mul(10).saturating_add(value
as ExpInt
);
1768 return Err(ParseError("Invalid character in exponent"));
1772 return Err(ParseError("Exponent has no digits"));
1781 return Err(ParseError("Invalid character in significand"));
1785 return Err(ParseError("Significand has no digits"));
1788 // Hex floats require an exponent but not a hexadecimal point.
1790 return Err(ParseError("Hex strings require an exponent"));
1793 // Ignore the exponent if we are zero.
1794 let first_sig_digit
= match first_sig_digit
{
1796 None
=> return Ok(Status
::OK
.and(Self::ZERO
)),
1799 // Calculate the exponent adjustment implicit in the number of
1800 // significant digits and adjust for writing the significand starting
1801 // at the most significant nibble.
1802 let exp_adjustment
= if dot
> first_sig_digit
{
1803 ExpInt
::try_from(dot
- first_sig_digit
).unwrap()
1805 -ExpInt
::try_from(first_sig_digit
- dot
- 1).unwrap()
1807 let exp_adjustment
= exp_adjustment
1810 .saturating_add(S
::PRECISION
as ExpInt
)
1811 .saturating_sub(LIMB_BITS
as ExpInt
);
1812 r
.exp
= r
.exp
.saturating_add(exp_adjustment
);
1814 Ok(r
.normalize(round
, loss
.unwrap_or(Loss
::ExactlyZero
)))
1817 fn from_decimal_string(s
: &str, round
: Round
) -> Result
<StatusAnd
<Self>, ParseError
> {
1818 // Given a normal decimal floating point number of the form
1820 // dddd.dddd[eE][+-]ddd
1822 // where the decimal point and exponent are optional, fill out the
1823 // variables below. Exponent is appropriate if the significand is
1824 // treated as an integer, and normalized_exp if the significand
1825 // is taken to have the decimal point after a single leading
1828 // If the value is zero, first_sig_digit is None.
1830 let mut any_digits
= false;
1831 let mut dec_exp
= 0i32;
1833 // Without leading or trailing zeros, irrespective of the dot.
1834 let mut first_sig_digit
= None
;
1835 let mut last_sig_digit
= 0;
1836 let mut dot
= s
.len();
1838 for (p
, c
) in s
.char_indices() {
1841 return Err(ParseError("String contains multiple dots"));
1844 } else if let Some(dec_value
) = c
.to_digit(10) {
1848 if first_sig_digit
.is_none() {
1849 first_sig_digit
= Some(p
);
1853 } else if c
== 'e'
|| c
== 'E'
{
1855 return Err(ParseError("Significand has no digits"));
1862 let mut chars
= s
[p
+ 1..].chars().peekable();
1864 // Adjust for the given exponent.
1865 let exp_minus
= chars
.peek() == Some(&'
-'
);
1866 if exp_minus
|| chars
.peek() == Some(&'
+'
) {
1872 if let Some(value
) = c
.to_digit(10) {
1874 dec_exp
= dec_exp
.saturating_mul(10).saturating_add(value
as i32);
1876 return Err(ParseError("Invalid character in exponent"));
1880 return Err(ParseError("Exponent has no digits"));
1889 return Err(ParseError("Invalid character in significand"));
1893 return Err(ParseError("Significand has no digits"));
1896 // Test if we have a zero number allowing for non-zero exponents.
1897 let first_sig_digit
= match first_sig_digit
{
1899 None
=> return Ok(Status
::OK
.and(Self::ZERO
)),
1902 // Adjust the exponents for any decimal point.
1903 if dot
> last_sig_digit
{
1904 dec_exp
= dec_exp
.saturating_add((dot
- last_sig_digit
- 1) as i32);
1906 dec_exp
= dec_exp
.saturating_sub((last_sig_digit
- dot
) as i32);
1908 let significand_digits
= last_sig_digit
- first_sig_digit
+ 1
1909 - (dot
> first_sig_digit
&& dot
< last_sig_digit
) as usize;
1910 let normalized_exp
= dec_exp
.saturating_add(significand_digits
as i32 - 1);
1912 // Handle the cases where exponents are obviously too large or too
1913 // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
1914 // definitely overflows if
1916 // (dec_exp - 1) * L >= MAX_EXP
1918 // and definitely underflows to zero where
1920 // (dec_exp + 1) * L <= MIN_EXP - PRECISION
1922 // With integer arithmetic the tightest bounds for L are
1924 // 93/28 < L < 196/59 [ numerator <= 256 ]
1925 // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
1927 // Check for MAX_EXP.
1928 if normalized_exp
.saturating_sub(1).saturating_mul(42039) >= 12655 * S
::MAX_EXP
as i32 {
1929 // Overflow and round.
1930 return Ok(Self::overflow_result(round
));
1933 // Check for MIN_EXP.
1934 if normalized_exp
.saturating_add(1).saturating_mul(28738)
1935 <= 8651 * (S
::MIN_EXP
as i32 - S
::PRECISION
as i32)
1937 // Underflow to zero and round.
1939 if round
== Round
::TowardPositive { IeeeFloat::SMALLEST }
else { IeeeFloat::ZERO }
;
1940 return Ok((Status
::UNDERFLOW
| Status
::INEXACT
).and(r
));
1943 // A tight upper bound on number of bits required to hold an
1944 // N-digit decimal integer is N * 196 / 59. Allocate enough space
1945 // to hold the full significand, and an extra limb required by
1947 let max_limbs
= limbs_for_bits(1 + 196 * significand_digits
/ 59);
1948 let mut dec_sig
: SmallVec
<[Limb
; 1]> = SmallVec
::with_capacity(max_limbs
);
1950 // Convert to binary efficiently - we do almost all multiplication
1951 // in a Limb. When this would overflow do we do a single
1952 // bignum multiplication, and then revert again to multiplication
1954 let mut chars
= s
[first_sig_digit
..=last_sig_digit
].chars();
1957 let mut multiplier
= 1;
1960 let dec_value
= match chars
.next() {
1961 Some('
.'
) => continue,
1962 Some(c
) => c
.to_digit(10).unwrap(),
1967 val
= val
* 10 + dec_value
as Limb
;
1969 // The maximum number that can be multiplied by ten with any
1970 // digit added without overflowing a Limb.
1971 if multiplier
> (!0 - 9) / 10 {
1976 // If we've consumed no digits, we're done.
1977 if multiplier
== 1 {
1981 // Multiply out the current limb.
1982 let mut carry
= val
;
1983 for x
in &mut dec_sig
{
1984 let [low
, mut high
] = sig
::widening_mul(*x
, multiplier
);
1987 let (low
, overflow
) = low
.overflowing_add(carry
);
1988 high
+= overflow
as Limb
;
1994 // If we had carry, we need another limb (likely but not guaranteed).
1996 dec_sig
.push(carry
);
2000 // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
2001 // The *_calc Vec's are reused scratch space, as an optimization.
2002 let (pow5_full
, mut pow5_calc
, mut sig_calc
, mut sig_scratch_calc
) = {
2003 let mut power
= dec_exp
.abs() as usize;
2005 const FIRST_EIGHT_POWERS
: [Limb
; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
2007 let mut p5_scratch
= smallvec
![];
2008 let mut p5
: SmallVec
<[Limb
; 1]> = smallvec
![FIRST_EIGHT_POWERS
[4]];
2010 let mut r_scratch
= smallvec
![];
2011 let mut r
: SmallVec
<[Limb
; 1]> = smallvec
![FIRST_EIGHT_POWERS
[power
& 7]];
2015 // Calculate pow(5,pow(2,n+3)).
2016 p5_scratch
.resize(p5
.len() * 2, 0);
2017 let _
: Loss
= sig
::mul(&mut p5_scratch
, &mut 0, &p5
, &p5
, p5
.len() * 2 * LIMB_BITS
);
2018 while p5_scratch
.last() == Some(&0) {
2021 mem
::swap(&mut p5
, &mut p5_scratch
);
2024 r_scratch
.resize(r
.len() + p5
.len(), 0);
2026 sig
::mul(&mut r_scratch
, &mut 0, &r
, &p5
, (r
.len() + p5
.len()) * LIMB_BITS
);
2027 while r_scratch
.last() == Some(&0) {
2030 mem
::swap(&mut r
, &mut r_scratch
);
2036 (r
, r_scratch
, p5
, p5_scratch
)
2039 // Attempt dec_sig * 10^dec_exp with increasing precision.
2040 let mut attempt
= 0;
2042 let calc_precision
= (LIMB_BITS
<< attempt
) - 1;
2045 let calc_normal_from_limbs
= |sig
: &mut SmallVec
<[Limb
; 1]>,
2047 -> StatusAnd
<ExpInt
> {
2048 sig
.resize(limbs_for_bits(calc_precision
), 0);
2049 let (mut loss
, mut exp
) = sig
::from_limbs(sig
, limbs
, calc_precision
);
2051 // Before rounding normalize the exponent of Category::Normal numbers.
2052 let mut omsb
= sig
::omsb(sig
);
2054 assert_ne
!(omsb
, 0);
2056 // OMSB is numbered from 1. We want to place it in the integer
2057 // bit numbered PRECISION if possible, with a compensating change in
2059 let final_exp
= exp
.saturating_add(omsb
as ExpInt
- calc_precision
as ExpInt
);
2061 // Shifting left is easy as we don't lose precision.
2062 if final_exp
< exp
{
2063 assert_eq
!(loss
, Loss
::ExactlyZero
);
2065 let exp_change
= (exp
- final_exp
) as usize;
2066 sig
::shift_left(sig
, &mut exp
, exp_change
);
2068 return Status
::OK
.and(exp
);
2071 // Shift right and capture any new lost fraction.
2072 if final_exp
> exp
{
2073 let exp_change
= (final_exp
- exp
) as usize;
2074 loss
= sig
::shift_right(sig
, &mut exp
, exp_change
).combine(loss
);
2076 // Keep OMSB up-to-date.
2077 omsb
= omsb
.saturating_sub(exp_change
);
2080 assert_eq
!(omsb
, calc_precision
);
2082 // Now round the number according to round given the lost
2085 // As specified in IEEE 754, since we do not trap we do not report
2086 // underflow for exact results.
2087 if loss
== Loss
::ExactlyZero
{
2088 return Status
::OK
.and(exp
);
2091 // Increment the significand if we're rounding away from zero.
2092 if loss
== Loss
::MoreThanHalf
|| loss
== Loss
::ExactlyHalf
&& sig
::get_bit(sig
, 0) {
2093 // We should never overflow.
2094 assert_eq
!(sig
::increment(sig
), 0);
2095 omsb
= sig
::omsb(sig
);
2097 // Did the significand increment overflow?
2098 if omsb
== calc_precision
+ 1 {
2099 let _
: Loss
= sig
::shift_right(sig
, &mut exp
, 1);
2101 return Status
::INEXACT
.and(exp
);
2105 // The normal case - we were and are not denormal, and any
2106 // significand increment above didn't overflow.
2107 Status
::INEXACT
.and(exp
)
2111 let mut exp
= unpack
!(status
=,
2112 calc_normal_from_limbs(&mut sig_calc
, &dec_sig
));
2114 let pow5_exp
= unpack
!(pow5_status
=,
2115 calc_normal_from_limbs(&mut pow5_calc
, &pow5_full
));
2117 // Add dec_exp, as 10^n = 5^n * 2^n.
2118 exp
+= dec_exp
as ExpInt
;
2120 let mut used_bits
= S
::PRECISION
;
2121 let mut truncated_bits
= calc_precision
- used_bits
;
2123 let half_ulp_err1
= (status
!= Status
::OK
) as Limb
;
2124 let (calc_loss
, half_ulp_err2
);
2128 sig_scratch_calc
.resize(sig_calc
.len() + pow5_calc
.len(), 0);
2129 calc_loss
= sig
::mul(
2130 &mut sig_scratch_calc
,
2136 mem
::swap(&mut sig_calc
, &mut sig_scratch_calc
);
2138 half_ulp_err2
= (pow5_status
!= Status
::OK
) as Limb
;
2142 sig_scratch_calc
.resize(sig_calc
.len(), 0);
2143 calc_loss
= sig
::div(
2144 &mut sig_scratch_calc
,
2150 mem
::swap(&mut sig_calc
, &mut sig_scratch_calc
);
2152 // Denormal numbers have less precision.
2153 if exp
< S
::MIN_EXP
{
2154 truncated_bits
+= (S
::MIN_EXP
- exp
) as usize;
2155 used_bits
= calc_precision
.saturating_sub(truncated_bits
);
2157 // Extra half-ulp lost in reciprocal of exponent.
2159 2 * (pow5_status
!= Status
::OK
|| calc_loss
!= Loss
::ExactlyZero
) as Limb
;
2162 // Both sig::mul and sig::div return the
2163 // result with the integer bit set.
2164 assert
!(sig
::get_bit(&sig_calc
, calc_precision
- 1));
2166 // The error from the true value, in half-ulps, on multiplying two
2167 // floating point numbers, which differ from the value they
2168 // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
2169 // than the returned value.
2171 // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
2172 assert
!(half_ulp_err1
< 2 || half_ulp_err2
< 2 || (half_ulp_err1
+ half_ulp_err2
< 8));
2174 let inexact
= (calc_loss
!= Loss
::ExactlyZero
) as Limb
;
2175 let half_ulp_err
= if half_ulp_err1
+ half_ulp_err2
== 0 {
2176 inexact
* 2 // <= inexact half-ulps.
2178 inexact
+ 2 * (half_ulp_err1
+ half_ulp_err2
)
2181 let ulps_from_boundary
= {
2182 let bits
= calc_precision
- used_bits
- 1;
2184 let i
= bits
/ LIMB_BITS
;
2185 let limb
= sig_calc
[i
] & (!0 >> (LIMB_BITS
- 1 - bits
% LIMB_BITS
));
2186 let boundary
= match round
{
2187 Round
::NearestTiesToEven
| Round
::NearestTiesToAway
=> 1 << (bits
% LIMB_BITS
),
2191 let delta
= limb
.wrapping_sub(boundary
);
2192 cmp
::min(delta
, delta
.wrapping_neg())
2193 } else if limb
== boundary
{
2194 if !sig
::is_all_zeros(&sig_calc
[1..i
]) {
2199 } else if limb
== boundary
.wrapping_sub(1) {
2200 if sig_calc
[1..i
].iter().any(|&x
| x
.wrapping_neg() != 1) {
2203 sig_calc
[0].wrapping_neg()
2210 // Are we guaranteed to round correctly if we truncate?
2211 if ulps_from_boundary
.saturating_mul(2) >= half_ulp_err
{
2212 let mut r
= IeeeFloat
{
2215 category
: Category
::Normal
,
2217 marker
: PhantomData
,
2219 sig
::extract(&mut r
.sig
, &sig_calc
, used_bits
, calc_precision
- used_bits
);
2220 // If we extracted less bits above we must adjust our exponent
2221 // to compensate for the implicit right shift.
2222 r
.exp
+= (S
::PRECISION
- used_bits
) as ExpInt
;
2223 let loss
= Loss
::through_truncation(&sig_calc
, truncated_bits
);
2224 return Ok(r
.normalize(round
, loss
));
2231 /// Combine the effect of two lost fractions.
2232 fn combine(self, less_significant
: Loss
) -> Loss
{
2233 let mut more_significant
= self;
2234 if less_significant
!= Loss
::ExactlyZero
{
2235 if more_significant
== Loss
::ExactlyZero
{
2236 more_significant
= Loss
::LessThanHalf
;
2237 } else if more_significant
== Loss
::ExactlyHalf
{
2238 more_significant
= Loss
::MoreThanHalf
;
2245 /// Returns the fraction lost were a bignum truncated losing the least
2246 /// significant `bits` bits.
2247 fn through_truncation(limbs
: &[Limb
], bits
: usize) -> Loss
{
2249 return Loss
::ExactlyZero
;
2252 let half_bit
= bits
- 1;
2253 let half_limb
= half_bit
/ LIMB_BITS
;
2254 let (half_limb
, rest
) = if half_limb
< limbs
.len() {
2255 (limbs
[half_limb
], &limbs
[..half_limb
])
2259 let half
= 1 << (half_bit
% LIMB_BITS
);
2260 let has_half
= half_limb
& half
!= 0;
2261 let has_rest
= half_limb
& (half
- 1) != 0 || !sig
::is_all_zeros(rest
);
2263 match (has_half
, has_rest
) {
2264 (false, false) => Loss
::ExactlyZero
,
2265 (false, true) => Loss
::LessThanHalf
,
2266 (true, false) => Loss
::ExactlyHalf
,
2267 (true, true) => Loss
::MoreThanHalf
,
2272 /// Implementation details of IeeeFloat significands, such as big integer arithmetic.
2273 /// As a rule of thumb, no functions in this module should dynamically allocate.
2275 use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS}
;
2276 use core
::cmp
::Ordering
;
2280 pub(super) fn is_all_zeros(limbs
: &[Limb
]) -> bool
{
2281 limbs
.iter().all(|&l
| l
== 0)
2284 /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
2285 pub(super) fn olsb(limbs
: &[Limb
]) -> usize {
2289 .find(|(_
, &limb
)| limb
!= 0)
2290 .map_or(0, |(i
, limb
)| i
* LIMB_BITS
+ limb
.trailing_zeros() as usize + 1)
2293 /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
2294 pub(super) fn omsb(limbs
: &[Limb
]) -> usize {
2298 .rfind(|(_
, &limb
)| limb
!= 0)
2299 .map_or(0, |(i
, limb
)| (i
+ 1) * LIMB_BITS
- limb
.leading_zeros() as usize)
2302 /// Comparison (unsigned) of two significands.
2303 pub(super) fn cmp(a
: &[Limb
], b
: &[Limb
]) -> Ordering
{
2304 assert_eq
!(a
.len(), b
.len());
2305 for (a
, b
) in a
.iter().zip(b
).rev() {
2307 Ordering
::Equal
=> {}
2315 /// Extracts the given bit.
2316 pub(super) fn get_bit(limbs
: &[Limb
], bit
: usize) -> bool
{
2317 limbs
[bit
/ LIMB_BITS
] & (1 << (bit
% LIMB_BITS
)) != 0
2320 /// Sets the given bit.
2321 pub(super) fn set_bit(limbs
: &mut [Limb
], bit
: usize) {
2322 limbs
[bit
/ LIMB_BITS
] |= 1 << (bit
% LIMB_BITS
);
2325 /// Clear the given bit.
2326 pub(super) fn clear_bit(limbs
: &mut [Limb
], bit
: usize) {
2327 limbs
[bit
/ LIMB_BITS
] &= !(1 << (bit
% LIMB_BITS
));
2330 /// Shifts `dst` left `bits` bits, subtract `bits` from its exponent.
2331 pub(super) fn shift_left(dst
: &mut [Limb
], exp
: &mut ExpInt
, bits
: usize) {
2333 // Our exponent should not underflow.
2334 *exp
= exp
.checked_sub(bits
as ExpInt
).unwrap();
2336 // Jump is the inter-limb jump; shift is the intra-limb shift.
2337 let jump
= bits
/ LIMB_BITS
;
2338 let shift
= bits
% LIMB_BITS
;
2340 for i
in (0..dst
.len()).rev() {
2346 // dst[i] comes from the two limbs src[i - jump] and, if we have
2347 // an intra-limb shift, src[i - jump - 1].
2348 limb
= dst
[i
- jump
];
2352 limb
|= dst
[i
- jump
- 1] >> (LIMB_BITS
- shift
);
2362 /// Shifts `dst` right `bits` bits noting lost fraction.
2363 pub(super) fn shift_right(dst
: &mut [Limb
], exp
: &mut ExpInt
, bits
: usize) -> Loss
{
2364 let loss
= Loss
::through_truncation(dst
, bits
);
2367 // Our exponent should not overflow.
2368 *exp
= exp
.checked_add(bits
as ExpInt
).unwrap();
2370 // Jump is the inter-limb jump; shift is the intra-limb shift.
2371 let jump
= bits
/ LIMB_BITS
;
2372 let shift
= bits
% LIMB_BITS
;
2374 // Perform the shift. This leaves the most significant `bits` bits
2375 // of the result at zero.
2376 for i
in 0..dst
.len() {
2379 if i
+ jump
>= dst
.len() {
2382 limb
= dst
[i
+ jump
];
2385 if i
+ jump
+ 1 < dst
.len() {
2386 limb
|= dst
[i
+ jump
+ 1] << (LIMB_BITS
- shift
);
2398 /// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
2399 /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
2400 /// All high bits above `src_bits` in `dst` are zero-filled.
2401 pub(super) fn extract(dst
: &mut [Limb
], src
: &[Limb
], src_bits
: usize, src_lsb
: usize) {
2406 let dst_limbs
= limbs_for_bits(src_bits
);
2407 assert
!(dst_limbs
<= dst
.len());
2409 let src
= &src
[src_lsb
/ LIMB_BITS
..];
2410 dst
[..dst_limbs
].copy_from_slice(&src
[..dst_limbs
]);
2412 let shift
= src_lsb
% LIMB_BITS
;
2413 let _
: Loss
= shift_right(&mut dst
[..dst_limbs
], &mut 0, shift
);
2415 // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
2416 // in `dst`. If this is less that src_bits, append the rest, else
2417 // clear the high bits.
2418 let n
= dst_limbs
* LIMB_BITS
- shift
;
2420 let mask
= (1 << (src_bits
- n
)) - 1;
2421 dst
[dst_limbs
- 1] |= (src
[dst_limbs
] & mask
) << (n
% LIMB_BITS
);
2422 } else if n
> src_bits
&& src_bits
% LIMB_BITS
> 0 {
2423 dst
[dst_limbs
- 1] &= (1 << (src_bits
% LIMB_BITS
)) - 1;
2426 // Clear high limbs.
2427 for x
in &mut dst
[dst_limbs
..] {
2432 /// We want the most significant PRECISION bits of `src`. There may not
2433 /// be that many; extract what we can.
2434 pub(super) fn from_limbs(dst
: &mut [Limb
], src
: &[Limb
], precision
: usize) -> (Loss
, ExpInt
) {
2435 let omsb
= omsb(src
);
2437 if precision
<= omsb
{
2438 extract(dst
, src
, precision
, omsb
- precision
);
2439 (Loss
::through_truncation(src
, omsb
- precision
), omsb
as ExpInt
- 1)
2441 extract(dst
, src
, omsb
, 0);
2442 (Loss
::ExactlyZero
, precision
as ExpInt
- 1)
2446 /// For every consecutive chunk of `bits` bits from `limbs`,
2447 /// going from most significant to the least significant bits,
2448 /// call `f` to transform those bits and store the result back.
2449 pub(super) fn each_chunk
<F
: FnMut(Limb
) -> Limb
>(limbs
: &mut [Limb
], bits
: usize, mut f
: F
) {
2450 assert_eq
!(LIMB_BITS
% bits
, 0);
2451 for limb
in limbs
.iter_mut().rev() {
2453 for i
in (0..LIMB_BITS
/ bits
).rev() {
2454 r
|= f((*limb
>> (i
* bits
)) & ((1 << bits
) - 1)) << (i
* bits
);
2460 /// Increment in-place, return the carry flag.
2461 pub(super) fn increment(dst
: &mut [Limb
]) -> Limb
{
2463 *x
= x
.wrapping_add(1);
2472 /// Decrement in-place, return the borrow flag.
2473 pub(super) fn decrement(dst
: &mut [Limb
]) -> Limb
{
2475 *x
= x
.wrapping_sub(1);
2484 /// `a += b + c` where `c` is zero or one. Returns the carry flag.
2485 pub(super) fn add(a
: &mut [Limb
], b
: &[Limb
], mut c
: Limb
) -> Limb
{
2488 for (a
, &b
) in iter
::zip(a
, b
) {
2489 let (r
, overflow
) = a
.overflowing_add(b
);
2490 let (r
, overflow2
) = r
.overflowing_add(c
);
2492 c
= (overflow
| overflow2
) as Limb
;
2498 /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
2499 pub(super) fn sub(a
: &mut [Limb
], b
: &[Limb
], mut c
: Limb
) -> Limb
{
2502 for (a
, &b
) in iter
::zip(a
, b
) {
2503 let (r
, overflow
) = a
.overflowing_sub(b
);
2504 let (r
, overflow2
) = r
.overflowing_sub(c
);
2506 c
= (overflow
| overflow2
) as Limb
;
2512 /// `a += b` or `a -= b`. Does not preserve `b`.
2513 pub(super) fn add_or_sub(
2521 // Are we bigger exponent-wise than the RHS?
2522 let bits
= *a_exp
- b_exp
;
2524 // Determine if the operation on the absolute values is effectively
2525 // an addition or subtraction.
2526 // Subtraction is more subtle than one might naively expect.
2527 if *a_sign ^ b_sign
{
2528 let (reverse
, loss
);
2530 #[allow(clippy::comparison_chain)]
2532 reverse
= cmp(a_sig
, b_sig
) == Ordering
::Less
;
2533 loss
= Loss
::ExactlyZero
;
2534 } else if bits
> 0 {
2535 loss
= shift_right(b_sig
, &mut 0, (bits
- 1) as usize);
2536 shift_left(a_sig
, a_exp
, 1);
2539 loss
= shift_right(a_sig
, a_exp
, (-bits
- 1) as usize);
2540 shift_left(b_sig
, &mut 0, 1);
2544 let borrow
= (loss
!= Loss
::ExactlyZero
) as Limb
;
2546 // The code above is intended to ensure that no borrow is necessary.
2547 assert_eq
!(sub(b_sig
, a_sig
, borrow
), 0);
2548 a_sig
.copy_from_slice(b_sig
);
2551 // The code above is intended to ensure that no borrow is necessary.
2552 assert_eq
!(sub(a_sig
, b_sig
, borrow
), 0);
2555 // Invert the lost fraction - it was on the RHS and subtracted.
2557 Loss
::LessThanHalf
=> Loss
::MoreThanHalf
,
2558 Loss
::MoreThanHalf
=> Loss
::LessThanHalf
,
2562 let loss
= if bits
> 0 {
2563 shift_right(b_sig
, &mut 0, bits
as usize)
2565 shift_right(a_sig
, a_exp
, -bits
as usize)
2567 // We have a guard bit; generating a carry cannot happen.
2568 assert_eq
!(add(a_sig
, b_sig
, 0), 0);
2573 /// `[low, high] = a * b`.
2575 /// This cannot overflow, because
2577 /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
2579 /// which is less than n<sup>2</sup>.
2580 pub(super) fn widening_mul(a
: Limb
, b
: Limb
) -> [Limb
; 2] {
2581 let mut wide
= [0, 0];
2583 if a
== 0 || b
== 0 {
2587 const HALF_BITS
: usize = LIMB_BITS
/ 2;
2589 let select
= |limb
, i
| (limb
>> (i
* HALF_BITS
)) & ((1 << HALF_BITS
) - 1);
2592 let mut x
= [select(a
, i
) * select(b
, j
), 0];
2593 shift_left(&mut x
, &mut 0, (i
+ j
) * HALF_BITS
);
2594 assert_eq
!(add(&mut wide
, &x
, 0), 0);
2601 /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
2602 pub(super) fn mul
<'a
>(
2609 // Put the narrower number on the `a` for less loops below.
2610 if a
.len() > b
.len() {
2611 mem
::swap(&mut a
, &mut b
);
2614 for x
in &mut dst
[..b
.len()] {
2618 for i
in 0..a
.len() {
2620 for j
in 0..b
.len() {
2621 let [low
, mut high
] = widening_mul(a
[i
], b
[j
]);
2624 let (low
, overflow
) = low
.overflowing_add(carry
);
2625 high
+= overflow
as Limb
;
2627 // And now `dst[i + j]`, and store the new low part there.
2628 let (low
, overflow
) = low
.overflowing_add(dst
[i
+ j
]);
2629 high
+= overflow
as Limb
;
2634 dst
[i
+ b
.len()] = carry
;
2637 // Assume the operands involved in the multiplication are single-precision
2638 // FP, and the two multiplicants are:
2639 // a = a23 . a22 ... a0 * 2^e1
2640 // b = b23 . b22 ... b0 * 2^e2
2641 // the result of multiplication is:
2642 // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
2643 // Note that there are three significant bits at the left-hand side of the
2644 // radix point: two for the multiplication, and an overflow bit for the
2645 // addition (that will always be zero at this point). Move the radix point
2646 // toward left by two bits, and adjust exponent accordingly.
2649 // Convert the result having "2 * precision" significant-bits back to the one
2650 // having "precision" significant-bits. First, move the radix point from
2651 // poision "2*precision - 1" to "precision - 1". The exponent need to be
2652 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
2653 *exp
-= precision
as ExpInt
+ 1;
2655 // In case MSB resides at the left-hand side of radix point, shift the
2656 // mantissa right by some amount to make sure the MSB reside right before
2657 // the radix point (i.e., "MSB . rest-significant-bits").
2659 // Note that the result is not normalized when "omsb < precision". So, the
2660 // caller needs to call IeeeFloat::normalize() if normalized value is
2662 let omsb
= omsb(dst
);
2663 if omsb
<= precision { Loss::ExactlyZero }
else { shift_right(dst, exp, omsb - precision) }
2666 /// `quotient = dividend / divisor`. Returns the lost fraction.
2667 /// Does not preserve `dividend` or `divisor`.
2669 quotient
: &mut [Limb
],
2671 dividend
: &mut [Limb
],
2672 divisor
: &mut [Limb
],
2675 // Normalize the divisor.
2676 let bits
= precision
- omsb(divisor
);
2677 shift_left(divisor
, &mut 0, bits
);
2678 *exp
+= bits
as ExpInt
;
2680 // Normalize the dividend.
2681 let bits
= precision
- omsb(dividend
);
2682 shift_left(dividend
, exp
, bits
);
2685 let olsb_divisor
= olsb(divisor
);
2686 if olsb_divisor
== precision
{
2687 quotient
.copy_from_slice(dividend
);
2688 return Loss
::ExactlyZero
;
2691 // Ensure the dividend >= divisor initially for the loop below.
2692 // Incidentally, this means that the division loop below is
2693 // guaranteed to set the integer bit to one.
2694 if cmp(dividend
, divisor
) == Ordering
::Less
{
2695 shift_left(dividend
, exp
, 1);
2696 assert_ne
!(cmp(dividend
, divisor
), Ordering
::Less
)
2699 // Helper for figuring out the lost fraction.
2700 let lost_fraction
= |dividend
: &[Limb
], divisor
: &[Limb
]| match cmp(dividend
, divisor
) {
2701 Ordering
::Greater
=> Loss
::MoreThanHalf
,
2702 Ordering
::Equal
=> Loss
::ExactlyHalf
,
2704 if is_all_zeros(dividend
) {
2712 // Try to perform a (much faster) short division for small divisors.
2713 let divisor_bits
= precision
- (olsb_divisor
- 1);
2714 macro_rules
! try_short_div
{
2715 ($W
:ty
, $H
:ty
, $half
:expr
) => {
2716 if divisor_bits
* 2 <= $half
{
2717 // Extract the small divisor.
2718 let _
: Loss
= shift_right(divisor
, &mut 0, olsb_divisor
- 1);
2719 let divisor
= divisor
[0] as $H
as $W
;
2721 // Shift the dividend to produce a quotient with the unit bit set.
2722 let top_limb
= *dividend
.last().unwrap();
2723 let mut rem
= (top_limb
>> (LIMB_BITS
- (divisor_bits
- 1))) as $H
;
2724 shift_left(dividend
, &mut 0, divisor_bits
- 1);
2726 // Apply short division in place on $H (of $half bits) chunks.
2727 each_chunk(dividend
, $half
, |chunk
| {
2728 let chunk
= chunk
as $H
;
2729 let combined
= ((rem
as $W
) << $half
) | (chunk
as $W
);
2730 rem
= (combined
% divisor
) as $H
;
2731 (combined
/ divisor
) as $H
as Limb
2733 quotient
.copy_from_slice(dividend
);
2735 return lost_fraction(&[(rem
as Limb
) << 1], &[divisor
as Limb
]);
2740 try_short_div
!(u32, u16, 16);
2741 try_short_div
!(u64, u32, 32);
2742 try_short_div
!(u128
, u64, 64);
2744 // Zero the quotient before setting bits in it.
2745 for x
in &mut quotient
[..limbs_for_bits(precision
)] {
2750 for bit
in (0..precision
).rev() {
2751 if cmp(dividend
, divisor
) != Ordering
::Less
{
2752 sub(dividend
, divisor
, 0);
2753 set_bit(quotient
, bit
);
2755 shift_left(dividend
, &mut 0, 1);
2758 lost_fraction(dividend
, divisor
)