[rustc.git] / src / libcore / num / dec2flt / rawfp.rs

// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Bit fiddling on positive IEEE 754 floats. Negative numbers aren't and needn't be handled.
//! Normal floating point numbers have a canonical representation as (frac, exp) such that the
//! value is 2^exp * (1 + sum(frac[N-i] / 2^i)) where N is the number of bits. Subnormals are
//! slightly different and weird, but the same principle applies.
//!
//! Here, however, we represent them as (sig, k) with f positive, such that the value is f * 2^e.
//! Besides making the "hidden bit" explicit, this changes the exponent by the so-called
//! mantissa shift.
//!
//! Put another way, normally floats are written as (1) but here they are written as (2):
//!
//! 1. `1.101100...11 * 2^m`
//! 2. `1101100...11 * 2^n`
//!
//! We call (1) the **fractional representation** and (2) the **integral representation**.
//!
//! Many functions in this module only handle normal numbers. The dec2flt routines conservatively
//! take the universally-correct slow path (Algorithm M) for very small and very large numbers.
//! That algorithm needs only next_float() which does handle subnormals and zeros.
use u32;
use cmp::Ordering::{Less, Equal, Greater};
use ops::{Mul, Div, Neg};
use fmt::{Debug, LowerExp};
use mem::transmute;
use num::diy_float::Fp;
use num::FpCategory::{Infinite, Zero, Subnormal, Normal, Nan};
use num::Float;
use num::dec2flt::num::{self, Big};
use num::dec2flt::table;

#[derive(Copy, Clone, Debug)]
pub struct Unpacked {
    pub sig: u64,
    pub k: i16,
}

impl Unpacked {
    pub fn new(sig: u64, k: i16) -> Self {
        Unpacked { sig: sig, k: k }
    }
}

/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
///
/// See the parent module's doc comment for why this is necessary.
///
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
/// Inherits from `Float` because there is some overlap, but all the reused methods are trivial.
/// The "methods" (pseudo-constants) with default implementation should not be overriden.
pub trait RawFloat : Float + Copy + Debug + LowerExp
                    + Mul<Output=Self> + Div<Output=Self> + Neg<Output=Self>
{
    // suffix of "2" because Float::infinity is deprecated
    #[allow(deprecated)]
    fn infinity2() -> Self {
        Float::infinity()
    }

    // suffix of "2" because Float::nan is deprecated
    #[allow(deprecated)]
    fn nan2() -> Self {
        Float::nan()
    }

    // suffix of "2" because Float::zero is deprecated
    fn zero2() -> Self;

    // suffix of "2" because Float::integer_decode is deprecated
    #[allow(deprecated)]
    fn integer_decode2(self) -> (u64, i16, i8) {
        Float::integer_decode(self)
    }

    /// Get the raw binary representation of the float.
    fn transmute(self) -> u64;

    /// Transmute the raw binary representation into a float.
    fn from_bits(bits: u64) -> Self;

    /// Decode the float.
    fn unpack(self) -> Unpacked;

    /// Cast from a small integer that can be represented exactly.  Panic if the integer can't be
    /// represented, the other code in this module makes sure to never let that happen.
    fn from_int(x: u64) -> Self;

    /// Get the value 10^e from a pre-computed table. Panics for e >= ceil_log5_of_max_sig().
    fn short_fast_pow10(e: usize) -> Self;

    // FIXME Everything that follows should be associated constants, but taking the value of an
    // associated constant from a type parameter does not work (yet?)
    // A possible workaround is having a `FloatInfo` struct for all the constants, but so far
    // the methods aren't painful enough to rewrite.

    /// What the name says. It's easier to hard code than juggling intrinsics and
    /// hoping LLVM constant folds it.
    fn ceil_log5_of_max_sig() -> i16;

    // A conservative bound on the decimal digits of inputs that can't produce overflow or zero or
    /// subnormals. Probably the decimal exponent of the maximum normal value, hence the name.
    fn max_normal_digits() -> usize;

    /// When the most significant decimal digit has a place value greater than this, the number
    /// is certainly rounded to infinity.
    fn inf_cutoff() -> i64;

    /// When the most significant decimal digit has a place value less than this, the number
    /// is certainly rounded to zero.
    fn zero_cutoff() -> i64;

    /// The number of bits in the exponent.
    fn exp_bits() -> u8;

    /// The number of bits in the singificand, *including* the hidden bit.
    fn sig_bits() -> u8;

    /// The number of bits in the singificand, *excluding* the hidden bit.
    fn explicit_sig_bits() -> u8 {
        Self::sig_bits() - 1
    }

    /// The maximum legal exponent in fractional representation.
    fn max_exp() -> i16 {
        (1 << (Self::exp_bits() - 1)) - 1
    }

    /// The minimum legal exponent in fractional representation, excluding subnormals.
    fn min_exp() -> i16 {
        -Self::max_exp() + 1
    }

    /// `MAX_EXP` for integral representation, i.e., with the shift applied.
    fn max_exp_int() -> i16 {
        Self::max_exp() - (Self::sig_bits() as i16 - 1)
    }

    /// `MAX_EXP` encoded (i.e., with offset bias)
    fn max_encoded_exp() -> i16 {
        (1 << Self::exp_bits()) - 1
    }

    /// `MIN_EXP` for integral representation, i.e., with the shift applied.
    fn min_exp_int() -> i16 {
        Self::min_exp() - (Self::sig_bits() as i16 - 1)
    }

    /// The maximum normalized singificand in integral representation.
    fn max_sig() -> u64 {
        (1 << Self::sig_bits()) - 1
    }

    /// The minimal normalized significand in integral representation.
    fn min_sig() -> u64 {
        1 << (Self::sig_bits() - 1)
    }
}

impl RawFloat for f32 {
    fn zero2() -> Self {
        0.0
    }

    fn sig_bits() -> u8 {
        24
    }

    fn exp_bits() -> u8 {
        8
    }

    fn ceil_log5_of_max_sig() -> i16 {
        11
    }

    fn transmute(self) -> u64 {
        let bits: u32 = unsafe { transmute(self) };
        bits as u64
    }

    fn from_bits(bits: u64) -> f32 {
        assert!(bits < u32::MAX as u64, "f32::from_bits: too many bits");
        unsafe { transmute(bits as u32) }
    }

    fn unpack(self) -> Unpacked {
        let (sig, exp, _sig) = self.integer_decode2();
        Unpacked::new(sig, exp)
    }

    fn from_int(x: u64) -> f32 {
        // rkruppe is uncertain whether `as` rounds correctly on all platforms.
        debug_assert!(x as f32 == fp_to_float(Fp { f: x, e: 0 }));
        x as f32
    }

    fn short_fast_pow10(e: usize) -> Self {
        table::F32_SHORT_POWERS[e]
    }

    fn max_normal_digits() -> usize {
        35
    }

    fn inf_cutoff() -> i64 {
        40
    }

    fn zero_cutoff() -> i64 {
        -48
    }
}


impl RawFloat for f64 {
    fn zero2() -> Self {
        0.0
    }

    fn sig_bits() -> u8 {
        53
    }

    fn exp_bits() -> u8 {
        11
    }

    fn ceil_log5_of_max_sig() -> i16 {
        23
    }

    fn transmute(self) -> u64 {
        let bits: u64 = unsafe { transmute(self) };
        bits
    }

    fn from_bits(bits: u64) -> f64 {
        unsafe { transmute(bits) }
    }

    fn unpack(self) -> Unpacked {
        let (sig, exp, _sig) = self.integer_decode2();
        Unpacked::new(sig, exp)
    }

    fn from_int(x: u64) -> f64 {
        // rkruppe is uncertain whether `as` rounds correctly on all platforms.
        debug_assert!(x as f64 == fp_to_float(Fp { f: x, e: 0 }));
        x as f64
    }

    fn short_fast_pow10(e: usize) -> Self {
        table::F64_SHORT_POWERS[e]
    }

    fn max_normal_digits() -> usize {
        305
    }

    fn inf_cutoff() -> i64 {
        310
    }

    fn zero_cutoff() -> i64 {
        -326
    }

}

/// Convert an Fp to the closest f64. Only handles number that fit into a normalized f64.
pub fn fp_to_float<T: RawFloat>(x: Fp) -> T {
    let x = x.normalize();
    // x.f is 64 bit, so x.e has a mantissa shift of 63
    let e = x.e + 63;
    if e > T::max_exp() {
        panic!("fp_to_float: exponent {} too large", e)
    }  else if e > T::min_exp() {
        encode_normal(round_normal::<T>(x))
    } else {
        panic!("fp_to_float: exponent {} too small", e)
    }
}

/// Round the 64-bit significand to 53 bit with half-to-even. Does not handle exponent overflow.
pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
    let excess = 64 - T::sig_bits() as i16;
    let half: u64 = 1 << (excess - 1);
    let (q, rem) = (x.f >> excess, x.f & ((1 << excess) - 1));
    assert_eq!(q << excess | rem, x.f);
    // Adjust mantissa shift
    let k = x.e + excess;
    if rem < half {
        Unpacked::new(q, k)
    } else if rem == half && (q % 2) == 0 {
        Unpacked::new(q, k)
    } else if q == T::max_sig() {
        Unpacked::new(T::min_sig(), k + 1)
    } else {
        Unpacked::new(q + 1, k)
    }
}

/// Inverse of `RawFloat::unpack()` for normalized numbers.
/// Panics if the significand or exponent are not valid for normalized numbers.
pub fn encode_normal<T: RawFloat>(x: Unpacked) -> T {
    debug_assert!(T::min_sig() <= x.sig && x.sig <= T::max_sig(),
        "encode_normal: significand not normalized");
    // Remove the hidden bit
    let sig_enc = x.sig & !(1 << T::explicit_sig_bits());
    // Adjust the exponent for exponent bias and mantissa shift
    let k_enc = x.k + T::max_exp() + T::explicit_sig_bits() as i16;
    debug_assert!(k_enc != 0 && k_enc < T::max_encoded_exp(),
        "encode_normal: exponent out of range");
    // Leave sign bit at 0 ("+"), our numbers are all positive
    let bits = (k_enc as u64) << T::explicit_sig_bits() | sig_enc;
    T::from_bits(bits)
}

/// Construct the subnormal. A mantissa of 0 is allowed and constructs zero.
pub fn encode_subnormal<T: RawFloat>(significand: u64) -> T {
    assert!(significand < T::min_sig(), "encode_subnormal: not actually subnormal");
    // Encoded exponent is 0, the sign bit is 0, so we just have to reinterpret the bits.
    T::from_bits(significand)
}

/// Approximate a bignum with an Fp. Rounds within 0.5 ULP with half-to-even.
pub fn big_to_fp(f: &Big) -> Fp {
    let end = f.bit_length();
    assert!(end != 0, "big_to_fp: unexpectedly, input is zero");
    let start = end.saturating_sub(64);
    let leading = num::get_bits(f, start, end);
    // We cut off all bits prior to the index `start`, i.e., we effectively right-shift by
    // an amount of `start`, so this is also the exponent we need.
    let e = start as i16;
    let rounded_down = Fp { f: leading, e: e }.normalize();
    // Round (half-to-even) depending on the truncated bits.
    match num::compare_with_half_ulp(f, start) {
        Less => rounded_down,
        Equal if leading % 2 == 0 => rounded_down,
        Equal | Greater => match leading.checked_add(1) {
            Some(f) => Fp { f: f, e: e }.normalize(),
            None => Fp { f: 1 << 63, e: e + 1 },
        }
    }
}

/// Find the largest floating point number strictly smaller than the argument.
/// Does not handle subnormals, zero, or exponent underflow.
pub fn prev_float<T: RawFloat>(x: T) -> T {
    match x.classify() {
        Infinite => panic!("prev_float: argument is infinite"),
        Nan => panic!("prev_float: argument is NaN"),
        Subnormal => panic!("prev_float: argument is subnormal"),
        Zero => panic!("prev_float: argument is zero"),
        Normal => {
            let Unpacked { sig, k } = x.unpack();
            if sig == T::min_sig() {
                encode_normal(Unpacked::new(T::max_sig(), k - 1))
            } else {
                encode_normal(Unpacked::new(sig - 1, k))
            }
        }
    }
}

// Find the smallest floating point number strictly larger than the argument.
// This operation is saturating, i.e. next_float(inf) == inf.
// Unlike most code in this module, this function does handle zero, subnormals, and infinities.
// However, like all other code here, it does not deal with NaN and negative numbers.
pub fn next_float<T: RawFloat>(x: T) -> T {
    match x.classify() {
        Nan => panic!("next_float: argument is NaN"),
        Infinite => T::infinity2(),
        // This seems too good to be true, but it works.
        // 0.0 is encoded as the all-zero word. Subnormals are 0x000m...m where m is the mantissa.
        // In particular, the smallest subnormal is 0x0...01 and the largest is 0x000F...F.
        // The smallest normal number is 0x0010...0, so this corner case works as well.
        // If the increment overflows the mantissa, the carry bit increments the exponent as we
        // want, and the mantissa bits become zero. Because of the hidden bit convention, this
        // too is exactly what we want!
        // Finally, f64::MAX + 1 = 7eff...f + 1 = 7ff0...0 = f64::INFINITY.
        Zero | Subnormal | Normal => {
            let bits: u64 = x.transmute();
            T::from_bits(bits + 1)
        }
    }
}
Commit	Line	Data
e9174d1e SL	1	// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
	2	// file at the top-level directory of this distribution and at
	3	// http://rust-lang.org/COPYRIGHT.
	4	//
	5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	8	// option. This file may not be copied, modified, or distributed
	9	// except according to those terms.
	10
	11	//! Bit fiddling on positive IEEE 754 floats. Negative numbers aren't and needn't be handled.
	12	//! Normal floating point numbers have a canonical representation as (frac, exp) such that the
	13	//! value is 2^exp * (1 + sum(frac[N-i] / 2^i)) where N is the number of bits. Subnormals are
	14	//! slightly different and weird, but the same principle applies.
	15	//!
	16	//! Here, however, we represent them as (sig, k) with f positive, such that the value is f * 2^e.
	17	//! Besides making the "hidden bit" explicit, this changes the exponent by the so-called
	18	//! mantissa shift.
	19	//!
	20	//! Put another way, normally floats are written as (1) but here they are written as (2):
	21	//!
	22	//! 1. `1.101100...11 * 2^m`
	23	//! 2. `1101100...11 * 2^n`
	24	//!
	25	//! We call (1) the fractional representation and (2) the integral representation.
	26	//!
	27	//! Many functions in this module only handle normal numbers. The dec2flt routines conservatively
	28	//! take the universally-correct slow path (Algorithm M) for very small and very large numbers.
	29	//! That algorithm needs only next_float() which does handle subnormals and zeros.
e9174d1e SL	30	use u32;
	31	use cmp::Ordering::{Less, Equal, Greater};
	32	use ops::{Mul, Div, Neg};
	33	use fmt::{Debug, LowerExp};
	34	use mem::transmute;
b039eaaf	35	use num::diy_float::Fp;
e9174d1e SL	36	use num::FpCategory::{Infinite, Zero, Subnormal, Normal, Nan};
e9174d1e SL	37	use num::Float;
b039eaaf	38	use num::dec2flt::num::{self, Big};
9cc50fc6	39	use num::dec2flt::table;
e9174d1e SL	40
	41	#[derive(Copy, Clone, Debug)]
	42	pub struct Unpacked {
	43	pub sig: u64,
	44	pub k: i16,
	45	}
	46
	47	impl Unpacked {
	48	pub fn new(sig: u64, k: i16) -> Self {
	49	Unpacked { sig: sig, k: k }
	50	}
	51	}
	52
	53	/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
	54	///
	55	/// See the parent module's doc comment for why this is necessary.
	56	///
	57	/// Should never ever be implemented for other types or be used outside the dec2flt module.
	58	/// Inherits from `Float` because there is some overlap, but all the reused methods are trivial.
	59	/// The "methods" (pseudo-constants) with default implementation should not be overriden.
	60	pub trait RawFloat : Float + Copy + Debug + LowerExp
	61	+ Mul<Output=Self> + Div<Output=Self> + Neg<Output=Self>
	62	{
3157f602 XL	63	// suffix of "2" because Float::infinity is deprecated
	64	#[allow(deprecated)]
	65	fn infinity2() -> Self {
	66	Float::infinity()
	67	}
	68
	69	// suffix of "2" because Float::nan is deprecated
	70	#[allow(deprecated)]
	71	fn nan2() -> Self {
	72	Float::nan()
	73	}
	74
	75	// suffix of "2" because Float::zero is deprecated
	76	fn zero2() -> Self;
	77
	78	// suffix of "2" because Float::integer_decode is deprecated
	79	#[allow(deprecated)]
	80	fn integer_decode2(self) -> (u64, i16, i8) {
	81	Float::integer_decode(self)
	82	}
	83
e9174d1e SL	84	/// Get the raw binary representation of the float.
	85	fn transmute(self) -> u64;
	86
	87	/// Transmute the raw binary representation into a float.
	88	fn from_bits(bits: u64) -> Self;
	89
	90	/// Decode the float.
	91	fn unpack(self) -> Unpacked;
	92
	93	/// Cast from a small integer that can be represented exactly. Panic if the integer can't be
	94	/// represented, the other code in this module makes sure to never let that happen.
	95	fn from_int(x: u64) -> Self;
	96
9cc50fc6 SL	97	/// Get the value 10^e from a pre-computed table. Panics for e >= ceil_log5_of_max_sig().
	98	fn short_fast_pow10(e: usize) -> Self;
	99
e9174d1e SL	100	// FIXME Everything that follows should be associated constants, but taking the value of an
	101	// associated constant from a type parameter does not work (yet?)
	102	// A possible workaround is having a `FloatInfo` struct for all the constants, but so far
	103	// the methods aren't painful enough to rewrite.
	104
	105	/// What the name says. It's easier to hard code than juggling intrinsics and
	106	/// hoping LLVM constant folds it.
	107	fn ceil_log5_of_max_sig() -> i16;
	108
	109	// A conservative bound on the decimal digits of inputs that can't produce overflow or zero or
	110	/// subnormals. Probably the decimal exponent of the maximum normal value, hence the name.
	111	fn max_normal_digits() -> usize;
	112
	113	/// When the most significant decimal digit has a place value greater than this, the number
	114	/// is certainly rounded to infinity.
	115	fn inf_cutoff() -> i64;
	116
	117	/// When the most significant decimal digit has a place value less than this, the number
	118	/// is certainly rounded to zero.
	119	fn zero_cutoff() -> i64;
	120
	121	/// The number of bits in the exponent.
	122	fn exp_bits() -> u8;
	123
	124	/// The number of bits in the singificand, including the hidden bit.
	125	fn sig_bits() -> u8;
	126
	127	/// The number of bits in the singificand, excluding the hidden bit.
	128	fn explicit_sig_bits() -> u8 {
	129	Self::sig_bits() - 1
	130	}
	131
	132	/// The maximum legal exponent in fractional representation.
	133	fn max_exp() -> i16 {
	134	(1 << (Self::exp_bits() - 1)) - 1
	135	}
	136
	137	/// The minimum legal exponent in fractional representation, excluding subnormals.
	138	fn min_exp() -> i16 {
	139	-Self::max_exp() + 1
	140	}
	141
	142	/// `MAX_EXP` for integral representation, i.e., with the shift applied.
	143	fn max_exp_int() -> i16 {
	144	Self::max_exp() - (Self::sig_bits() as i16 - 1)
	145	}
	146
	147	/// `MAX_EXP` encoded (i.e., with offset bias)
	148	fn max_encoded_exp() -> i16 {
	149	(1 << Self::exp_bits()) - 1
	150	}
	151
	152	/// `MIN_EXP` for integral representation, i.e., with the shift applied.
	153	fn min_exp_int() -> i16 {
	154	Self::min_exp() - (Self::sig_bits() as i16 - 1)
	155	}
	156
	157	/// The maximum normalized singificand in integral representation.
	158	fn max_sig() -> u64 {
	159	(1 << Self::sig_bits()) - 1
	160	}
	161
	162	/// The minimal normalized significand in integral representation.
	163	fn min_sig() -> u64 {
164	1 << (Self::sig_bits() - 1)
165	}
166	}
167
168	impl RawFloat for f32 {
3157f602 XL	169	fn zero2() -> Self {
	170	0.0
	171	}
	172
e9174d1e SL	173	fn sig_bits() -> u8 {
	174	24
	175	}
	176
	177	fn exp_bits() -> u8 {
	178	8
	179	}
	180
	181	fn ceil_log5_of_max_sig() -> i16 {
	182	11
	183	}
	184
	185	fn transmute(self) -> u64 {
	186	let bits: u32 = unsafe { transmute(self) };
	187	bits as u64
	188	}
	189
	190	fn from_bits(bits: u64) -> f32 {
	191	assert!(bits < u32::MAX as u64, "f32::from_bits: too many bits");
	192	unsafe { transmute(bits as u32) }
	193	}
	194
	195	fn unpack(self) -> Unpacked {
3157f602	196	let (sig, exp, _sig) = self.integer_decode2();
e9174d1e SL	197	Unpacked::new(sig, exp)
	198	}
	199
	200	fn from_int(x: u64) -> f32 {
	201	// rkruppe is uncertain whether `as` rounds correctly on all platforms.
	202	debug_assert!(x as f32 == fp_to_float(Fp { f: x, e: 0 }));
	203	x as f32
	204	}
	205
9cc50fc6 SL	206	fn short_fast_pow10(e: usize) -> Self {
	207	table::F32_SHORT_POWERS[e]
	208	}
	209
e9174d1e SL	210	fn max_normal_digits() -> usize {
	211	35
	212	}
	213
	214	fn inf_cutoff() -> i64 {
	215	40
	216	}
	217
	218	fn zero_cutoff() -> i64 {
	219	-48
	220	}
	221	}
	222
	223
	224	impl RawFloat for f64 {
3157f602 XL	225	fn zero2() -> Self {
	226	0.0
	227	}
	228
e9174d1e SL	229	fn sig_bits() -> u8 {
	230	53
	231	}
	232
	233	fn exp_bits() -> u8 {
	234	11
	235	}
	236
	237	fn ceil_log5_of_max_sig() -> i16 {
	238	23
	239	}
	240
	241	fn transmute(self) -> u64 {
	242	let bits: u64 = unsafe { transmute(self) };
	243	bits
	244	}
	245
	246	fn from_bits(bits: u64) -> f64 {
	247	unsafe { transmute(bits) }
	248	}
	249
	250	fn unpack(self) -> Unpacked {
3157f602	251	let (sig, exp, _sig) = self.integer_decode2();
e9174d1e SL	252	Unpacked::new(sig, exp)
	253	}
	254
	255	fn from_int(x: u64) -> f64 {
	256	// rkruppe is uncertain whether `as` rounds correctly on all platforms.
	257	debug_assert!(x as f64 == fp_to_float(Fp { f: x, e: 0 }));
	258	x as f64
	259	}
	260
9cc50fc6 SL	261	fn short_fast_pow10(e: usize) -> Self {
	262	table::F64_SHORT_POWERS[e]
	263	}
	264
e9174d1e SL	265	fn max_normal_digits() -> usize {
	266	305
	267	}
	268
	269	fn inf_cutoff() -> i64 {
	270	310
	271	}
	272
	273	fn zero_cutoff() -> i64 {
	274	-326
	275	}
	276
	277	}
	278
	279	/// Convert an Fp to the closest f64. Only handles number that fit into a normalized f64.
	280	pub fn fp_to_float<T: RawFloat>(x: Fp) -> T {
	281	let x = x.normalize();
	282	// x.f is 64 bit, so x.e has a mantissa shift of 63
	283	let e = x.e + 63;
	284	if e > T::max_exp() {
	285	panic!("fp_to_float: exponent {} too large", e)
	286	} else if e > T::min_exp() {
	287	encode_normal(round_normal::<T>(x))
	288	} else {
	289	panic!("fp_to_float: exponent {} too small", e)
	290	}
	291	}
	292
	293	/// Round the 64-bit significand to 53 bit with half-to-even. Does not handle exponent overflow.
	294	pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
	295	let excess = 64 - T::sig_bits() as i16;
	296	let half: u64 = 1 << (excess - 1);
	297	let (q, rem) = (x.f >> excess, x.f & ((1 << excess) - 1));
	298	assert_eq!(q << excess \| rem, x.f);
	299	// Adjust mantissa shift
	300	let k = x.e + excess;
	301	if rem < half {
	302	Unpacked::new(q, k)
	303	} else if rem == half && (q % 2) == 0 {
	304	Unpacked::new(q, k)
	305	} else if q == T::max_sig() {
	306	Unpacked::new(T::min_sig(), k + 1)
	307	} else {
	308	Unpacked::new(q + 1, k)
	309	}
	310	}
	311
	312	/// Inverse of `RawFloat::unpack()` for normalized numbers.
	313	/// Panics if the significand or exponent are not valid for normalized numbers.
	314	pub fn encode_normal<T: RawFloat>(x: Unpacked) -> T {
	315	debug_assert!(T::min_sig() <= x.sig && x.sig <= T::max_sig(),
	316	"encode_normal: significand not normalized");
	317	// Remove the hidden bit
	318	let sig_enc = x.sig & !(1 << T::explicit_sig_bits());
	319	// Adjust the exponent for exponent bias and mantissa shift
	320	let k_enc = x.k + T::max_exp() + T::explicit_sig_bits() as i16;
	321	debug_assert!(k_enc != 0 && k_enc < T::max_encoded_exp(),
	322	"encode_normal: exponent out of range");
	323	// Leave sign bit at 0 ("+"), our numbers are all positive
	324	let bits = (k_enc as u64) << T::explicit_sig_bits() \| sig_enc;
	325	T::from_bits(bits)
	326	}
	327
	328	/// Construct the subnormal. A mantissa of 0 is allowed and constructs zero.
329	pub fn encode_subnormal<T: RawFloat>(significand: u64) -> T {
330	assert!(significand < T::min_sig(), "encode_subnormal: not actually subnormal");
9cc50fc6	331	// Encoded exponent is 0, the sign bit is 0, so we just have to reinterpret the bits.
e9174d1e SL	332	T::from_bits(significand)
	333	}
	334
	335	/// Approximate a bignum with an Fp. Rounds within 0.5 ULP with half-to-even.
	336	pub fn big_to_fp(f: &Big) -> Fp {
	337	let end = f.bit_length();
	338	assert!(end != 0, "big_to_fp: unexpectedly, input is zero");
	339	let start = end.saturating_sub(64);
	340	let leading = num::get_bits(f, start, end);
	341	// We cut off all bits prior to the index `start`, i.e., we effectively right-shift by
	342	// an amount of `start`, so this is also the exponent we need.
	343	let e = start as i16;
	344	let rounded_down = Fp { f: leading, e: e }.normalize();
	345	// Round (half-to-even) depending on the truncated bits.
	346	match num::compare_with_half_ulp(f, start) {
	347	Less => rounded_down,
	348	Equal if leading % 2 == 0 => rounded_down,
	349	Equal \| Greater => match leading.checked_add(1) {
	350	Some(f) => Fp { f: f, e: e }.normalize(),
	351	None => Fp { f: 1 << 63, e: e + 1 },
	352	}
	353	}
	354	}
	355
	356	/// Find the largest floating point number strictly smaller than the argument.
	357	/// Does not handle subnormals, zero, or exponent underflow.
	358	pub fn prev_float<T: RawFloat>(x: T) -> T {
	359	match x.classify() {
	360	Infinite => panic!("prev_float: argument is infinite"),
	361	Nan => panic!("prev_float: argument is NaN"),
	362	Subnormal => panic!("prev_float: argument is subnormal"),
	363	Zero => panic!("prev_float: argument is zero"),
	364	Normal => {
	365	let Unpacked { sig, k } = x.unpack();
	366	if sig == T::min_sig() {
	367	encode_normal(Unpacked::new(T::max_sig(), k - 1))
	368	} else {
	369	encode_normal(Unpacked::new(sig - 1, k))
	370	}
	371	}
	372	}
	373	}
	374
	375	// Find the smallest floating point number strictly larger than the argument.
	376	// This operation is saturating, i.e. next_float(inf) == inf.
	377	// Unlike most code in this module, this function does handle zero, subnormals, and infinities.
	378	// However, like all other code here, it does not deal with NaN and negative numbers.
	379	pub fn next_float<T: RawFloat>(x: T) -> T {
	380	match x.classify() {
	381	Nan => panic!("next_float: argument is NaN"),
3157f602	382	Infinite => T::infinity2(),
e9174d1e SL	383	// This seems too good to be true, but it works.
	384	// 0.0 is encoded as the all-zero word. Subnormals are 0x000m...m where m is the mantissa.
	385	// In particular, the smallest subnormal is 0x0...01 and the largest is 0x000F...F.
	386	// The smallest normal number is 0x0010...0, so this corner case works as well.
	387	// If the increment overflows the mantissa, the carry bit increments the exponent as we
	388	// want, and the mantissa bits become zero. Because of the hidden bit convention, this
	389	// too is exactly what we want!
	390	// Finally, f64::MAX + 1 = 7eff...f + 1 = 7ff0...0 = f64::INFINITY.
	391	Zero \| Subnormal \| Normal => {
	392	let bits: u64 = x.transmute();
	393	T::from_bits(bits + 1)
	394	}
	395	}
	396	}