[rustc.git] / vendor / ryu / src / d2s.rs

// Translated from C to Rust. The original C code can be found at
// https://github.com/ulfjack/ryu and carries the following license:
//
// Copyright 2018 Ulf Adams
//
// The contents of this file may be used under the terms of the Apache License,
// Version 2.0.
//
//    (See accompanying file LICENSE-Apache or copy at
//     http://www.apache.org/licenses/LICENSE-2.0)
//
// Alternatively, the contents of this file may be used under the terms of
// the Boost Software License, Version 1.0.
//    (See accompanying file LICENSE-Boost or copy at
//     https://www.boost.org/LICENSE_1_0.txt)
//
// Unless required by applicable law or agreed to in writing, this software
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.

use core::{mem, ptr};

use common::*;
#[cfg(not(feature = "small"))]
use d2s_full_table::*;
#[cfg(feature = "small")]
use d2s_small_table::*;
use digit_table::*;
use d2s_intrinsics::*;

#[cfg(feature = "no-panic")]
use no_panic::no_panic;

pub const DOUBLE_MANTISSA_BITS: u32 = 52;
pub const DOUBLE_EXPONENT_BITS: u32 = 11;

const DOUBLE_POW5_INV_BITCOUNT: i32 = 122;
const DOUBLE_POW5_BITCOUNT: i32 = 121;

#[cfg_attr(feature = "no-panic", inline)]
fn pow5_factor(mut value: u64) -> u32 {
    let mut count = 0u32;
    loop {
        debug_assert!(value != 0);
        let q = div5(value);
        let r = (value - 5 * q) as u32;
        if r != 0 {
            break;
        }
        value = q;
        count += 1;
    }
    count
}

// Returns true if value is divisible by 5^p.
#[cfg_attr(feature = "no-panic", inline)]
fn multiple_of_power_of_5(value: u64, p: u32) -> bool {
    // I tried a case distinction on p, but there was no performance difference.
    pow5_factor(value) >= p
}

// Returns true if value is divisible by 2^p.
#[cfg_attr(feature = "no-panic", inline)]
fn multiple_of_power_of_2(value: u64, p: u32) -> bool {
    // return __builtin_ctzll(value) >= p;
    (value & ((1u64 << p) - 1)) == 0
}

#[cfg(integer128)]
#[cfg_attr(feature = "no-panic", inline)]
fn mul_shift(m: u64, mul: &(u64, u64), j: u32) -> u64 {
    let b0 = m as u128 * mul.0 as u128;
    let b2 = m as u128 * mul.1 as u128;
    (((b0 >> 64) + b2) >> (j - 64)) as u64
}

#[cfg(integer128)]
#[cfg_attr(feature = "no-panic", inline)]
fn mul_shift_all(
    m: u64,
    mul: &(u64, u64),
    j: u32,
    vp: &mut u64,
    vm: &mut u64,
    mm_shift: u32,
) -> u64 {
    *vp = mul_shift(4 * m + 2, mul, j);
    *vm = mul_shift(4 * m - 1 - mm_shift as u64, mul, j);
    mul_shift(4 * m, mul, j)
}

#[cfg(not(integer128))]
#[cfg_attr(feature = "no-panic", inline)]
fn mul_shift_all(
    mut m: u64,
    mul: &(u64, u64),
    j: u32,
    vp: &mut u64,
    vm: &mut u64,
    mm_shift: u32,
) -> u64 {
    m <<= 1;
    // m is maximum 55 bits
    let (lo, tmp) = umul128(m, mul.0);
    let (mut mid, mut hi) = umul128(m, mul.1);
    mid = mid.wrapping_add(tmp);
    hi = hi.wrapping_add((mid < tmp) as u64); // overflow into hi

    let lo2 = lo.wrapping_add(mul.0);
    let mid2 = mid.wrapping_add(mul.1).wrapping_add((lo2 < lo) as u64);
    let hi2 = hi.wrapping_add((mid2 < mid) as u64);
    *vp = shiftright128(mid2, hi2, j - 64 - 1);

    if mm_shift == 1 {
        let lo3 = lo.wrapping_sub(mul.0);
        let mid3 = mid.wrapping_sub(mul.1).wrapping_sub((lo3 > lo) as u64);
        let hi3 = hi.wrapping_sub((mid3 > mid) as u64);
        *vm = shiftright128(mid3, hi3, j - 64 - 1);
    } else {
        let lo3 = lo + lo;
        let mid3 = mid.wrapping_add(mid).wrapping_add((lo3 < lo) as u64);
        let hi3 = hi.wrapping_add(hi).wrapping_add((mid3 < mid) as u64);
        let lo4 = lo3.wrapping_sub(mul.0);
        let mid4 = mid3.wrapping_sub(mul.1).wrapping_sub((lo4 > lo3) as u64);
        let hi4 = hi3.wrapping_sub((mid4 > mid3) as u64);
        *vm = shiftright128(mid4, hi4, j - 64);
    }

    shiftright128(mid, hi, j - 64 - 1)
}

#[cfg_attr(feature = "no-panic", inline)]
pub fn decimal_length(v: u64) -> u32 {
    // This is slightly faster than a loop.
    // The average output length is 16.38 digits, so we check high-to-low.
    // Function precondition: v is not an 18, 19, or 20-digit number.
    // (17 digits are sufficient for round-tripping.)
    debug_assert!(v < 100000000000000000);

    if v >= 10000000000000000 {
        17
    } else if v >= 1000000000000000 {
        16
    } else if v >= 100000000000000 {
        15
    } else if v >= 10000000000000 {
        14
    } else if v >= 1000000000000 {
        13
    } else if v >= 100000000000 {
        12
    } else if v >= 10000000000 {
        11
    } else if v >= 1000000000 {
        10
    } else if v >= 100000000 {
        9
    } else if v >= 10000000 {
        8
    } else if v >= 1000000 {
        7
    } else if v >= 100000 {
        6
    } else if v >= 10000 {
        5
    } else if v >= 1000 {
        4
    } else if v >= 100 {
        3
    } else if v >= 10 {
        2
    } else {
        1
    }
}

// A floating decimal representing m * 10^e.
pub struct FloatingDecimal64 {
    pub mantissa: u64,
    pub exponent: i32,
}

#[cfg_attr(feature = "no-panic", inline)]
pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
    let bias = (1u32 << (DOUBLE_EXPONENT_BITS - 1)) - 1;

    let (e2, m2) = if ieee_exponent == 0 {
        (
            // We subtract 2 so that the bounds computation has 2 additional bits.
            1 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2,
            ieee_mantissa,
        )
    } else {
        (
            ieee_exponent as i32 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2,
            (1u64 << DOUBLE_MANTISSA_BITS) | ieee_mantissa,
        )
    };
    let even = (m2 & 1) == 0;
    let accept_bounds = even;

    // Step 2: Determine the interval of legal decimal representations.
    let mv = 4 * m2;
    // Implicit bool -> int conversion. True is 1, false is 0.
    let mm_shift = (ieee_mantissa != 0 || ieee_exponent <= 1) as u32;
    // We would compute mp and mm like this:
    // uint64_t mp = 4 * m2 + 2;
    // uint64_t mm = mv - 1 - mm_shift;

    // Step 3: Convert to a decimal power base using 128-bit arithmetic.
    let mut vr: u64;
    let mut vp: u64 = unsafe { mem::uninitialized() };
    let mut vm: u64 = unsafe { mem::uninitialized() };
    let e10: i32;
    let mut vm_is_trailing_zeros = false;
    let mut vr_is_trailing_zeros = false;
    if e2 >= 0 {
        // I tried special-casing q == 0, but there was no effect on performance.
        // This expression is slightly faster than max(0, log10_pow2(e2) - 1).
        let q = (log10_pow2(e2) - (e2 > 3) as i32) as u32;
        e10 = q as i32;
        let k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q as i32) as i32 - 1;
        let i = -e2 + q as i32 + k;
        vr = mul_shift_all(
            m2,
            #[cfg(feature = "small")]
            unsafe {
                &compute_inv_pow5(q)
            },
            #[cfg(not(feature = "small"))]
            unsafe {
                debug_assert!(q < DOUBLE_POW5_INV_SPLIT.len() as u32);
                DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize)
            },
            i as u32,
            &mut vp,
            &mut vm,
            mm_shift,
        );
        if q <= 21 {
            // This should use q <= 22, but I think 21 is also safe. Smaller values
            // may still be safe, but it's more difficult to reason about them.
            // Only one of mp, mv, and mm can be a multiple of 5, if any.
            let mv_mod5 = (mv - 5 * div5(mv)) as u32;
            if mv_mod5 == 0 {
                vr_is_trailing_zeros = multiple_of_power_of_5(mv, q);
            } else if accept_bounds {
                // Same as min(e2 + (~mm & 1), pow5_factor(mm)) >= q
                // <=> e2 + (~mm & 1) >= q && pow5_factor(mm) >= q
                // <=> true && pow5_factor(mm) >= q, since e2 >= q.
                vm_is_trailing_zeros = multiple_of_power_of_5(mv - 1 - mm_shift as u64, q);
            } else {
                // Same as min(e2 + 1, pow5_factor(mp)) >= q.
                vp -= multiple_of_power_of_5(mv + 2, q) as u64;
            }
        }
    } else {
        // This expression is slightly faster than max(0, log10_pow5(-e2) - 1).
        let q = (log10_pow5(-e2) - (-e2 > 1) as i32) as u32;
        e10 = q as i32 + e2;
        let i = -e2 - q as i32;
        let k = pow5bits(i) as i32 - DOUBLE_POW5_BITCOUNT;
        let j = q as i32 - k;
        vr = mul_shift_all(
            m2,
            #[cfg(feature = "small")]
            unsafe {
                &compute_pow5(i as u32)
            },
            #[cfg(not(feature = "small"))]
            unsafe {
                debug_assert!(i < DOUBLE_POW5_SPLIT.len() as i32);
                DOUBLE_POW5_SPLIT.get_unchecked(i as usize)
            },
            j as u32,
            &mut vp,
            &mut vm,
            mm_shift,
        );
        if q <= 1 {
            // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
            // mv = 4 * m2, so it always has at least two trailing 0 bits.
            vr_is_trailing_zeros = true;
            if accept_bounds {
                // mm = mv - 1 - mm_shift, so it has 1 trailing 0 bit iff mm_shift == 1.
                vm_is_trailing_zeros = mm_shift == 1;
            } else {
                // mp = mv + 2, so it always has at least one trailing 0 bit.
                vp -= 1;
            }
        } else if q < 63 {
            // TODO(ulfjack): Use a tighter bound here.
            // We need to compute min(ntz(mv), pow5_factor(mv) - e2) >= q - 1
            // <=> ntz(mv) >= q - 1  &&  pow5_factor(mv) - e2 >= q - 1
            // <=> ntz(mv) >= q - 1    (e2 is negative and -e2 >= q)
            // <=> (mv & ((1 << (q - 1)) - 1)) == 0
            // We also need to make sure that the left shift does not overflow.
            vr_is_trailing_zeros = multiple_of_power_of_2(mv, q - 1);
        }
    }

    // Step 4: Find the shortest decimal representation in the interval of legal representations.
    let mut removed = 0u32;
    let mut last_removed_digit = 0u8;
    // On average, we remove ~2 digits.
    let output = if vm_is_trailing_zeros || vr_is_trailing_zeros {
        // General case, which happens rarely (~0.7%).
        loop {
            let vp_div10 = div10(vp);
            let vm_div10 = div10(vm);
            if vp_div10 <= vm_div10 {
                break;
            }
            let vm_mod10 = (vm - 10 * vm_div10) as u32;
            let vr_div10 = div10(vr);
            let vr_mod10 = (vr - 10 * vr_div10) as u32;
            vm_is_trailing_zeros &= vm_mod10 == 0;
            vr_is_trailing_zeros &= last_removed_digit == 0;
            last_removed_digit = vr_mod10 as u8;
            vr = vr_div10;
            vp = vp_div10;
            vm = vm_div10;
            removed += 1;
        }
        if vm_is_trailing_zeros {
            loop {
                let vm_div10 = div10(vm);
                let vm_mod10 = (vm - 10 * vm_div10) as u32;
                if vm_mod10 != 0 {
                    break;
                }
                let vp_div10 = div10(vp);
                let vr_div10 = div10(vr);
                let vr_mod10 = (vr - 10 * vr_div10) as u32;
                vr_is_trailing_zeros &= last_removed_digit == 0;
                last_removed_digit = vr_mod10 as u8;
                vr = vr_div10;
                vp = vp_div10;
                vm = vm_div10;
                removed += 1;
            }
        }
        if vr_is_trailing_zeros && last_removed_digit == 5 && vr % 2 == 0 {
            // Round even if the exact number is .....50..0.
            last_removed_digit = 4;
        }
        // We need to take vr + 1 if vr is outside bounds or we need to round up.
        vr + ((vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5)
            as u64
    } else {
        // Specialized for the common case (~99.3%). Percentages below are relative to this.
        let mut round_up = false;
        let vp_div100 = div100(vp);
        let vm_div100 = div100(vm);
        // Optimization: remove two digits at a time (~86.2%).
        if vp_div100 > vm_div100 {
            let vr_div100 = div100(vr);
            let vr_mod100 = (vr - 100 * vr_div100) as u32;
            round_up = vr_mod100 >= 50;
            vr = vr_div100;
            vp = vp_div100;
            vm = vm_div100;
            removed += 2;
        }
        // Loop iterations below (approximately), without optimization above:
        // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
        // Loop iterations below (approximately), with optimization above:
        // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
        loop {
            let vp_div10 = div10(vp);
            let vm_div10 = div10(vm);
            if vp_div10 <= vm_div10 {
                break;
            }
            let vr_div10 = div10(vr);
            let vr_mod10 = (vr - 10 * vr_div10) as u32;
            round_up = vr_mod10 >= 5;
            vr = vr_div10;
            vp = vp_div10;
            vm = vm_div10;
            removed += 1;
        }
        // We need to take vr + 1 if vr is outside bounds or we need to round up.
        vr + (vr == vm || round_up) as u64
    };
    let exp = e10 + removed as i32;

    FloatingDecimal64 {
        exponent: exp,
        mantissa: output,
    }
}

#[cfg_attr(feature = "no-panic", inline)]
unsafe fn to_chars(v: FloatingDecimal64, sign: bool, result: *mut u8) -> usize {
    // Step 5: Print the decimal representation.
    let mut index = 0isize;
    if sign {
        *result.offset(index) = b'-';
        index += 1;
    }

    let mut output = v.mantissa;
    let olength = decimal_length(output);

    // Print the decimal digits.
    // The following code is equivalent to:
    // for (uint32_t i = 0; i < olength - 1; ++i) {
    //   const uint32_t c = output % 10; output /= 10;
    //   result[index + olength - i] = (char) ('0' + c);
    // }
    // result[index] = '0' + output % 10;

    let mut i = 0isize;
    // We prefer 32-bit operations, even on 64-bit platforms.
    // We have at most 17 digits, and uint32_t can store 9 digits.
    // If output doesn't fit into uint32_t, we cut off 8 digits,
    // so the rest will fit into uint32_t.
    if (output >> 32) != 0 {
        // Expensive 64-bit division.
        let q = div100_000_000(output);
        let mut output2 = (output - 100_000_000 * q) as u32;
        output = q;

        let c = output2 % 10000;
        output2 /= 10000;
        let d = output2 % 10000;
        let c0 = (c % 100) << 1;
        let c1 = (c / 100) << 1;
        let d0 = (d % 100) << 1;
        let d1 = (d / 100) << 1;
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked(c0 as usize),
            result.offset(index + olength as isize - i - 1),
            2,
        );
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked(c1 as usize),
            result.offset(index + olength as isize - i - 3),
            2,
        );
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked(d0 as usize),
            result.offset(index + olength as isize - i - 5),
            2,
        );
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked(d1 as usize),
            result.offset(index + olength as isize - i - 7),
            2,
        );
        i += 8;
    }
    let mut output2 = output as u32;
    while output2 >= 10000 {
        let c = (output2 - 10000 * (output2 / 10000)) as u32;
        output2 /= 10000;
        let c0 = (c % 100) << 1;
        let c1 = (c / 100) << 1;
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked(c0 as usize),
            result.offset(index + olength as isize - i - 1),
            2,
        );
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked(c1 as usize),
            result.offset(index + olength as isize - i - 3),
            2,
        );
        i += 4;
    }
    if output2 >= 100 {
        let c = ((output2 % 100) << 1) as u32;
        output2 /= 100;
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked(c as usize),
            result.offset(index + olength as isize - i - 1),
            2,
        );
        i += 2;
    }
    if output2 >= 10 {
        let c = (output2 << 1) as u32;
        // We can't use memcpy here: the decimal dot goes between these two digits.
        *result.offset(index + olength as isize - i) = *DIGIT_TABLE.get_unchecked(c as usize + 1);
        *result.offset(index) = *DIGIT_TABLE.get_unchecked(c as usize);
    } else {
        *result.offset(index) = b'0' + output2 as u8;
    }

    // Print decimal point if needed.
    if olength > 1 {
        *result.offset(index + 1) = b'.';
        index += olength as isize + 1;
    } else {
        index += 1;
    }

    // Print the exponent.
    *result.offset(index) = b'E';
    index += 1;
    let mut exp = v.exponent as i32 + olength as i32 - 1;
    if exp < 0 {
        *result.offset(index) = b'-';
        index += 1;
        exp = -exp;
    }

    if exp >= 100 {
        let c = exp % 10;
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked((2 * (exp / 10)) as usize),
            result.offset(index),
            2,
        );
        *result.offset(index + 2) = b'0' + c as u8;
        index += 3;
    } else if exp >= 10 {
        ptr::copy_nonoverlapping(
            DIGIT_TABLE.get_unchecked((2 * exp) as usize),
            result.offset(index),
            2,
        );
        index += 2;
    } else {
        *result.offset(index) = b'0' + exp as u8;
        index += 1;
    }

    debug_assert!(index <= 24);
    index as usize
}

/// Print f64 to the given buffer and return number of bytes written. Ryū's
/// original formatting.
///
/// At most 24 bytes will be written.
///
/// ## Special cases
///
/// This function represents any NaN as `NaN`, positive infinity as `Infinity`,
/// and negative infinity as `-Infinity`.
///
/// ## Safety
///
/// The `result` pointer argument must point to sufficiently many writable bytes
/// to hold Ryū's representation of `f`.
///
/// ## Example
///
/// ```rust
/// let f = 1.234f64;
///
/// unsafe {
///     let mut buffer: [u8; 24] = std::mem::uninitialized();
///     let n = ryu::raw::d2s_buffered_n(f, &mut buffer[0]);
///     let s = std::str::from_utf8_unchecked(&buffer[..n]);
///     assert_eq!(s, "1.234E0");
/// }
/// ```
#[cfg_attr(must_use_return, must_use)]
#[cfg_attr(feature = "no-panic", no_panic)]
pub unsafe fn d2s_buffered_n(f: f64, result: *mut u8) -> usize {
    // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
    let bits = mem::transmute::<f64, u64>(f);

    // Decode bits into sign, mantissa, and exponent.
    let ieee_sign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
    let ieee_mantissa = bits & ((1u64 << DOUBLE_MANTISSA_BITS) - 1);
    let ieee_exponent =
        (bits >> DOUBLE_MANTISSA_BITS) as u32 & ((1u32 << DOUBLE_EXPONENT_BITS) - 1);
    // Case distinction; exit early for the easy cases.
    if ieee_exponent == ((1u32 << DOUBLE_EXPONENT_BITS) - 1)
        || (ieee_exponent == 0 && ieee_mantissa == 0)
    {
        return copy_special_str(result, ieee_sign, ieee_exponent != 0, ieee_mantissa != 0);
    }

    let v = d2d(ieee_mantissa, ieee_exponent);
    to_chars(v, ieee_sign, result)
}
Commit	Line	Data
b7449926 XL	1	// Translated from C to Rust. The original C code can be found at
	2	// https://github.com/ulfjack/ryu and carries the following license:
	3	//
	4	// Copyright 2018 Ulf Adams
	5	//
	6	// The contents of this file may be used under the terms of the Apache License,
	7	// Version 2.0.
	8	//
	9	// (See accompanying file LICENSE-Apache or copy at
	10	// http://www.apache.org/licenses/LICENSE-2.0)
	11	//
	12	// Alternatively, the contents of this file may be used under the terms of
	13	// the Boost Software License, Version 1.0.
	14	// (See accompanying file LICENSE-Boost or copy at
	15	// https://www.boost.org/LICENSE_1_0.txt)
	16	//
	17	// Unless required by applicable law or agreed to in writing, this software
	18	// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	19	// KIND, either express or implied.
	20
	21	use core::{mem, ptr};
	22
	23	use common::*;
	24	#[cfg(not(feature = "small"))]
	25	use d2s_full_table::*;
	26	#[cfg(feature = "small")]
	27	use d2s_small_table::*;
	28	use digit_table::*;
	29	use d2s_intrinsics::*;
	30
	31	#[cfg(feature = "no-panic")]
	32	use no_panic::no_panic;
	33
	34	pub const DOUBLE_MANTISSA_BITS: u32 = 52;
	35	pub const DOUBLE_EXPONENT_BITS: u32 = 11;
	36
	37	const DOUBLE_POW5_INV_BITCOUNT: i32 = 122;
	38	const DOUBLE_POW5_BITCOUNT: i32 = 121;
	39
	40	#[cfg_attr(feature = "no-panic", inline)]
	41	fn pow5_factor(mut value: u64) -> u32 {
	42	let mut count = 0u32;
	43	loop {
	44	debug_assert!(value != 0);
	45	let q = div5(value);
	46	let r = (value - 5 * q) as u32;
	47	if r != 0 {
	48	break;
	49	}
	50	value = q;
	51	count += 1;
	52	}
	53	count
	54	}
	55
	56	// Returns true if value is divisible by 5^p.
	57	#[cfg_attr(feature = "no-panic", inline)]
	58	fn multiple_of_power_of_5(value: u64, p: u32) -> bool {
	59	// I tried a case distinction on p, but there was no performance difference.
	60	pow5_factor(value) >= p
	61	}
	62
	63	// Returns true if value is divisible by 2^p.
	64	#[cfg_attr(feature = "no-panic", inline)]
65	fn multiple_of_power_of_2(value: u64, p: u32) -> bool {
66	// return __builtin_ctzll(value) >= p;
67	(value & ((1u64 << p) - 1)) == 0
68	}
69
70	#[cfg(integer128)]
71	#[cfg_attr(feature = "no-panic", inline)]
72	fn mul_shift(m: u64, mul: &(u64, u64), j: u32) -> u64 {
73	let b0 = m as u128 * mul.0 as u128;
74	let b2 = m as u128 * mul.1 as u128;
75	(((b0 >> 64) + b2) >> (j - 64)) as u64
76	}
77
78	#[cfg(integer128)]
79	#[cfg_attr(feature = "no-panic", inline)]
80	fn mul_shift_all(
81	m: u64,
82	mul: &(u64, u64),
83	j: u32,
84	vp: &mut u64,
85	vm: &mut u64,
86	mm_shift: u32,
87	) -> u64 {
88	vp = mul_shift(4 m + 2, mul, j);
89	vm = mul_shift(4 m - 1 - mm_shift as u64, mul, j);
90	mul_shift(4 * m, mul, j)
91	}
92
93	#[cfg(not(integer128))]
94	#[cfg_attr(feature = "no-panic", inline)]
95	fn mul_shift_all(
96	mut m: u64,
97	mul: &(u64, u64),
98	j: u32,
99	vp: &mut u64,
100	vm: &mut u64,
101	mm_shift: u32,
102	) -> u64 {
103	m <<= 1;
104	// m is maximum 55 bits
105	let (lo, tmp) = umul128(m, mul.0);
106	let (mut mid, mut hi) = umul128(m, mul.1);
107	mid = mid.wrapping_add(tmp);
108	hi = hi.wrapping_add((mid < tmp) as u64); // overflow into hi
109
110	let lo2 = lo.wrapping_add(mul.0);
111	let mid2 = mid.wrapping_add(mul.1).wrapping_add((lo2 < lo) as u64);
112	let hi2 = hi.wrapping_add((mid2 < mid) as u64);
113	*vp = shiftright128(mid2, hi2, j - 64 - 1);
114
115	if mm_shift == 1 {
116	let lo3 = lo.wrapping_sub(mul.0);
117	let mid3 = mid.wrapping_sub(mul.1).wrapping_sub((lo3 > lo) as u64);
118	let hi3 = hi.wrapping_sub((mid3 > mid) as u64);
119	*vm = shiftright128(mid3, hi3, j - 64 - 1);
120	} else {
121	let lo3 = lo + lo;
122	let mid3 = mid.wrapping_add(mid).wrapping_add((lo3 < lo) as u64);
123	let hi3 = hi.wrapping_add(hi).wrapping_add((mid3 < mid) as u64);
124	let lo4 = lo3.wrapping_sub(mul.0);
125	let mid4 = mid3.wrapping_sub(mul.1).wrapping_sub((lo4 > lo3) as u64);
126	let hi4 = hi3.wrapping_sub((mid4 > mid3) as u64);
127	*vm = shiftright128(mid4, hi4, j - 64);
128	}
129
130	shiftright128(mid, hi, j - 64 - 1)
131	}
132
133	#[cfg_attr(feature = "no-panic", inline)]
134	pub fn decimal_length(v: u64) -> u32 {
135	// This is slightly faster than a loop.
136	// The average output length is 16.38 digits, so we check high-to-low.
137	// Function precondition: v is not an 18, 19, or 20-digit number.
138	// (17 digits are sufficient for round-tripping.)
139	debug_assert!(v < 100000000000000000);
140
141	if v >= 10000000000000000 {
142	17
143	} else if v >= 1000000000000000 {
144	16
145	} else if v >= 100000000000000 {
146	15
147	} else if v >= 10000000000000 {
148	14
149	} else if v >= 1000000000000 {
150	13
151	} else if v >= 100000000000 {
152	12
153	} else if v >= 10000000000 {
154	11
155	} else if v >= 1000000000 {
156	10
157	} else if v >= 100000000 {
158	9
159	} else if v >= 10000000 {
160	8
161	} else if v >= 1000000 {
162	7
163	} else if v >= 100000 {
164	6
165	} else if v >= 10000 {
166	5
167	} else if v >= 1000 {
168	4
169	} else if v >= 100 {
170	3
171	} else if v >= 10 {
172	2
173	} else {
174	1
175	}
176	}
177
178	// A floating decimal representing m * 10^e.
179	pub struct FloatingDecimal64 {
180	pub mantissa: u64,
181	pub exponent: i32,
182	}
183
184	#[cfg_attr(feature = "no-panic", inline)]
185	pub fn d2d(ieee_mantissa: u64, ieee_exponent: u32) -> FloatingDecimal64 {
186	let bias = (1u32 << (DOUBLE_EXPONENT_BITS - 1)) - 1;
187
188	let (e2, m2) = if ieee_exponent == 0 {
189	(
190	// We subtract 2 so that the bounds computation has 2 additional bits.
191	1 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2,
192	ieee_mantissa,
193	)
194	} else {
195	(
196	ieee_exponent as i32 - bias as i32 - DOUBLE_MANTISSA_BITS as i32 - 2,
197	(1u64 << DOUBLE_MANTISSA_BITS) \| ieee_mantissa,
198	)
199	};
200	let even = (m2 & 1) == 0;
201	let accept_bounds = even;
202
203	// Step 2: Determine the interval of legal decimal representations.
204	let mv = 4 * m2;
205	// Implicit bool -> int conversion. True is 1, false is 0.
206	let mm_shift = (ieee_mantissa != 0 \|\| ieee_exponent <= 1) as u32;
207	// We would compute mp and mm like this:
208	// uint64_t mp = 4 * m2 + 2;
209	// uint64_t mm = mv - 1 - mm_shift;
210
211	// Step 3: Convert to a decimal power base using 128-bit arithmetic.
212	let mut vr: u64;
213	let mut vp: u64 = unsafe { mem::uninitialized() };
214	let mut vm: u64 = unsafe { mem::uninitialized() };
215	let e10: i32;
216	let mut vm_is_trailing_zeros = false;
217	let mut vr_is_trailing_zeros = false;
218	if e2 >= 0 {
219	// I tried special-casing q == 0, but there was no effect on performance.
220	// This expression is slightly faster than max(0, log10_pow2(e2) - 1).
221	let q = (log10_pow2(e2) - (e2 > 3) as i32) as u32;
222	e10 = q as i32;
223	let k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q as i32) as i32 - 1;
224	let i = -e2 + q as i32 + k;
225	vr = mul_shift_all(
226	m2,
227	#[cfg(feature = "small")]
228	unsafe {
229	&compute_inv_pow5(q)
230	},
231	#[cfg(not(feature = "small"))]
232	unsafe {
233	debug_assert!(q < DOUBLE_POW5_INV_SPLIT.len() as u32);
234	DOUBLE_POW5_INV_SPLIT.get_unchecked(q as usize)
235	},
236	i as u32,
237	&mut vp,
238	&mut vm,
239	mm_shift,
240	);
241	if q <= 21 {
242	// This should use q <= 22, but I think 21 is also safe. Smaller values
243	// may still be safe, but it's more difficult to reason about them.
244	// Only one of mp, mv, and mm can be a multiple of 5, if any.
245	let mv_mod5 = (mv - 5 * div5(mv)) as u32;
246	if mv_mod5 == 0 {
247	vr_is_trailing_zeros = multiple_of_power_of_5(mv, q);
248	} else if accept_bounds {
249	// Same as min(e2 + (~mm & 1), pow5_factor(mm)) >= q
250	// <=> e2 + (~mm & 1) >= q && pow5_factor(mm) >= q
251	// <=> true && pow5_factor(mm) >= q, since e2 >= q.
252	vm_is_trailing_zeros = multiple_of_power_of_5(mv - 1 - mm_shift as u64, q);
253	} else {
254	// Same as min(e2 + 1, pow5_factor(mp)) >= q.
255	vp -= multiple_of_power_of_5(mv + 2, q) as u64;
256	}
257	}
258	} else {
259	// This expression is slightly faster than max(0, log10_pow5(-e2) - 1).
260	let q = (log10_pow5(-e2) - (-e2 > 1) as i32) as u32;
261	e10 = q as i32 + e2;
262	let i = -e2 - q as i32;
263	let k = pow5bits(i) as i32 - DOUBLE_POW5_BITCOUNT;
264	let j = q as i32 - k;
265	vr = mul_shift_all(
266	m2,
267	#[cfg(feature = "small")]
268	unsafe {
269	&compute_pow5(i as u32)
270	},
271	#[cfg(not(feature = "small"))]
272	unsafe {
273	debug_assert!(i < DOUBLE_POW5_SPLIT.len() as i32);
274	DOUBLE_POW5_SPLIT.get_unchecked(i as usize)
275	},
276	j as u32,
277	&mut vp,
278	&mut vm,
279	mm_shift,
280	);
281	if q <= 1 {
282	// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
283	// mv = 4 * m2, so it always has at least two trailing 0 bits.
284	vr_is_trailing_zeros = true;
285	if accept_bounds {
286	// mm = mv - 1 - mm_shift, so it has 1 trailing 0 bit iff mm_shift == 1.
287	vm_is_trailing_zeros = mm_shift == 1;
288	} else {
289	// mp = mv + 2, so it always has at least one trailing 0 bit.
290	vp -= 1;
291	}
292	} else if q < 63 {
293	// TODO(ulfjack): Use a tighter bound here.
294	// We need to compute min(ntz(mv), pow5_factor(mv) - e2) >= q - 1
295	// <=> ntz(mv) >= q - 1 && pow5_factor(mv) - e2 >= q - 1
296	// <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
297	// <=> (mv & ((1 << (q - 1)) - 1)) == 0
298	// We also need to make sure that the left shift does not overflow.
299	vr_is_trailing_zeros = multiple_of_power_of_2(mv, q - 1);
300	}
301	}
302
303	// Step 4: Find the shortest decimal representation in the interval of legal representations.
304	let mut removed = 0u32;
305	let mut last_removed_digit = 0u8;
306	// On average, we remove ~2 digits.
307	let output = if vm_is_trailing_zeros \|\| vr_is_trailing_zeros {
308	// General case, which happens rarely (~0.7%).
309	loop {
310	let vp_div10 = div10(vp);
311	let vm_div10 = div10(vm);
312	if vp_div10 <= vm_div10 {
313	break;
314	}
315	let vm_mod10 = (vm - 10 * vm_div10) as u32;
316	let vr_div10 = div10(vr);
317	let vr_mod10 = (vr - 10 * vr_div10) as u32;
318	vm_is_trailing_zeros &= vm_mod10 == 0;
319	vr_is_trailing_zeros &= last_removed_digit == 0;
320	last_removed_digit = vr_mod10 as u8;
321	vr = vr_div10;
322	vp = vp_div10;
323	vm = vm_div10;
324	removed += 1;
325	}
326	if vm_is_trailing_zeros {
327	loop {
328	let vm_div10 = div10(vm);
329	let vm_mod10 = (vm - 10 * vm_div10) as u32;
330	if vm_mod10 != 0 {
331	break;
332	}
333	let vp_div10 = div10(vp);
334	let vr_div10 = div10(vr);
335	let vr_mod10 = (vr - 10 * vr_div10) as u32;
336	vr_is_trailing_zeros &= last_removed_digit == 0;
337	last_removed_digit = vr_mod10 as u8;
338	vr = vr_div10;
339	vp = vp_div10;
340	vm = vm_div10;
341	removed += 1;
342	}
343	}
344	if vr_is_trailing_zeros && last_removed_digit == 5 && vr % 2 == 0 {
345	// Round even if the exact number is .....50..0.
346	last_removed_digit = 4;
347	}
348	// We need to take vr + 1 if vr is outside bounds or we need to round up.
349	vr + ((vr == vm && (!accept_bounds \|\| !vm_is_trailing_zeros)) \|\| last_removed_digit >= 5)
350	as u64
351	} else {
352	// Specialized for the common case (~99.3%). Percentages below are relative to this.
353	let mut round_up = false;
354	let vp_div100 = div100(vp);
355	let vm_div100 = div100(vm);
356	// Optimization: remove two digits at a time (~86.2%).
357	if vp_div100 > vm_div100 {
358	let vr_div100 = div100(vr);
359	let vr_mod100 = (vr - 100 * vr_div100) as u32;
360	round_up = vr_mod100 >= 50;
361	vr = vr_div100;
362	vp = vp_div100;
363	vm = vm_div100;
364	removed += 2;
365	}
366	// Loop iterations below (approximately), without optimization above:
367	// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
368	// Loop iterations below (approximately), with optimization above:
369	// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
370	loop {
371	let vp_div10 = div10(vp);
372	let vm_div10 = div10(vm);
373	if vp_div10 <= vm_div10 {
374	break;
375	}
376	let vr_div10 = div10(vr);
377	let vr_mod10 = (vr - 10 * vr_div10) as u32;
378	round_up = vr_mod10 >= 5;
379	vr = vr_div10;
380	vp = vp_div10;
381	vm = vm_div10;
382	removed += 1;
383	}
384	// We need to take vr + 1 if vr is outside bounds or we need to round up.
385	vr + (vr == vm \|\| round_up) as u64
386	};
387	let exp = e10 + removed as i32;
388
389	FloatingDecimal64 {
390	exponent: exp,
391	mantissa: output,
392	}
393	}
394
395	#[cfg_attr(feature = "no-panic", inline)]
396	unsafe fn to_chars(v: FloatingDecimal64, sign: bool, result: *mut u8) -> usize {
397	// Step 5: Print the decimal representation.
398	let mut index = 0isize;
399	if sign {
400	*result.offset(index) = b'-';
401	index += 1;
402	}
403
404	let mut output = v.mantissa;
405	let olength = decimal_length(output);
406
407	// Print the decimal digits.
408	// The following code is equivalent to:
409	// for (uint32_t i = 0; i < olength - 1; ++i) {
410	// const uint32_t c = output % 10; output /= 10;
411	// result[index + olength - i] = (char) ('0' + c);
412	// }
413	// result[index] = '0' + output % 10;
414
415	let mut i = 0isize;
416	// We prefer 32-bit operations, even on 64-bit platforms.
417	// We have at most 17 digits, and uint32_t can store 9 digits.
418	// If output doesn't fit into uint32_t, we cut off 8 digits,
419	// so the rest will fit into uint32_t.
420	if (output >> 32) != 0 {
421	// Expensive 64-bit division.
422	let q = div100_000_000(output);
423	let mut output2 = (output - 100_000_000 * q) as u32;
424	output = q;
425
426	let c = output2 % 10000;
427	output2 /= 10000;
428	let d = output2 % 10000;
429	let c0 = (c % 100) << 1;
430	let c1 = (c / 100) << 1;
431	let d0 = (d % 100) << 1;
432	let d1 = (d / 100) << 1;
433	ptr::copy_nonoverlapping(
434	DIGIT_TABLE.get_unchecked(c0 as usize),
435	result.offset(index + olength as isize - i - 1),
436	2,
437	);
438	ptr::copy_nonoverlapping(
439	DIGIT_TABLE.get_unchecked(c1 as usize),
440	result.offset(index + olength as isize - i - 3),
441	2,
442	);
443	ptr::copy_nonoverlapping(
444	DIGIT_TABLE.get_unchecked(d0 as usize),
445	result.offset(index + olength as isize - i - 5),
446	2,
447	);
448	ptr::copy_nonoverlapping(
449	DIGIT_TABLE.get_unchecked(d1 as usize),
450	result.offset(index + olength as isize - i - 7),
451	2,
452	);
453	i += 8;
454	}
455	let mut output2 = output as u32;
456	while output2 >= 10000 {
457	let c = (output2 - 10000 * (output2 / 10000)) as u32;
458	output2 /= 10000;
459	let c0 = (c % 100) << 1;
460	let c1 = (c / 100) << 1;
461	ptr::copy_nonoverlapping(
462	DIGIT_TABLE.get_unchecked(c0 as usize),
463	result.offset(index + olength as isize - i - 1),
464	2,
465	);
466	ptr::copy_nonoverlapping(
467	DIGIT_TABLE.get_unchecked(c1 as usize),
468	result.offset(index + olength as isize - i - 3),
469	2,
470	);
471	i += 4;
472	}
473	if output2 >= 100 {
474	let c = ((output2 % 100) << 1) as u32;
475	output2 /= 100;
476	ptr::copy_nonoverlapping(
477	DIGIT_TABLE.get_unchecked(c as usize),
478	result.offset(index + olength as isize - i - 1),
479	2,
480	);
481	i += 2;
482	}
483	if output2 >= 10 {
484	let c = (output2 << 1) as u32;
485	// We can't use memcpy here: the decimal dot goes between these two digits.
486	result.offset(index + olength as isize - i) = DIGIT_TABLE.get_unchecked(c as usize + 1);
487	result.offset(index) = DIGIT_TABLE.get_unchecked(c as usize);
488	} else {
489	*result.offset(index) = b'0' + output2 as u8;
490	}
491
492	// Print decimal point if needed.
493	if olength > 1 {
494	*result.offset(index + 1) = b'.';
495	index += olength as isize + 1;
496	} else {
497	index += 1;
498	}
499
500	// Print the exponent.
501	*result.offset(index) = b'E';
502	index += 1;
503	let mut exp = v.exponent as i32 + olength as i32 - 1;
504	if exp < 0 {
505	*result.offset(index) = b'-';
506	index += 1;
507	exp = -exp;
508	}
509
510	if exp >= 100 {
511	let c = exp % 10;
512	ptr::copy_nonoverlapping(
513	DIGIT_TABLE.get_unchecked((2 * (exp / 10)) as usize),
514	result.offset(index),
515	2,
516	);
517	*result.offset(index + 2) = b'0' + c as u8;
518	index += 3;
519	} else if exp >= 10 {
520	ptr::copy_nonoverlapping(
521	DIGIT_TABLE.get_unchecked((2 * exp) as usize),
522	result.offset(index),
523	2,
524	);
525	index += 2;
526	} else {
527	*result.offset(index) = b'0' + exp as u8;
528	index += 1;
529	}
530
531	debug_assert!(index <= 24);
532	index as usize
533	}
534
0731742a XL	535	/// Print f64 to the given buffer and return number of bytes written. Ryū's
0731742a XL	536	/// original formatting.
b7449926 XL	537	///
	538	/// At most 24 bytes will be written.
	539	///
	540	/// ## Special cases
	541	///
	542	/// This function represents any NaN as `NaN`, positive infinity as `Infinity`,
	543	/// and negative infinity as `-Infinity`.
	544	///
	545	/// ## Safety
	546	///
	547	/// The `result` pointer argument must point to sufficiently many writable bytes
	548	/// to hold Ryū's representation of `f`.
	549	///
	550	/// ## Example
	551	///
	552	/// ```rust
	553	/// let f = 1.234f64;
	554	///
	555	/// unsafe {
	556	/// let mut buffer: [u8; 24] = std::mem::uninitialized();
	557	/// let n = ryu::raw::d2s_buffered_n(f, &mut buffer[0]);
	558	/// let s = std::str::from_utf8_unchecked(&buffer[..n]);
	559	/// assert_eq!(s, "1.234E0");
	560	/// }
	561	/// ```
	562	#[cfg_attr(must_use_return, must_use)]
	563	#[cfg_attr(feature = "no-panic", no_panic)]
	564	pub unsafe fn d2s_buffered_n(f: f64, result: *mut u8) -> usize {
	565	// Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
	566	let bits = mem::transmute::<f64, u64>(f);
	567
	568	// Decode bits into sign, mantissa, and exponent.
	569	let ieee_sign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
	570	let ieee_mantissa = bits & ((1u64 << DOUBLE_MANTISSA_BITS) - 1);
	571	let ieee_exponent =
	572	(bits >> DOUBLE_MANTISSA_BITS) as u32 & ((1u32 << DOUBLE_EXPONENT_BITS) - 1);
	573	// Case distinction; exit early for the easy cases.
	574	if ieee_exponent == ((1u32 << DOUBLE_EXPONENT_BITS) - 1)
	575	\|\| (ieee_exponent == 0 && ieee_mantissa == 0)
	576	{
	577	return copy_special_str(result, ieee_sign, ieee_exponent != 0, ieee_mantissa != 0);
	578	}
	579
	580	let v = d2d(ieee_mantissa, ieee_exponent);
	581	to_chars(v, ieee_sign, result)
	582	}