library/core/src/num/dec2flt/parse.rs

   1 //! Functions to parse floating-point numbers.
   2
   3 use crate::num::dec2flt::common::{is_8digits, AsciiStr, ByteSlice};
   4 use crate::num::dec2flt::float::RawFloat;
   5 use crate::num::dec2flt::number::Number;
   6
   7 const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
   8
   9 /// Parse 8 digits, loaded as bytes in little-endian order.
  10 ///
  11 /// This uses the trick where every digit is in [0x030, 0x39],
  12 /// and therefore can be parsed in 3 multiplications, much
  13 /// faster than the normal 8.
  14 ///
  15 /// This is based off the algorithm described in "Fast numeric string to
  16 /// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
  17 fn parse_8digits(mut v: u64) -> u64 {
  18     const MASK: u64 = 0x0000_00FF_0000_00FF;
  19     const MUL1: u64 = 0x000F_4240_0000_0064;
  20     const MUL2: u64 = 0x0000_2710_0000_0001;
  21     v -= 0x3030_3030_3030_3030;
  22     v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
  23     let v1 = (v & MASK).wrapping_mul(MUL1);
  24     let v2 = ((v >> 16) & MASK).wrapping_mul(MUL2);
  25     ((v1.wrapping_add(v2) >> 32) as u32) as u64
  26 }
  27
  28 /// Parse digits until a non-digit character is found.
  29 fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) {
  30     // may cause overflows, to be handled later
  31     s.parse_digits(|digit| {
  32         *x = x.wrapping_mul(10).wrapping_add(digit as _);
  33     });
  34 }
  35
  36 /// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
  37 fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
  38     while *x < MIN_19DIGIT_INT {
  39         if let Some(&c) = s.as_ref().first() {
  40             let digit = c.wrapping_sub(b'0');
  41             if digit < 10 {
  42                 *x = (*x * 10) + digit as u64; // no overflows here
  43                 // SAFETY: cannot be empty
  44                 unsafe {
  45                     s.step();
  46                 }
  47             } else {
  48                 break;
  49             }
  50         } else {
  51             break;
  52         }
  53     }
  54 }
  55
  56 /// Try to parse 8 digits at a time, using an optimized algorithm.
  57 fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) {
  58     // may cause overflows, to be handled later
  59     if let Some(v) = s.read_u64() {
  60         if is_8digits(v) {
  61             *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v));
  62             // SAFETY: already ensured the buffer was >= 8 bytes in read_u64.
  63             unsafe {
  64                 s.step_by(8);
  65             }
  66             if let Some(v) = s.read_u64() {
  67                 if is_8digits(v) {
  68                     *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v));
  69                     // SAFETY: already ensured the buffer was >= 8 bytes in try_read_u64.
  70                     unsafe {
  71                         s.step_by(8);
  72                     }
  73                 }
  74             }
  75         }
  76     }
  77 }
  78
  79 /// Parse the scientific notation component of a float.
  80 fn parse_scientific(s: &mut AsciiStr<'_>) -> Option<i64> {
  81     let mut exponent = 0_i64;
  82     let mut negative = false;
  83     if let Some(&c) = s.as_ref().get(0) {
  84         negative = c == b'-';
  85         if c == b'-' || c == b'+' {
  86             // SAFETY: s cannot be empty
  87             unsafe {
  88                 s.step();
  89             }
  90         }
  91     }
  92     if s.first_isdigit() {
  93         s.parse_digits(|digit| {
  94             // no overflows here, saturate well before overflow
  95             if exponent < 0x10000 {
  96                 exponent = 10 * exponent + digit as i64;
  97             }
  98         });
  99         if negative { Some(-exponent) } else { Some(exponent) }
 100     } else {
 101         None
 102     }
 103 }
 104
 105 /// Parse a partial, non-special floating point number.
 106 ///
 107 /// This creates a representation of the float as the
 108 /// significant digits and the decimal exponent.
 109 fn parse_partial_number(s: &[u8], negative: bool) -> Option<(Number, usize)> {
 110     let mut s = AsciiStr::new(s);
 111     let start = s;
 112     debug_assert!(!s.is_empty());
 113
 114     // parse initial digits before dot
 115     let mut mantissa = 0_u64;
 116     let digits_start = s;
 117     try_parse_digits(&mut s, &mut mantissa);
 118     let mut n_digits = s.offset_from(&digits_start);
 119
 120     // handle dot with the following digits
 121     let mut n_after_dot = 0;
 122     let mut exponent = 0_i64;
 123     let int_end = s;
 124     if s.first_is(b'.') {
 125         // SAFETY: s cannot be empty due to first_is
 126         unsafe { s.step() };
 127         let before = s;
 128         try_parse_8digits(&mut s, &mut mantissa);
 129         try_parse_digits(&mut s, &mut mantissa);
 130         n_after_dot = s.offset_from(&before);
 131         exponent = -n_after_dot as i64;
 132     }
 133
 134     n_digits += n_after_dot;
 135     if n_digits == 0 {
 136         return None;
 137     }
 138
 139     // handle scientific format
 140     let mut exp_number = 0_i64;
 141     if s.first_is2(b'e', b'E') {
 142         // SAFETY: s cannot be empty
 143         unsafe {
 144             s.step();
 145         }
 146         // If None, we have no trailing digits after exponent, or an invalid float.
 147         exp_number = parse_scientific(&mut s)?;
 148         exponent += exp_number;
 149     }
 150
 151     let len = s.offset_from(&start) as _;
 152
 153     // handle uncommon case with many digits
 154     if n_digits <= 19 {
 155         return Some((Number { exponent, mantissa, negative, many_digits: false }, len));
 156     }
 157
 158     n_digits -= 19;
 159     let mut many_digits = false;
 160     let mut p = digits_start;
 161     while p.first_is2(b'0', b'.') {
 162         // SAFETY: p cannot be empty due to first_is2
 163         unsafe {
 164             // '0' = b'.' + 2
 165             n_digits -= p.first_unchecked().saturating_sub(b'0' - 1) as isize;
 166             p.step();
 167         }
 168     }
 169     if n_digits > 0 {
 170         // at this point we have more than 19 significant digits, let's try again
 171         many_digits = true;
 172         mantissa = 0;
 173         let mut s = digits_start;
 174         try_parse_19digits(&mut s, &mut mantissa);
 175         exponent = if mantissa >= MIN_19DIGIT_INT {
 176             // big int
 177             int_end.offset_from(&s)
 178         } else {
 179             // SAFETY: the next byte must be present and be '.'
 180             // We know this is true because we had more than 19
 181             // digits previously, so we overflowed a 64-bit integer,
 182             // but parsing only the integral digits produced less
 183             // than 19 digits. That means we must have a decimal
 184             // point, and at least 1 fractional digit.
 185             unsafe { s.step() };
 186             let before = s;
 187             try_parse_19digits(&mut s, &mut mantissa);
 188             -s.offset_from(&before)
 189         } as i64;
 190         // add back the explicit part
 191         exponent += exp_number;
 192     }
 193
 194     Some((Number { exponent, mantissa, negative, many_digits }, len))
 195 }
 196
 197 /// Try to parse a non-special floating point number.
 198 pub fn parse_number(s: &[u8], negative: bool) -> Option<Number> {
 199     if let Some((float, rest)) = parse_partial_number(s, negative) {
 200         if rest == s.len() {
 201             return Some(float);
 202         }
 203     }
 204     None
 205 }
 206
 207 /// Parse a partial representation of a special, non-finite float.
 208 fn parse_partial_inf_nan<F: RawFloat>(s: &[u8]) -> Option<(F, usize)> {
 209     fn parse_inf_rest(s: &[u8]) -> usize {
 210         if s.len() >= 8 && s[3..].as_ref().eq_ignore_case(b"inity") { 8 } else { 3 }
 211     }
 212     if s.len() >= 3 {
 213         if s.eq_ignore_case(b"nan") {
 214             return Some((F::NAN, 3));
 215         } else if s.eq_ignore_case(b"inf") {
 216             return Some((F::INFINITY, parse_inf_rest(s)));
 217         }
 218     }
 219     None
 220 }
 221
 222 /// Try to parse a special, non-finite float.
 223 pub fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
 224     if let Some((mut float, rest)) = parse_partial_inf_nan::<F>(s) {
 225         if rest == s.len() {
 226             if negative {
 227                 float = -float;
 228             }
 229             return Some(float);
 230         }
 231     }
 232     None
 233 }