]> git.proxmox.com Git - rustc.git/blob - library/core/src/num/dec2flt/parse.rs
New upstream version 1.55.0+dfsg1
[rustc.git] / library / core / src / num / dec2flt / parse.rs
1 //! Functions to parse floating-point numbers.
2
3 use crate::num::dec2flt::common::{is_8digits, AsciiStr, ByteSlice};
4 use crate::num::dec2flt::float::RawFloat;
5 use crate::num::dec2flt::number::Number;
6
7 const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
8
9 /// Parse 8 digits, loaded as bytes in little-endian order.
10 ///
11 /// This uses the trick where every digit is in [0x030, 0x39],
12 /// and therefore can be parsed in 3 multiplications, much
13 /// faster than the normal 8.
14 ///
15 /// This is based off the algorithm described in "Fast numeric string to
16 /// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
17 fn parse_8digits(mut v: u64) -> u64 {
18 const MASK: u64 = 0x0000_00FF_0000_00FF;
19 const MUL1: u64 = 0x000F_4240_0000_0064;
20 const MUL2: u64 = 0x0000_2710_0000_0001;
21 v -= 0x3030_3030_3030_3030;
22 v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
23 let v1 = (v & MASK).wrapping_mul(MUL1);
24 let v2 = ((v >> 16) & MASK).wrapping_mul(MUL2);
25 ((v1.wrapping_add(v2) >> 32) as u32) as u64
26 }
27
28 /// Parse digits until a non-digit character is found.
29 fn try_parse_digits(s: &mut AsciiStr<'_>, x: &mut u64) {
30 // may cause overflows, to be handled later
31 s.parse_digits(|digit| {
32 *x = x.wrapping_mul(10).wrapping_add(digit as _);
33 });
34 }
35
36 /// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
37 fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
38 while *x < MIN_19DIGIT_INT {
39 if let Some(&c) = s.as_ref().first() {
40 let digit = c.wrapping_sub(b'0');
41 if digit < 10 {
42 *x = (*x * 10) + digit as u64; // no overflows here
43 // SAFETY: cannot be empty
44 unsafe {
45 s.step();
46 }
47 } else {
48 break;
49 }
50 } else {
51 break;
52 }
53 }
54 }
55
56 /// Try to parse 8 digits at a time, using an optimized algorithm.
57 fn try_parse_8digits(s: &mut AsciiStr<'_>, x: &mut u64) {
58 // may cause overflows, to be handled later
59 if let Some(v) = s.read_u64() {
60 if is_8digits(v) {
61 *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v));
62 // SAFETY: already ensured the buffer was >= 8 bytes in read_u64.
63 unsafe {
64 s.step_by(8);
65 }
66 if let Some(v) = s.read_u64() {
67 if is_8digits(v) {
68 *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(v));
69 // SAFETY: already ensured the buffer was >= 8 bytes in try_read_u64.
70 unsafe {
71 s.step_by(8);
72 }
73 }
74 }
75 }
76 }
77 }
78
79 /// Parse the scientific notation component of a float.
80 fn parse_scientific(s: &mut AsciiStr<'_>) -> Option<i64> {
81 let mut exponent = 0_i64;
82 let mut negative = false;
83 if let Some(&c) = s.as_ref().get(0) {
84 negative = c == b'-';
85 if c == b'-' || c == b'+' {
86 // SAFETY: s cannot be empty
87 unsafe {
88 s.step();
89 }
90 }
91 }
92 if s.first_isdigit() {
93 s.parse_digits(|digit| {
94 // no overflows here, saturate well before overflow
95 if exponent < 0x10000 {
96 exponent = 10 * exponent + digit as i64;
97 }
98 });
99 if negative { Some(-exponent) } else { Some(exponent) }
100 } else {
101 None
102 }
103 }
104
105 /// Parse a partial, non-special floating point number.
106 ///
107 /// This creates a representation of the float as the
108 /// significant digits and the decimal exponent.
109 fn parse_partial_number(s: &[u8], negative: bool) -> Option<(Number, usize)> {
110 let mut s = AsciiStr::new(s);
111 let start = s;
112 debug_assert!(!s.is_empty());
113
114 // parse initial digits before dot
115 let mut mantissa = 0_u64;
116 let digits_start = s;
117 try_parse_digits(&mut s, &mut mantissa);
118 let mut n_digits = s.offset_from(&digits_start);
119
120 // handle dot with the following digits
121 let mut n_after_dot = 0;
122 let mut exponent = 0_i64;
123 let int_end = s;
124 if s.first_is(b'.') {
125 // SAFETY: s cannot be empty due to first_is
126 unsafe { s.step() };
127 let before = s;
128 try_parse_8digits(&mut s, &mut mantissa);
129 try_parse_digits(&mut s, &mut mantissa);
130 n_after_dot = s.offset_from(&before);
131 exponent = -n_after_dot as i64;
132 }
133
134 n_digits += n_after_dot;
135 if n_digits == 0 {
136 return None;
137 }
138
139 // handle scientific format
140 let mut exp_number = 0_i64;
141 if s.first_is2(b'e', b'E') {
142 // SAFETY: s cannot be empty
143 unsafe {
144 s.step();
145 }
146 // If None, we have no trailing digits after exponent, or an invalid float.
147 exp_number = parse_scientific(&mut s)?;
148 exponent += exp_number;
149 }
150
151 let len = s.offset_from(&start) as _;
152
153 // handle uncommon case with many digits
154 if n_digits <= 19 {
155 return Some((Number { exponent, mantissa, negative, many_digits: false }, len));
156 }
157
158 n_digits -= 19;
159 let mut many_digits = false;
160 let mut p = digits_start;
161 while p.first_is2(b'0', b'.') {
162 // SAFETY: p cannot be empty due to first_is2
163 unsafe {
164 // '0' = b'.' + 2
165 n_digits -= p.first_unchecked().saturating_sub(b'0' - 1) as isize;
166 p.step();
167 }
168 }
169 if n_digits > 0 {
170 // at this point we have more than 19 significant digits, let's try again
171 many_digits = true;
172 mantissa = 0;
173 let mut s = digits_start;
174 try_parse_19digits(&mut s, &mut mantissa);
175 exponent = if mantissa >= MIN_19DIGIT_INT {
176 // big int
177 int_end.offset_from(&s)
178 } else {
179 // SAFETY: the next byte must be present and be '.'
180 // We know this is true because we had more than 19
181 // digits previously, so we overflowed a 64-bit integer,
182 // but parsing only the integral digits produced less
183 // than 19 digits. That means we must have a decimal
184 // point, and at least 1 fractional digit.
185 unsafe { s.step() };
186 let before = s;
187 try_parse_19digits(&mut s, &mut mantissa);
188 -s.offset_from(&before)
189 } as i64;
190 // add back the explicit part
191 exponent += exp_number;
192 }
193
194 Some((Number { exponent, mantissa, negative, many_digits }, len))
195 }
196
197 /// Try to parse a non-special floating point number.
198 pub fn parse_number(s: &[u8], negative: bool) -> Option<Number> {
199 if let Some((float, rest)) = parse_partial_number(s, negative) {
200 if rest == s.len() {
201 return Some(float);
202 }
203 }
204 None
205 }
206
207 /// Parse a partial representation of a special, non-finite float.
208 fn parse_partial_inf_nan<F: RawFloat>(s: &[u8]) -> Option<(F, usize)> {
209 fn parse_inf_rest(s: &[u8]) -> usize {
210 if s.len() >= 8 && s[3..].as_ref().eq_ignore_case(b"inity") { 8 } else { 3 }
211 }
212 if s.len() >= 3 {
213 if s.eq_ignore_case(b"nan") {
214 return Some((F::NAN, 3));
215 } else if s.eq_ignore_case(b"inf") {
216 return Some((F::INFINITY, parse_inf_rest(s)));
217 }
218 }
219 None
220 }
221
222 /// Try to parse a special, non-finite float.
223 pub fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
224 if let Some((mut float, rest)) = parse_partial_inf_nan::<F>(s) {
225 if rest == s.len() {
226 if negative {
227 float = -float;
228 }
229 return Some(float);
230 }
231 }
232 None
233 }