]>
Commit | Line | Data |
---|---|---|
2c00a5a8 XL |
1 | // This is a part of Chrono. |
2 | // See README.md and LICENSE.txt for details. | |
3 | ||
4 | /*! | |
5 | * Various scanning routines for the parser. | |
6 | */ | |
7 | ||
8 | use Weekday; | |
9 | use super::{ParseResult, TOO_SHORT, INVALID, OUT_OF_RANGE}; | |
10 | ||
11 | /// Returns true when two slices are equal case-insensitively (in ASCII). | |
12 | /// Assumes that the `pattern` is already converted to lower case. | |
13 | fn equals(s: &str, pattern: &str) -> bool { | |
14 | let mut xs = s.as_bytes().iter().map(|&c| match c { b'A'...b'Z' => c + 32, _ => c }); | |
15 | let mut ys = pattern.as_bytes().iter().cloned(); | |
16 | loop { | |
17 | match (xs.next(), ys.next()) { | |
18 | (None, None) => return true, | |
19 | (None, _) | (_, None) => return false, | |
20 | (Some(x), Some(y)) if x != y => return false, | |
21 | _ => (), | |
22 | } | |
23 | } | |
24 | } | |
25 | ||
26 | /// Tries to parse the non-negative number from `min` to `max` digits. | |
27 | /// | |
28 | /// The absence of digits at all is an unconditional error. | |
29 | /// More than `max` digits are consumed up to the first `max` digits. | |
30 | /// Any number that does not fit in `i64` is an error. | |
31 | pub fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> { | |
32 | assert!(min <= max); | |
33 | ||
34 | // limit `s` to given number of digits | |
35 | let mut window = s.as_bytes(); | |
36 | if window.len() > max { window = &window[..max]; } | |
37 | ||
38 | // scan digits | |
83c7162d XL |
39 | let upto = window.iter().position(|&c| c < b'0' || b'9' < c) |
40 | .unwrap_or_else(|| window.len()); | |
2c00a5a8 XL |
41 | if upto < min { |
42 | return Err(if window.is_empty() {TOO_SHORT} else {INVALID}); | |
43 | } | |
44 | ||
45 | // we can overflow here, which is the only possible cause of error from `parse`. | |
46 | let v: i64 = try!(s[..upto].parse().map_err(|_| OUT_OF_RANGE)); | |
47 | Ok((&s[upto..], v)) | |
48 | } | |
49 | ||
50 | /// Tries to consume at least one digits as a fractional second. | |
51 | /// Returns the number of whole nanoseconds (0--999,999,999). | |
52 | pub fn nanosecond(s: &str) -> ParseResult<(&str, i64)> { | |
53 | // record the number of digits consumed for later scaling. | |
54 | let origlen = s.len(); | |
55 | let (s, v) = try!(number(s, 1, 9)); | |
56 | let consumed = origlen - s.len(); | |
57 | ||
58 | // scale the number accordingly. | |
59 | static SCALE: [i64; 10] = [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, | |
60 | 1_000, 100, 10, 1]; | |
61 | let v = try!(v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)); | |
62 | ||
63 | // if there are more than 9 digits, skip next digits. | |
64 | let s = s.trim_left_matches(|c: char| '0' <= c && c <= '9'); | |
65 | ||
66 | Ok((s, v)) | |
67 | } | |
68 | ||
8faf50e0 XL |
69 | /// Tries to consume a fixed number of digits as a fractional second. |
70 | /// Returns the number of whole nanoseconds (0--999,999,999). | |
71 | pub fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> { | |
72 | // record the number of digits consumed for later scaling. | |
73 | let (s, v) = try!(number(s, digits, digits)); | |
74 | ||
75 | // scale the number accordingly. | |
76 | static SCALE: [i64; 10] = [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000, | |
77 | 1_000, 100, 10, 1]; | |
78 | let v = try!(v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)); | |
79 | ||
80 | Ok((s, v)) | |
81 | } | |
82 | ||
2c00a5a8 XL |
83 | /// Tries to parse the month index (0 through 11) with the first three ASCII letters. |
84 | pub fn short_month0(s: &str) -> ParseResult<(&str, u8)> { | |
85 | if s.len() < 3 { return Err(TOO_SHORT); } | |
86 | let buf = s.as_bytes(); | |
87 | let month0 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) { | |
88 | (b'j',b'a',b'n') => 0, | |
89 | (b'f',b'e',b'b') => 1, | |
90 | (b'm',b'a',b'r') => 2, | |
91 | (b'a',b'p',b'r') => 3, | |
92 | (b'm',b'a',b'y') => 4, | |
93 | (b'j',b'u',b'n') => 5, | |
94 | (b'j',b'u',b'l') => 6, | |
95 | (b'a',b'u',b'g') => 7, | |
96 | (b's',b'e',b'p') => 8, | |
97 | (b'o',b'c',b't') => 9, | |
98 | (b'n',b'o',b'v') => 10, | |
99 | (b'd',b'e',b'c') => 11, | |
100 | _ => return Err(INVALID) | |
101 | }; | |
102 | Ok((&s[3..], month0)) | |
103 | } | |
104 | ||
105 | /// Tries to parse the weekday with the first three ASCII letters. | |
106 | pub fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> { | |
107 | if s.len() < 3 { return Err(TOO_SHORT); } | |
108 | let buf = s.as_bytes(); | |
109 | let weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) { | |
110 | (b'm',b'o',b'n') => Weekday::Mon, | |
111 | (b't',b'u',b'e') => Weekday::Tue, | |
112 | (b'w',b'e',b'd') => Weekday::Wed, | |
113 | (b't',b'h',b'u') => Weekday::Thu, | |
114 | (b'f',b'r',b'i') => Weekday::Fri, | |
115 | (b's',b'a',b't') => Weekday::Sat, | |
116 | (b's',b'u',b'n') => Weekday::Sun, | |
117 | _ => return Err(INVALID) | |
118 | }; | |
119 | Ok((&s[3..], weekday)) | |
120 | } | |
121 | ||
122 | /// Tries to parse the month index (0 through 11) with short or long month names. | |
123 | /// It prefers long month names to short month names when both are possible. | |
124 | pub fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> { | |
125 | // lowercased month names, minus first three chars | |
126 | static LONG_MONTH_SUFFIXES: [&'static str; 12] = | |
127 | ["uary", "ruary", "ch", "il", "", "e", "y", "ust", "tember", "ober", "ember", "ember"]; | |
128 | ||
129 | let (mut s, month0) = try!(short_month0(s)); | |
130 | ||
131 | // tries to consume the suffix if possible | |
132 | let suffix = LONG_MONTH_SUFFIXES[month0 as usize]; | |
133 | if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) { | |
134 | s = &s[suffix.len()..]; | |
135 | } | |
136 | ||
137 | Ok((s, month0)) | |
138 | } | |
139 | ||
140 | /// Tries to parse the weekday with short or long weekday names. | |
141 | /// It prefers long weekday names to short weekday names when both are possible. | |
142 | pub fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> { | |
143 | // lowercased weekday names, minus first three chars | |
144 | static LONG_WEEKDAY_SUFFIXES: [&'static str; 7] = | |
145 | ["day", "sday", "nesday", "rsday", "day", "urday", "day"]; | |
146 | ||
147 | let (mut s, weekday) = try!(short_weekday(s)); | |
148 | ||
149 | // tries to consume the suffix if possible | |
150 | let suffix = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize]; | |
151 | if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) { | |
152 | s = &s[suffix.len()..]; | |
153 | } | |
154 | ||
155 | Ok((s, weekday)) | |
156 | } | |
157 | ||
158 | /// Tries to consume exactly one given character. | |
159 | pub fn char(s: &str, c1: u8) -> ParseResult<&str> { | |
160 | match s.as_bytes().first() { | |
161 | Some(&c) if c == c1 => Ok(&s[1..]), | |
162 | Some(_) => Err(INVALID), | |
163 | None => Err(TOO_SHORT), | |
164 | } | |
165 | } | |
166 | ||
167 | /// Tries to consume one or more whitespace. | |
168 | pub fn space(s: &str) -> ParseResult<&str> { | |
169 | let s_ = s.trim_left(); | |
170 | if s_.len() < s.len() { | |
171 | Ok(s_) | |
172 | } else if s.is_empty() { | |
173 | Err(TOO_SHORT) | |
174 | } else { | |
175 | Err(INVALID) | |
176 | } | |
177 | } | |
178 | ||
179 | /// Consumes any number (including zero) of colon or spaces. | |
180 | pub fn colon_or_space(s: &str) -> ParseResult<&str> { | |
181 | Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace())) | |
182 | } | |
183 | ||
184 | /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible. | |
185 | /// | |
186 | /// The additional `colon` may be used to parse a mandatory or optional `:` | |
187 | /// between hours and minutes, and should return either a new suffix or `Err` when parsing fails. | |
8faf50e0 | 188 | pub fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)> |
2c00a5a8 | 189 | where F: FnMut(&str) -> ParseResult<&str> { |
8faf50e0 XL |
190 | timezone_offset_internal(s, consume_colon, false) |
191 | } | |
192 | ||
193 | fn timezone_offset_internal<F>(mut s: &str, mut consume_colon: F, allow_missing_minutes: bool) | |
194 | -> ParseResult<(&str, i32)> | |
195 | where F: FnMut(&str) -> ParseResult<&str> | |
196 | { | |
2c00a5a8 XL |
197 | fn digits(s: &str) -> ParseResult<(u8, u8)> { |
198 | let b = s.as_bytes(); | |
199 | if b.len() < 2 { | |
200 | Err(TOO_SHORT) | |
201 | } else { | |
202 | Ok((b[0], b[1])) | |
203 | } | |
204 | } | |
205 | let negative = match s.as_bytes().first() { | |
206 | Some(&b'+') => false, | |
207 | Some(&b'-') => true, | |
208 | Some(_) => return Err(INVALID), | |
209 | None => return Err(TOO_SHORT), | |
210 | }; | |
211 | s = &s[1..]; | |
212 | ||
213 | // hours (00--99) | |
214 | let hours = match try!(digits(s)) { | |
83c7162d | 215 | (h1 @ b'0'...b'9', h2 @ b'0'...b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), |
2c00a5a8 XL |
216 | _ => return Err(INVALID), |
217 | }; | |
218 | s = &s[2..]; | |
219 | ||
220 | // colons (and possibly other separators) | |
8faf50e0 | 221 | s = try!(consume_colon(s)); |
2c00a5a8 XL |
222 | |
223 | // minutes (00--59) | |
8faf50e0 XL |
224 | // if the next two items are digits then we have to add minutes |
225 | let minutes = if let Ok(ds) = digits(s) { | |
226 | match ds { | |
227 | (m1 @ b'0'...b'5', m2 @ b'0'...b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')), | |
228 | (b'6'...b'9', b'0'...b'9') => return Err(OUT_OF_RANGE), | |
229 | _ => return Err(INVALID), | |
230 | } | |
231 | } else if allow_missing_minutes { | |
232 | 0 | |
233 | } else { | |
234 | return Err(TOO_SHORT); | |
235 | }; | |
236 | s = match s.len() { | |
237 | len if len >= 2 => &s[2..], | |
238 | len if len == 0 => s, | |
239 | _ => return Err(TOO_SHORT), | |
2c00a5a8 | 240 | }; |
2c00a5a8 XL |
241 | |
242 | let seconds = hours * 3600 + minutes * 60; | |
243 | Ok((s, if negative {-seconds} else {seconds})) | |
244 | } | |
245 | ||
246 | /// Same to `timezone_offset` but also allows for `z`/`Z` which is same to `+00:00`. | |
8faf50e0 XL |
247 | pub fn timezone_offset_zulu<F>(s: &str, colon: F) |
248 | -> ParseResult<(&str, i32)> | |
249 | where F: FnMut(&str) -> ParseResult<&str> | |
250 | { | |
2c00a5a8 XL |
251 | match s.as_bytes().first() { |
252 | Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)), | |
253 | _ => timezone_offset(s, colon), | |
254 | } | |
255 | } | |
256 | ||
8faf50e0 XL |
257 | /// Same to `timezone_offset` but also allows for `z`/`Z` which is same to |
258 | /// `+00:00`, and allows missing minutes entirely. | |
259 | pub fn timezone_offset_permissive<F>(s: &str, colon: F) | |
260 | -> ParseResult<(&str, i32)> | |
261 | where F: FnMut(&str) -> ParseResult<&str> | |
262 | { | |
263 | match s.as_bytes().first() { | |
264 | Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)), | |
265 | _ => timezone_offset_internal(s, colon, true), | |
266 | } | |
267 | } | |
268 | ||
2c00a5a8 XL |
269 | /// Same to `timezone_offset` but also allows for RFC 2822 legacy timezones. |
270 | /// May return `None` which indicates an insufficient offset data (i.e. `-0000`). | |
271 | pub fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> { | |
272 | // tries to parse legacy time zone names | |
273 | let upto = s.as_bytes().iter().position(|&c| match c { b'a'...b'z' | b'A'...b'Z' => false, | |
83c7162d XL |
274 | _ => true }) |
275 | .unwrap_or_else(|| s.len()); | |
2c00a5a8 XL |
276 | if upto > 0 { |
277 | let name = &s[..upto]; | |
278 | let s = &s[upto..]; | |
83c7162d | 279 | let offset_hours = |o| Ok((s, Some(o * 3600))); |
2c00a5a8 | 280 | if equals(name, "gmt") || equals(name, "ut") { |
83c7162d | 281 | offset_hours(0) |
2c00a5a8 | 282 | } else if equals(name, "edt") { |
83c7162d XL |
283 | offset_hours(-4) |
284 | } else if equals(name, "est") || equals(name, "cdt") { | |
285 | offset_hours(-5) | |
286 | } else if equals(name, "cst") || equals(name, "mdt") { | |
287 | offset_hours(-6) | |
288 | } else if equals(name, "mst") || equals(name, "pdt") { | |
289 | offset_hours(-7) | |
2c00a5a8 | 290 | } else if equals(name, "pst") { |
83c7162d | 291 | offset_hours(-8) |
2c00a5a8 XL |
292 | } else { |
293 | Ok((s, None)) // recommended by RFC 2822: consume but treat it as -0000 | |
294 | } | |
295 | } else { | |
296 | let (s_, offset) = try!(timezone_offset(s, |s| Ok(s))); | |
297 | if offset == 0 && s.starts_with('-') { // -0000 is not same to +0000 | |
298 | Ok((s_, None)) | |
299 | } else { | |
300 | Ok((s_, Some(offset))) | |
301 | } | |
302 | } | |
303 | } | |
304 |