]> git.proxmox.com Git - rustc.git/blame - src/vendor/toml-0.3.2/src/tokens.rs
New upstream version 1.19.0+dfsg3
[rustc.git] / src / vendor / toml-0.3.2 / src / tokens.rs
CommitLineData
7cac9316
XL
1use std::borrow::Cow;
2use std::char;
3use std::str;
4use std::string;
5
6use self::Token::*;
7
8#[derive(Eq, PartialEq, Debug)]
9pub enum Token<'a> {
10 Whitespace(&'a str),
11 Newline,
12 Comment(&'a str),
13
14 Equals,
15 Period,
16 Comma,
17 Colon,
18 Plus,
19 LeftBrace,
20 RightBrace,
21 LeftBracket,
22 RightBracket,
23
24 Keylike(&'a str),
25 String { src: &'a str, val: Cow<'a, str> },
26}
27
28#[derive(Eq, PartialEq, Debug)]
29pub enum Error {
30 InvalidCharInString(usize, char),
31 InvalidEscape(usize, char),
32 InvalidHexEscape(usize, char),
33 InvalidEscapeValue(usize, u32),
34 NewlineInString(usize),
35 Unexpected(usize, char),
36 UnterminatedString(usize),
37 NewlineInTableKey(usize),
38 EmptyTableKey(usize),
39 Wanted { at: usize, expected: &'static str, found: &'static str },
40}
41
42#[derive(Clone)]
43pub struct Tokenizer<'a> {
44 input: &'a str,
45 chars: CrlfFold<'a>,
46}
47
48#[derive(Clone)]
49struct CrlfFold<'a> {
50 chars: str::CharIndices<'a>,
51}
52
53#[derive(Debug)]
54enum MaybeString {
55 NotEscaped(usize),
56 Owned(string::String),
57}
58
59impl<'a> Tokenizer<'a> {
60 pub fn new(input: &'a str) -> Tokenizer<'a> {
61 let mut t = Tokenizer {
62 input: input,
63 chars: CrlfFold {
64 chars: input.char_indices(),
65 },
66 };
67 // Eat utf-8 BOM
68 t.eatc('\u{feff}');
69 return t
70 }
71
72 pub fn next(&mut self) -> Result<Option<Token<'a>>, Error> {
73 let token = match self.chars.next() {
74 Some((_, '\n')) => Newline,
75 Some((start, ' ')) => self.whitespace_token(start),
76 Some((start, '\t')) => self.whitespace_token(start),
77 Some((start, '#')) => self.comment_token(start),
78 Some((_, '=')) => Equals,
79 Some((_, '.')) => Period,
80 Some((_, ',')) => Comma,
81 Some((_, ':')) => Colon,
82 Some((_, '+')) => Plus,
83 Some((_, '{')) => LeftBrace,
84 Some((_, '}')) => RightBrace,
85 Some((_, '[')) => LeftBracket,
86 Some((_, ']')) => RightBracket,
87 Some((start, '\'')) => return self.literal_string(start).map(Some),
88 Some((start, '"')) => return self.basic_string(start).map(Some),
89 Some((start, ch)) if is_keylike(ch) => self.keylike(start),
90
91 Some((start, ch)) => return Err(Error::Unexpected(start, ch)),
92 None => return Ok(None),
93 };
94 Ok(Some(token))
95 }
96
97 pub fn peek(&mut self) -> Result<Option<Token<'a>>, Error> {
98 self.clone().next()
99 }
100
101 pub fn eat(&mut self, expected: Token<'a>) -> Result<bool, Error> {
102 match self.peek()? {
103 Some(ref found) if expected == *found => {}
104 Some(_) => return Ok(false),
105 None => return Ok(false),
106 }
107 drop(self.next());
108 Ok(true)
109 }
110
111 pub fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> {
112 let current = self.current();
113 match self.next()? {
114 Some(found) => {
115 if expected == found {
116 Ok(())
117 } else {
118 Err(Error::Wanted {
119 at: current,
120 expected: expected.describe(),
121 found: found.describe(),
122 })
123 }
124 }
125 None => {
126 Err(Error::Wanted {
127 at: self.input.len(),
128 expected: expected.describe(),
129 found: "eof",
130 })
131 }
132 }
133 }
134
135 pub fn table_key(&mut self) -> Result<Cow<'a, str>, Error> {
136 let current = self.current();
137 match self.next()? {
138 Some(Token::Keylike(k)) => Ok(k.into()),
139 Some(Token::String { src, val }) => {
140 let offset = self.substr_offset(src);
141 if val == "" {
142 return Err(Error::EmptyTableKey(offset))
143 }
144 match src.find("\n") {
145 None => Ok(val),
146 Some(i) => Err(Error::NewlineInTableKey(offset + i)),
147 }
148 }
149 Some(other) => {
150 Err(Error::Wanted {
151 at: current,
152 expected: "a table key",
153 found: other.describe(),
154 })
155 }
156 None => {
157 Err(Error::Wanted {
158 at: self.input.len(),
159 expected: "a table key",
160 found: "eof",
161 })
162 }
163 }
164 }
165
166 pub fn eat_whitespace(&mut self) -> Result<(), Error> {
167 while self.eatc(' ') || self.eatc('\t') {
168 // ...
169 }
170 Ok(())
171 }
172
173 pub fn eat_comment(&mut self) -> Result<bool, Error> {
174 if !self.eatc('#') {
175 return Ok(false)
176 }
177 drop(self.comment_token(0));
178 self.eat_newline_or_eof().map(|()| true)
179 }
180
181 pub fn eat_newline_or_eof(&mut self) -> Result<(), Error> {
182 let current = self.current();
183 match self.next()? {
184 None |
185 Some(Token::Newline) => Ok(()),
186 Some(other) => {
187 Err(Error::Wanted {
188 at: current,
189 expected: "newline",
190 found: other.describe(),
191 })
192 }
193 }
194 }
195
196 pub fn skip_to_newline(&mut self) {
197 loop {
198 match self.chars.next() {
199 Some((_, '\n')) |
200 None => break,
201 _ => {}
202 }
203 }
204 }
205
206 fn eatc(&mut self, ch: char) -> bool {
207 match self.chars.clone().next() {
208 Some((_, ch2)) if ch == ch2 => {
209 self.chars.next();
210 true
211 }
212 _ => false,
213 }
214 }
215
216 pub fn current(&mut self) -> usize {
217 self.chars.clone().next().map(|i| i.0).unwrap_or(self.input.len())
218 }
219
220 pub fn input(&self) -> &'a str {
221 self.input
222 }
223
224 fn whitespace_token(&mut self, start: usize) -> Token<'a> {
225 while self.eatc(' ') || self.eatc('\t') {
226 // ...
227 }
228 Whitespace(&self.input[start..self.current()])
229 }
230
231 fn comment_token(&mut self, start: usize) -> Token<'a> {
232 while let Some((_, ch)) = self.chars.clone().next() {
233 if ch != '\t' && (ch < '\u{20}' || ch > '\u{10ffff}') {
234 break
235 }
236 self.chars.next();
237 }
238 Comment(&self.input[start..self.current()])
239 }
240
241 fn read_string(&mut self,
242 delim: char,
243 start: usize,
244 new_ch: &mut FnMut(&mut Tokenizer, &mut MaybeString,
245 bool, usize, char)
246 -> Result<(), Error>)
247 -> Result<Token<'a>, Error> {
248 let mut multiline = false;
249 if self.eatc(delim) {
250 if self.eatc(delim) {
251 multiline = true;
252 } else {
253 return Ok(String {
254 src: &self.input[start..start+2],
255 val: Cow::Borrowed(""),
256 })
257 }
258 }
259 let mut val = MaybeString::NotEscaped(self.current());
260 let mut n = 0;
261 'outer: loop {
262 n += 1;
263 match self.chars.next() {
264 Some((i, '\n')) => {
265 if multiline {
266 if self.input.as_bytes()[i] == b'\r' {
267 val.to_owned(&self.input[..i]);
268 }
269 if n == 1 {
270 val = MaybeString::NotEscaped(self.current());
271 } else {
272 val.push('\n');
273 }
274 continue
275 } else {
276 return Err(Error::NewlineInString(i))
277 }
278 }
279 Some((i, ch)) if ch == delim => {
280 if multiline {
281 for _ in 0..2 {
282 if !self.eatc(delim) {
283 val.push(delim);
284 continue 'outer
285 }
286 }
287 }
288 return Ok(String {
289 src: &self.input[start..self.current()],
290 val: val.into_cow(&self.input[..i]),
291 })
292 }
293 Some((i, c)) => try!(new_ch(self, &mut val, multiline, i, c)),
294 None => return Err(Error::UnterminatedString(start))
295 }
296 }
297 }
298
299 fn literal_string(&mut self, start: usize) -> Result<Token<'a>, Error> {
300 self.read_string('\'', start, &mut |_me, val, _multi, i, ch| {
301 if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}') {
302 val.push(ch);
303 Ok(())
304 } else {
305 Err(Error::InvalidCharInString(i, ch))
306 }
307 })
308 }
309
310 fn basic_string(&mut self, start: usize) -> Result<Token<'a>, Error> {
311 self.read_string('"', start, &mut |me, val, multi, i, ch| {
312 match ch {
313 '\\' => {
314 val.to_owned(&me.input[..i]);
315 match me.chars.next() {
316 Some((_, '"')) => val.push('"'),
317 Some((_, '\\')) => val.push('\\'),
318 Some((_, 'b')) => val.push('\u{8}'),
319 Some((_, 'f')) => val.push('\u{c}'),
320 Some((_, 'n')) => val.push('\n'),
321 Some((_, 'r')) => val.push('\r'),
322 Some((_, 't')) => val.push('\t'),
323 Some((i, c @ 'u')) |
324 Some((i, c @ 'U')) => {
325 let len = if c == 'u' {4} else {8};
326 val.push(try!(me.hex(start, i, len)));
327 }
328 Some((_, '\n')) if multi => {
329 while let Some((_, ch)) = me.chars.clone().next() {
330 match ch {
331 ' ' | '\t' | '\n' => {
332 me.chars.next();
333 }
334 _ => break,
335 }
336 }
337 }
338 Some((i, c)) => return Err(Error::InvalidEscape(i, c)),
339 None => return Err(Error::UnterminatedString(start)),
340 }
341 Ok(())
342 }
343 ch if '\u{20}' <= ch && ch <= '\u{10ffff}' => {
344 val.push(ch);
345 Ok(())
346 }
347 _ => Err(Error::InvalidCharInString(i, ch))
348 }
349 })
350 }
351
352 fn hex(&mut self, start: usize, i: usize, len: usize) -> Result<char, Error> {
353 let mut val = 0;
354 for _ in 0..len {
355 match self.chars.next() {
356 Some((_, ch)) if '0' <= ch && ch <= '9' => {
357 val = val * 16 + (ch as u32 - '0' as u32);
358 }
359 Some((_, ch)) if 'A' <= ch && ch <= 'F' => {
360 val = val * 16 + (ch as u32 - 'A' as u32) + 10;
361 }
362 Some((i, ch)) => return Err(Error::InvalidHexEscape(i, ch)),
363 None => return Err(Error::UnterminatedString(start)),
364 }
365 }
366 match char::from_u32(val) {
367 Some(ch) => Ok(ch),
368 None => Err(Error::InvalidEscapeValue(i, val)),
369 }
370 }
371
372 fn keylike(&mut self, start: usize) -> Token<'a> {
373 while let Some((_, ch)) = self.chars.clone().next() {
374 if !is_keylike(ch) {
375 break
376 }
377 self.chars.next();
378 }
379 Keylike(&self.input[start..self.current()])
380 }
381
382 pub fn substr_offset(&self, s: &'a str) -> usize {
383 assert!(s.len() <= self.input.len());
384 let a = self.input.as_ptr() as usize;
385 let b = s.as_ptr() as usize;
386 assert!(a <= b);
387 b - a
388 }
389}
390
391impl<'a> Iterator for CrlfFold<'a> {
392 type Item = (usize, char);
393
394 fn next(&mut self) -> Option<(usize, char)> {
395 self.chars.next().map(|(i, c)| {
396 if c == '\r' {
397 let mut attempt = self.chars.clone();
398 if let Some((_, '\n')) = attempt.next() {
399 self.chars = attempt;
400 return (i, '\n')
401 }
402 }
403 (i, c)
404 })
405 }
406}
407
408impl MaybeString {
409 fn push(&mut self, ch: char) {
410 match *self {
411 MaybeString::NotEscaped(..) => {}
412 MaybeString::Owned(ref mut s) => s.push(ch),
413 }
414 }
415
416 fn to_owned(&mut self, input: &str) {
417 match *self {
418 MaybeString::NotEscaped(start) => {
419 *self = MaybeString::Owned(input[start..].to_owned());
420 }
421 MaybeString::Owned(..) => {}
422 }
423 }
424
425 fn into_cow<'a>(self, input: &'a str) -> Cow<'a, str> {
426 match self {
427 MaybeString::NotEscaped(start) => Cow::Borrowed(&input[start..]),
428 MaybeString::Owned(s) => Cow::Owned(s),
429 }
430 }
431}
432
433fn is_keylike(ch: char) -> bool {
434 ('A' <= ch && ch <= 'Z') ||
435 ('a' <= ch && ch <= 'z') ||
436 ('0' <= ch && ch <= '9') ||
437 ch == '-' ||
438 ch == '_'
439}
440
441impl<'a> Token<'a> {
442 pub fn describe(&self) -> &'static str {
443 match *self {
444 Token::Keylike(_) => "an identifier",
445 Token::Equals => "an equals",
446 Token::Period => "a period",
447 Token::Comment(_) => "a comment",
448 Token::Newline => "a newline",
449 Token::Whitespace(_) => "whitespace",
450 Token::Comma => "a comma",
451 Token::RightBrace => "a right brace",
452 Token::LeftBrace => "a left brace",
453 Token::RightBracket => "a right bracket",
454 Token::LeftBracket => "a left bracket",
455 Token::String { .. } => "a string",
456 Token::Colon => "a colon",
457 Token::Plus => "a plus",
458 }
459 }
460}
461
462#[cfg(test)]
463mod tests {
464 use std::borrow::Cow;
465 use super::{Tokenizer, Token, Error};
466
467 fn err(input: &str, err: Error) {
468 let mut t = Tokenizer::new(input);
469 let token = t.next().unwrap_err();
470 assert_eq!(token, err);
471 assert!(t.next().unwrap().is_none());
472 }
473
474 #[test]
475 fn literal_strings() {
476 fn t(input: &str, val: &str) {
477 let mut t = Tokenizer::new(input);
478 let token = t.next().unwrap().unwrap();
479 assert_eq!(token, Token::String {
480 src: input,
481 val: Cow::Borrowed(val),
482 });
483 assert!(t.next().unwrap().is_none());
484 }
485
486 t("''", "");
487 t("''''''", "");
488 t("'''\n'''", "");
489 t("'a'", "a");
490 t("'\"a'", "\"a");
491 t("''''a'''", "'a");
492 t("'''\n'a\n'''", "'a\n");
493 t("'''a\n'a\r\n'''", "a\n'a\n");
494 }
495
496 #[test]
497 fn basic_strings() {
498 fn t(input: &str, val: &str) {
499 let mut t = Tokenizer::new(input);
500 let token = t.next().unwrap().unwrap();
501 assert_eq!(token, Token::String {
502 src: input,
503 val: Cow::Borrowed(val),
504 });
505 assert!(t.next().unwrap().is_none());
506 }
507
508 t(r#""""#, "");
509 t(r#""""""""#, "");
510 t(r#""a""#, "a");
511 t(r#""""a""""#, "a");
512 t(r#""\t""#, "\t");
513 t(r#""\u0000""#, "\0");
514 t(r#""\U00000000""#, "\0");
515 t(r#""\U000A0000""#, "\u{A0000}");
516 t(r#""\\t""#, "\\t");
517 t("\"\"\"\\\n\"\"\"", "");
518 t("\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"", "");
519 t(r#""\r""#, "\r");
520 t(r#""\n""#, "\n");
521 t(r#""\b""#, "\u{8}");
522 t(r#""a\fa""#, "a\u{c}a");
523 t(r#""\"a""#, "\"a");
524 t("\"\"\"\na\"\"\"", "a");
525 t("\"\"\"\n\"\"\"", "");
526 err(r#""\a"#, Error::InvalidEscape(2, 'a'));
527 err("\"\\\n", Error::InvalidEscape(2, '\n'));
528 err("\"\\\r\n", Error::InvalidEscape(2, '\n'));
529 err("\"\\", Error::UnterminatedString(0));
530 err("\"\u{0}", Error::InvalidCharInString(1, '\u{0}'));
531 err(r#""\U00""#, Error::InvalidHexEscape(5, '"'));
532 err(r#""\U00"#, Error::UnterminatedString(0));
533 err(r#""\uD800"#, Error::InvalidEscapeValue(2, 0xd800));
534 err(r#""\UFFFFFFFF"#, Error::InvalidEscapeValue(2, 0xffffffff));
535 }
536
537 #[test]
538 fn keylike() {
539 fn t(input: &str) {
540 let mut t = Tokenizer::new(input);
541 let token = t.next().unwrap().unwrap();
542 assert_eq!(token, Token::Keylike(input));
543 assert!(t.next().unwrap().is_none());
544 }
545 t("foo");
546 t("0bar");
547 t("bar0");
548 t("1234");
549 t("a-b");
550 t("a_B");
551 t("-_-");
552 t("___");
553 }
554
555 #[test]
556 fn all() {
557 fn t(input: &str, expected: &[Token]) {
558 let mut tokens = Tokenizer::new(input);
559 let mut actual = Vec::new();
560 while let Some(token) = tokens.next().unwrap() {
561 actual.push(token);
562 }
563 for (a, b) in actual.iter().zip(expected) {
564 assert_eq!(a, b);
565 }
566 assert_eq!(actual.len(), expected.len());
567 }
568
569 t(" a ", &[
570 Token::Whitespace(" "),
571 Token::Keylike("a"),
572 Token::Whitespace(" "),
573 ]);
574
575 t(" a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ", &[
576 Token::Whitespace(" "),
577 Token::Keylike("a"),
578 Token::Whitespace("\t "),
579 Token::LeftBracket,
580 Token::LeftBracket,
581 Token::RightBracket,
582 Token::RightBracket,
583 Token::Whitespace(" \t "),
584 Token::LeftBracket,
585 Token::RightBracket,
586 Token::Whitespace(" "),
587 Token::LeftBrace,
588 Token::RightBrace,
589 Token::Whitespace(" "),
590 Token::Comma,
591 Token::Whitespace(" "),
592 Token::Period,
593 Token::Whitespace(" "),
594 Token::Equals,
595 Token::Newline,
596 Token::Comment("# foo "),
597 Token::Newline,
598 Token::Comment("#foo "),
599 Token::Newline,
600 Token::Whitespace(" "),
601 ]);
602 }
603
604 #[test]
605 fn bare_cr_bad() {
606 err("\r", Error::Unexpected(0, '\r'));
607 err("'\n", Error::NewlineInString(1));
608 err("'\u{0}", Error::InvalidCharInString(1, '\u{0}'));
609 err("'", Error::UnterminatedString(0));
610 err("\u{0}", Error::Unexpected(0, '\u{0}'));
611 }
612
613 #[test]
614 fn bad_comment() {
615 let mut t = Tokenizer::new("#\u{0}");
616 t.next().unwrap().unwrap();
617 assert_eq!(t.next(), Err(Error::Unexpected(1, '\u{0}')));
618 assert!(t.next().unwrap().is_none());
619 }
620}