8 #[derive(Eq, PartialEq, Debug)]
25 String { src: &'a str, val: Cow<'a, str> }
,
28 #[derive(Eq, PartialEq, Debug)]
30 InvalidCharInString(usize, char),
31 InvalidEscape(usize, char),
32 InvalidHexEscape(usize, char),
33 InvalidEscapeValue(usize, u32),
34 NewlineInString(usize),
35 Unexpected(usize, char),
36 UnterminatedString(usize),
37 NewlineInTableKey(usize),
39 Wanted { at: usize, expected: &'static str, found: &'static str }
,
43 pub struct Tokenizer
<'a
> {
50 chars
: str::CharIndices
<'a
>,
56 Owned(string
::String
),
59 impl<'a
> Tokenizer
<'a
> {
60 pub fn new(input
: &'a
str) -> Tokenizer
<'a
> {
61 let mut t
= Tokenizer
{
64 chars
: input
.char_indices(),
72 pub fn next(&mut self) -> Result
<Option
<Token
<'a
>>, Error
> {
73 let token
= match self.chars
.next() {
74 Some((_
, '
\n'
)) => Newline
,
75 Some((start
, ' '
)) => self.whitespace_token(start
),
76 Some((start
, '
\t'
)) => self.whitespace_token(start
),
77 Some((start
, '
#')) => self.comment_token(start),
78 Some((_
, '
='
)) => Equals
,
79 Some((_
, '
.'
)) => Period
,
80 Some((_
, '
,'
)) => Comma
,
81 Some((_
, '
:'
)) => Colon
,
82 Some((_
, '
+'
)) => Plus
,
83 Some((_
, '
{'
)) => LeftBrace
,
84 Some((_
, '
}'
)) => RightBrace
,
85 Some((_
, '
['
)) => LeftBracket
,
86 Some((_
, '
]'
)) => RightBracket
,
87 Some((start
, '
\''
)) => return self.literal_string(start
).map(Some
),
88 Some((start
, '
"')) => return self.basic_string(start).map(Some),
89 Some((start, ch)) if is_keylike(ch) => self.keylike(start),
91 Some((start, ch)) => return Err(Error::Unexpected(start, ch)),
92 None => return Ok(None),
97 pub fn peek(&mut self) -> Result<Option<Token<'a>>, Error> {
101 pub fn eat(&mut self, expected: Token<'a>) -> Result<bool, Error> {
103 Some(ref found) if expected == *found => {}
104 Some(_) => return Ok(false),
105 None => return Ok(false),
111 pub fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> {
112 let current = self.current();
115 if expected == found {
120 expected: expected.describe(),
121 found: found.describe(),
127 at: self.input.len(),
128 expected: expected.describe(),
135 pub fn table_key(&mut self) -> Result<Cow<'a, str>, Error> {
136 let current = self.current();
138 Some(Token::Keylike(k)) => Ok(k.into()),
139 Some(Token::String { src, val }) => {
140 let offset = self.substr_offset(src);
142 return Err(Error::EmptyTableKey(offset))
144 match src.find("\n") {
146 Some(i) => Err(Error::NewlineInTableKey(offset + i)),
152 expected: "a table key
",
153 found: other.describe(),
158 at: self.input.len(),
159 expected: "a table key
",
166 pub fn eat_whitespace(&mut self) -> Result<(), Error> {
167 while self.eatc(' ') || self.eatc('\t') {
173 pub fn eat_comment(&mut self) -> Result<bool, Error> {
177 drop(self.comment_token(0));
178 self.eat_newline_or_eof().map(|()| true)
181 pub fn eat_newline_or_eof(&mut self) -> Result<(), Error> {
182 let current = self.current();
185 Some(Token::Newline) => Ok(()),
190 found: other.describe(),
196 pub fn skip_to_newline(&mut self) {
198 match self.chars.next() {
206 fn eatc(&mut self, ch: char) -> bool {
207 match self.chars.clone().next() {
208 Some((_, ch2)) if ch == ch2 => {
216 pub fn current(&mut self) -> usize {
217 self.chars.clone().next().map(|i| i.0).unwrap_or(self.input.len())
220 pub fn input(&self) -> &'a str {
224 fn whitespace_token(&mut self, start: usize) -> Token<'a> {
225 while self.eatc(' ') || self.eatc('\t') {
228 Whitespace(&self.input[start..self.current()])
231 fn comment_token(&mut self, start: usize) -> Token<'a> {
232 while let Some((_, ch)) = self.chars.clone().next() {
233 if ch != '\t' && (ch < '\u{20}' || ch > '\u{10ffff}') {
238 Comment(&self.input[start..self.current()])
241 fn read_string(&mut self,
244 new_ch: &mut FnMut(&mut Tokenizer, &mut MaybeString,
246 -> Result<(), Error>)
247 -> Result<Token<'a>, Error> {
248 let mut multiline = false;
249 if self.eatc(delim) {
250 if self.eatc(delim) {
254 src: &self.input[start..start+2],
255 val: Cow::Borrowed(""),
259 let mut val = MaybeString::NotEscaped(self.current());
263 match self.chars.next() {
266 if self.input.as_bytes()[i] == b'\r' {
267 val.to_owned(&self.input[..i]);
270 val = MaybeString::NotEscaped(self.current());
276 return Err(Error::NewlineInString(i))
279 Some((i, ch)) if ch == delim => {
282 if !self.eatc(delim) {
289 src: &self.input[start..self.current()],
290 val: val.into_cow(&self.input[..i]),
293 Some((i, c)) => try!(new_ch(self, &mut val, multiline, i, c)),
294 None => return Err(Error::UnterminatedString(start))
299 fn literal_string(&mut self, start: usize) -> Result<Token<'a>, Error> {
300 self.read_string('\'', start, &mut |_me, val, _multi, i, ch| {
301 if ch == '\u{09}' || ('\u{20}' <= ch && ch <= '\u{10ffff}') {
305 Err(Error::InvalidCharInString(i, ch))
310 fn basic_string(&mut self, start: usize) -> Result<Token<'a>, Error> {
311 self.read_string('"'
, start
, &mut |me
, val
, multi
, i
, ch
| {
314 val
.to_owned(&me
.input
[..i
]);
315 match me
.chars
.next() {
316 Some((_
, '
"')) => val.push('"'
),
317 Some((_
, '
\\'
)) => val
.push('
\\'
),
318 Some((_
, 'b'
)) => val
.push('
\u{8}'
),
319 Some((_
, 'f'
)) => val
.push('
\u{c}'
),
320 Some((_
, 'n'
)) => val
.push('
\n'
),
321 Some((_
, 'r'
)) => val
.push('
\r'
),
322 Some((_
, 't'
)) => val
.push('
\t'
),
324 Some((i
, c @ 'U'
)) => {
325 let len
= if c
== 'u' {4}
else {8}
;
326 val
.push(try
!(me
.hex(start
, i
, len
)));
328 Some((_
, '
\n'
)) if multi
=> {
329 while let Some((_
, ch
)) = me
.chars
.clone().next() {
331 ' '
| '
\t'
| '
\n'
=> {
338 Some((i
, c
)) => return Err(Error
::InvalidEscape(i
, c
)),
339 None
=> return Err(Error
::UnterminatedString(start
)),
343 ch
if '
\u{20}'
<= ch
&& ch
<= '
\u{10ffff}'
=> {
347 _
=> Err(Error
::InvalidCharInString(i
, ch
))
352 fn hex(&mut self, start
: usize, i
: usize, len
: usize) -> Result
<char, Error
> {
355 match self.chars
.next() {
356 Some((_
, ch
)) if '
0'
<= ch
&& ch
<= '
9'
=> {
357 val
= val
* 16 + (ch
as u32 - '
0'
as u32);
359 Some((_
, ch
)) if 'A'
<= ch
&& ch
<= 'F'
=> {
360 val
= val
* 16 + (ch
as u32 - 'A'
as u32) + 10;
362 Some((i
, ch
)) => return Err(Error
::InvalidHexEscape(i
, ch
)),
363 None
=> return Err(Error
::UnterminatedString(start
)),
366 match char::from_u32(val
) {
368 None
=> Err(Error
::InvalidEscapeValue(i
, val
)),
372 fn keylike(&mut self, start
: usize) -> Token
<'a
> {
373 while let Some((_
, ch
)) = self.chars
.clone().next() {
379 Keylike(&self.input
[start
..self.current()])
382 pub fn substr_offset(&self, s
: &'a
str) -> usize {
383 assert
!(s
.len() <= self.input
.len());
384 let a
= self.input
.as_ptr() as usize;
385 let b
= s
.as_ptr() as usize;
391 impl<'a
> Iterator
for CrlfFold
<'a
> {
392 type Item
= (usize, char);
394 fn next(&mut self) -> Option
<(usize, char)> {
395 self.chars
.next().map(|(i
, c
)| {
397 let mut attempt
= self.chars
.clone();
398 if let Some((_
, '
\n'
)) = attempt
.next() {
399 self.chars
= attempt
;
409 fn push(&mut self, ch
: char) {
411 MaybeString
::NotEscaped(..) => {}
412 MaybeString
::Owned(ref mut s
) => s
.push(ch
),
416 fn to_owned(&mut self, input
: &str) {
418 MaybeString
::NotEscaped(start
) => {
419 *self = MaybeString
::Owned(input
[start
..].to_owned());
421 MaybeString
::Owned(..) => {}
425 fn into_cow
<'a
>(self, input
: &'a
str) -> Cow
<'a
, str> {
427 MaybeString
::NotEscaped(start
) => Cow
::Borrowed(&input
[start
..]),
428 MaybeString
::Owned(s
) => Cow
::Owned(s
),
433 fn is_keylike(ch
: char) -> bool
{
434 ('A'
<= ch
&& ch
<= 'Z'
) ||
435 ('a'
<= ch
&& ch
<= 'z'
) ||
436 ('
0'
<= ch
&& ch
<= '
9'
) ||
442 pub fn describe(&self) -> &'
static str {
444 Token
::Keylike(_
) => "an identifier",
445 Token
::Equals
=> "an equals",
446 Token
::Period
=> "a period",
447 Token
::Comment(_
) => "a comment",
448 Token
::Newline
=> "a newline",
449 Token
::Whitespace(_
) => "whitespace",
450 Token
::Comma
=> "a comma",
451 Token
::RightBrace
=> "a right brace",
452 Token
::LeftBrace
=> "a left brace",
453 Token
::RightBracket
=> "a right bracket",
454 Token
::LeftBracket
=> "a left bracket",
455 Token
::String { .. }
=> "a string",
456 Token
::Colon
=> "a colon",
457 Token
::Plus
=> "a plus",
464 use std
::borrow
::Cow
;
465 use super::{Tokenizer, Token, Error}
;
467 fn err(input
: &str, err
: Error
) {
468 let mut t
= Tokenizer
::new(input
);
469 let token
= t
.next().unwrap_err();
470 assert_eq
!(token
, err
);
471 assert
!(t
.next().unwrap().is_none());
475 fn literal_strings() {
476 fn t(input
: &str, val
: &str) {
477 let mut t
= Tokenizer
::new(input
);
478 let token
= t
.next().unwrap().unwrap();
479 assert_eq
!(token
, Token
::String
{
481 val
: Cow
::Borrowed(val
),
483 assert
!(t
.next().unwrap().is_none());
492 t("'''\n'a\n'''", "'a\n");
493 t("'''a\n'a\r\n'''", "a\n'a\n");
498 fn t(input
: &str, val
: &str) {
499 let mut t
= Tokenizer
::new(input
);
500 let token
= t
.next().unwrap().unwrap();
501 assert_eq
!(token
, Token
::String
{
503 val
: Cow
::Borrowed(val
),
505 assert
!(t
.next().unwrap().is_none());
511 t(r
#""""a""""#, "a");
513 t(r
#""\u0000""#, "\0");
514 t(r
#""\U00000000""#, "\0");
515 t(r
#""\U000A0000""#, "\u{A0000}");
516 t(r
#""\\t""#, "\\t");
517 t("\"\"\"\\\n\"\"\"", "");
518 t("\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"", "");
521 t(r
#""\b""#, "\u{8}");
522 t(r
#""a\fa""#, "a\u{c}a");
523 t(r
#""\"a""#, "\"a");
524 t("\"\"\"\na\"\"\"", "a");
525 t("\"\"\"\n\"\"\"", "");
526 err(r
#""\a"#, Error::InvalidEscape(2, 'a'));
527 err("\"\\\n", Error::InvalidEscape(2, '\n'));
528 err("\"\\\r\n", Error::InvalidEscape(2, '\n'));
529 err("\"\\", Error::UnterminatedString(0));
530 err("\"\u{0}", Error::InvalidCharInString(1, '\u{0}'));
531 err(r#""\U00""#, Error::InvalidHexEscape(5, '"'));
532 err(r
#""\U00"#, Error::UnterminatedString(0));
533 err(r#""\uD800"#, Error::InvalidEscapeValue(2, 0xd800));
534 err(r
#""\UFFFFFFFF"#, Error::InvalidEscapeValue(2, 0xffffffff));
540 let mut t = Tokenizer::new(input);
541 let token = t.next().unwrap().unwrap();
542 assert_eq!(token, Token::Keylike(input));
543 assert!(t.next().unwrap().is_none());
557 fn t(input: &str, expected: &[Token]) {
558 let mut tokens = Tokenizer::new(input);
559 let mut actual = Vec::new();
560 while let Some(token) = tokens.next().unwrap() {
563 for (a, b) in actual.iter().zip(expected) {
566 assert_eq!(actual.len(), expected.len());
570 Token::Whitespace(" "),
572 Token::Whitespace(" "),
575 t(" a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ", &[
576 Token::Whitespace(" "),
578 Token::Whitespace("\t "),
583 Token::Whitespace(" \t "),
586 Token::Whitespace(" "),
589 Token::Whitespace(" "),
591 Token::Whitespace(" "),
593 Token::Whitespace(" "),
596 Token::Comment("# foo "),
598 Token::Comment("#foo "),
600 Token::Whitespace(" "),
606 err("\r", Error::Unexpected(0, '\r'));
607 err("'\n", Error::NewlineInString(1));
608 err("'\u{0}", Error::InvalidCharInString(1, '\u{0}'));
609 err("'", Error::UnterminatedString(0));
610 err("\u{0}", Error::Unexpected(0, '\u{0}'));
615 let mut t = Tokenizer::new("#\u{0}");
616 t.next().unwrap().unwrap();
617 assert_eq!(t.next(), Err(Error::Unexpected(1, '\u{0}')));
618 assert!(t.next().unwrap().is_none());