1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use codemap
::{BytePos, CharPos, CodeMap, Pos, Span}
;
14 use errors
::{FatalError, Handler, DiagnosticBuilder}
;
15 use ext
::tt
::transcribe
::tt_next_token
;
16 use parse
::token
::str_to_ident
;
19 use rustc_unicode
::property
::Pattern_White_Space
;
23 use std
::mem
::replace
;
26 pub use ext
::tt
::transcribe
::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag}
;
32 fn is_eof(&self) -> bool
;
33 fn next_token(&mut self) -> TokenAndSpan
;
34 /// Report a fatal error with the current span.
35 fn fatal(&self, &str) -> FatalError
;
36 /// Report a non-fatal error with the current span.
38 fn peek(&self) -> TokenAndSpan
;
39 /// Get a token the parser cares about.
40 fn real_token(&mut self) -> TokenAndSpan
{
41 let mut t
= self.next_token();
44 token
::Whitespace
| token
::Comment
| token
::Shebang(_
) => {
45 t
= self.next_token();
54 #[derive(Clone, PartialEq, Eq, Debug)]
55 pub struct TokenAndSpan
{
56 pub tok
: token
::Token
,
60 pub struct StringReader
<'a
> {
61 pub span_diagnostic
: &'a Handler
,
62 /// The absolute offset within the codemap of the next character to read
64 /// The absolute offset within the codemap of the last character read(curr)
65 pub last_pos
: BytePos
,
66 /// The column of the next character to read
68 /// The last character to be read
69 pub curr
: Option
<char>,
70 pub filemap
: Rc
<codemap
::FileMap
>,
72 pub peek_tok
: token
::Token
,
75 // cache a direct reference to the source text, so that we don't have to
76 // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
77 source_text
: Rc
<String
>,
80 impl<'a
> Reader
for StringReader
<'a
> {
81 fn is_eof(&self) -> bool
{
84 /// Return the next token. EFFECT: advances the string_reader.
85 fn next_token(&mut self) -> TokenAndSpan
{
86 let ret_val
= TokenAndSpan
{
87 tok
: replace(&mut self.peek_tok
, token
::Underscore
),
93 fn fatal(&self, m
: &str) -> FatalError
{
94 self.fatal_span(self.peek_span
, m
)
96 fn err(&self, m
: &str) {
97 self.err_span(self.peek_span
, m
)
99 fn peek(&self) -> TokenAndSpan
{
100 // FIXME(pcwalton): Bad copy!
102 tok
: self.peek_tok
.clone(),
108 impl<'a
> Reader
for TtReader
<'a
> {
109 fn is_eof(&self) -> bool
{
110 self.cur_tok
== token
::Eof
112 fn next_token(&mut self) -> TokenAndSpan
{
113 let r
= tt_next_token(self);
114 debug
!("TtReader: r={:?}", r
);
117 fn fatal(&self, m
: &str) -> FatalError
{
118 self.sp_diag
.span_fatal(self.cur_span
, m
)
120 fn err(&self, m
: &str) {
121 self.sp_diag
.span_err(self.cur_span
, m
);
123 fn peek(&self) -> TokenAndSpan
{
125 tok
: self.cur_tok
.clone(),
131 impl<'a
> StringReader
<'a
> {
132 /// For comments.rs, which hackily pokes into pos and curr
133 pub fn new_raw
<'b
>(span_diagnostic
: &'b Handler
,
134 filemap
: Rc
<codemap
::FileMap
>)
135 -> StringReader
<'b
> {
136 if filemap
.src
.is_none() {
137 span_diagnostic
.bug(&format
!("Cannot lex filemap \
142 let source_text
= (*filemap
.src
.as_ref().unwrap()).clone();
144 let mut sr
= StringReader
{
145 span_diagnostic
: span_diagnostic
,
146 pos
: filemap
.start_pos
,
147 last_pos
: filemap
.start_pos
,
151 // dummy values; not read
152 peek_tok
: token
::Eof
,
153 peek_span
: codemap
::DUMMY_SP
,
154 source_text
: source_text
,
160 pub fn new
<'b
>(span_diagnostic
: &'b Handler
,
161 filemap
: Rc
<codemap
::FileMap
>)
162 -> StringReader
<'b
> {
163 let mut sr
= StringReader
::new_raw(span_diagnostic
, filemap
);
168 pub fn curr_is(&self, c
: char) -> bool
{
172 /// Report a fatal lexical error with a given span.
173 pub fn fatal_span(&self, sp
: Span
, m
: &str) -> FatalError
{
174 self.span_diagnostic
.span_fatal(sp
, m
)
177 /// Report a lexical error with a given span.
178 pub fn err_span(&self, sp
: Span
, m
: &str) {
179 self.span_diagnostic
.span_err(sp
, m
)
183 /// Report a fatal error spanning [`from_pos`, `to_pos`).
184 fn fatal_span_(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str) -> FatalError
{
185 self.fatal_span(codemap
::mk_sp(from_pos
, to_pos
), m
)
188 /// Report a lexical error spanning [`from_pos`, `to_pos`).
189 fn err_span_(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str) {
190 self.err_span(codemap
::mk_sp(from_pos
, to_pos
), m
)
193 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
194 /// escaped character to the error message
195 fn fatal_span_char(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str, c
: char) -> FatalError
{
196 let mut m
= m
.to_string();
198 for c
in c
.escape_default() {
201 self.fatal_span_(from_pos
, to_pos
, &m
[..])
203 fn struct_fatal_span_char(&self,
208 -> DiagnosticBuilder
<'a
> {
209 let mut m
= m
.to_string();
211 for c
in c
.escape_default() {
214 self.span_diagnostic
.struct_span_fatal(codemap
::mk_sp(from_pos
, to_pos
), &m
[..])
217 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
218 /// escaped character to the error message
219 fn err_span_char(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str, c
: char) {
220 let mut m
= m
.to_string();
222 for c
in c
.escape_default() {
225 self.err_span_(from_pos
, to_pos
, &m
[..]);
227 fn struct_err_span_char(&self,
232 -> DiagnosticBuilder
<'a
> {
233 let mut m
= m
.to_string();
235 for c
in c
.escape_default() {
238 self.span_diagnostic
.struct_span_err(codemap
::mk_sp(from_pos
, to_pos
), &m
[..])
241 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
242 /// offending string to the error message
243 fn fatal_span_verbose(&self, from_pos
: BytePos
, to_pos
: BytePos
, mut m
: String
) -> FatalError
{
245 let from
= self.byte_offset(from_pos
).to_usize();
246 let to
= self.byte_offset(to_pos
).to_usize();
247 m
.push_str(&self.source_text
[from
..to
]);
248 self.fatal_span_(from_pos
, to_pos
, &m
[..])
251 /// Advance peek_tok and peek_span to refer to the next token, and
252 /// possibly update the interner.
253 fn advance_token(&mut self) {
254 match self.scan_whitespace_or_comment() {
256 self.peek_span
= comment
.sp
;
257 self.peek_tok
= comment
.tok
;
261 self.peek_tok
= token
::Eof
;
262 self.peek_span
= codemap
::mk_sp(self.filemap
.end_pos
, self.filemap
.end_pos
);
264 let start_bytepos
= self.last_pos
;
265 self.peek_tok
= self.next_token_inner();
266 self.peek_span
= codemap
::mk_sp(start_bytepos
, self.last_pos
);
272 fn byte_offset(&self, pos
: BytePos
) -> BytePos
{
273 (pos
- self.filemap
.start_pos
)
276 /// Calls `f` with a string slice of the source text spanning from `start`
277 /// up to but excluding `self.last_pos`, meaning the slice does not include
278 /// the character `self.curr`.
279 pub fn with_str_from
<T
, F
>(&self, start
: BytePos
, f
: F
) -> T
280 where F
: FnOnce(&str) -> T
282 self.with_str_from_to(start
, self.last_pos
, f
)
285 /// Create a Name from a given offset to the current offset, each
286 /// adjusted 1 towards each other (assumes that on either side there is a
287 /// single-byte delimiter).
288 pub fn name_from(&self, start
: BytePos
) -> ast
::Name
{
289 debug
!("taking an ident from {:?} to {:?}", start
, self.last_pos
);
290 self.with_str_from(start
, token
::intern
)
293 /// As name_from, with an explicit endpoint.
294 pub fn name_from_to(&self, start
: BytePos
, end
: BytePos
) -> ast
::Name
{
295 debug
!("taking an ident from {:?} to {:?}", start
, end
);
296 self.with_str_from_to(start
, end
, token
::intern
)
299 /// Calls `f` with a string slice of the source text spanning from `start`
300 /// up to but excluding `end`.
301 fn with_str_from_to
<T
, F
>(&self, start
: BytePos
, end
: BytePos
, f
: F
) -> T
302 where F
: FnOnce(&str) -> T
304 f(&self.source_text
[self.byte_offset(start
).to_usize()..self.byte_offset(end
).to_usize()])
307 /// Converts CRLF to LF in the given string, raising an error on bare CR.
308 fn translate_crlf
<'b
>(&self, start
: BytePos
, s
: &'b
str, errmsg
: &'b
str) -> Cow
<'b
, str> {
311 let ch
= char_at(s
, i
);
312 let next
= i
+ ch
.len_utf8();
314 if next
< s
.len() && char_at(s
, next
) == '
\n'
{
315 return translate_crlf_(self, start
, s
, errmsg
, i
).into();
317 let pos
= start
+ BytePos(i
as u32);
318 let end_pos
= start
+ BytePos(next
as u32);
319 self.err_span_(pos
, end_pos
, errmsg
);
325 fn translate_crlf_(rdr
: &StringReader
,
331 let mut buf
= String
::with_capacity(s
.len());
334 let ch
= char_at(s
, i
);
335 let next
= i
+ ch
.len_utf8();
338 buf
.push_str(&s
[j
..i
]);
341 if next
>= s
.len() || char_at(s
, next
) != '
\n'
{
342 let pos
= start
+ BytePos(i
as u32);
343 let end_pos
= start
+ BytePos(next
as u32);
344 rdr
.err_span_(pos
, end_pos
, errmsg
);
350 buf
.push_str(&s
[j
..]);
357 /// Advance the StringReader by one character. If a newline is
358 /// discovered, add it to the FileMap's list of line start offsets.
359 pub fn bump(&mut self) {
360 self.last_pos
= self.pos
;
361 let current_byte_offset
= self.byte_offset(self.pos
).to_usize();
362 if current_byte_offset
< self.source_text
.len() {
363 assert
!(self.curr
.is_some());
364 let last_char
= self.curr
.unwrap();
365 let ch
= char_at(&self.source_text
, current_byte_offset
);
366 let next
= current_byte_offset
+ ch
.len_utf8();
367 let byte_offset_diff
= next
- current_byte_offset
;
368 self.pos
= self.pos
+ Pos
::from_usize(byte_offset_diff
);
369 self.curr
= Some(ch
);
370 self.col
= self.col
+ CharPos(1);
371 if last_char
== '
\n'
{
372 self.filemap
.next_line(self.last_pos
);
373 self.col
= CharPos(0);
376 if byte_offset_diff
> 1 {
377 self.filemap
.record_multibyte_char(self.last_pos
, byte_offset_diff
);
384 pub fn nextch(&self) -> Option
<char> {
385 let offset
= self.byte_offset(self.pos
).to_usize();
386 if offset
< self.source_text
.len() {
387 Some(char_at(&self.source_text
, offset
))
393 pub fn nextch_is(&self, c
: char) -> bool
{
394 self.nextch() == Some(c
)
397 pub fn nextnextch(&self) -> Option
<char> {
398 let offset
= self.byte_offset(self.pos
).to_usize();
399 let s
= &self.source_text
[..];
400 if offset
>= s
.len() {
403 let next
= offset
+ char_at(s
, offset
).len_utf8();
405 Some(char_at(s
, next
))
411 pub fn nextnextch_is(&self, c
: char) -> bool
{
412 self.nextnextch() == Some(c
)
415 /// Eats <XID_start><XID_continue>*, if possible.
416 fn scan_optional_raw_name(&mut self) -> Option
<ast
::Name
> {
417 if !ident_start(self.curr
) {
420 let start
= self.last_pos
;
421 while ident_continue(self.curr
) {
425 self.with_str_from(start
, |string
| {
429 Some(token
::intern(string
))
434 /// PRECONDITION: self.curr is not whitespace
435 /// Eats any kind of comment.
436 fn scan_comment(&mut self) -> Option
<TokenAndSpan
> {
439 if c
.is_whitespace() {
440 self.span_diagnostic
.span_err(codemap
::mk_sp(self.last_pos
, self.last_pos
),
441 "called consume_any_line_comment, but there \
448 if self.curr_is('
/'
) {
449 match self.nextch() {
454 // line comments starting with "///" or "//!" are doc-comments
455 let doc_comment
= self.curr_is('
/'
) || self.curr_is('
!'
);
456 let start_bpos
= if doc_comment
{
457 self.pos
- BytePos(3)
459 self.last_pos
- BytePos(2)
462 while !self.is_eof() {
463 match self.curr
.unwrap() {
466 if self.nextch_is('
\n'
) {
469 } else if doc_comment
{
470 self.err_span_(self.last_pos
,
472 "bare CR not allowed in doc-comment");
480 return if doc_comment
{
481 self.with_str_from(start_bpos
, |string
| {
482 // comments with only more "/"s are not doc comments
483 let tok
= if is_doc_comment(string
) {
484 token
::DocComment(token
::intern(string
))
491 sp
: codemap
::mk_sp(start_bpos
, self.last_pos
),
497 sp
: codemap
::mk_sp(start_bpos
, self.last_pos
),
504 self.scan_block_comment()
508 } else if self.curr_is('
#') {
509 if self.nextch_is('
!'
) {
511 // Parse an inner attribute.
512 if self.nextnextch_is('
['
) {
516 // I guess this is the only way to figure out if
517 // we're at the beginning of the file...
518 let cmap
= CodeMap
::new();
519 cmap
.files
.borrow_mut().push(self.filemap
.clone());
520 let loc
= cmap
.lookup_char_pos_adj(self.last_pos
);
521 debug
!("Skipping a shebang");
522 if loc
.line
== 1 && loc
.col
== CharPos(0) {
523 // FIXME: Add shebang "token", return it
524 let start
= self.last_pos
;
525 while !self.curr_is('
\n'
) && !self.is_eof() {
528 return Some(TokenAndSpan
{
529 tok
: token
::Shebang(self.name_from(start
)),
530 sp
: codemap
::mk_sp(start
, self.last_pos
),
540 /// If there is whitespace, shebang, or a comment, scan it. Otherwise,
542 fn scan_whitespace_or_comment(&mut self) -> Option
<TokenAndSpan
> {
543 match self.curr
.unwrap_or('
\0'
) {
544 // # to handle shebang at start of file -- this is the entry point
545 // for skipping over all "junk"
547 let c
= self.scan_comment();
548 debug
!("scanning a comment {:?}", c
);
551 c
if is_pattern_whitespace(Some(c
)) => {
552 let start_bpos
= self.last_pos
;
553 while is_pattern_whitespace(self.curr
) {
556 let c
= Some(TokenAndSpan
{
557 tok
: token
::Whitespace
,
558 sp
: codemap
::mk_sp(start_bpos
, self.last_pos
),
560 debug
!("scanning whitespace: {:?}", c
);
567 /// Might return a sugared-doc-attr
568 fn scan_block_comment(&mut self) -> Option
<TokenAndSpan
> {
569 // block comments starting with "/**" or "/*!" are doc-comments
570 let is_doc_comment
= self.curr_is('
*'
) || self.curr_is('
!'
);
571 let start_bpos
= self.last_pos
- BytePos(2);
573 let mut level
: isize = 1;
574 let mut has_cr
= false;
577 let msg
= if is_doc_comment
{
578 "unterminated block doc-comment"
580 "unterminated block comment"
582 let last_bpos
= self.last_pos
;
583 panic
!(self.fatal_span_(start_bpos
, last_bpos
, msg
));
585 let n
= self.curr
.unwrap();
587 '
/'
if self.nextch_is('
*'
) => {
591 '
*'
if self.nextch_is('
/'
) => {
603 self.with_str_from(start_bpos
, |string
| {
604 // but comments with only "*"s between two "/"s are not
605 let tok
= if is_block_doc_comment(string
) {
606 let string
= if has_cr
{
607 self.translate_crlf(start_bpos
,
609 "bare CR not allowed in block doc-comment")
613 token
::DocComment(token
::intern(&string
[..]))
620 sp
: codemap
::mk_sp(start_bpos
, self.last_pos
),
625 /// Scan through any digits (base `scan_radix`) or underscores,
626 /// and return how many digits there were.
628 /// `real_radix` represents the true radix of the number we're
629 /// interested in, and errors will be emitted for any digits
630 /// between `real_radix` and `scan_radix`.
631 fn scan_digits(&mut self, real_radix
: u32, scan_radix
: u32) -> usize {
632 assert
!(real_radix
<= scan_radix
);
637 debug
!("skipping a _");
641 match c
.and_then(|cc
| cc
.to_digit(scan_radix
)) {
643 debug
!("{:?} in scan_digits", c
);
644 // check that the hypothetical digit is actually
645 // in range for the true radix
646 if c
.unwrap().to_digit(real_radix
).is_none() {
647 self.err_span_(self.last_pos
,
649 &format
!("invalid digit for a base {} literal", real_radix
));
659 /// Lex a LIT_INTEGER or a LIT_FLOAT
660 fn scan_number(&mut self, c
: char) -> token
::Lit
{
663 let start_bpos
= self.last_pos
;
668 match self.curr
.unwrap_or('
\0'
) {
672 num_digits
= self.scan_digits(2, 10);
677 num_digits
= self.scan_digits(8, 10);
682 num_digits
= self.scan_digits(16, 16);
684 '
0'
...'
9'
| '_'
| '
.'
=> {
685 num_digits
= self.scan_digits(10, 10) + 1;
689 return token
::Integer(self.name_from(start_bpos
));
692 } else if c
.is_digit(10) {
693 num_digits
= self.scan_digits(10, 10) + 1;
699 self.err_span_(start_bpos
,
701 "no valid digits found for number");
702 return token
::Integer(token
::intern("0"));
705 // might be a float, but don't be greedy if this is actually an
706 // integer literal followed by field/method access or a range pattern
707 // (`0..2` and `12.foo()`)
708 if self.curr_is('
.'
) && !self.nextch_is('
.'
) &&
712 // might have stuff after the ., and if it does, it needs to start
715 if self.curr
.unwrap_or('
\0'
).is_digit(10) {
716 self.scan_digits(10, 10);
717 self.scan_float_exponent();
719 let last_pos
= self.last_pos
;
720 self.check_float_base(start_bpos
, last_pos
, base
);
721 return token
::Float(self.name_from(start_bpos
));
723 // it might be a float if it has an exponent
724 if self.curr_is('e'
) || self.curr_is('E'
) {
725 self.scan_float_exponent();
726 let last_pos
= self.last_pos
;
727 self.check_float_base(start_bpos
, last_pos
, base
);
728 return token
::Float(self.name_from(start_bpos
));
730 // but we certainly have an integer!
731 return token
::Integer(self.name_from(start_bpos
));
735 /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
736 /// error if too many or too few digits are encountered.
737 fn scan_hex_digits(&mut self, n_digits
: usize, delim
: char, below_0x7f_only
: bool
) -> bool
{
738 debug
!("scanning {} digits until {:?}", n_digits
, delim
);
739 let start_bpos
= self.last_pos
;
740 let mut accum_int
= 0;
742 let mut valid
= true;
743 for _
in 0..n_digits
{
745 let last_bpos
= self.last_pos
;
746 panic
!(self.fatal_span_(start_bpos
,
748 "unterminated numeric character escape"));
750 if self.curr_is(delim
) {
751 let last_bpos
= self.last_pos
;
752 self.err_span_(start_bpos
,
754 "numeric character escape is too short");
758 let c
= self.curr
.unwrap_or('
\x00'
);
760 accum_int
+= c
.to_digit(16).unwrap_or_else(|| {
761 self.err_span_char(self.last_pos
,
763 "invalid character in numeric character escape",
772 if below_0x7f_only
&& accum_int
>= 0x80 {
773 self.err_span_(start_bpos
,
775 "this form of character escape may only be used with characters in \
776 the range [\\x00-\\x7f]");
780 match char::from_u32(accum_int
) {
783 let last_bpos
= self.last_pos
;
784 self.err_span_(start_bpos
, last_bpos
, "invalid numeric character escape");
790 /// Scan for a single (possibly escaped) byte or char
791 /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
792 /// `start` is the position of `first_source_char`, which is already consumed.
794 /// Returns true if there was a valid char/byte, false otherwise.
795 fn scan_char_or_byte(&mut self,
797 first_source_char
: char,
801 match first_source_char
{
803 // '\X' for some X must be a character constant:
804 let escaped
= self.curr
;
805 let escaped_pos
= self.last_pos
;
808 None
=> {}
// EOF here is an error that will be checked later.
811 'n'
| 'r'
| 't'
| '
\\'
| '
\''
| '
"' | '0' => true,
812 'x' => self.scan_byte_escape(delim, !ascii_only),
814 let valid = if self.curr_is('{') {
815 self.scan_unicode_escape(delim) && !ascii_only
817 let span = codemap::mk_sp(start, self.last_pos);
819 .struct_span_err(span, "incorrect unicode escape sequence
")
821 "format of unicode escape sequences is
\
827 self.err_span_(start,
829 "unicode escape sequences cannot be used
as a
\
830 byte or
in a byte string
");
835 '\n' if delim == '"'
=> {
836 self.consume_whitespace();
839 '
\r'
if delim
== '
"' && self.curr_is('\n') => {
840 self.consume_whitespace();
844 let last_pos = self.last_pos;
845 let mut err = self.struct_err_span_char(escaped_pos,
848 "unknown byte escape
"
855 err.span_help(codemap::mk_sp(escaped_pos, last_pos),
856 "this is an isolated carriage
return; consider
\
857 checking your editor and version control
\
860 if (e == '{' || e == '}') && !ascii_only {
861 err.span_help(codemap::mk_sp(escaped_pos, last_pos),
862 "if used
in a formatting string
, curly braces
\
863 are escaped with `{{` and `}
}`
");
872 '\t' | '\n' | '\r' | '\'' if delim == '\'' => {
873 let last_pos = self.last_pos;
874 self.err_span_char(start,
877 "byte constant must be escaped
"
879 "character constant must be escaped
"
885 if self.curr_is('\n') {
889 self.err_span_(start,
891 "bare CR not allowed
in string
, use \\r instead
");
896 if ascii_only && first_source_char > '\x7F' {
897 let last_pos = self.last_pos;
898 self.err_span_char(start,
900 "byte constant must be ASCII
. Use a
\\xHH escape
for a
\
910 /// Scan over a \u{...} escape
912 /// At this point, we have already seen the \ and the u, the { is the current character. We
913 /// will read at least one digit, and up to 6, and pass over the }.
914 fn scan_unicode_escape(&mut self, delim: char) -> bool {
915 self.bump(); // past the {
916 let start_bpos = self.last_pos;
918 let mut accum_int = 0;
919 let mut valid = true;
921 while !self.curr_is('}') && count <= 6 {
922 let c = match self.curr {
925 panic!(self.fatal_span_(start_bpos,
927 "unterminated unicode
escape (found EOF
)"));
931 accum_int += c.to_digit(16).unwrap_or_else(|| {
933 panic!(self.fatal_span_(self.last_pos,
935 "unterminated unicode
escape (needed a `
}`
)"));
937 self.err_span_char(self.last_pos,
939 "invalid character
in unicode escape
",
950 self.err_span_(start_bpos,
952 "overlong unicode
escape (can have at most
6 hex digits
)");
956 if valid && (char::from_u32(accum_int).is_none() || count == 0) {
957 self.err_span_(start_bpos,
959 "invalid unicode character escape
");
963 self.bump(); // past the ending }
967 /// Scan over a float exponent.
968 fn scan_float_exponent(&mut self) {
969 if self.curr_is('e') || self.curr_is('E') {
971 if self.curr_is('-') || self.curr_is('+') {
974 if self.scan_digits(10, 10) == 0 {
975 self.err_span_(self.last_pos,
977 "expected at least one digit
in exponent
")
982 /// Check that a base is valid for a floating literal, emitting a nice
983 /// error if it isn't.
984 fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
987 self.err_span_(start_bpos,
989 "hexadecimal float literal is not supported
")
992 self.err_span_(start_bpos,
994 "octal float literal is not supported
")
997 self.err_span_(start_bpos,
999 "binary float literal is not supported
")
1005 fn binop(&mut self, op: token::BinOpToken) -> token::Token {
1007 if self.curr_is('=') {
1009 return token::BinOpEq(op);
1011 return token::BinOp(op);
1015 /// Return the next token from the string, advances the input past that
1016 /// token, and updates the interner
1017 fn next_token_inner(&mut self) -> token::Token {
1019 if ident_start(c) &&
1020 match (c.unwrap(), self.nextch(), self.nextnextch()) {
1021 // Note: r as in r" or r
#" is part of a raw string literal,
1022 // b as in b' is part of a byte literal.
1023 // They are not identifiers, and are handled further down.
1024 ('r', Some('"'), _) |
1025 ('r'
, Some('
#'), _) |
1026 ('b'
, Some('
"'), _) |
1027 ('b', Some('\''), _) |
1028 ('b', Some('r'), Some('"'
)) |
1029 ('b'
, Some('r'
), Some('
#')) => false,
1032 let start
= self.last_pos
;
1033 while ident_continue(self.curr
) {
1037 return self.with_str_from(start
, |string
| {
1041 // FIXME: perform NFKC normalization here. (Issue #2253)
1042 if self.curr_is('
:'
) && self.nextch_is('
:'
) {
1043 token
::Ident(str_to_ident(string
), token
::ModName
)
1045 token
::Ident(str_to_ident(string
), token
::Plain
)
1051 if is_dec_digit(c
) {
1052 let num
= self.scan_number(c
.unwrap());
1053 let suffix
= self.scan_optional_raw_name();
1054 debug
!("next_token_inner: scanned number {:?}, {:?}", num
, suffix
);
1055 return token
::Literal(num
, suffix
);
1058 match c
.expect("next_token_inner called at EOF") {
1066 return token
::Comma
;
1070 return if self.curr_is('
.'
) {
1072 if self.curr_is('
.'
) {
1084 return token
::OpenDelim(token
::Paren
);
1088 return token
::CloseDelim(token
::Paren
);
1092 return token
::OpenDelim(token
::Brace
);
1096 return token
::CloseDelim(token
::Brace
);
1100 return token
::OpenDelim(token
::Bracket
);
1104 return token
::CloseDelim(token
::Bracket
);
1112 return token
::Pound
;
1116 return token
::Tilde
;
1120 return token
::Question
;
1124 if self.curr_is('
:'
) {
1126 return token
::ModSep
;
1128 return token
::Colon
;
1134 return token
::Dollar
;
1137 // Multi-byte tokens.
1140 if self.curr_is('
='
) {
1143 } else if self.curr_is('
>'
) {
1145 return token
::FatArrow
;
1152 if self.curr_is('
='
) {
1161 match self.curr
.unwrap_or('
\x00'
) {
1167 return self.binop(token
::Shl
);
1171 match self.curr
.unwrap_or('
\x00'
) {
1173 return token
::LArrow
;
1184 match self.curr
.unwrap_or('
\x00'
) {
1190 return self.binop(token
::Shr
);
1198 // Either a character constant 'a' OR a lifetime name 'abc
1199 let start_with_quote
= self.last_pos
;
1201 let start
= self.last_pos
;
1203 // the eof will be picked up by the final `'` check below
1204 let c2
= self.curr
.unwrap_or('
\x00'
);
1207 // If the character is an ident start not followed by another single
1208 // quote, then this is a lifetime name:
1209 if ident_start(Some(c2
)) && !self.curr_is('
\''
) {
1210 while ident_continue(self.curr
) {
1213 // lifetimes shouldn't end with a single quote
1214 // if we find one, then this is an invalid character literal
1215 if self.curr_is('
\''
) {
1216 panic
!(self.fatal_span_verbose(
1217 start_with_quote
, self.pos
,
1218 String
::from("character literal may only contain one codepoint")));
1222 // Include the leading `'` in the real identifier, for macro
1223 // expansion purposes. See #12512 for the gory details of why
1224 // this is necessary.
1225 let ident
= self.with_str_from(start
, |lifetime_name
| {
1226 str_to_ident(&format
!("'{}", lifetime_name
))
1229 // Conjure up a "keyword checking ident" to make sure that
1230 // the lifetime name is not a keyword.
1231 let keyword_checking_ident
= self.with_str_from(start
, |lifetime_name
| {
1232 str_to_ident(lifetime_name
)
1234 let keyword_checking_token
= &token
::Ident(keyword_checking_ident
,
1236 let last_bpos
= self.last_pos
;
1237 if keyword_checking_token
.is_keyword(token
::keywords
::SelfValue
) {
1238 self.err_span_(start
,
1240 "invalid lifetime name: 'self is no longer a special \
1242 } else if keyword_checking_token
.is_any_keyword() &&
1243 !keyword_checking_token
.is_keyword(token
::keywords
::Static
) {
1244 self.err_span_(start
, last_bpos
, "invalid lifetime name");
1247 return token
::Lifetime(ident
);
1250 let valid
= self.scan_char_or_byte(start
,
1256 if !self.curr_is('
\''
) {
1257 panic
!(self.fatal_span_verbose(
1258 start_with_quote
, self.last_pos
,
1259 String
::from("character literal may only contain one codepoint")));
1263 self.name_from(start
)
1267 self.bump(); // advance curr past token
1268 let suffix
= self.scan_optional_raw_name();
1269 return token
::Literal(token
::Char(id
), suffix
);
1273 let lit
= match self.curr
{
1274 Some('
\''
) => self.scan_byte(),
1275 Some('
"') => self.scan_byte_string(),
1276 Some('r') => self.scan_raw_byte_string(),
1277 _ => unreachable!(), // Should have been a token::Ident above.
1279 let suffix = self.scan_optional_raw_name();
1280 return token::Literal(lit, suffix);
1283 let start_bpos
= self.last_pos
;
1284 let mut valid
= true;
1286 while !self.curr_is('
"') {
1288 let last_bpos = self.last_pos;
1289 panic!(self.fatal_span_(start_bpos,
1291 "unterminated double quote string
"));
1294 let ch_start = self.last_pos;
1295 let ch = self.curr.unwrap();
1297 valid &= self.scan_char_or_byte(ch_start,
1303 // adjust for the ASCII " at the start of the literal
1305 self.name_from(start_bpos
+ BytePos(1))
1310 let suffix
= self.scan_optional_raw_name();
1311 return token
::Literal(token
::Str_(id
), suffix
);
1314 let start_bpos
= self.last_pos
;
1316 let mut hash_count
= 0;
1317 while self.curr_is('
#') {
1323 let last_bpos
= self.last_pos
;
1324 panic
!(self.fatal_span_(start_bpos
, last_bpos
, "unterminated raw string"));
1325 } else if !self.curr_is('
"') {
1326 let last_bpos = self.last_pos;
1327 let curr_char = self.curr.unwrap();
1328 panic!(self.fatal_span_char(start_bpos,
1330 "found invalid character
; only `
#` is allowed \
1331 in raw string delimitation",
1335 let content_start_bpos = self.last_pos;
1336 let mut content_end_bpos;
1337 let mut valid = true;
1340 let last_bpos = self.last_pos;
1341 panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
1343 // if self.curr_is('"') {
1344 // content_end_bpos = self.last_pos;
1345 // for _ in 0..hash_count {
1347 // if !self.curr_is('#') {
1349 let c
= self.curr
.unwrap();
1352 content_end_bpos = self.last_pos;
1353 for _ in 0..hash_count {
1355 if !self.curr_is('#') {
1362 if !self.nextch_is('\n') {
1363 let last_bpos = self.last_pos;
1364 self.err_span_(start_bpos,
1366 "bare CR not allowed
in raw string
, use \\r
\
1377 self.name_from_to(content_start_bpos, content_end_bpos)
1381 let suffix = self.scan_optional_raw_name();
1382 return token::Literal(token::StrRaw(id, hash_count), suffix);
1385 if self.nextch_is('>') {
1388 return token::RArrow;
1390 return self.binop(token::Minus);
1394 if self.nextch_is('&') {
1397 return token::AndAnd;
1399 return self.binop(token::And);
1403 match self.nextch() {
1410 return self.binop(token::Or);
1415 return self.binop(token::Plus);
1418 return self.binop(token::Star);
1421 return self.binop(token::Slash);
1424 return self.binop(token::Caret);
1427 return self.binop(token::Percent);
1430 let last_bpos = self.last_pos;
1431 let bpos = self.pos;
1432 let mut err = self.struct_fatal_span_char(last_bpos,
1434 "unknown start of token
",
1436 unicode_chars::check_for_substitution(&self, c, &mut err);
1443 fn consume_whitespace(&mut self) {
1444 while is_pattern_whitespace(self.curr) && !self.is_eof() {
1449 fn read_to_eol(&mut self) -> String {
1450 let mut val = String::new();
1451 while !self.curr_is('\n') && !self.is_eof() {
1452 val.push(self.curr.unwrap());
1455 if self.curr_is('\n') {
1461 fn read_one_line_comment(&mut self) -> String {
1462 let val = self.read_to_eol();
1463 assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
1464 (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
1468 fn consume_non_eol_whitespace(&mut self) {
1469 while is_pattern_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() {
1474 fn peeking_at_comment(&self) -> bool {
1475 (self.curr_is('/') && self.nextch_is('/')) || (self.curr_is('/') && self.nextch_is('*')) ||
1476 // consider shebangs comments, but not inner attributes
1477 (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
1480 fn scan_byte(&mut self) -> token::Lit {
1482 let start = self.last_pos;
1484 // the eof will be picked up by the final `'` check below
1485 let c2 = self.curr.unwrap_or('\x00');
1488 let valid = self.scan_char_or_byte(start,
1493 if !self.curr_is('\'') {
1494 // Byte offsetting here is okay because the
1495 // character before position `start` are an
1496 // ascii single quote and ascii 'b'.
1497 let last_pos = self.last_pos;
1498 panic!(self.fatal_span_verbose(start - BytePos(2),
1500 "unterminated byte constant
".to_string()));
1504 self.name_from(start)
1508 self.bump(); // advance curr past token
1509 return token::Byte(id);
1512 fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
1513 self.scan_hex_digits(2, delim, below_0x7f_only)
1516 fn scan_byte_string(&mut self) -> token::Lit {
1518 let start = self.last_pos;
1519 let mut valid = true;
1521 while !self.curr_is('"'
) {
1523 let last_pos
= self.last_pos
;
1524 panic
!(self.fatal_span_(start
, last_pos
, "unterminated double quote byte string"));
1527 let ch_start
= self.last_pos
;
1528 let ch
= self.curr
.unwrap();
1530 valid
&= self.scan_char_or_byte(ch_start
,
1537 self.name_from(start)
1542 return token::ByteStr(id);
1545 fn scan_raw_byte_string(&mut self) -> token::Lit {
1546 let start_bpos = self.last_pos;
1548 let mut hash_count = 0;
1549 while self.curr_is('#') {
1555 let last_pos = self.last_pos;
1556 panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string
"));
1557 } else if !self.curr_is('"'
) {
1558 let last_pos
= self.last_pos
;
1559 let ch
= self.curr
.unwrap();
1560 panic
!(self.fatal_span_char(start_bpos
,
1562 "found invalid character; only `#` is allowed in raw \
1563 string delimitation",
1567 let content_start_bpos
= self.last_pos
;
1568 let mut content_end_bpos
;
1572 let last_pos
= self.last_pos
;
1573 panic
!(self.fatal_span_(start_bpos
, last_pos
, "unterminated raw string"))
1576 content_end_bpos = self.last_pos;
1577 for _ in 0..hash_count {
1579 if !self.curr_is('#') {
1587 let last_pos = self.last_pos;
1588 self.err_span_char(last_pos, last_pos, "raw byte string must be ASCII
", c);
1595 return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos),
1600 // This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
1601 // is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
1602 pub fn is_pattern_whitespace(c: Option<char>) -> bool {
1603 c.map_or(false, Pattern_White_Space)
1606 fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
1608 Some(c) => lo <= c && c <= hi,
1613 fn is_dec_digit(c: Option<char>) -> bool {
1614 return in_range(c, '0', '9');
1617 pub fn is_doc_comment(s: &str) -> bool {
1618 let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
1619 s
.starts_with("//!");
1620 debug
!("is {:?} a doc comment? {}", s
, res
);
1624 pub fn is_block_doc_comment(s
: &str) -> bool
{
1625 // Prevent `/**/` from being parsed as a doc comment
1626 let res
= ((s
.starts_with("/**") && *s
.as_bytes().get(3).unwrap_or(&b' '
) != b'
*'
) ||
1627 s
.starts_with("/*!")) && s
.len() >= 5;
1628 debug
!("is {:?} a doc comment? {}", s
, res
);
1632 fn ident_start(c
: Option
<char>) -> bool
{
1635 None
=> return false,
1638 (c
>= 'a'
&& c
<= 'z'
) || (c
>= 'A'
&& c
<= 'Z'
) || c
== '_'
|| (c
> '
\x7f'
&& c
.is_xid_start())
1641 fn ident_continue(c
: Option
<char>) -> bool
{
1644 None
=> return false,
1647 (c
>= 'a'
&& c
<= 'z'
) || (c
>= 'A'
&& c
<= 'Z'
) || (c
>= '
0'
&& c
<= '
9'
) || c
== '_'
||
1648 (c
> '
\x7f'
&& c
.is_xid_continue())
1655 use codemap
::{BytePos, CodeMap, Span, NO_EXPANSION}
;
1658 use parse
::token
::str_to_ident
;
1662 fn mk_sh(cm
: Rc
<CodeMap
>) -> errors
::Handler
{
1663 // FIXME (#22405): Replace `Box::new` with `box` here when/if possible.
1664 let emitter
= errors
::emitter
::EmitterWriter
::new(Box
::new(io
::sink()), None
, cm
);
1665 errors
::Handler
::with_emitter(true, false, Box
::new(emitter
))
1668 // open a string reader for the given string
1669 fn setup
<'a
>(cm
: &CodeMap
,
1670 span_handler
: &'a errors
::Handler
,
1672 -> StringReader
<'a
> {
1673 let fm
= cm
.new_filemap("zebra.rs".to_string(), teststr
);
1674 StringReader
::new(span_handler
, fm
)
1679 let cm
= Rc
::new(CodeMap
::new());
1680 let sh
= mk_sh(cm
.clone());
1681 let mut string_reader
= setup(&cm
,
1683 "/* my source file */ fn main() { println!(\"zebra\"); }\n"
1685 let id
= str_to_ident("fn");
1686 assert_eq
!(string_reader
.next_token().tok
, token
::Comment
);
1687 assert_eq
!(string_reader
.next_token().tok
, token
::Whitespace
);
1688 let tok1
= string_reader
.next_token();
1689 let tok2
= TokenAndSpan
{
1690 tok
: token
::Ident(id
, token
::Plain
),
1694 expn_id
: NO_EXPANSION
,
1697 assert_eq
!(tok1
, tok2
);
1698 assert_eq
!(string_reader
.next_token().tok
, token
::Whitespace
);
1699 // the 'main' id is already read:
1700 assert_eq
!(string_reader
.last_pos
.clone(), BytePos(28));
1701 // read another token:
1702 let tok3
= string_reader
.next_token();
1703 let tok4
= TokenAndSpan
{
1704 tok
: token
::Ident(str_to_ident("main"), token
::Plain
),
1708 expn_id
: NO_EXPANSION
,
1711 assert_eq
!(tok3
, tok4
);
1712 // the lparen is already read:
1713 assert_eq
!(string_reader
.last_pos
.clone(), BytePos(29))
1716 // check that the given reader produces the desired stream
1717 // of tokens (stop checking after exhausting the expected vec)
1718 fn check_tokenization(mut string_reader
: StringReader
, expected
: Vec
<token
::Token
>) {
1719 for expected_tok
in &expected
{
1720 assert_eq
!(&string_reader
.next_token().tok
, expected_tok
);
1724 // make the identifier by looking up the string in the interner
1725 fn mk_ident(id
: &str, style
: token
::IdentStyle
) -> token
::Token
{
1726 token
::Ident(str_to_ident(id
), style
)
1730 fn doublecolonparsing() {
1731 let cm
= Rc
::new(CodeMap
::new());
1732 let sh
= mk_sh(cm
.clone());
1733 check_tokenization(setup(&cm
, &sh
, "a b".to_string()),
1734 vec
![mk_ident("a", token
::Plain
),
1736 mk_ident("b", token
::Plain
)]);
1741 let cm
= Rc
::new(CodeMap
::new());
1742 let sh
= mk_sh(cm
.clone());
1743 check_tokenization(setup(&cm
, &sh
, "a::b".to_string()),
1744 vec
![mk_ident("a", token
::ModName
),
1746 mk_ident("b", token
::Plain
)]);
1751 let cm
= Rc
::new(CodeMap
::new());
1752 let sh
= mk_sh(cm
.clone());
1753 check_tokenization(setup(&cm
, &sh
, "a ::b".to_string()),
1754 vec
![mk_ident("a", token
::Plain
),
1757 mk_ident("b", token
::Plain
)]);
1762 let cm
= Rc
::new(CodeMap
::new());
1763 let sh
= mk_sh(cm
.clone());
1764 check_tokenization(setup(&cm
, &sh
, "a:: b".to_string()),
1765 vec
![mk_ident("a", token
::ModName
),
1768 mk_ident("b", token
::Plain
)]);
1773 let cm
= Rc
::new(CodeMap
::new());
1774 let sh
= mk_sh(cm
.clone());
1775 assert_eq
!(setup(&cm
, &sh
, "'a'".to_string()).next_token().tok
,
1776 token
::Literal(token
::Char(token
::intern("a")), None
));
1780 fn character_space() {
1781 let cm
= Rc
::new(CodeMap
::new());
1782 let sh
= mk_sh(cm
.clone());
1783 assert_eq
!(setup(&cm
, &sh
, "' '".to_string()).next_token().tok
,
1784 token
::Literal(token
::Char(token
::intern(" ")), None
));
1788 fn character_escaped() {
1789 let cm
= Rc
::new(CodeMap
::new());
1790 let sh
= mk_sh(cm
.clone());
1791 assert_eq
!(setup(&cm
, &sh
, "'\\n'".to_string()).next_token().tok
,
1792 token
::Literal(token
::Char(token
::intern("\\n")), None
));
1796 fn lifetime_name() {
1797 let cm
= Rc
::new(CodeMap
::new());
1798 let sh
= mk_sh(cm
.clone());
1799 assert_eq
!(setup(&cm
, &sh
, "'abc".to_string()).next_token().tok
,
1800 token
::Lifetime(token
::str_to_ident("'abc")));
1805 let cm
= Rc
::new(CodeMap
::new());
1806 let sh
= mk_sh(cm
.clone());
1807 assert_eq
!(setup(&cm
, &sh
, "r###\"\"#a\\b\x00c\"\"###".to_string())
1810 token
::Literal(token
::StrRaw(token
::intern("\"#a\\b\x00c\""), 3), None
));
1814 fn literal_suffixes() {
1815 let cm
= Rc
::new(CodeMap
::new());
1816 let sh
= mk_sh(cm
.clone());
1818 ($input
: expr
, $tok_type
: ident
, $tok_contents
: expr
) => {{
1819 assert_eq
!(setup(&cm
, &sh
, format
!("{}suffix", $input
)).next_token().tok
,
1820 token
::Literal(token
::$
tok_type(token
::intern($tok_contents
)),
1821 Some(token
::intern("suffix"))));
1822 // with a whitespace separator:
1823 assert_eq
!(setup(&cm
, &sh
, format
!("{} suffix", $input
)).next_token().tok
,
1824 token
::Literal(token
::$
tok_type(token
::intern($tok_contents
)),
1829 test
!("'a'", Char
, "a");
1830 test
!("b'a'", Byte
, "a");
1831 test
!("\"a\"", Str_
, "a");
1832 test
!("b\"a\"", ByteStr
, "a");
1833 test
!("1234", Integer
, "1234");
1834 test
!("0b101", Integer
, "0b101");
1835 test
!("0xABC", Integer
, "0xABC");
1836 test
!("1.0", Float
, "1.0");
1837 test
!("1.0e10", Float
, "1.0e10");
1839 assert_eq
!(setup(&cm
, &sh
, "2us".to_string()).next_token().tok
,
1840 token
::Literal(token
::Integer(token
::intern("2")),
1841 Some(token
::intern("us"))));
1842 assert_eq
!(setup(&cm
, &sh
, "r###\"raw\"###suffix".to_string()).next_token().tok
,
1843 token
::Literal(token
::StrRaw(token
::intern("raw"), 3),
1844 Some(token
::intern("suffix"))));
1845 assert_eq
!(setup(&cm
, &sh
, "br###\"raw\"###suffix".to_string()).next_token().tok
,
1846 token
::Literal(token
::ByteStrRaw(token
::intern("raw"), 3),
1847 Some(token
::intern("suffix"))));
1851 fn line_doc_comments() {
1852 assert
!(is_doc_comment("///"));
1853 assert
!(is_doc_comment("/// blah"));
1854 assert
!(!is_doc_comment("////"));
1858 fn nested_block_comments() {
1859 let cm
= Rc
::new(CodeMap
::new());
1860 let sh
= mk_sh(cm
.clone());
1861 let mut lexer
= setup(&cm
, &sh
, "/* /* */ */'a'".to_string());
1862 match lexer
.next_token().tok
{
1863 token
::Comment
=> {}
1864 _
=> panic
!("expected a comment!"),
1866 assert_eq
!(lexer
.next_token().tok
,
1867 token
::Literal(token
::Char(token
::intern("a")), None
));
1871 fn crlf_comments() {
1872 let cm
= Rc
::new(CodeMap
::new());
1873 let sh
= mk_sh(cm
.clone());
1874 let mut lexer
= setup(&cm
, &sh
, "// test\r\n/// test\r\n".to_string());
1875 let comment
= lexer
.next_token();
1876 assert_eq
!(comment
.tok
, token
::Comment
);
1877 assert_eq
!(comment
.sp
, ::codemap
::mk_sp(BytePos(0), BytePos(7)));
1878 assert_eq
!(lexer
.next_token().tok
, token
::Whitespace
);
1879 assert_eq
!(lexer
.next_token().tok
,
1880 token
::DocComment(token
::intern("/// test")));