]> git.proxmox.com Git - rustc.git/blame - src/libsyntax/parse/lexer/mod.rs
New upstream version 1.13.0+dfsg1
[rustc.git] / src / libsyntax / parse / lexer / mod.rs
CommitLineData
1a4d82fc
JJ
1// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use ast;
3157f602
XL
12use syntax_pos::{self, BytePos, CharPos, Pos, Span};
13use codemap::CodeMap;
9cc50fc6 14use errors::{FatalError, Handler, DiagnosticBuilder};
1a4d82fc 15use ext::tt::transcribe::tt_next_token;
a7813a04 16use parse::token::{self, keywords, str_to_ident};
d9579d0f 17use str::char_at;
54a0048b 18use rustc_unicode::property::Pattern_White_Space;
1a4d82fc 19
d9579d0f 20use std::borrow::Cow;
1a4d82fc 21use std::char;
1a4d82fc 22use std::mem::replace;
1a4d82fc 23use std::rc::Rc;
1a4d82fc
JJ
24
25pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag};
26
27pub mod comments;
92a42be0 28mod unicode_chars;
1a4d82fc
JJ
29
30pub trait Reader {
31 fn is_eof(&self) -> bool;
a7813a04
XL
32 fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>;
33 fn next_token(&mut self) -> TokenAndSpan where Self: Sized {
34 let res = self.try_next_token();
35 self.unwrap_or_abort(res)
36 }
1a4d82fc 37 /// Report a fatal error with the current span.
92a42be0 38 fn fatal(&self, &str) -> FatalError;
1a4d82fc
JJ
39 /// Report a non-fatal error with the current span.
40 fn err(&self, &str);
a7813a04
XL
41 fn emit_fatal_errors(&mut self);
42 fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan {
43 match res {
44 Ok(tok) => tok,
45 Err(_) => {
46 self.emit_fatal_errors();
47 panic!(FatalError);
48 }
49 }
50 }
1a4d82fc
JJ
51 fn peek(&self) -> TokenAndSpan;
52 /// Get a token the parser cares about.
a7813a04
XL
53 fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> {
54 let mut t = self.try_next_token()?;
1a4d82fc
JJ
55 loop {
56 match t.tok {
57 token::Whitespace | token::Comment | token::Shebang(_) => {
a7813a04 58 t = self.try_next_token()?;
9cc50fc6
SL
59 }
60 _ => break,
1a4d82fc
JJ
61 }
62 }
a7813a04
XL
63 Ok(t)
64 }
65 fn real_token(&mut self) -> TokenAndSpan {
66 let res = self.try_real_token();
67 self.unwrap_or_abort(res)
1a4d82fc
JJ
68 }
69}
70
85aaf69f 71#[derive(Clone, PartialEq, Eq, Debug)]
1a4d82fc
JJ
72pub struct TokenAndSpan {
73 pub tok: token::Token,
74 pub sp: Span,
75}
76
77pub struct StringReader<'a> {
9cc50fc6 78 pub span_diagnostic: &'a Handler,
1a4d82fc
JJ
79 /// The absolute offset within the codemap of the next character to read
80 pub pos: BytePos,
81 /// The absolute offset within the codemap of the last character read(curr)
82 pub last_pos: BytePos,
83 /// The column of the next character to read
84 pub col: CharPos,
85 /// The last character to be read
86 pub curr: Option<char>,
3157f602 87 pub filemap: Rc<syntax_pos::FileMap>,
9e0c209e
SL
88 /// If Some, stop reading the source at this position (inclusive).
89 pub terminator: Option<BytePos>,
90 /// Whether to record new-lines in filemap. This is only necessary the first
91 /// time a filemap is lexed. If part of a filemap is being re-lexed, this
92 /// should be set to false.
93 pub save_new_lines: bool,
9cc50fc6 94 // cached:
1a4d82fc
JJ
95 pub peek_tok: token::Token,
96 pub peek_span: Span,
a7813a04 97 pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
c34b1796
AL
98 // cache a direct reference to the source text, so that we don't have to
99 // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
9cc50fc6 100 source_text: Rc<String>,
1a4d82fc
JJ
101}
102
103impl<'a> Reader for StringReader<'a> {
9cc50fc6 104 fn is_eof(&self) -> bool {
9e0c209e
SL
105 if self.curr.is_none() {
106 return true;
107 }
108
109 match self.terminator {
110 Some(t) => self.pos > t,
111 None => false,
112 }
9cc50fc6 113 }
1a4d82fc 114 /// Return the next token. EFFECT: advances the string_reader.
a7813a04
XL
115 fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
116 assert!(self.fatal_errs.is_empty());
1a4d82fc
JJ
117 let ret_val = TokenAndSpan {
118 tok: replace(&mut self.peek_tok, token::Underscore),
119 sp: self.peek_span,
120 };
a7813a04
XL
121 self.advance_token()?;
122 Ok(ret_val)
1a4d82fc 123 }
92a42be0 124 fn fatal(&self, m: &str) -> FatalError {
1a4d82fc
JJ
125 self.fatal_span(self.peek_span, m)
126 }
127 fn err(&self, m: &str) {
128 self.err_span(self.peek_span, m)
129 }
a7813a04
XL
130 fn emit_fatal_errors(&mut self) {
131 for err in &mut self.fatal_errs {
132 err.emit();
133 }
134 self.fatal_errs.clear();
135 }
1a4d82fc
JJ
136 fn peek(&self) -> TokenAndSpan {
137 // FIXME(pcwalton): Bad copy!
138 TokenAndSpan {
139 tok: self.peek_tok.clone(),
140 sp: self.peek_span,
141 }
142 }
143}
144
145impl<'a> Reader for TtReader<'a> {
146 fn is_eof(&self) -> bool {
147 self.cur_tok == token::Eof
148 }
a7813a04
XL
149 fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
150 assert!(self.fatal_errs.is_empty());
1a4d82fc
JJ
151 let r = tt_next_token(self);
152 debug!("TtReader: r={:?}", r);
a7813a04 153 Ok(r)
1a4d82fc 154 }
92a42be0
SL
155 fn fatal(&self, m: &str) -> FatalError {
156 self.sp_diag.span_fatal(self.cur_span, m)
1a4d82fc
JJ
157 }
158 fn err(&self, m: &str) {
159 self.sp_diag.span_err(self.cur_span, m);
160 }
a7813a04
XL
161 fn emit_fatal_errors(&mut self) {
162 for err in &mut self.fatal_errs {
163 err.emit();
164 }
165 self.fatal_errs.clear();
166 }
1a4d82fc
JJ
167 fn peek(&self) -> TokenAndSpan {
168 TokenAndSpan {
169 tok: self.cur_tok.clone(),
170 sp: self.cur_span,
171 }
172 }
173}
174
1a4d82fc
JJ
175impl<'a> StringReader<'a> {
176 /// For comments.rs, which hackily pokes into pos and curr
9cc50fc6 177 pub fn new_raw<'b>(span_diagnostic: &'b Handler,
3157f602 178 filemap: Rc<syntax_pos::FileMap>)
9cc50fc6 179 -> StringReader<'b> {
9e0c209e
SL
180 let mut sr = StringReader::new_raw_internal(span_diagnostic, filemap);
181 sr.bump();
182 sr
183 }
184
185 fn new_raw_internal<'b>(span_diagnostic: &'b Handler,
186 filemap: Rc<syntax_pos::FileMap>)
187 -> StringReader<'b> {
c34b1796 188 if filemap.src.is_none() {
9cc50fc6
SL
189 span_diagnostic.bug(&format!("Cannot lex filemap \
190 without source: {}",
191 filemap.name)[..]);
c34b1796
AL
192 }
193
194 let source_text = (*filemap.src.as_ref().unwrap()).clone();
195
9e0c209e 196 StringReader {
1a4d82fc
JJ
197 span_diagnostic: span_diagnostic,
198 pos: filemap.start_pos,
199 last_pos: filemap.start_pos,
200 col: CharPos(0),
201 curr: Some('\n'),
202 filemap: filemap,
9e0c209e
SL
203 terminator: None,
204 save_new_lines: true,
9cc50fc6 205 // dummy values; not read
1a4d82fc 206 peek_tok: token::Eof,
3157f602 207 peek_span: syntax_pos::DUMMY_SP,
9cc50fc6 208 source_text: source_text,
a7813a04 209 fatal_errs: Vec::new(),
9e0c209e 210 }
1a4d82fc
JJ
211 }
212
9cc50fc6 213 pub fn new<'b>(span_diagnostic: &'b Handler,
3157f602 214 filemap: Rc<syntax_pos::FileMap>)
9cc50fc6 215 -> StringReader<'b> {
1a4d82fc 216 let mut sr = StringReader::new_raw(span_diagnostic, filemap);
a7813a04
XL
217 if let Err(_) = sr.advance_token() {
218 sr.emit_fatal_errors();
219 panic!(FatalError);
220 }
1a4d82fc
JJ
221 sr
222 }
223
224 pub fn curr_is(&self, c: char) -> bool {
225 self.curr == Some(c)
226 }
227
228 /// Report a fatal lexical error with a given span.
92a42be0
SL
229 pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
230 self.span_diagnostic.span_fatal(sp, m)
1a4d82fc
JJ
231 }
232
233 /// Report a lexical error with a given span.
234 pub fn err_span(&self, sp: Span, m: &str) {
235 self.span_diagnostic.span_err(sp, m)
236 }
237
c1a9b12d 238
1a4d82fc 239 /// Report a fatal error spanning [`from_pos`, `to_pos`).
92a42be0 240 fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
3157f602 241 self.fatal_span(syntax_pos::mk_sp(from_pos, to_pos), m)
1a4d82fc
JJ
242 }
243
244 /// Report a lexical error spanning [`from_pos`, `to_pos`).
245 fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
3157f602 246 self.err_span(syntax_pos::mk_sp(from_pos, to_pos), m)
1a4d82fc
JJ
247 }
248
249 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
250 /// escaped character to the error message
92a42be0 251 fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
1a4d82fc
JJ
252 let mut m = m.to_string();
253 m.push_str(": ");
9cc50fc6
SL
254 for c in c.escape_default() {
255 m.push(c)
256 }
92a42be0 257 self.fatal_span_(from_pos, to_pos, &m[..])
1a4d82fc 258 }
9cc50fc6
SL
259 fn struct_fatal_span_char(&self,
260 from_pos: BytePos,
261 to_pos: BytePos,
262 m: &str,
263 c: char)
264 -> DiagnosticBuilder<'a> {
265 let mut m = m.to_string();
266 m.push_str(": ");
267 for c in c.escape_default() {
268 m.push(c)
269 }
3157f602 270 self.span_diagnostic.struct_span_fatal(syntax_pos::mk_sp(from_pos, to_pos), &m[..])
9cc50fc6 271 }
1a4d82fc
JJ
272
273 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
274 /// escaped character to the error message
275 fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
276 let mut m = m.to_string();
277 m.push_str(": ");
9cc50fc6
SL
278 for c in c.escape_default() {
279 m.push(c)
280 }
85aaf69f 281 self.err_span_(from_pos, to_pos, &m[..]);
1a4d82fc 282 }
9cc50fc6
SL
283 fn struct_err_span_char(&self,
284 from_pos: BytePos,
285 to_pos: BytePos,
286 m: &str,
287 c: char)
288 -> DiagnosticBuilder<'a> {
289 let mut m = m.to_string();
290 m.push_str(": ");
291 for c in c.escape_default() {
292 m.push(c)
293 }
3157f602 294 self.span_diagnostic.struct_span_err(syntax_pos::mk_sp(from_pos, to_pos), &m[..])
9cc50fc6 295 }
1a4d82fc
JJ
296
297 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
298 /// offending string to the error message
92a42be0 299 fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
1a4d82fc 300 m.push_str(": ");
85aaf69f
SL
301 let from = self.byte_offset(from_pos).to_usize();
302 let to = self.byte_offset(to_pos).to_usize();
c34b1796 303 m.push_str(&self.source_text[from..to]);
92a42be0 304 self.fatal_span_(from_pos, to_pos, &m[..])
1a4d82fc
JJ
305 }
306
307 /// Advance peek_tok and peek_span to refer to the next token, and
308 /// possibly update the interner.
a7813a04 309 fn advance_token(&mut self) -> Result<(), ()> {
1a4d82fc
JJ
310 match self.scan_whitespace_or_comment() {
311 Some(comment) => {
312 self.peek_span = comment.sp;
313 self.peek_tok = comment.tok;
9cc50fc6 314 }
1a4d82fc
JJ
315 None => {
316 if self.is_eof() {
317 self.peek_tok = token::Eof;
3157f602 318 self.peek_span = syntax_pos::mk_sp(self.filemap.end_pos, self.filemap.end_pos);
1a4d82fc
JJ
319 } else {
320 let start_bytepos = self.last_pos;
a7813a04 321 self.peek_tok = self.next_token_inner()?;
3157f602 322 self.peek_span = syntax_pos::mk_sp(start_bytepos, self.last_pos);
1a4d82fc
JJ
323 };
324 }
325 }
a7813a04 326 Ok(())
1a4d82fc
JJ
327 }
328
329 fn byte_offset(&self, pos: BytePos) -> BytePos {
330 (pos - self.filemap.start_pos)
331 }
332
333 /// Calls `f` with a string slice of the source text spanning from `start`
334 /// up to but excluding `self.last_pos`, meaning the slice does not include
335 /// the character `self.curr`.
9cc50fc6
SL
336 pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
337 where F: FnOnce(&str) -> T
1a4d82fc
JJ
338 {
339 self.with_str_from_to(start, self.last_pos, f)
340 }
341
342 /// Create a Name from a given offset to the current offset, each
343 /// adjusted 1 towards each other (assumes that on either side there is a
344 /// single-byte delimiter).
345 pub fn name_from(&self, start: BytePos) -> ast::Name {
346 debug!("taking an ident from {:?} to {:?}", start, self.last_pos);
347 self.with_str_from(start, token::intern)
348 }
349
350 /// As name_from, with an explicit endpoint.
351 pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
352 debug!("taking an ident from {:?} to {:?}", start, end);
353 self.with_str_from_to(start, end, token::intern)
354 }
355
356 /// Calls `f` with a string slice of the source text spanning from `start`
357 /// up to but excluding `end`.
9cc50fc6
SL
358 fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
359 where F: FnOnce(&str) -> T
1a4d82fc 360 {
9cc50fc6 361 f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
1a4d82fc
JJ
362 }
363
364 /// Converts CRLF to LF in the given string, raising an error on bare CR.
9cc50fc6 365 fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
85aaf69f 366 let mut i = 0;
1a4d82fc 367 while i < s.len() {
d9579d0f 368 let ch = char_at(s, i);
c34b1796 369 let next = i + ch.len_utf8();
1a4d82fc 370 if ch == '\r' {
d9579d0f
AL
371 if next < s.len() && char_at(s, next) == '\n' {
372 return translate_crlf_(self, start, s, errmsg, i).into();
1a4d82fc
JJ
373 }
374 let pos = start + BytePos(i as u32);
375 let end_pos = start + BytePos(next as u32);
376 self.err_span_(pos, end_pos, errmsg);
377 }
378 i = next;
379 }
d9579d0f 380 return s.into();
1a4d82fc 381
9cc50fc6
SL
382 fn translate_crlf_(rdr: &StringReader,
383 start: BytePos,
384 s: &str,
385 errmsg: &str,
386 mut i: usize)
387 -> String {
1a4d82fc
JJ
388 let mut buf = String::with_capacity(s.len());
389 let mut j = 0;
390 while i < s.len() {
d9579d0f 391 let ch = char_at(s, i);
c34b1796 392 let next = i + ch.len_utf8();
1a4d82fc 393 if ch == '\r' {
9cc50fc6
SL
394 if j < i {
395 buf.push_str(&s[j..i]);
396 }
1a4d82fc 397 j = next;
d9579d0f 398 if next >= s.len() || char_at(s, next) != '\n' {
1a4d82fc
JJ
399 let pos = start + BytePos(i as u32);
400 let end_pos = start + BytePos(next as u32);
401 rdr.err_span_(pos, end_pos, errmsg);
402 }
403 }
404 i = next;
405 }
9cc50fc6
SL
406 if j < s.len() {
407 buf.push_str(&s[j..]);
408 }
1a4d82fc
JJ
409 buf
410 }
411 }
412
413
414 /// Advance the StringReader by one character. If a newline is
415 /// discovered, add it to the FileMap's list of line start offsets.
416 pub fn bump(&mut self) {
417 self.last_pos = self.pos;
85aaf69f 418 let current_byte_offset = self.byte_offset(self.pos).to_usize();
c34b1796 419 if current_byte_offset < self.source_text.len() {
1a4d82fc
JJ
420 assert!(self.curr.is_some());
421 let last_char = self.curr.unwrap();
d9579d0f 422 let ch = char_at(&self.source_text, current_byte_offset);
c34b1796
AL
423 let next = current_byte_offset + ch.len_utf8();
424 let byte_offset_diff = next - current_byte_offset;
85aaf69f 425 self.pos = self.pos + Pos::from_usize(byte_offset_diff);
c34b1796 426 self.curr = Some(ch);
85aaf69f 427 self.col = self.col + CharPos(1);
1a4d82fc 428 if last_char == '\n' {
9e0c209e
SL
429 if self.save_new_lines {
430 self.filemap.next_line(self.last_pos);
431 }
85aaf69f 432 self.col = CharPos(0);
1a4d82fc
JJ
433 }
434
435 if byte_offset_diff > 1 {
436 self.filemap.record_multibyte_char(self.last_pos, byte_offset_diff);
437 }
438 } else {
439 self.curr = None;
440 }
441 }
442
443 pub fn nextch(&self) -> Option<char> {
85aaf69f 444 let offset = self.byte_offset(self.pos).to_usize();
c34b1796 445 if offset < self.source_text.len() {
d9579d0f 446 Some(char_at(&self.source_text, offset))
1a4d82fc
JJ
447 } else {
448 None
449 }
450 }
451
452 pub fn nextch_is(&self, c: char) -> bool {
453 self.nextch() == Some(c)
454 }
455
456 pub fn nextnextch(&self) -> Option<char> {
85aaf69f 457 let offset = self.byte_offset(self.pos).to_usize();
c34b1796 458 let s = &self.source_text[..];
9cc50fc6
SL
459 if offset >= s.len() {
460 return None;
461 }
d9579d0f 462 let next = offset + char_at(s, offset).len_utf8();
1a4d82fc 463 if next < s.len() {
d9579d0f 464 Some(char_at(s, next))
1a4d82fc
JJ
465 } else {
466 None
467 }
468 }
469
470 pub fn nextnextch_is(&self, c: char) -> bool {
471 self.nextnextch() == Some(c)
472 }
473
474 /// Eats <XID_start><XID_continue>*, if possible.
475 fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
476 if !ident_start(self.curr) {
9cc50fc6 477 return None;
1a4d82fc
JJ
478 }
479 let start = self.last_pos;
480 while ident_continue(self.curr) {
481 self.bump();
482 }
483
484 self.with_str_from(start, |string| {
485 if string == "_" {
486 None
487 } else {
488 Some(token::intern(string))
489 }
490 })
491 }
492
493 /// PRECONDITION: self.curr is not whitespace
494 /// Eats any kind of comment.
495 fn scan_comment(&mut self) -> Option<TokenAndSpan> {
3157f602
XL
496 if let Some(c) = self.curr {
497 if c.is_whitespace() {
498 self.span_diagnostic.span_err(syntax_pos::mk_sp(self.last_pos, self.last_pos),
499 "called consume_any_line_comment, but there \
500 was whitespace");
9cc50fc6 501 }
1a4d82fc
JJ
502 }
503
504 if self.curr_is('/') {
505 match self.nextch() {
506 Some('/') => {
507 self.bump();
508 self.bump();
62682a34 509
1a4d82fc 510 // line comments starting with "///" or "//!" are doc-comments
62682a34
SL
511 let doc_comment = self.curr_is('/') || self.curr_is('!');
512 let start_bpos = if doc_comment {
513 self.pos - BytePos(3)
514 } else {
515 self.last_pos - BytePos(2)
516 };
517
518 while !self.is_eof() {
519 match self.curr.unwrap() {
520 '\n' => break,
521 '\r' => {
522 if self.nextch_is('\n') {
523 // CRLF
9cc50fc6 524 break;
62682a34 525 } else if doc_comment {
9cc50fc6
SL
526 self.err_span_(self.last_pos,
527 self.pos,
62682a34 528 "bare CR not allowed in doc-comment");
1a4d82fc 529 }
1a4d82fc 530 }
9cc50fc6 531 _ => (),
1a4d82fc 532 }
62682a34
SL
533 self.bump();
534 }
535
536 return if doc_comment {
537 self.with_str_from(start_bpos, |string| {
538 // comments with only more "/"s are not doc comments
1a4d82fc
JJ
539 let tok = if is_doc_comment(string) {
540 token::DocComment(token::intern(string))
541 } else {
542 token::Comment
543 };
544
62682a34 545 Some(TokenAndSpan {
1a4d82fc 546 tok: tok,
3157f602 547 sp: syntax_pos::mk_sp(start_bpos, self.last_pos),
62682a34
SL
548 })
549 })
1a4d82fc 550 } else {
62682a34 551 Some(TokenAndSpan {
1a4d82fc 552 tok: token::Comment,
3157f602 553 sp: syntax_pos::mk_sp(start_bpos, self.last_pos),
62682a34 554 })
9cc50fc6 555 };
1a4d82fc
JJ
556 }
557 Some('*') => {
9cc50fc6
SL
558 self.bump();
559 self.bump();
1a4d82fc
JJ
560 self.scan_block_comment()
561 }
9cc50fc6 562 _ => None,
1a4d82fc
JJ
563 }
564 } else if self.curr_is('#') {
565 if self.nextch_is('!') {
566
567 // Parse an inner attribute.
568 if self.nextnextch_is('[') {
569 return None;
570 }
571
572 // I guess this is the only way to figure out if
573 // we're at the beginning of the file...
574 let cmap = CodeMap::new();
575 cmap.files.borrow_mut().push(self.filemap.clone());
576 let loc = cmap.lookup_char_pos_adj(self.last_pos);
577 debug!("Skipping a shebang");
85aaf69f 578 if loc.line == 1 && loc.col == CharPos(0) {
1a4d82fc
JJ
579 // FIXME: Add shebang "token", return it
580 let start = self.last_pos;
9cc50fc6
SL
581 while !self.curr_is('\n') && !self.is_eof() {
582 self.bump();
583 }
1a4d82fc
JJ
584 return Some(TokenAndSpan {
585 tok: token::Shebang(self.name_from(start)),
3157f602 586 sp: syntax_pos::mk_sp(start, self.last_pos),
1a4d82fc
JJ
587 });
588 }
589 }
590 None
591 } else {
592 None
593 }
594 }
595
596 /// If there is whitespace, shebang, or a comment, scan it. Otherwise,
597 /// return None.
598 fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> {
599 match self.curr.unwrap_or('\0') {
600 // # to handle shebang at start of file -- this is the entry point
601 // for skipping over all "junk"
602 '/' | '#' => {
603 let c = self.scan_comment();
604 debug!("scanning a comment {:?}", c);
605 c
54a0048b
SL
606 },
607 c if is_pattern_whitespace(Some(c)) => {
1a4d82fc 608 let start_bpos = self.last_pos;
54a0048b 609 while is_pattern_whitespace(self.curr) {
9cc50fc6
SL
610 self.bump();
611 }
1a4d82fc
JJ
612 let c = Some(TokenAndSpan {
613 tok: token::Whitespace,
3157f602 614 sp: syntax_pos::mk_sp(start_bpos, self.last_pos),
1a4d82fc
JJ
615 });
616 debug!("scanning whitespace: {:?}", c);
617 c
9cc50fc6
SL
618 }
619 _ => None,
1a4d82fc
JJ
620 }
621 }
622
623 /// Might return a sugared-doc-attr
624 fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
625 // block comments starting with "/**" or "/*!" are doc-comments
626 let is_doc_comment = self.curr_is('*') || self.curr_is('!');
627 let start_bpos = self.last_pos - BytePos(2);
628
85aaf69f 629 let mut level: isize = 1;
1a4d82fc
JJ
630 let mut has_cr = false;
631 while level > 0 {
632 if self.is_eof() {
633 let msg = if is_doc_comment {
634 "unterminated block doc-comment"
635 } else {
636 "unterminated block comment"
637 };
638 let last_bpos = self.last_pos;
92a42be0 639 panic!(self.fatal_span_(start_bpos, last_bpos, msg));
1a4d82fc
JJ
640 }
641 let n = self.curr.unwrap();
642 match n {
643 '/' if self.nextch_is('*') => {
644 level += 1;
645 self.bump();
646 }
647 '*' if self.nextch_is('/') => {
648 level -= 1;
649 self.bump();
650 }
651 '\r' => {
652 has_cr = true;
653 }
9cc50fc6 654 _ => (),
1a4d82fc
JJ
655 }
656 self.bump();
657 }
658
659 self.with_str_from(start_bpos, |string| {
660 // but comments with only "*"s between two "/"s are not
661 let tok = if is_block_doc_comment(string) {
662 let string = if has_cr {
9cc50fc6
SL
663 self.translate_crlf(start_bpos,
664 string,
1a4d82fc 665 "bare CR not allowed in block doc-comment")
9cc50fc6
SL
666 } else {
667 string.into()
668 };
85aaf69f 669 token::DocComment(token::intern(&string[..]))
1a4d82fc
JJ
670 } else {
671 token::Comment
672 };
673
9cc50fc6 674 Some(TokenAndSpan {
1a4d82fc 675 tok: tok,
3157f602 676 sp: syntax_pos::mk_sp(start_bpos, self.last_pos),
1a4d82fc
JJ
677 })
678 })
679 }
680
c34b1796
AL
681 /// Scan through any digits (base `scan_radix`) or underscores,
682 /// and return how many digits there were.
683 ///
684 /// `real_radix` represents the true radix of the number we're
685 /// interested in, and errors will be emitted for any digits
686 /// between `real_radix` and `scan_radix`.
687 fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
688 assert!(real_radix <= scan_radix);
85aaf69f 689 let mut len = 0;
1a4d82fc
JJ
690 loop {
691 let c = self.curr;
9cc50fc6
SL
692 if c == Some('_') {
693 debug!("skipping a _");
694 self.bump();
695 continue;
696 }
c34b1796 697 match c.and_then(|cc| cc.to_digit(scan_radix)) {
1a4d82fc
JJ
698 Some(_) => {
699 debug!("{:?} in scan_digits", c);
c34b1796
AL
700 // check that the hypothetical digit is actually
701 // in range for the true radix
702 if c.unwrap().to_digit(real_radix).is_none() {
9cc50fc6
SL
703 self.err_span_(self.last_pos,
704 self.pos,
705 &format!("invalid digit for a base {} literal", real_radix));
c34b1796 706 }
1a4d82fc
JJ
707 len += 1;
708 self.bump();
709 }
9cc50fc6 710 _ => return len,
1a4d82fc 711 }
9cc50fc6 712 }
1a4d82fc
JJ
713 }
714
715 /// Lex a LIT_INTEGER or a LIT_FLOAT
716 fn scan_number(&mut self, c: char) -> token::Lit {
c1a9b12d 717 let num_digits;
1a4d82fc
JJ
718 let mut base = 10;
719 let start_bpos = self.last_pos;
720
721 self.bump();
722
723 if c == '0' {
724 match self.curr.unwrap_or('\0') {
9cc50fc6
SL
725 'b' => {
726 self.bump();
727 base = 2;
728 num_digits = self.scan_digits(2, 10);
729 }
730 'o' => {
731 self.bump();
732 base = 8;
733 num_digits = self.scan_digits(8, 10);
734 }
735 'x' => {
736 self.bump();
737 base = 16;
738 num_digits = self.scan_digits(16, 16);
739 }
1a4d82fc 740 '0'...'9' | '_' | '.' => {
c34b1796 741 num_digits = self.scan_digits(10, 10) + 1;
1a4d82fc
JJ
742 }
743 _ => {
744 // just a 0
745 return token::Integer(self.name_from(start_bpos));
746 }
747 }
748 } else if c.is_digit(10) {
c34b1796 749 num_digits = self.scan_digits(10, 10) + 1;
1a4d82fc
JJ
750 } else {
751 num_digits = 0;
752 }
753
754 if num_digits == 0 {
9cc50fc6
SL
755 self.err_span_(start_bpos,
756 self.last_pos,
757 "no valid digits found for number");
1a4d82fc
JJ
758 return token::Integer(token::intern("0"));
759 }
760
761 // might be a float, but don't be greedy if this is actually an
762 // integer literal followed by field/method access or a range pattern
763 // (`0..2` and `12.foo()`)
9cc50fc6
SL
764 if self.curr_is('.') && !self.nextch_is('.') &&
765 !self.nextch()
766 .unwrap_or('\0')
767 .is_xid_start() {
1a4d82fc
JJ
768 // might have stuff after the ., and if it does, it needs to start
769 // with a number
770 self.bump();
771 if self.curr.unwrap_or('\0').is_digit(10) {
c34b1796 772 self.scan_digits(10, 10);
1a4d82fc
JJ
773 self.scan_float_exponent();
774 }
775 let last_pos = self.last_pos;
776 self.check_float_base(start_bpos, last_pos, base);
777 return token::Float(self.name_from(start_bpos));
778 } else {
779 // it might be a float if it has an exponent
780 if self.curr_is('e') || self.curr_is('E') {
781 self.scan_float_exponent();
782 let last_pos = self.last_pos;
783 self.check_float_base(start_bpos, last_pos, base);
784 return token::Float(self.name_from(start_bpos));
785 }
786 // but we certainly have an integer!
787 return token::Integer(self.name_from(start_bpos));
788 }
789 }
790
791 /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
792 /// error if too many or too few digits are encountered.
9cc50fc6 793 fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
1a4d82fc
JJ
794 debug!("scanning {} digits until {:?}", n_digits, delim);
795 let start_bpos = self.last_pos;
796 let mut accum_int = 0;
797
c34b1796 798 let mut valid = true;
85aaf69f 799 for _ in 0..n_digits {
1a4d82fc
JJ
800 if self.is_eof() {
801 let last_bpos = self.last_pos;
92a42be0
SL
802 panic!(self.fatal_span_(start_bpos,
803 last_bpos,
804 "unterminated numeric character escape"));
1a4d82fc
JJ
805 }
806 if self.curr_is(delim) {
807 let last_bpos = self.last_pos;
9cc50fc6
SL
808 self.err_span_(start_bpos,
809 last_bpos,
810 "numeric character escape is too short");
c34b1796 811 valid = false;
1a4d82fc
JJ
812 break;
813 }
814 let c = self.curr.unwrap_or('\x00');
815 accum_int *= 16;
816 accum_int += c.to_digit(16).unwrap_or_else(|| {
9cc50fc6
SL
817 self.err_span_char(self.last_pos,
818 self.pos,
819 "invalid character in numeric character escape",
820 c);
c34b1796
AL
821
822 valid = false;
1a4d82fc 823 0
c34b1796 824 });
1a4d82fc
JJ
825 self.bump();
826 }
827
828 if below_0x7f_only && accum_int >= 0x80 {
829 self.err_span_(start_bpos,
830 self.last_pos,
9cc50fc6
SL
831 "this form of character escape may only be used with characters in \
832 the range [\\x00-\\x7f]");
c34b1796 833 valid = false;
1a4d82fc
JJ
834 }
835
836 match char::from_u32(accum_int) {
c34b1796 837 Some(_) => valid,
1a4d82fc
JJ
838 None => {
839 let last_bpos = self.last_pos;
c1a9b12d 840 self.err_span_(start_bpos, last_bpos, "invalid numeric character escape");
1a4d82fc
JJ
841 false
842 }
843 }
844 }
845
1a4d82fc
JJ
846 /// Scan for a single (possibly escaped) byte or char
847 /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
848 /// `start` is the position of `first_source_char`, which is already consumed.
849 ///
850 /// Returns true if there was a valid char/byte, false otherwise.
9cc50fc6
SL
851 fn scan_char_or_byte(&mut self,
852 start: BytePos,
853 first_source_char: char,
854 ascii_only: bool,
855 delim: char)
856 -> bool {
1a4d82fc
JJ
857 match first_source_char {
858 '\\' => {
859 // '\X' for some X must be a character constant:
860 let escaped = self.curr;
861 let escaped_pos = self.last_pos;
862 self.bump();
863 match escaped {
9cc50fc6 864 None => {} // EOF here is an error that will be checked later.
1a4d82fc
JJ
865 Some(e) => {
866 return match e {
867 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
868 'x' => self.scan_byte_escape(delim, !ascii_only),
c1a9b12d
SL
869 'u' => {
870 let valid = if self.curr_is('{') {
871 self.scan_unicode_escape(delim) && !ascii_only
872 } else {
3157f602 873 let span = syntax_pos::mk_sp(start, self.last_pos);
9cc50fc6
SL
874 self.span_diagnostic
875 .struct_span_err(span, "incorrect unicode escape sequence")
876 .span_help(span,
877 "format of unicode escape sequences is \
878 `\\u{…}`")
879 .emit();
c1a9b12d
SL
880 false
881 };
882 if ascii_only {
9cc50fc6
SL
883 self.err_span_(start,
884 self.last_pos,
885 "unicode escape sequences cannot be used as a \
886 byte or in a byte string");
62682a34 887 }
c1a9b12d
SL
888 valid
889
1a4d82fc
JJ
890 }
891 '\n' if delim == '"' => {
892 self.consume_whitespace();
893 true
9cc50fc6 894 }
1a4d82fc
JJ
895 '\r' if delim == '"' && self.curr_is('\n') => {
896 self.consume_whitespace();
897 true
898 }
899 c => {
900 let last_pos = self.last_pos;
9cc50fc6
SL
901 let mut err = self.struct_err_span_char(escaped_pos,
902 last_pos,
903 if ascii_only {
904 "unknown byte escape"
905 } else {
906 "unknown character \
907 escape"
908 },
909 c);
1a4d82fc 910 if e == '\r' {
3157f602 911 err.span_help(syntax_pos::mk_sp(escaped_pos, last_pos),
9cc50fc6
SL
912 "this is an isolated carriage return; consider \
913 checking your editor and version control \
914 settings");
1a4d82fc 915 }
9346a6ac 916 if (e == '{' || e == '}') && !ascii_only {
3157f602 917 err.span_help(syntax_pos::mk_sp(escaped_pos, last_pos),
9cc50fc6
SL
918 "if used in a formatting string, curly braces \
919 are escaped with `{{` and `}}`");
9346a6ac 920 }
9cc50fc6 921 err.emit();
1a4d82fc
JJ
922 false
923 }
924 }
925 }
926 }
927 }
928 '\t' | '\n' | '\r' | '\'' if delim == '\'' => {
929 let last_pos = self.last_pos;
9cc50fc6
SL
930 self.err_span_char(start,
931 last_pos,
932 if ascii_only {
933 "byte constant must be escaped"
934 } else {
935 "character constant must be escaped"
936 },
937 first_source_char);
1a4d82fc
JJ
938 return false;
939 }
940 '\r' => {
941 if self.curr_is('\n') {
942 self.bump();
943 return true;
944 } else {
9cc50fc6
SL
945 self.err_span_(start,
946 self.last_pos,
1a4d82fc
JJ
947 "bare CR not allowed in string, use \\r instead");
948 return false;
949 }
950 }
9cc50fc6
SL
951 _ => {
952 if ascii_only && first_source_char > '\x7F' {
953 let last_pos = self.last_pos;
a7813a04
XL
954 self.err_span_(start,
955 last_pos,
956 "byte constant must be ASCII. Use a \\xHH escape for a \
957 non-ASCII byte");
9cc50fc6
SL
958 return false;
959 }
1a4d82fc
JJ
960 }
961 }
962 true
963 }
964
965 /// Scan over a \u{...} escape
966 ///
967 /// At this point, we have already seen the \ and the u, the { is the current character. We
968 /// will read at least one digit, and up to 6, and pass over the }.
969 fn scan_unicode_escape(&mut self, delim: char) -> bool {
970 self.bump(); // past the {
971 let start_bpos = self.last_pos;
85aaf69f 972 let mut count = 0;
1a4d82fc 973 let mut accum_int = 0;
c34b1796 974 let mut valid = true;
1a4d82fc
JJ
975
976 while !self.curr_is('}') && count <= 6 {
977 let c = match self.curr {
978 Some(c) => c,
979 None => {
9cc50fc6
SL
980 panic!(self.fatal_span_(start_bpos,
981 self.last_pos,
92a42be0 982 "unterminated unicode escape (found EOF)"));
1a4d82fc
JJ
983 }
984 };
985 accum_int *= 16;
986 accum_int += c.to_digit(16).unwrap_or_else(|| {
987 if c == delim {
9cc50fc6
SL
988 panic!(self.fatal_span_(self.last_pos,
989 self.pos,
92a42be0 990 "unterminated unicode escape (needed a `}`)"));
1a4d82fc 991 } else {
9cc50fc6
SL
992 self.err_span_char(self.last_pos,
993 self.pos,
994 "invalid character in unicode escape",
995 c);
1a4d82fc 996 }
c34b1796
AL
997 valid = false;
998 0
999 });
1a4d82fc
JJ
1000 self.bump();
1001 count += 1;
1002 }
1003
1004 if count > 6 {
9cc50fc6
SL
1005 self.err_span_(start_bpos,
1006 self.last_pos,
1007 "overlong unicode escape (can have at most 6 hex digits)");
c34b1796 1008 valid = false;
1a4d82fc
JJ
1009 }
1010
c34b1796 1011 if valid && (char::from_u32(accum_int).is_none() || count == 0) {
9cc50fc6
SL
1012 self.err_span_(start_bpos,
1013 self.last_pos,
1014 "invalid unicode character escape");
62682a34 1015 valid = false;
1a4d82fc
JJ
1016 }
1017
c1a9b12d 1018 self.bump(); // past the ending }
1a4d82fc
JJ
1019 valid
1020 }
1021
1022 /// Scan over a float exponent.
1023 fn scan_float_exponent(&mut self) {
1024 if self.curr_is('e') || self.curr_is('E') {
1025 self.bump();
1026 if self.curr_is('-') || self.curr_is('+') {
1027 self.bump();
1028 }
c34b1796 1029 if self.scan_digits(10, 10) == 0 {
9cc50fc6
SL
1030 self.err_span_(self.last_pos,
1031 self.pos,
1032 "expected at least one digit in exponent")
1a4d82fc
JJ
1033 }
1034 }
1035 }
1036
1037 /// Check that a base is valid for a floating literal, emitting a nice
1038 /// error if it isn't.
85aaf69f 1039 fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
1a4d82fc 1040 match base {
9cc50fc6
SL
1041 16 => {
1042 self.err_span_(start_bpos,
1043 last_bpos,
1044 "hexadecimal float literal is not supported")
1045 }
1046 8 => {
1047 self.err_span_(start_bpos,
1048 last_bpos,
1049 "octal float literal is not supported")
1050 }
1051 2 => {
1052 self.err_span_(start_bpos,
1053 last_bpos,
1054 "binary float literal is not supported")
1055 }
1056 _ => (),
1a4d82fc
JJ
1057 }
1058 }
1059
1060 fn binop(&mut self, op: token::BinOpToken) -> token::Token {
1061 self.bump();
1062 if self.curr_is('=') {
1063 self.bump();
1064 return token::BinOpEq(op);
1065 } else {
1066 return token::BinOp(op);
1067 }
1068 }
1069
1070 /// Return the next token from the string, advances the input past that
1071 /// token, and updates the interner
a7813a04 1072 fn next_token_inner(&mut self) -> Result<token::Token, ()> {
1a4d82fc 1073 let c = self.curr;
9cc50fc6
SL
1074 if ident_start(c) &&
1075 match (c.unwrap(), self.nextch(), self.nextnextch()) {
1a4d82fc
JJ
1076 // Note: r as in r" or r#" is part of a raw string literal,
1077 // b as in b' is part of a byte literal.
1078 // They are not identifiers, and are handled further down.
9cc50fc6
SL
1079 ('r', Some('"'), _) |
1080 ('r', Some('#'), _) |
1081 ('b', Some('"'), _) |
1082 ('b', Some('\''), _) |
1083 ('b', Some('r'), Some('"')) |
1084 ('b', Some('r'), Some('#')) => false,
1085 _ => true,
1a4d82fc
JJ
1086 } {
1087 let start = self.last_pos;
1088 while ident_continue(self.curr) {
1089 self.bump();
1090 }
1091
a7813a04 1092 return Ok(self.with_str_from(start, |string| {
1a4d82fc
JJ
1093 if string == "_" {
1094 token::Underscore
1095 } else {
1096 // FIXME: perform NFKC normalization here. (Issue #2253)
a7813a04 1097 token::Ident(str_to_ident(string))
1a4d82fc 1098 }
a7813a04 1099 }));
1a4d82fc
JJ
1100 }
1101
1102 if is_dec_digit(c) {
1103 let num = self.scan_number(c.unwrap());
1104 let suffix = self.scan_optional_raw_name();
1105 debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix);
a7813a04 1106 return Ok(token::Literal(num, suffix));
1a4d82fc
JJ
1107 }
1108
1a4d82fc 1109 match c.expect("next_token_inner called at EOF") {
9cc50fc6
SL
1110 // One-byte tokens.
1111 ';' => {
1a4d82fc 1112 self.bump();
a7813a04 1113 return Ok(token::Semi);
9cc50fc6
SL
1114 }
1115 ',' => {
1116 self.bump();
a7813a04 1117 return Ok(token::Comma);
9cc50fc6
SL
1118 }
1119 '.' => {
1120 self.bump();
1121 return if self.curr_is('.') {
1122 self.bump();
1123 if self.curr_is('.') {
1124 self.bump();
a7813a04 1125 Ok(token::DotDotDot)
9cc50fc6 1126 } else {
a7813a04 1127 Ok(token::DotDot)
9cc50fc6
SL
1128 }
1129 } else {
a7813a04 1130 Ok(token::Dot)
9cc50fc6
SL
1131 };
1132 }
1133 '(' => {
1134 self.bump();
a7813a04 1135 return Ok(token::OpenDelim(token::Paren));
9cc50fc6
SL
1136 }
1137 ')' => {
1138 self.bump();
a7813a04 1139 return Ok(token::CloseDelim(token::Paren));
9cc50fc6
SL
1140 }
1141 '{' => {
1142 self.bump();
a7813a04 1143 return Ok(token::OpenDelim(token::Brace));
9cc50fc6
SL
1144 }
1145 '}' => {
1146 self.bump();
a7813a04 1147 return Ok(token::CloseDelim(token::Brace));
9cc50fc6
SL
1148 }
1149 '[' => {
1150 self.bump();
a7813a04 1151 return Ok(token::OpenDelim(token::Bracket));
9cc50fc6
SL
1152 }
1153 ']' => {
1154 self.bump();
a7813a04 1155 return Ok(token::CloseDelim(token::Bracket));
9cc50fc6
SL
1156 }
1157 '@' => {
1158 self.bump();
a7813a04 1159 return Ok(token::At);
9cc50fc6
SL
1160 }
1161 '#' => {
1162 self.bump();
a7813a04 1163 return Ok(token::Pound);
9cc50fc6
SL
1164 }
1165 '~' => {
1166 self.bump();
a7813a04 1167 return Ok(token::Tilde);
9cc50fc6
SL
1168 }
1169 '?' => {
1170 self.bump();
a7813a04 1171 return Ok(token::Question);
9cc50fc6
SL
1172 }
1173 ':' => {
1174 self.bump();
1175 if self.curr_is(':') {
1176 self.bump();
a7813a04 1177 return Ok(token::ModSep);
9cc50fc6 1178 } else {
a7813a04 1179 return Ok(token::Colon);
9cc50fc6 1180 }
1a4d82fc 1181 }
1a4d82fc 1182
9cc50fc6
SL
1183 '$' => {
1184 self.bump();
a7813a04 1185 return Ok(token::Dollar);
9cc50fc6 1186 }
1a4d82fc 1187
9cc50fc6
SL
1188 // Multi-byte tokens.
1189 '=' => {
1a4d82fc 1190 self.bump();
9cc50fc6
SL
1191 if self.curr_is('=') {
1192 self.bump();
a7813a04 1193 return Ok(token::EqEq);
9cc50fc6
SL
1194 } else if self.curr_is('>') {
1195 self.bump();
a7813a04 1196 return Ok(token::FatArrow);
9cc50fc6 1197 } else {
a7813a04 1198 return Ok(token::Eq);
9cc50fc6
SL
1199 }
1200 }
1201 '!' => {
1a4d82fc 1202 self.bump();
9cc50fc6
SL
1203 if self.curr_is('=') {
1204 self.bump();
a7813a04 1205 return Ok(token::Ne);
9cc50fc6 1206 } else {
a7813a04 1207 return Ok(token::Not);
9cc50fc6 1208 }
1a4d82fc 1209 }
9cc50fc6 1210 '<' => {
1a4d82fc 1211 self.bump();
9cc50fc6
SL
1212 match self.curr.unwrap_or('\x00') {
1213 '=' => {
1214 self.bump();
a7813a04 1215 return Ok(token::Le);
9cc50fc6
SL
1216 }
1217 '<' => {
a7813a04 1218 return Ok(self.binop(token::Shl));
9cc50fc6
SL
1219 }
1220 '-' => {
1221 self.bump();
1222 match self.curr.unwrap_or('\x00') {
1223 _ => {
a7813a04 1224 return Ok(token::LArrow);
9cc50fc6
SL
1225 }
1226 }
1227 }
1228 _ => {
a7813a04 1229 return Ok(token::Lt);
9cc50fc6
SL
1230 }
1231 }
1232 }
1233 '>' => {
1a4d82fc
JJ
1234 self.bump();
1235 match self.curr.unwrap_or('\x00') {
9cc50fc6
SL
1236 '=' => {
1237 self.bump();
a7813a04 1238 return Ok(token::Ge);
9cc50fc6
SL
1239 }
1240 '>' => {
a7813a04 1241 return Ok(self.binop(token::Shr));
9cc50fc6
SL
1242 }
1243 _ => {
a7813a04 1244 return Ok(token::Gt);
9cc50fc6 1245 }
1a4d82fc 1246 }
1a4d82fc 1247 }
9cc50fc6
SL
1248 '\'' => {
1249 // Either a character constant 'a' OR a lifetime name 'abc
1250 let start_with_quote = self.last_pos;
1251 self.bump();
1252 let start = self.last_pos;
1a4d82fc 1253
9cc50fc6
SL
1254 // the eof will be picked up by the final `'` check below
1255 let c2 = self.curr.unwrap_or('\x00');
1256 self.bump();
1a4d82fc 1257
9cc50fc6
SL
1258 // If the character is an ident start not followed by another single
1259 // quote, then this is a lifetime name:
1260 if ident_start(Some(c2)) && !self.curr_is('\'') {
1261 while ident_continue(self.curr) {
1262 self.bump();
1263 }
1264 // lifetimes shouldn't end with a single quote
1265 // if we find one, then this is an invalid character literal
1266 if self.curr_is('\'') {
1267 panic!(self.fatal_span_verbose(
1268 start_with_quote, self.pos,
1269 String::from("character literal may only contain one codepoint")));
1a4d82fc 1270
9cc50fc6 1271 }
1a4d82fc 1272
9cc50fc6
SL
1273 // Include the leading `'` in the real identifier, for macro
1274 // expansion purposes. See #12512 for the gory details of why
1275 // this is necessary.
1276 let ident = self.with_str_from(start, |lifetime_name| {
1277 str_to_ident(&format!("'{}", lifetime_name))
1278 });
1279
1280 // Conjure up a "keyword checking ident" to make sure that
1281 // the lifetime name is not a keyword.
1282 let keyword_checking_ident = self.with_str_from(start, |lifetime_name| {
1a4d82fc
JJ
1283 str_to_ident(lifetime_name)
1284 });
a7813a04 1285 let keyword_checking_token = &token::Ident(keyword_checking_ident);
9cc50fc6 1286 let last_bpos = self.last_pos;
a7813a04
XL
1287 if keyword_checking_token.is_any_keyword() &&
1288 !keyword_checking_token.is_keyword(keywords::Static) {
1289 self.err_span_(start, last_bpos, "lifetimes cannot use keyword names");
9cc50fc6
SL
1290 }
1291
a7813a04 1292 return Ok(token::Lifetime(ident));
1a4d82fc 1293 }
1a4d82fc 1294
9cc50fc6
SL
1295 let valid = self.scan_char_or_byte(start,
1296 c2,
1297 // ascii_only =
1298 false,
1299 '\'');
92a42be0 1300
9cc50fc6
SL
1301 if !self.curr_is('\'') {
1302 panic!(self.fatal_span_verbose(
1303 start_with_quote, self.last_pos,
1304 String::from("character literal may only contain one codepoint")));
1a4d82fc
JJ
1305 }
1306
9cc50fc6
SL
1307 let id = if valid {
1308 self.name_from(start)
1309 } else {
1310 token::intern("0")
1311 };
1312 self.bump(); // advance curr past token
1313 let suffix = self.scan_optional_raw_name();
a7813a04 1314 return Ok(token::Literal(token::Char(id), suffix));
1a4d82fc 1315 }
9cc50fc6 1316 'b' => {
1a4d82fc 1317 self.bump();
9cc50fc6
SL
1318 let lit = match self.curr {
1319 Some('\'') => self.scan_byte(),
1320 Some('"') => self.scan_byte_string(),
1321 Some('r') => self.scan_raw_byte_string(),
1322 _ => unreachable!(), // Should have been a token::Ident above.
1323 };
1324 let suffix = self.scan_optional_raw_name();
a7813a04 1325 return Ok(token::Literal(lit, suffix));
1a4d82fc 1326 }
9cc50fc6
SL
1327 '"' => {
1328 let start_bpos = self.last_pos;
1329 let mut valid = true;
1330 self.bump();
1331 while !self.curr_is('"') {
1332 if self.is_eof() {
1333 let last_bpos = self.last_pos;
1334 panic!(self.fatal_span_(start_bpos,
1335 last_bpos,
1336 "unterminated double quote string"));
1337 }
1a4d82fc 1338
9cc50fc6
SL
1339 let ch_start = self.last_pos;
1340 let ch = self.curr.unwrap();
1341 self.bump();
1342 valid &= self.scan_char_or_byte(ch_start,
1343 ch,
1344 // ascii_only =
1345 false,
1346 '"');
1347 }
1348 // adjust for the ASCII " at the start of the literal
1349 let id = if valid {
1350 self.name_from(start_bpos + BytePos(1))
1351 } else {
1352 token::intern("??")
1353 };
1354 self.bump();
1355 let suffix = self.scan_optional_raw_name();
a7813a04 1356 return Ok(token::Literal(token::Str_(id), suffix));
1a4d82fc 1357 }
9cc50fc6
SL
1358 'r' => {
1359 let start_bpos = self.last_pos;
1360 self.bump();
1361 let mut hash_count = 0;
1362 while self.curr_is('#') {
1363 self.bump();
1364 hash_count += 1;
1365 }
1366
1a4d82fc
JJ
1367 if self.is_eof() {
1368 let last_bpos = self.last_pos;
92a42be0 1369 panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
9cc50fc6
SL
1370 } else if !self.curr_is('"') {
1371 let last_bpos = self.last_pos;
1372 let curr_char = self.curr.unwrap();
1373 panic!(self.fatal_span_char(start_bpos,
1374 last_bpos,
1375 "found invalid character; only `#` is allowed \
1376 in raw string delimitation",
1377 curr_char));
1a4d82fc 1378 }
9cc50fc6
SL
1379 self.bump();
1380 let content_start_bpos = self.last_pos;
1381 let mut content_end_bpos;
1382 let mut valid = true;
1383 'outer: loop {
1384 if self.is_eof() {
1385 let last_bpos = self.last_pos;
1386 panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
1387 }
1388 // if self.curr_is('"') {
1389 // content_end_bpos = self.last_pos;
1390 // for _ in 0..hash_count {
1391 // self.bump();
1392 // if !self.curr_is('#') {
1393 // continue 'outer;
1394 let c = self.curr.unwrap();
1395 match c {
1396 '"' => {
1397 content_end_bpos = self.last_pos;
1398 for _ in 0..hash_count {
1399 self.bump();
1400 if !self.curr_is('#') {
1401 continue 'outer;
1402 }
1a4d82fc 1403 }
9cc50fc6 1404 break;
1a4d82fc 1405 }
9cc50fc6
SL
1406 '\r' => {
1407 if !self.nextch_is('\n') {
1408 let last_bpos = self.last_pos;
1409 self.err_span_(start_bpos,
1410 last_bpos,
1411 "bare CR not allowed in raw string, use \\r \
1412 instead");
1413 valid = false;
1414 }
1a4d82fc 1415 }
9cc50fc6 1416 _ => (),
1a4d82fc 1417 }
9cc50fc6 1418 self.bump();
1a4d82fc
JJ
1419 }
1420 self.bump();
9cc50fc6
SL
1421 let id = if valid {
1422 self.name_from_to(content_start_bpos, content_end_bpos)
1423 } else {
1424 token::intern("??")
1425 };
1426 let suffix = self.scan_optional_raw_name();
a7813a04 1427 return Ok(token::Literal(token::StrRaw(id, hash_count), suffix));
9cc50fc6
SL
1428 }
1429 '-' => {
1430 if self.nextch_is('>') {
1431 self.bump();
1432 self.bump();
a7813a04 1433 return Ok(token::RArrow);
9cc50fc6 1434 } else {
a7813a04 1435 return Ok(self.binop(token::Minus));
9cc50fc6
SL
1436 }
1437 }
1438 '&' => {
1439 if self.nextch_is('&') {
1440 self.bump();
1441 self.bump();
a7813a04 1442 return Ok(token::AndAnd);
9cc50fc6 1443 } else {
a7813a04 1444 return Ok(self.binop(token::And));
9cc50fc6
SL
1445 }
1446 }
1447 '|' => {
1448 match self.nextch() {
1449 Some('|') => {
1450 self.bump();
1451 self.bump();
a7813a04 1452 return Ok(token::OrOr);
9cc50fc6
SL
1453 }
1454 _ => {
a7813a04 1455 return Ok(self.binop(token::Or));
9cc50fc6
SL
1456 }
1457 }
1458 }
1459 '+' => {
a7813a04 1460 return Ok(self.binop(token::Plus));
9cc50fc6
SL
1461 }
1462 '*' => {
a7813a04 1463 return Ok(self.binop(token::Star));
9cc50fc6
SL
1464 }
1465 '/' => {
a7813a04 1466 return Ok(self.binop(token::Slash));
9cc50fc6
SL
1467 }
1468 '^' => {
a7813a04 1469 return Ok(self.binop(token::Caret));
9cc50fc6
SL
1470 }
1471 '%' => {
a7813a04 1472 return Ok(self.binop(token::Percent));
9cc50fc6
SL
1473 }
1474 c => {
1475 let last_bpos = self.last_pos;
1476 let bpos = self.pos;
1477 let mut err = self.struct_fatal_span_char(last_bpos,
1478 bpos,
1479 "unknown start of token",
1480 c);
1481 unicode_chars::check_for_substitution(&self, c, &mut err);
a7813a04
XL
1482 self.fatal_errs.push(err);
1483 Err(())
1a4d82fc 1484 }
1a4d82fc
JJ
1485 }
1486 }
1487
1488 fn consume_whitespace(&mut self) {
54a0048b 1489 while is_pattern_whitespace(self.curr) && !self.is_eof() {
9cc50fc6
SL
1490 self.bump();
1491 }
1a4d82fc
JJ
1492 }
1493
1494 fn read_to_eol(&mut self) -> String {
1495 let mut val = String::new();
1496 while !self.curr_is('\n') && !self.is_eof() {
1497 val.push(self.curr.unwrap());
1498 self.bump();
1499 }
9cc50fc6
SL
1500 if self.curr_is('\n') {
1501 self.bump();
1502 }
1503 return val;
1a4d82fc
JJ
1504 }
1505
1506 fn read_one_line_comment(&mut self) -> String {
1507 let val = self.read_to_eol();
9cc50fc6
SL
1508 assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
1509 (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
1a4d82fc
JJ
1510 return val;
1511 }
1512
1513 fn consume_non_eol_whitespace(&mut self) {
54a0048b 1514 while is_pattern_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() {
1a4d82fc
JJ
1515 self.bump();
1516 }
1517 }
1518
1519 fn peeking_at_comment(&self) -> bool {
9cc50fc6
SL
1520 (self.curr_is('/') && self.nextch_is('/')) || (self.curr_is('/') && self.nextch_is('*')) ||
1521 // consider shebangs comments, but not inner attributes
1522 (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
1a4d82fc
JJ
1523 }
1524
1525 fn scan_byte(&mut self) -> token::Lit {
1526 self.bump();
1527 let start = self.last_pos;
1528
1529 // the eof will be picked up by the final `'` check below
1530 let c2 = self.curr.unwrap_or('\x00');
1531 self.bump();
1532
9cc50fc6
SL
1533 let valid = self.scan_char_or_byte(start,
1534 c2,
1535 // ascii_only =
1536 true,
1537 '\'');
1a4d82fc
JJ
1538 if !self.curr_is('\'') {
1539 // Byte offsetting here is okay because the
1540 // character before position `start` are an
1541 // ascii single quote and ascii 'b'.
1542 let last_pos = self.last_pos;
9cc50fc6
SL
1543 panic!(self.fatal_span_verbose(start - BytePos(2),
1544 last_pos,
1545 "unterminated byte constant".to_string()));
1a4d82fc
JJ
1546 }
1547
9cc50fc6
SL
1548 let id = if valid {
1549 self.name_from(start)
1550 } else {
1551 token::intern("?")
1552 };
1a4d82fc
JJ
1553 self.bump(); // advance curr past token
1554 return token::Byte(id);
1555 }
1556
1557 fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
1558 self.scan_hex_digits(2, delim, below_0x7f_only)
1559 }
1560
1561 fn scan_byte_string(&mut self) -> token::Lit {
1562 self.bump();
1563 let start = self.last_pos;
1564 let mut valid = true;
1565
1566 while !self.curr_is('"') {
1567 if self.is_eof() {
1568 let last_pos = self.last_pos;
92a42be0 1569 panic!(self.fatal_span_(start, last_pos, "unterminated double quote byte string"));
1a4d82fc
JJ
1570 }
1571
1572 let ch_start = self.last_pos;
1573 let ch = self.curr.unwrap();
1574 self.bump();
9cc50fc6
SL
1575 valid &= self.scan_char_or_byte(ch_start,
1576 ch,
1577 // ascii_only =
1578 true,
1579 '"');
1a4d82fc 1580 }
9cc50fc6
SL
1581 let id = if valid {
1582 self.name_from(start)
1583 } else {
1584 token::intern("??")
1585 };
1a4d82fc 1586 self.bump();
e9174d1e 1587 return token::ByteStr(id);
1a4d82fc
JJ
1588 }
1589
1590 fn scan_raw_byte_string(&mut self) -> token::Lit {
1591 let start_bpos = self.last_pos;
1592 self.bump();
85aaf69f 1593 let mut hash_count = 0;
1a4d82fc
JJ
1594 while self.curr_is('#') {
1595 self.bump();
1596 hash_count += 1;
1597 }
1598
1599 if self.is_eof() {
1600 let last_pos = self.last_pos;
92a42be0 1601 panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string"));
1a4d82fc
JJ
1602 } else if !self.curr_is('"') {
1603 let last_pos = self.last_pos;
1604 let ch = self.curr.unwrap();
9cc50fc6
SL
1605 panic!(self.fatal_span_char(start_bpos,
1606 last_pos,
1607 "found invalid character; only `#` is allowed in raw \
1608 string delimitation",
1609 ch));
1a4d82fc
JJ
1610 }
1611 self.bump();
1612 let content_start_bpos = self.last_pos;
1613 let mut content_end_bpos;
1614 'outer: loop {
1615 match self.curr {
1616 None => {
1617 let last_pos = self.last_pos;
92a42be0 1618 panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string"))
9cc50fc6 1619 }
1a4d82fc
JJ
1620 Some('"') => {
1621 content_end_bpos = self.last_pos;
85aaf69f 1622 for _ in 0..hash_count {
1a4d82fc
JJ
1623 self.bump();
1624 if !self.curr_is('#') {
1625 continue 'outer;
1626 }
1627 }
1628 break;
9cc50fc6
SL
1629 }
1630 Some(c) => {
1631 if c > '\x7F' {
1632 let last_pos = self.last_pos;
1633 self.err_span_char(last_pos, last_pos, "raw byte string must be ASCII", c);
1634 }
1a4d82fc
JJ
1635 }
1636 }
1637 self.bump();
1638 }
1639 self.bump();
9cc50fc6
SL
1640 return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos),
1641 hash_count);
1a4d82fc
JJ
1642 }
1643}
1644
54a0048b
SL
1645// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
1646// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
1647pub fn is_pattern_whitespace(c: Option<char>) -> bool {
1648 c.map_or(false, Pattern_White_Space)
1a4d82fc
JJ
1649}
1650
1651fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
1652 match c {
1653 Some(c) => lo <= c && c <= hi,
9cc50fc6 1654 _ => false,
1a4d82fc
JJ
1655 }
1656}
1657
9cc50fc6
SL
1658fn is_dec_digit(c: Option<char>) -> bool {
1659 return in_range(c, '0', '9');
1660}
1a4d82fc
JJ
1661
1662pub fn is_doc_comment(s: &str) -> bool {
9cc50fc6
SL
1663 let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
1664 s.starts_with("//!");
1a4d82fc
JJ
1665 debug!("is {:?} a doc comment? {}", s, res);
1666 res
1667}
1668
1669pub fn is_block_doc_comment(s: &str) -> bool {
9cc50fc6
SL
1670 // Prevent `/**/` from being parsed as a doc comment
1671 let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
1672 s.starts_with("/*!")) && s.len() >= 5;
1a4d82fc
JJ
1673 debug!("is {:?} a doc comment? {}", s, res);
1674 res
1675}
1676
1677fn ident_start(c: Option<char>) -> bool {
9cc50fc6
SL
1678 let c = match c {
1679 Some(c) => c,
1680 None => return false,
1681 };
1a4d82fc 1682
9cc50fc6 1683 (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
1a4d82fc
JJ
1684}
1685
1686fn ident_continue(c: Option<char>) -> bool {
9cc50fc6
SL
1687 let c = match c {
1688 Some(c) => c,
1689 None => return false,
1690 };
1a4d82fc 1691
9cc50fc6
SL
1692 (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
1693 (c > '\x7f' && c.is_xid_continue())
1a4d82fc
JJ
1694}
1695
1696#[cfg(test)]
d9579d0f 1697mod tests {
1a4d82fc
JJ
1698 use super::*;
1699
3157f602
XL
1700 use syntax_pos::{BytePos, Span, NO_EXPANSION};
1701 use codemap::CodeMap;
9cc50fc6 1702 use errors;
1a4d82fc 1703 use parse::token;
9cc50fc6 1704 use parse::token::str_to_ident;
c34b1796 1705 use std::io;
9cc50fc6 1706 use std::rc::Rc;
1a4d82fc 1707
9cc50fc6 1708 fn mk_sh(cm: Rc<CodeMap>) -> errors::Handler {
c34b1796 1709 // FIXME (#22405): Replace `Box::new` with `box` here when/if possible.
3157f602 1710 let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()),
5bcae85e 1711 Some(cm));
9cc50fc6 1712 errors::Handler::with_emitter(true, false, Box::new(emitter))
1a4d82fc
JJ
1713 }
1714
1715 // open a string reader for the given string
9cc50fc6
SL
1716 fn setup<'a>(cm: &CodeMap,
1717 span_handler: &'a errors::Handler,
1718 teststr: String)
1719 -> StringReader<'a> {
3157f602 1720 let fm = cm.new_filemap("zebra.rs".to_string(), None, teststr);
1a4d82fc
JJ
1721 StringReader::new(span_handler, fm)
1722 }
1723
9cc50fc6
SL
1724 #[test]
1725 fn t1() {
1726 let cm = Rc::new(CodeMap::new());
1727 let sh = mk_sh(cm.clone());
1728 let mut string_reader = setup(&cm,
1729 &sh,
1730 "/* my source file */ fn main() { println!(\"zebra\"); }\n"
1731 .to_string());
1a4d82fc
JJ
1732 let id = str_to_ident("fn");
1733 assert_eq!(string_reader.next_token().tok, token::Comment);
1734 assert_eq!(string_reader.next_token().tok, token::Whitespace);
1735 let tok1 = string_reader.next_token();
9cc50fc6 1736 let tok2 = TokenAndSpan {
a7813a04 1737 tok: token::Ident(id),
9cc50fc6
SL
1738 sp: Span {
1739 lo: BytePos(21),
1740 hi: BytePos(23),
1741 expn_id: NO_EXPANSION,
1742 },
1743 };
1744 assert_eq!(tok1, tok2);
1a4d82fc
JJ
1745 assert_eq!(string_reader.next_token().tok, token::Whitespace);
1746 // the 'main' id is already read:
1747 assert_eq!(string_reader.last_pos.clone(), BytePos(28));
1748 // read another token:
1749 let tok3 = string_reader.next_token();
9cc50fc6 1750 let tok4 = TokenAndSpan {
a7813a04 1751 tok: token::Ident(str_to_ident("main")),
9cc50fc6
SL
1752 sp: Span {
1753 lo: BytePos(24),
1754 hi: BytePos(28),
1755 expn_id: NO_EXPANSION,
1756 },
1757 };
1758 assert_eq!(tok3, tok4);
1a4d82fc
JJ
1759 // the lparen is already read:
1760 assert_eq!(string_reader.last_pos.clone(), BytePos(29))
1761 }
1762
1763 // check that the given reader produces the desired stream
1764 // of tokens (stop checking after exhausting the expected vec)
9cc50fc6 1765 fn check_tokenization(mut string_reader: StringReader, expected: Vec<token::Token>) {
85aaf69f 1766 for expected_tok in &expected {
1a4d82fc
JJ
1767 assert_eq!(&string_reader.next_token().tok, expected_tok);
1768 }
1769 }
1770
1771 // make the identifier by looking up the string in the interner
a7813a04
XL
1772 fn mk_ident(id: &str) -> token::Token {
1773 token::Ident(str_to_ident(id))
1a4d82fc
JJ
1774 }
1775
9cc50fc6
SL
1776 #[test]
1777 fn doublecolonparsing() {
1778 let cm = Rc::new(CodeMap::new());
1779 let sh = mk_sh(cm.clone());
1780 check_tokenization(setup(&cm, &sh, "a b".to_string()),
a7813a04 1781 vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
1a4d82fc
JJ
1782 }
1783
9cc50fc6
SL
1784 #[test]
1785 fn dcparsing_2() {
1786 let cm = Rc::new(CodeMap::new());
1787 let sh = mk_sh(cm.clone());
1788 check_tokenization(setup(&cm, &sh, "a::b".to_string()),
a7813a04 1789 vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
1a4d82fc
JJ
1790 }
1791
9cc50fc6
SL
1792 #[test]
1793 fn dcparsing_3() {
1794 let cm = Rc::new(CodeMap::new());
1795 let sh = mk_sh(cm.clone());
1796 check_tokenization(setup(&cm, &sh, "a ::b".to_string()),
a7813a04 1797 vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
1a4d82fc
JJ
1798 }
1799
9cc50fc6
SL
1800 #[test]
1801 fn dcparsing_4() {
1802 let cm = Rc::new(CodeMap::new());
1803 let sh = mk_sh(cm.clone());
1804 check_tokenization(setup(&cm, &sh, "a:: b".to_string()),
a7813a04 1805 vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
1a4d82fc
JJ
1806 }
1807
9cc50fc6
SL
1808 #[test]
1809 fn character_a() {
1810 let cm = Rc::new(CodeMap::new());
1811 let sh = mk_sh(cm.clone());
1812 assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok,
1a4d82fc
JJ
1813 token::Literal(token::Char(token::intern("a")), None));
1814 }
1815
9cc50fc6
SL
1816 #[test]
1817 fn character_space() {
1818 let cm = Rc::new(CodeMap::new());
1819 let sh = mk_sh(cm.clone());
1820 assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok,
1a4d82fc
JJ
1821 token::Literal(token::Char(token::intern(" ")), None));
1822 }
1823
9cc50fc6
SL
1824 #[test]
1825 fn character_escaped() {
1826 let cm = Rc::new(CodeMap::new());
1827 let sh = mk_sh(cm.clone());
1828 assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok,
1a4d82fc
JJ
1829 token::Literal(token::Char(token::intern("\\n")), None));
1830 }
1831
9cc50fc6
SL
1832 #[test]
1833 fn lifetime_name() {
1834 let cm = Rc::new(CodeMap::new());
1835 let sh = mk_sh(cm.clone());
1836 assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok,
1a4d82fc
JJ
1837 token::Lifetime(token::str_to_ident("'abc")));
1838 }
1839
9cc50fc6
SL
1840 #[test]
1841 fn raw_string() {
1842 let cm = Rc::new(CodeMap::new());
1843 let sh = mk_sh(cm.clone());
1844 assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string())
1845 .next_token()
1846 .tok,
1a4d82fc
JJ
1847 token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None));
1848 }
1849
9cc50fc6
SL
1850 #[test]
1851 fn literal_suffixes() {
1852 let cm = Rc::new(CodeMap::new());
1853 let sh = mk_sh(cm.clone());
1a4d82fc
JJ
1854 macro_rules! test {
1855 ($input: expr, $tok_type: ident, $tok_contents: expr) => {{
9cc50fc6 1856 assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok,
1a4d82fc
JJ
1857 token::Literal(token::$tok_type(token::intern($tok_contents)),
1858 Some(token::intern("suffix"))));
1859 // with a whitespace separator:
9cc50fc6 1860 assert_eq!(setup(&cm, &sh, format!("{} suffix", $input)).next_token().tok,
1a4d82fc
JJ
1861 token::Literal(token::$tok_type(token::intern($tok_contents)),
1862 None));
1863 }}
1864 }
1865
1866 test!("'a'", Char, "a");
1867 test!("b'a'", Byte, "a");
1868 test!("\"a\"", Str_, "a");
e9174d1e 1869 test!("b\"a\"", ByteStr, "a");
1a4d82fc
JJ
1870 test!("1234", Integer, "1234");
1871 test!("0b101", Integer, "0b101");
1872 test!("0xABC", Integer, "0xABC");
1873 test!("1.0", Float, "1.0");
1874 test!("1.0e10", Float, "1.0e10");
1875
9cc50fc6 1876 assert_eq!(setup(&cm, &sh, "2us".to_string()).next_token().tok,
1a4d82fc 1877 token::Literal(token::Integer(token::intern("2")),
85aaf69f 1878 Some(token::intern("us"))));
9cc50fc6 1879 assert_eq!(setup(&cm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok,
1a4d82fc
JJ
1880 token::Literal(token::StrRaw(token::intern("raw"), 3),
1881 Some(token::intern("suffix"))));
9cc50fc6 1882 assert_eq!(setup(&cm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok,
e9174d1e 1883 token::Literal(token::ByteStrRaw(token::intern("raw"), 3),
1a4d82fc
JJ
1884 Some(token::intern("suffix"))));
1885 }
1886
9cc50fc6
SL
1887 #[test]
1888 fn line_doc_comments() {
1a4d82fc
JJ
1889 assert!(is_doc_comment("///"));
1890 assert!(is_doc_comment("/// blah"));
1891 assert!(!is_doc_comment("////"));
1892 }
1893
9cc50fc6
SL
1894 #[test]
1895 fn nested_block_comments() {
1896 let cm = Rc::new(CodeMap::new());
1897 let sh = mk_sh(cm.clone());
1898 let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string());
1a4d82fc 1899 match lexer.next_token().tok {
9cc50fc6
SL
1900 token::Comment => {}
1901 _ => panic!("expected a comment!"),
1a4d82fc 1902 }
9cc50fc6
SL
1903 assert_eq!(lexer.next_token().tok,
1904 token::Literal(token::Char(token::intern("a")), None));
1a4d82fc
JJ
1905 }
1906
9cc50fc6
SL
1907 #[test]
1908 fn crlf_comments() {
1909 let cm = Rc::new(CodeMap::new());
1910 let sh = mk_sh(cm.clone());
1911 let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string());
62682a34
SL
1912 let comment = lexer.next_token();
1913 assert_eq!(comment.tok, token::Comment);
3157f602 1914 assert_eq!(comment.sp, ::syntax_pos::mk_sp(BytePos(0), BytePos(7)));
62682a34 1915 assert_eq!(lexer.next_token().tok, token::Whitespace);
9cc50fc6
SL
1916 assert_eq!(lexer.next_token().tok,
1917 token::DocComment(token::intern("/// test")));
62682a34 1918 }
1a4d82fc 1919}