]>
Commit | Line | Data |
---|---|---|
1 | // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT | |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | use ast::{self, Ident}; | |
12 | use syntax_pos::{self, BytePos, CharPos, Pos, Span}; | |
13 | use codemap::CodeMap; | |
14 | use errors::{FatalError, Handler, DiagnosticBuilder}; | |
15 | use ext::tt::transcribe::tt_next_token; | |
16 | use parse::token; | |
17 | use str::char_at; | |
18 | use symbol::{Symbol, keywords}; | |
19 | use std_unicode::property::Pattern_White_Space; | |
20 | ||
21 | use std::borrow::Cow; | |
22 | use std::char; | |
23 | use std::mem::replace; | |
24 | use std::rc::Rc; | |
25 | ||
26 | pub use ext::tt::transcribe::{TtReader, new_tt_reader}; | |
27 | ||
28 | pub mod comments; | |
29 | mod unicode_chars; | |
30 | ||
31 | pub trait Reader { | |
32 | fn is_eof(&self) -> bool; | |
33 | fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>; | |
34 | fn next_token(&mut self) -> TokenAndSpan where Self: Sized { | |
35 | let res = self.try_next_token(); | |
36 | self.unwrap_or_abort(res) | |
37 | } | |
38 | /// Report a fatal error with the current span. | |
39 | fn fatal(&self, &str) -> FatalError; | |
40 | /// Report a non-fatal error with the current span. | |
41 | fn err(&self, &str); | |
42 | fn emit_fatal_errors(&mut self); | |
43 | fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan { | |
44 | match res { | |
45 | Ok(tok) => tok, | |
46 | Err(_) => { | |
47 | self.emit_fatal_errors(); | |
48 | panic!(FatalError); | |
49 | } | |
50 | } | |
51 | } | |
52 | fn peek(&self) -> TokenAndSpan; | |
53 | /// Get a token the parser cares about. | |
54 | fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> { | |
55 | let mut t = self.try_next_token()?; | |
56 | loop { | |
57 | match t.tok { | |
58 | token::Whitespace | token::Comment | token::Shebang(_) => { | |
59 | t = self.try_next_token()?; | |
60 | } | |
61 | _ => break, | |
62 | } | |
63 | } | |
64 | Ok(t) | |
65 | } | |
66 | fn real_token(&mut self) -> TokenAndSpan { | |
67 | let res = self.try_real_token(); | |
68 | self.unwrap_or_abort(res) | |
69 | } | |
70 | } | |
71 | ||
72 | #[derive(Clone, PartialEq, Eq, Debug)] | |
73 | pub struct TokenAndSpan { | |
74 | pub tok: token::Token, | |
75 | pub sp: Span, | |
76 | } | |
77 | ||
78 | impl Default for TokenAndSpan { | |
79 | fn default() -> Self { | |
80 | TokenAndSpan { tok: token::Underscore, sp: syntax_pos::DUMMY_SP } | |
81 | } | |
82 | } | |
83 | ||
84 | pub struct StringReader<'a> { | |
85 | pub span_diagnostic: &'a Handler, | |
86 | /// The absolute offset within the codemap of the next character to read | |
87 | pub next_pos: BytePos, | |
88 | /// The absolute offset within the codemap of the current character | |
89 | pub pos: BytePos, | |
90 | /// The column of the next character to read | |
91 | pub col: CharPos, | |
92 | /// The current character (which has been read from self.pos) | |
93 | pub ch: Option<char>, | |
94 | pub filemap: Rc<syntax_pos::FileMap>, | |
95 | /// If Some, stop reading the source at this position (inclusive). | |
96 | pub terminator: Option<BytePos>, | |
97 | /// Whether to record new-lines in filemap. This is only necessary the first | |
98 | /// time a filemap is lexed. If part of a filemap is being re-lexed, this | |
99 | /// should be set to false. | |
100 | pub save_new_lines: bool, | |
101 | // cached: | |
102 | pub peek_tok: token::Token, | |
103 | pub peek_span: Span, | |
104 | pub fatal_errs: Vec<DiagnosticBuilder<'a>>, | |
105 | // cache a direct reference to the source text, so that we don't have to | |
106 | // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. | |
107 | source_text: Rc<String>, | |
108 | } | |
109 | ||
110 | impl<'a> Reader for StringReader<'a> { | |
111 | fn is_eof(&self) -> bool { | |
112 | if self.ch.is_none() { | |
113 | return true; | |
114 | } | |
115 | ||
116 | match self.terminator { | |
117 | Some(t) => self.next_pos > t, | |
118 | None => false, | |
119 | } | |
120 | } | |
121 | /// Return the next token. EFFECT: advances the string_reader. | |
122 | fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { | |
123 | assert!(self.fatal_errs.is_empty()); | |
124 | let ret_val = TokenAndSpan { | |
125 | tok: replace(&mut self.peek_tok, token::Underscore), | |
126 | sp: self.peek_span, | |
127 | }; | |
128 | self.advance_token()?; | |
129 | Ok(ret_val) | |
130 | } | |
131 | fn fatal(&self, m: &str) -> FatalError { | |
132 | self.fatal_span(self.peek_span, m) | |
133 | } | |
134 | fn err(&self, m: &str) { | |
135 | self.err_span(self.peek_span, m) | |
136 | } | |
137 | fn emit_fatal_errors(&mut self) { | |
138 | for err in &mut self.fatal_errs { | |
139 | err.emit(); | |
140 | } | |
141 | self.fatal_errs.clear(); | |
142 | } | |
143 | fn peek(&self) -> TokenAndSpan { | |
144 | // FIXME(pcwalton): Bad copy! | |
145 | TokenAndSpan { | |
146 | tok: self.peek_tok.clone(), | |
147 | sp: self.peek_span, | |
148 | } | |
149 | } | |
150 | } | |
151 | ||
152 | impl<'a> Reader for TtReader<'a> { | |
153 | fn is_eof(&self) -> bool { | |
154 | self.peek().tok == token::Eof | |
155 | } | |
156 | fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { | |
157 | assert!(self.fatal_errs.is_empty()); | |
158 | let r = tt_next_token(self); | |
159 | debug!("TtReader: r={:?}", r); | |
160 | Ok(r) | |
161 | } | |
162 | fn fatal(&self, m: &str) -> FatalError { | |
163 | self.sp_diag.span_fatal(self.cur_span, m) | |
164 | } | |
165 | fn err(&self, m: &str) { | |
166 | self.sp_diag.span_err(self.cur_span, m); | |
167 | } | |
168 | fn emit_fatal_errors(&mut self) { | |
169 | for err in &mut self.fatal_errs { | |
170 | err.emit(); | |
171 | } | |
172 | self.fatal_errs.clear(); | |
173 | } | |
174 | fn peek(&self) -> TokenAndSpan { | |
175 | TokenAndSpan { | |
176 | tok: self.cur_tok.clone(), | |
177 | sp: self.cur_span, | |
178 | } | |
179 | } | |
180 | } | |
181 | ||
182 | impl<'a> StringReader<'a> { | |
183 | /// For comments.rs, which hackily pokes into next_pos and ch | |
184 | pub fn new_raw<'b>(span_diagnostic: &'b Handler, | |
185 | filemap: Rc<syntax_pos::FileMap>) | |
186 | -> StringReader<'b> { | |
187 | let mut sr = StringReader::new_raw_internal(span_diagnostic, filemap); | |
188 | sr.bump(); | |
189 | sr | |
190 | } | |
191 | ||
192 | fn new_raw_internal<'b>(span_diagnostic: &'b Handler, | |
193 | filemap: Rc<syntax_pos::FileMap>) | |
194 | -> StringReader<'b> { | |
195 | if filemap.src.is_none() { | |
196 | span_diagnostic.bug(&format!("Cannot lex filemap \ | |
197 | without source: {}", | |
198 | filemap.name)[..]); | |
199 | } | |
200 | ||
201 | let source_text = (*filemap.src.as_ref().unwrap()).clone(); | |
202 | ||
203 | StringReader { | |
204 | span_diagnostic: span_diagnostic, | |
205 | next_pos: filemap.start_pos, | |
206 | pos: filemap.start_pos, | |
207 | col: CharPos(0), | |
208 | ch: Some('\n'), | |
209 | filemap: filemap, | |
210 | terminator: None, | |
211 | save_new_lines: true, | |
212 | // dummy values; not read | |
213 | peek_tok: token::Eof, | |
214 | peek_span: syntax_pos::DUMMY_SP, | |
215 | source_text: source_text, | |
216 | fatal_errs: Vec::new(), | |
217 | } | |
218 | } | |
219 | ||
220 | pub fn new<'b>(span_diagnostic: &'b Handler, | |
221 | filemap: Rc<syntax_pos::FileMap>) | |
222 | -> StringReader<'b> { | |
223 | let mut sr = StringReader::new_raw(span_diagnostic, filemap); | |
224 | if let Err(_) = sr.advance_token() { | |
225 | sr.emit_fatal_errors(); | |
226 | panic!(FatalError); | |
227 | } | |
228 | sr | |
229 | } | |
230 | ||
231 | pub fn ch_is(&self, c: char) -> bool { | |
232 | self.ch == Some(c) | |
233 | } | |
234 | ||
235 | /// Report a fatal lexical error with a given span. | |
236 | pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError { | |
237 | self.span_diagnostic.span_fatal(sp, m) | |
238 | } | |
239 | ||
240 | /// Report a lexical error with a given span. | |
241 | pub fn err_span(&self, sp: Span, m: &str) { | |
242 | self.span_diagnostic.span_err(sp, m) | |
243 | } | |
244 | ||
245 | ||
246 | /// Report a fatal error spanning [`from_pos`, `to_pos`). | |
247 | fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError { | |
248 | self.fatal_span(syntax_pos::mk_sp(from_pos, to_pos), m) | |
249 | } | |
250 | ||
251 | /// Report a lexical error spanning [`from_pos`, `to_pos`). | |
252 | fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { | |
253 | self.err_span(syntax_pos::mk_sp(from_pos, to_pos), m) | |
254 | } | |
255 | ||
256 | /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an | |
257 | /// escaped character to the error message | |
258 | fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError { | |
259 | let mut m = m.to_string(); | |
260 | m.push_str(": "); | |
261 | for c in c.escape_default() { | |
262 | m.push(c) | |
263 | } | |
264 | self.fatal_span_(from_pos, to_pos, &m[..]) | |
265 | } | |
266 | fn struct_fatal_span_char(&self, | |
267 | from_pos: BytePos, | |
268 | to_pos: BytePos, | |
269 | m: &str, | |
270 | c: char) | |
271 | -> DiagnosticBuilder<'a> { | |
272 | let mut m = m.to_string(); | |
273 | m.push_str(": "); | |
274 | for c in c.escape_default() { | |
275 | m.push(c) | |
276 | } | |
277 | self.span_diagnostic.struct_span_fatal(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) | |
278 | } | |
279 | ||
280 | /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an | |
281 | /// escaped character to the error message | |
282 | fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) { | |
283 | let mut m = m.to_string(); | |
284 | m.push_str(": "); | |
285 | for c in c.escape_default() { | |
286 | m.push(c) | |
287 | } | |
288 | self.err_span_(from_pos, to_pos, &m[..]); | |
289 | } | |
290 | fn struct_err_span_char(&self, | |
291 | from_pos: BytePos, | |
292 | to_pos: BytePos, | |
293 | m: &str, | |
294 | c: char) | |
295 | -> DiagnosticBuilder<'a> { | |
296 | let mut m = m.to_string(); | |
297 | m.push_str(": "); | |
298 | for c in c.escape_default() { | |
299 | m.push(c) | |
300 | } | |
301 | self.span_diagnostic.struct_span_err(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) | |
302 | } | |
303 | ||
304 | /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the | |
305 | /// offending string to the error message | |
306 | fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError { | |
307 | m.push_str(": "); | |
308 | let from = self.byte_offset(from_pos).to_usize(); | |
309 | let to = self.byte_offset(to_pos).to_usize(); | |
310 | m.push_str(&self.source_text[from..to]); | |
311 | self.fatal_span_(from_pos, to_pos, &m[..]) | |
312 | } | |
313 | ||
314 | /// Advance peek_tok and peek_span to refer to the next token, and | |
315 | /// possibly update the interner. | |
316 | fn advance_token(&mut self) -> Result<(), ()> { | |
317 | match self.scan_whitespace_or_comment() { | |
318 | Some(comment) => { | |
319 | self.peek_span = comment.sp; | |
320 | self.peek_tok = comment.tok; | |
321 | } | |
322 | None => { | |
323 | if self.is_eof() { | |
324 | self.peek_tok = token::Eof; | |
325 | self.peek_span = syntax_pos::mk_sp(self.filemap.end_pos, self.filemap.end_pos); | |
326 | } else { | |
327 | let start_bytepos = self.pos; | |
328 | self.peek_tok = self.next_token_inner()?; | |
329 | self.peek_span = syntax_pos::mk_sp(start_bytepos, self.pos); | |
330 | }; | |
331 | } | |
332 | } | |
333 | Ok(()) | |
334 | } | |
335 | ||
336 | fn byte_offset(&self, pos: BytePos) -> BytePos { | |
337 | (pos - self.filemap.start_pos) | |
338 | } | |
339 | ||
340 | /// Calls `f` with a string slice of the source text spanning from `start` | |
341 | /// up to but excluding `self.pos`, meaning the slice does not include | |
342 | /// the character `self.ch`. | |
343 | pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T | |
344 | where F: FnOnce(&str) -> T | |
345 | { | |
346 | self.with_str_from_to(start, self.pos, f) | |
347 | } | |
348 | ||
349 | /// Create a Name from a given offset to the current offset, each | |
350 | /// adjusted 1 towards each other (assumes that on either side there is a | |
351 | /// single-byte delimiter). | |
352 | pub fn name_from(&self, start: BytePos) -> ast::Name { | |
353 | debug!("taking an ident from {:?} to {:?}", start, self.pos); | |
354 | self.with_str_from(start, Symbol::intern) | |
355 | } | |
356 | ||
357 | /// As name_from, with an explicit endpoint. | |
358 | pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { | |
359 | debug!("taking an ident from {:?} to {:?}", start, end); | |
360 | self.with_str_from_to(start, end, Symbol::intern) | |
361 | } | |
362 | ||
363 | /// Calls `f` with a string slice of the source text spanning from `start` | |
364 | /// up to but excluding `end`. | |
365 | fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T | |
366 | where F: FnOnce(&str) -> T | |
367 | { | |
368 | f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()]) | |
369 | } | |
370 | ||
371 | /// Converts CRLF to LF in the given string, raising an error on bare CR. | |
372 | fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> { | |
373 | let mut i = 0; | |
374 | while i < s.len() { | |
375 | let ch = char_at(s, i); | |
376 | let next = i + ch.len_utf8(); | |
377 | if ch == '\r' { | |
378 | if next < s.len() && char_at(s, next) == '\n' { | |
379 | return translate_crlf_(self, start, s, errmsg, i).into(); | |
380 | } | |
381 | let pos = start + BytePos(i as u32); | |
382 | let end_pos = start + BytePos(next as u32); | |
383 | self.err_span_(pos, end_pos, errmsg); | |
384 | } | |
385 | i = next; | |
386 | } | |
387 | return s.into(); | |
388 | ||
389 | fn translate_crlf_(rdr: &StringReader, | |
390 | start: BytePos, | |
391 | s: &str, | |
392 | errmsg: &str, | |
393 | mut i: usize) | |
394 | -> String { | |
395 | let mut buf = String::with_capacity(s.len()); | |
396 | let mut j = 0; | |
397 | while i < s.len() { | |
398 | let ch = char_at(s, i); | |
399 | let next = i + ch.len_utf8(); | |
400 | if ch == '\r' { | |
401 | if j < i { | |
402 | buf.push_str(&s[j..i]); | |
403 | } | |
404 | j = next; | |
405 | if next >= s.len() || char_at(s, next) != '\n' { | |
406 | let pos = start + BytePos(i as u32); | |
407 | let end_pos = start + BytePos(next as u32); | |
408 | rdr.err_span_(pos, end_pos, errmsg); | |
409 | } | |
410 | } | |
411 | i = next; | |
412 | } | |
413 | if j < s.len() { | |
414 | buf.push_str(&s[j..]); | |
415 | } | |
416 | buf | |
417 | } | |
418 | } | |
419 | ||
420 | ||
421 | /// Advance the StringReader by one character. If a newline is | |
422 | /// discovered, add it to the FileMap's list of line start offsets. | |
423 | pub fn bump(&mut self) { | |
424 | let new_pos = self.next_pos; | |
425 | let new_byte_offset = self.byte_offset(new_pos).to_usize(); | |
426 | if new_byte_offset < self.source_text.len() { | |
427 | let old_ch_is_newline = self.ch.unwrap() == '\n'; | |
428 | let new_ch = char_at(&self.source_text, new_byte_offset); | |
429 | let new_ch_len = new_ch.len_utf8(); | |
430 | ||
431 | self.ch = Some(new_ch); | |
432 | self.pos = new_pos; | |
433 | self.next_pos = new_pos + Pos::from_usize(new_ch_len); | |
434 | if old_ch_is_newline { | |
435 | if self.save_new_lines { | |
436 | self.filemap.next_line(self.pos); | |
437 | } | |
438 | self.col = CharPos(0); | |
439 | } else { | |
440 | self.col = self.col + CharPos(1); | |
441 | } | |
442 | if new_ch_len > 1 { | |
443 | self.filemap.record_multibyte_char(self.pos, new_ch_len); | |
444 | } | |
445 | } else { | |
446 | self.ch = None; | |
447 | self.pos = new_pos; | |
448 | } | |
449 | } | |
450 | ||
451 | pub fn nextch(&self) -> Option<char> { | |
452 | let offset = self.byte_offset(self.next_pos).to_usize(); | |
453 | if offset < self.source_text.len() { | |
454 | Some(char_at(&self.source_text, offset)) | |
455 | } else { | |
456 | None | |
457 | } | |
458 | } | |
459 | ||
460 | pub fn nextch_is(&self, c: char) -> bool { | |
461 | self.nextch() == Some(c) | |
462 | } | |
463 | ||
464 | pub fn nextnextch(&self) -> Option<char> { | |
465 | let offset = self.byte_offset(self.next_pos).to_usize(); | |
466 | let s = &self.source_text[..]; | |
467 | if offset >= s.len() { | |
468 | return None; | |
469 | } | |
470 | let next = offset + char_at(s, offset).len_utf8(); | |
471 | if next < s.len() { | |
472 | Some(char_at(s, next)) | |
473 | } else { | |
474 | None | |
475 | } | |
476 | } | |
477 | ||
478 | pub fn nextnextch_is(&self, c: char) -> bool { | |
479 | self.nextnextch() == Some(c) | |
480 | } | |
481 | ||
482 | /// Eats <XID_start><XID_continue>*, if possible. | |
483 | fn scan_optional_raw_name(&mut self) -> Option<ast::Name> { | |
484 | if !ident_start(self.ch) { | |
485 | return None; | |
486 | } | |
487 | let start = self.pos; | |
488 | while ident_continue(self.ch) { | |
489 | self.bump(); | |
490 | } | |
491 | ||
492 | self.with_str_from(start, |string| { | |
493 | if string == "_" { | |
494 | None | |
495 | } else { | |
496 | Some(Symbol::intern(string)) | |
497 | } | |
498 | }) | |
499 | } | |
500 | ||
501 | /// PRECONDITION: self.ch is not whitespace | |
502 | /// Eats any kind of comment. | |
503 | fn scan_comment(&mut self) -> Option<TokenAndSpan> { | |
504 | if let Some(c) = self.ch { | |
505 | if c.is_whitespace() { | |
506 | self.span_diagnostic.span_err(syntax_pos::mk_sp(self.pos, self.pos), | |
507 | "called consume_any_line_comment, but there \ | |
508 | was whitespace"); | |
509 | } | |
510 | } | |
511 | ||
512 | if self.ch_is('/') { | |
513 | match self.nextch() { | |
514 | Some('/') => { | |
515 | self.bump(); | |
516 | self.bump(); | |
517 | ||
518 | // line comments starting with "///" or "//!" are doc-comments | |
519 | let doc_comment = self.ch_is('/') || self.ch_is('!'); | |
520 | let start_bpos = self.pos - BytePos(2); | |
521 | ||
522 | while !self.is_eof() { | |
523 | match self.ch.unwrap() { | |
524 | '\n' => break, | |
525 | '\r' => { | |
526 | if self.nextch_is('\n') { | |
527 | // CRLF | |
528 | break; | |
529 | } else if doc_comment { | |
530 | self.err_span_(self.pos, | |
531 | self.next_pos, | |
532 | "bare CR not allowed in doc-comment"); | |
533 | } | |
534 | } | |
535 | _ => (), | |
536 | } | |
537 | self.bump(); | |
538 | } | |
539 | ||
540 | return if doc_comment { | |
541 | self.with_str_from(start_bpos, |string| { | |
542 | // comments with only more "/"s are not doc comments | |
543 | let tok = if is_doc_comment(string) { | |
544 | token::DocComment(Symbol::intern(string)) | |
545 | } else { | |
546 | token::Comment | |
547 | }; | |
548 | ||
549 | Some(TokenAndSpan { | |
550 | tok: tok, | |
551 | sp: syntax_pos::mk_sp(start_bpos, self.pos), | |
552 | }) | |
553 | }) | |
554 | } else { | |
555 | Some(TokenAndSpan { | |
556 | tok: token::Comment, | |
557 | sp: syntax_pos::mk_sp(start_bpos, self.pos), | |
558 | }) | |
559 | }; | |
560 | } | |
561 | Some('*') => { | |
562 | self.bump(); | |
563 | self.bump(); | |
564 | self.scan_block_comment() | |
565 | } | |
566 | _ => None, | |
567 | } | |
568 | } else if self.ch_is('#') { | |
569 | if self.nextch_is('!') { | |
570 | ||
571 | // Parse an inner attribute. | |
572 | if self.nextnextch_is('[') { | |
573 | return None; | |
574 | } | |
575 | ||
576 | // I guess this is the only way to figure out if | |
577 | // we're at the beginning of the file... | |
578 | let cmap = CodeMap::new(); | |
579 | cmap.files.borrow_mut().push(self.filemap.clone()); | |
580 | let loc = cmap.lookup_char_pos_adj(self.pos); | |
581 | debug!("Skipping a shebang"); | |
582 | if loc.line == 1 && loc.col == CharPos(0) { | |
583 | // FIXME: Add shebang "token", return it | |
584 | let start = self.pos; | |
585 | while !self.ch_is('\n') && !self.is_eof() { | |
586 | self.bump(); | |
587 | } | |
588 | return Some(TokenAndSpan { | |
589 | tok: token::Shebang(self.name_from(start)), | |
590 | sp: syntax_pos::mk_sp(start, self.pos), | |
591 | }); | |
592 | } | |
593 | } | |
594 | None | |
595 | } else { | |
596 | None | |
597 | } | |
598 | } | |
599 | ||
600 | /// If there is whitespace, shebang, or a comment, scan it. Otherwise, | |
601 | /// return None. | |
602 | fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> { | |
603 | match self.ch.unwrap_or('\0') { | |
604 | // # to handle shebang at start of file -- this is the entry point | |
605 | // for skipping over all "junk" | |
606 | '/' | '#' => { | |
607 | let c = self.scan_comment(); | |
608 | debug!("scanning a comment {:?}", c); | |
609 | c | |
610 | }, | |
611 | c if is_pattern_whitespace(Some(c)) => { | |
612 | let start_bpos = self.pos; | |
613 | while is_pattern_whitespace(self.ch) { | |
614 | self.bump(); | |
615 | } | |
616 | let c = Some(TokenAndSpan { | |
617 | tok: token::Whitespace, | |
618 | sp: syntax_pos::mk_sp(start_bpos, self.pos), | |
619 | }); | |
620 | debug!("scanning whitespace: {:?}", c); | |
621 | c | |
622 | } | |
623 | _ => None, | |
624 | } | |
625 | } | |
626 | ||
627 | /// Might return a sugared-doc-attr | |
628 | fn scan_block_comment(&mut self) -> Option<TokenAndSpan> { | |
629 | // block comments starting with "/**" or "/*!" are doc-comments | |
630 | let is_doc_comment = self.ch_is('*') || self.ch_is('!'); | |
631 | let start_bpos = self.pos - BytePos(2); | |
632 | ||
633 | let mut level: isize = 1; | |
634 | let mut has_cr = false; | |
635 | while level > 0 { | |
636 | if self.is_eof() { | |
637 | let msg = if is_doc_comment { | |
638 | "unterminated block doc-comment" | |
639 | } else { | |
640 | "unterminated block comment" | |
641 | }; | |
642 | let last_bpos = self.pos; | |
643 | panic!(self.fatal_span_(start_bpos, last_bpos, msg)); | |
644 | } | |
645 | let n = self.ch.unwrap(); | |
646 | match n { | |
647 | '/' if self.nextch_is('*') => { | |
648 | level += 1; | |
649 | self.bump(); | |
650 | } | |
651 | '*' if self.nextch_is('/') => { | |
652 | level -= 1; | |
653 | self.bump(); | |
654 | } | |
655 | '\r' => { | |
656 | has_cr = true; | |
657 | } | |
658 | _ => (), | |
659 | } | |
660 | self.bump(); | |
661 | } | |
662 | ||
663 | self.with_str_from(start_bpos, |string| { | |
664 | // but comments with only "*"s between two "/"s are not | |
665 | let tok = if is_block_doc_comment(string) { | |
666 | let string = if has_cr { | |
667 | self.translate_crlf(start_bpos, | |
668 | string, | |
669 | "bare CR not allowed in block doc-comment") | |
670 | } else { | |
671 | string.into() | |
672 | }; | |
673 | token::DocComment(Symbol::intern(&string[..])) | |
674 | } else { | |
675 | token::Comment | |
676 | }; | |
677 | ||
678 | Some(TokenAndSpan { | |
679 | tok: tok, | |
680 | sp: syntax_pos::mk_sp(start_bpos, self.pos), | |
681 | }) | |
682 | }) | |
683 | } | |
684 | ||
685 | /// Scan through any digits (base `scan_radix`) or underscores, | |
686 | /// and return how many digits there were. | |
687 | /// | |
688 | /// `real_radix` represents the true radix of the number we're | |
689 | /// interested in, and errors will be emitted for any digits | |
690 | /// between `real_radix` and `scan_radix`. | |
691 | fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize { | |
692 | assert!(real_radix <= scan_radix); | |
693 | let mut len = 0; | |
694 | loop { | |
695 | let c = self.ch; | |
696 | if c == Some('_') { | |
697 | debug!("skipping a _"); | |
698 | self.bump(); | |
699 | continue; | |
700 | } | |
701 | match c.and_then(|cc| cc.to_digit(scan_radix)) { | |
702 | Some(_) => { | |
703 | debug!("{:?} in scan_digits", c); | |
704 | // check that the hypothetical digit is actually | |
705 | // in range for the true radix | |
706 | if c.unwrap().to_digit(real_radix).is_none() { | |
707 | self.err_span_(self.pos, | |
708 | self.next_pos, | |
709 | &format!("invalid digit for a base {} literal", real_radix)); | |
710 | } | |
711 | len += 1; | |
712 | self.bump(); | |
713 | } | |
714 | _ => return len, | |
715 | } | |
716 | } | |
717 | } | |
718 | ||
719 | /// Lex a LIT_INTEGER or a LIT_FLOAT | |
720 | fn scan_number(&mut self, c: char) -> token::Lit { | |
721 | let num_digits; | |
722 | let mut base = 10; | |
723 | let start_bpos = self.pos; | |
724 | ||
725 | self.bump(); | |
726 | ||
727 | if c == '0' { | |
728 | match self.ch.unwrap_or('\0') { | |
729 | 'b' => { | |
730 | self.bump(); | |
731 | base = 2; | |
732 | num_digits = self.scan_digits(2, 10); | |
733 | } | |
734 | 'o' => { | |
735 | self.bump(); | |
736 | base = 8; | |
737 | num_digits = self.scan_digits(8, 10); | |
738 | } | |
739 | 'x' => { | |
740 | self.bump(); | |
741 | base = 16; | |
742 | num_digits = self.scan_digits(16, 16); | |
743 | } | |
744 | '0'...'9' | '_' | '.' => { | |
745 | num_digits = self.scan_digits(10, 10) + 1; | |
746 | } | |
747 | _ => { | |
748 | // just a 0 | |
749 | return token::Integer(self.name_from(start_bpos)); | |
750 | } | |
751 | } | |
752 | } else if c.is_digit(10) { | |
753 | num_digits = self.scan_digits(10, 10) + 1; | |
754 | } else { | |
755 | num_digits = 0; | |
756 | } | |
757 | ||
758 | if num_digits == 0 { | |
759 | self.err_span_(start_bpos, | |
760 | self.pos, | |
761 | "no valid digits found for number"); | |
762 | return token::Integer(Symbol::intern("0")); | |
763 | } | |
764 | ||
765 | // might be a float, but don't be greedy if this is actually an | |
766 | // integer literal followed by field/method access or a range pattern | |
767 | // (`0..2` and `12.foo()`) | |
768 | if self.ch_is('.') && !self.nextch_is('.') && | |
769 | !self.nextch() | |
770 | .unwrap_or('\0') | |
771 | .is_xid_start() { | |
772 | // might have stuff after the ., and if it does, it needs to start | |
773 | // with a number | |
774 | self.bump(); | |
775 | if self.ch.unwrap_or('\0').is_digit(10) { | |
776 | self.scan_digits(10, 10); | |
777 | self.scan_float_exponent(); | |
778 | } | |
779 | let pos = self.pos; | |
780 | self.check_float_base(start_bpos, pos, base); | |
781 | return token::Float(self.name_from(start_bpos)); | |
782 | } else { | |
783 | // it might be a float if it has an exponent | |
784 | if self.ch_is('e') || self.ch_is('E') { | |
785 | self.scan_float_exponent(); | |
786 | let pos = self.pos; | |
787 | self.check_float_base(start_bpos, pos, base); | |
788 | return token::Float(self.name_from(start_bpos)); | |
789 | } | |
790 | // but we certainly have an integer! | |
791 | return token::Integer(self.name_from(start_bpos)); | |
792 | } | |
793 | } | |
794 | ||
795 | /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an | |
796 | /// error if too many or too few digits are encountered. | |
797 | fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool { | |
798 | debug!("scanning {} digits until {:?}", n_digits, delim); | |
799 | let start_bpos = self.pos; | |
800 | let mut accum_int = 0; | |
801 | ||
802 | let mut valid = true; | |
803 | for _ in 0..n_digits { | |
804 | if self.is_eof() { | |
805 | let last_bpos = self.pos; | |
806 | panic!(self.fatal_span_(start_bpos, | |
807 | last_bpos, | |
808 | "unterminated numeric character escape")); | |
809 | } | |
810 | if self.ch_is(delim) { | |
811 | let last_bpos = self.pos; | |
812 | self.err_span_(start_bpos, | |
813 | last_bpos, | |
814 | "numeric character escape is too short"); | |
815 | valid = false; | |
816 | break; | |
817 | } | |
818 | let c = self.ch.unwrap_or('\x00'); | |
819 | accum_int *= 16; | |
820 | accum_int += c.to_digit(16).unwrap_or_else(|| { | |
821 | self.err_span_char(self.pos, | |
822 | self.next_pos, | |
823 | "invalid character in numeric character escape", | |
824 | c); | |
825 | ||
826 | valid = false; | |
827 | 0 | |
828 | }); | |
829 | self.bump(); | |
830 | } | |
831 | ||
832 | if below_0x7f_only && accum_int >= 0x80 { | |
833 | self.err_span_(start_bpos, | |
834 | self.pos, | |
835 | "this form of character escape may only be used with characters in \ | |
836 | the range [\\x00-\\x7f]"); | |
837 | valid = false; | |
838 | } | |
839 | ||
840 | match char::from_u32(accum_int) { | |
841 | Some(_) => valid, | |
842 | None => { | |
843 | let last_bpos = self.pos; | |
844 | self.err_span_(start_bpos, last_bpos, "invalid numeric character escape"); | |
845 | false | |
846 | } | |
847 | } | |
848 | } | |
849 | ||
850 | /// Scan for a single (possibly escaped) byte or char | |
851 | /// in a byte, (non-raw) byte string, char, or (non-raw) string literal. | |
852 | /// `start` is the position of `first_source_char`, which is already consumed. | |
853 | /// | |
854 | /// Returns true if there was a valid char/byte, false otherwise. | |
855 | fn scan_char_or_byte(&mut self, | |
856 | start: BytePos, | |
857 | first_source_char: char, | |
858 | ascii_only: bool, | |
859 | delim: char) | |
860 | -> bool { | |
861 | match first_source_char { | |
862 | '\\' => { | |
863 | // '\X' for some X must be a character constant: | |
864 | let escaped = self.ch; | |
865 | let escaped_pos = self.pos; | |
866 | self.bump(); | |
867 | match escaped { | |
868 | None => {} // EOF here is an error that will be checked later. | |
869 | Some(e) => { | |
870 | return match e { | |
871 | 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, | |
872 | 'x' => self.scan_byte_escape(delim, !ascii_only), | |
873 | 'u' => { | |
874 | let valid = if self.ch_is('{') { | |
875 | self.scan_unicode_escape(delim) && !ascii_only | |
876 | } else { | |
877 | let span = syntax_pos::mk_sp(start, self.pos); | |
878 | self.span_diagnostic | |
879 | .struct_span_err(span, "incorrect unicode escape sequence") | |
880 | .span_help(span, | |
881 | "format of unicode escape sequences is \ | |
882 | `\\u{…}`") | |
883 | .emit(); | |
884 | false | |
885 | }; | |
886 | if ascii_only { | |
887 | self.err_span_(start, | |
888 | self.pos, | |
889 | "unicode escape sequences cannot be used as a \ | |
890 | byte or in a byte string"); | |
891 | } | |
892 | valid | |
893 | ||
894 | } | |
895 | '\n' if delim == '"' => { | |
896 | self.consume_whitespace(); | |
897 | true | |
898 | } | |
899 | '\r' if delim == '"' && self.ch_is('\n') => { | |
900 | self.consume_whitespace(); | |
901 | true | |
902 | } | |
903 | c => { | |
904 | let pos = self.pos; | |
905 | let mut err = self.struct_err_span_char(escaped_pos, | |
906 | pos, | |
907 | if ascii_only { | |
908 | "unknown byte escape" | |
909 | } else { | |
910 | "unknown character \ | |
911 | escape" | |
912 | }, | |
913 | c); | |
914 | if e == '\r' { | |
915 | err.span_help(syntax_pos::mk_sp(escaped_pos, pos), | |
916 | "this is an isolated carriage return; consider \ | |
917 | checking your editor and version control \ | |
918 | settings"); | |
919 | } | |
920 | if (e == '{' || e == '}') && !ascii_only { | |
921 | err.span_help(syntax_pos::mk_sp(escaped_pos, pos), | |
922 | "if used in a formatting string, curly braces \ | |
923 | are escaped with `{{` and `}}`"); | |
924 | } | |
925 | err.emit(); | |
926 | false | |
927 | } | |
928 | } | |
929 | } | |
930 | } | |
931 | } | |
932 | '\t' | '\n' | '\r' | '\'' if delim == '\'' => { | |
933 | let pos = self.pos; | |
934 | self.err_span_char(start, | |
935 | pos, | |
936 | if ascii_only { | |
937 | "byte constant must be escaped" | |
938 | } else { | |
939 | "character constant must be escaped" | |
940 | }, | |
941 | first_source_char); | |
942 | return false; | |
943 | } | |
944 | '\r' => { | |
945 | if self.ch_is('\n') { | |
946 | self.bump(); | |
947 | return true; | |
948 | } else { | |
949 | self.err_span_(start, | |
950 | self.pos, | |
951 | "bare CR not allowed in string, use \\r instead"); | |
952 | return false; | |
953 | } | |
954 | } | |
955 | _ => { | |
956 | if ascii_only && first_source_char > '\x7F' { | |
957 | let pos = self.pos; | |
958 | self.err_span_(start, | |
959 | pos, | |
960 | "byte constant must be ASCII. Use a \\xHH escape for a \ | |
961 | non-ASCII byte"); | |
962 | return false; | |
963 | } | |
964 | } | |
965 | } | |
966 | true | |
967 | } | |
968 | ||
969 | /// Scan over a \u{...} escape | |
970 | /// | |
971 | /// At this point, we have already seen the \ and the u, the { is the current character. We | |
972 | /// will read at least one digit, and up to 6, and pass over the }. | |
973 | fn scan_unicode_escape(&mut self, delim: char) -> bool { | |
974 | self.bump(); // past the { | |
975 | let start_bpos = self.pos; | |
976 | let mut count = 0; | |
977 | let mut accum_int = 0; | |
978 | let mut valid = true; | |
979 | ||
980 | while !self.ch_is('}') && count <= 6 { | |
981 | let c = match self.ch { | |
982 | Some(c) => c, | |
983 | None => { | |
984 | panic!(self.fatal_span_(start_bpos, | |
985 | self.pos, | |
986 | "unterminated unicode escape (found EOF)")); | |
987 | } | |
988 | }; | |
989 | accum_int *= 16; | |
990 | accum_int += c.to_digit(16).unwrap_or_else(|| { | |
991 | if c == delim { | |
992 | panic!(self.fatal_span_(self.pos, | |
993 | self.next_pos, | |
994 | "unterminated unicode escape (needed a `}`)")); | |
995 | } else { | |
996 | self.err_span_char(self.pos, | |
997 | self.next_pos, | |
998 | "invalid character in unicode escape", | |
999 | c); | |
1000 | } | |
1001 | valid = false; | |
1002 | 0 | |
1003 | }); | |
1004 | self.bump(); | |
1005 | count += 1; | |
1006 | } | |
1007 | ||
1008 | if count > 6 { | |
1009 | self.err_span_(start_bpos, | |
1010 | self.pos, | |
1011 | "overlong unicode escape (can have at most 6 hex digits)"); | |
1012 | valid = false; | |
1013 | } | |
1014 | ||
1015 | if valid && (char::from_u32(accum_int).is_none() || count == 0) { | |
1016 | self.err_span_(start_bpos, | |
1017 | self.pos, | |
1018 | "invalid unicode character escape"); | |
1019 | valid = false; | |
1020 | } | |
1021 | ||
1022 | self.bump(); // past the ending } | |
1023 | valid | |
1024 | } | |
1025 | ||
1026 | /// Scan over a float exponent. | |
1027 | fn scan_float_exponent(&mut self) { | |
1028 | if self.ch_is('e') || self.ch_is('E') { | |
1029 | self.bump(); | |
1030 | if self.ch_is('-') || self.ch_is('+') { | |
1031 | self.bump(); | |
1032 | } | |
1033 | if self.scan_digits(10, 10) == 0 { | |
1034 | self.err_span_(self.pos, | |
1035 | self.next_pos, | |
1036 | "expected at least one digit in exponent") | |
1037 | } | |
1038 | } | |
1039 | } | |
1040 | ||
1041 | /// Check that a base is valid for a floating literal, emitting a nice | |
1042 | /// error if it isn't. | |
1043 | fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) { | |
1044 | match base { | |
1045 | 16 => { | |
1046 | self.err_span_(start_bpos, | |
1047 | last_bpos, | |
1048 | "hexadecimal float literal is not supported") | |
1049 | } | |
1050 | 8 => { | |
1051 | self.err_span_(start_bpos, | |
1052 | last_bpos, | |
1053 | "octal float literal is not supported") | |
1054 | } | |
1055 | 2 => { | |
1056 | self.err_span_(start_bpos, | |
1057 | last_bpos, | |
1058 | "binary float literal is not supported") | |
1059 | } | |
1060 | _ => (), | |
1061 | } | |
1062 | } | |
1063 | ||
1064 | fn binop(&mut self, op: token::BinOpToken) -> token::Token { | |
1065 | self.bump(); | |
1066 | if self.ch_is('=') { | |
1067 | self.bump(); | |
1068 | return token::BinOpEq(op); | |
1069 | } else { | |
1070 | return token::BinOp(op); | |
1071 | } | |
1072 | } | |
1073 | ||
1074 | /// Return the next token from the string, advances the input past that | |
1075 | /// token, and updates the interner | |
1076 | fn next_token_inner(&mut self) -> Result<token::Token, ()> { | |
1077 | let c = self.ch; | |
1078 | if ident_start(c) && | |
1079 | match (c.unwrap(), self.nextch(), self.nextnextch()) { | |
1080 | // Note: r as in r" or r#" is part of a raw string literal, | |
1081 | // b as in b' is part of a byte literal. | |
1082 | // They are not identifiers, and are handled further down. | |
1083 | ('r', Some('"'), _) | | |
1084 | ('r', Some('#'), _) | | |
1085 | ('b', Some('"'), _) | | |
1086 | ('b', Some('\''), _) | | |
1087 | ('b', Some('r'), Some('"')) | | |
1088 | ('b', Some('r'), Some('#')) => false, | |
1089 | _ => true, | |
1090 | } { | |
1091 | let start = self.pos; | |
1092 | while ident_continue(self.ch) { | |
1093 | self.bump(); | |
1094 | } | |
1095 | ||
1096 | return Ok(self.with_str_from(start, |string| { | |
1097 | if string == "_" { | |
1098 | token::Underscore | |
1099 | } else { | |
1100 | // FIXME: perform NFKC normalization here. (Issue #2253) | |
1101 | token::Ident(Ident::from_str(string)) | |
1102 | } | |
1103 | })); | |
1104 | } | |
1105 | ||
1106 | if is_dec_digit(c) { | |
1107 | let num = self.scan_number(c.unwrap()); | |
1108 | let suffix = self.scan_optional_raw_name(); | |
1109 | debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); | |
1110 | return Ok(token::Literal(num, suffix)); | |
1111 | } | |
1112 | ||
1113 | match c.expect("next_token_inner called at EOF") { | |
1114 | // One-byte tokens. | |
1115 | ';' => { | |
1116 | self.bump(); | |
1117 | return Ok(token::Semi); | |
1118 | } | |
1119 | ',' => { | |
1120 | self.bump(); | |
1121 | return Ok(token::Comma); | |
1122 | } | |
1123 | '.' => { | |
1124 | self.bump(); | |
1125 | return if self.ch_is('.') { | |
1126 | self.bump(); | |
1127 | if self.ch_is('.') { | |
1128 | self.bump(); | |
1129 | Ok(token::DotDotDot) | |
1130 | } else { | |
1131 | Ok(token::DotDot) | |
1132 | } | |
1133 | } else { | |
1134 | Ok(token::Dot) | |
1135 | }; | |
1136 | } | |
1137 | '(' => { | |
1138 | self.bump(); | |
1139 | return Ok(token::OpenDelim(token::Paren)); | |
1140 | } | |
1141 | ')' => { | |
1142 | self.bump(); | |
1143 | return Ok(token::CloseDelim(token::Paren)); | |
1144 | } | |
1145 | '{' => { | |
1146 | self.bump(); | |
1147 | return Ok(token::OpenDelim(token::Brace)); | |
1148 | } | |
1149 | '}' => { | |
1150 | self.bump(); | |
1151 | return Ok(token::CloseDelim(token::Brace)); | |
1152 | } | |
1153 | '[' => { | |
1154 | self.bump(); | |
1155 | return Ok(token::OpenDelim(token::Bracket)); | |
1156 | } | |
1157 | ']' => { | |
1158 | self.bump(); | |
1159 | return Ok(token::CloseDelim(token::Bracket)); | |
1160 | } | |
1161 | '@' => { | |
1162 | self.bump(); | |
1163 | return Ok(token::At); | |
1164 | } | |
1165 | '#' => { | |
1166 | self.bump(); | |
1167 | return Ok(token::Pound); | |
1168 | } | |
1169 | '~' => { | |
1170 | self.bump(); | |
1171 | return Ok(token::Tilde); | |
1172 | } | |
1173 | '?' => { | |
1174 | self.bump(); | |
1175 | return Ok(token::Question); | |
1176 | } | |
1177 | ':' => { | |
1178 | self.bump(); | |
1179 | if self.ch_is(':') { | |
1180 | self.bump(); | |
1181 | return Ok(token::ModSep); | |
1182 | } else { | |
1183 | return Ok(token::Colon); | |
1184 | } | |
1185 | } | |
1186 | ||
1187 | '$' => { | |
1188 | self.bump(); | |
1189 | return Ok(token::Dollar); | |
1190 | } | |
1191 | ||
1192 | // Multi-byte tokens. | |
1193 | '=' => { | |
1194 | self.bump(); | |
1195 | if self.ch_is('=') { | |
1196 | self.bump(); | |
1197 | return Ok(token::EqEq); | |
1198 | } else if self.ch_is('>') { | |
1199 | self.bump(); | |
1200 | return Ok(token::FatArrow); | |
1201 | } else { | |
1202 | return Ok(token::Eq); | |
1203 | } | |
1204 | } | |
1205 | '!' => { | |
1206 | self.bump(); | |
1207 | if self.ch_is('=') { | |
1208 | self.bump(); | |
1209 | return Ok(token::Ne); | |
1210 | } else { | |
1211 | return Ok(token::Not); | |
1212 | } | |
1213 | } | |
1214 | '<' => { | |
1215 | self.bump(); | |
1216 | match self.ch.unwrap_or('\x00') { | |
1217 | '=' => { | |
1218 | self.bump(); | |
1219 | return Ok(token::Le); | |
1220 | } | |
1221 | '<' => { | |
1222 | return Ok(self.binop(token::Shl)); | |
1223 | } | |
1224 | '-' => { | |
1225 | self.bump(); | |
1226 | match self.ch.unwrap_or('\x00') { | |
1227 | _ => { | |
1228 | return Ok(token::LArrow); | |
1229 | } | |
1230 | } | |
1231 | } | |
1232 | _ => { | |
1233 | return Ok(token::Lt); | |
1234 | } | |
1235 | } | |
1236 | } | |
1237 | '>' => { | |
1238 | self.bump(); | |
1239 | match self.ch.unwrap_or('\x00') { | |
1240 | '=' => { | |
1241 | self.bump(); | |
1242 | return Ok(token::Ge); | |
1243 | } | |
1244 | '>' => { | |
1245 | return Ok(self.binop(token::Shr)); | |
1246 | } | |
1247 | _ => { | |
1248 | return Ok(token::Gt); | |
1249 | } | |
1250 | } | |
1251 | } | |
1252 | '\'' => { | |
1253 | // Either a character constant 'a' OR a lifetime name 'abc | |
1254 | let start_with_quote = self.pos; | |
1255 | self.bump(); | |
1256 | let start = self.pos; | |
1257 | ||
1258 | // the eof will be picked up by the final `'` check below | |
1259 | let c2 = self.ch.unwrap_or('\x00'); | |
1260 | self.bump(); | |
1261 | ||
1262 | // If the character is an ident start not followed by another single | |
1263 | // quote, then this is a lifetime name: | |
1264 | if ident_start(Some(c2)) && !self.ch_is('\'') { | |
1265 | while ident_continue(self.ch) { | |
1266 | self.bump(); | |
1267 | } | |
1268 | // lifetimes shouldn't end with a single quote | |
1269 | // if we find one, then this is an invalid character literal | |
1270 | if self.ch_is('\'') { | |
1271 | panic!(self.fatal_span_verbose( | |
1272 | start_with_quote, self.next_pos, | |
1273 | String::from("character literal may only contain one codepoint"))); | |
1274 | ||
1275 | } | |
1276 | ||
1277 | // Include the leading `'` in the real identifier, for macro | |
1278 | // expansion purposes. See #12512 for the gory details of why | |
1279 | // this is necessary. | |
1280 | let ident = self.with_str_from(start, |lifetime_name| { | |
1281 | Ident::from_str(&format!("'{}", lifetime_name)) | |
1282 | }); | |
1283 | ||
1284 | // Conjure up a "keyword checking ident" to make sure that | |
1285 | // the lifetime name is not a keyword. | |
1286 | let keyword_checking_ident = self.with_str_from(start, |lifetime_name| { | |
1287 | Ident::from_str(lifetime_name) | |
1288 | }); | |
1289 | let keyword_checking_token = &token::Ident(keyword_checking_ident); | |
1290 | let last_bpos = self.pos; | |
1291 | if keyword_checking_token.is_any_keyword() && | |
1292 | !keyword_checking_token.is_keyword(keywords::Static) { | |
1293 | self.err_span_(start, last_bpos, "lifetimes cannot use keyword names"); | |
1294 | } | |
1295 | ||
1296 | return Ok(token::Lifetime(ident)); | |
1297 | } | |
1298 | ||
1299 | let valid = self.scan_char_or_byte(start, | |
1300 | c2, | |
1301 | // ascii_only = | |
1302 | false, | |
1303 | '\''); | |
1304 | ||
1305 | if !self.ch_is('\'') { | |
1306 | panic!(self.fatal_span_verbose( | |
1307 | start_with_quote, self.pos, | |
1308 | String::from("character literal may only contain one codepoint"))); | |
1309 | } | |
1310 | ||
1311 | let id = if valid { | |
1312 | self.name_from(start) | |
1313 | } else { | |
1314 | Symbol::intern("0") | |
1315 | }; | |
1316 | self.bump(); // advance ch past token | |
1317 | let suffix = self.scan_optional_raw_name(); | |
1318 | return Ok(token::Literal(token::Char(id), suffix)); | |
1319 | } | |
1320 | 'b' => { | |
1321 | self.bump(); | |
1322 | let lit = match self.ch { | |
1323 | Some('\'') => self.scan_byte(), | |
1324 | Some('"') => self.scan_byte_string(), | |
1325 | Some('r') => self.scan_raw_byte_string(), | |
1326 | _ => unreachable!(), // Should have been a token::Ident above. | |
1327 | }; | |
1328 | let suffix = self.scan_optional_raw_name(); | |
1329 | return Ok(token::Literal(lit, suffix)); | |
1330 | } | |
1331 | '"' => { | |
1332 | let start_bpos = self.pos; | |
1333 | let mut valid = true; | |
1334 | self.bump(); | |
1335 | while !self.ch_is('"') { | |
1336 | if self.is_eof() { | |
1337 | let last_bpos = self.pos; | |
1338 | panic!(self.fatal_span_(start_bpos, | |
1339 | last_bpos, | |
1340 | "unterminated double quote string")); | |
1341 | } | |
1342 | ||
1343 | let ch_start = self.pos; | |
1344 | let ch = self.ch.unwrap(); | |
1345 | self.bump(); | |
1346 | valid &= self.scan_char_or_byte(ch_start, | |
1347 | ch, | |
1348 | // ascii_only = | |
1349 | false, | |
1350 | '"'); | |
1351 | } | |
1352 | // adjust for the ASCII " at the start of the literal | |
1353 | let id = if valid { | |
1354 | self.name_from(start_bpos + BytePos(1)) | |
1355 | } else { | |
1356 | Symbol::intern("??") | |
1357 | }; | |
1358 | self.bump(); | |
1359 | let suffix = self.scan_optional_raw_name(); | |
1360 | return Ok(token::Literal(token::Str_(id), suffix)); | |
1361 | } | |
1362 | 'r' => { | |
1363 | let start_bpos = self.pos; | |
1364 | self.bump(); | |
1365 | let mut hash_count = 0; | |
1366 | while self.ch_is('#') { | |
1367 | self.bump(); | |
1368 | hash_count += 1; | |
1369 | } | |
1370 | ||
1371 | if self.is_eof() { | |
1372 | let last_bpos = self.pos; | |
1373 | panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string")); | |
1374 | } else if !self.ch_is('"') { | |
1375 | let last_bpos = self.pos; | |
1376 | let curr_char = self.ch.unwrap(); | |
1377 | panic!(self.fatal_span_char(start_bpos, | |
1378 | last_bpos, | |
1379 | "found invalid character; only `#` is allowed \ | |
1380 | in raw string delimitation", | |
1381 | curr_char)); | |
1382 | } | |
1383 | self.bump(); | |
1384 | let content_start_bpos = self.pos; | |
1385 | let mut content_end_bpos; | |
1386 | let mut valid = true; | |
1387 | 'outer: loop { | |
1388 | if self.is_eof() { | |
1389 | let last_bpos = self.pos; | |
1390 | panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string")); | |
1391 | } | |
1392 | // if self.ch_is('"') { | |
1393 | // content_end_bpos = self.pos; | |
1394 | // for _ in 0..hash_count { | |
1395 | // self.bump(); | |
1396 | // if !self.ch_is('#') { | |
1397 | // continue 'outer; | |
1398 | let c = self.ch.unwrap(); | |
1399 | match c { | |
1400 | '"' => { | |
1401 | content_end_bpos = self.pos; | |
1402 | for _ in 0..hash_count { | |
1403 | self.bump(); | |
1404 | if !self.ch_is('#') { | |
1405 | continue 'outer; | |
1406 | } | |
1407 | } | |
1408 | break; | |
1409 | } | |
1410 | '\r' => { | |
1411 | if !self.nextch_is('\n') { | |
1412 | let last_bpos = self.pos; | |
1413 | self.err_span_(start_bpos, | |
1414 | last_bpos, | |
1415 | "bare CR not allowed in raw string, use \\r \ | |
1416 | instead"); | |
1417 | valid = false; | |
1418 | } | |
1419 | } | |
1420 | _ => (), | |
1421 | } | |
1422 | self.bump(); | |
1423 | } | |
1424 | self.bump(); | |
1425 | let id = if valid { | |
1426 | self.name_from_to(content_start_bpos, content_end_bpos) | |
1427 | } else { | |
1428 | Symbol::intern("??") | |
1429 | }; | |
1430 | let suffix = self.scan_optional_raw_name(); | |
1431 | return Ok(token::Literal(token::StrRaw(id, hash_count), suffix)); | |
1432 | } | |
1433 | '-' => { | |
1434 | if self.nextch_is('>') { | |
1435 | self.bump(); | |
1436 | self.bump(); | |
1437 | return Ok(token::RArrow); | |
1438 | } else { | |
1439 | return Ok(self.binop(token::Minus)); | |
1440 | } | |
1441 | } | |
1442 | '&' => { | |
1443 | if self.nextch_is('&') { | |
1444 | self.bump(); | |
1445 | self.bump(); | |
1446 | return Ok(token::AndAnd); | |
1447 | } else { | |
1448 | return Ok(self.binop(token::And)); | |
1449 | } | |
1450 | } | |
1451 | '|' => { | |
1452 | match self.nextch() { | |
1453 | Some('|') => { | |
1454 | self.bump(); | |
1455 | self.bump(); | |
1456 | return Ok(token::OrOr); | |
1457 | } | |
1458 | _ => { | |
1459 | return Ok(self.binop(token::Or)); | |
1460 | } | |
1461 | } | |
1462 | } | |
1463 | '+' => { | |
1464 | return Ok(self.binop(token::Plus)); | |
1465 | } | |
1466 | '*' => { | |
1467 | return Ok(self.binop(token::Star)); | |
1468 | } | |
1469 | '/' => { | |
1470 | return Ok(self.binop(token::Slash)); | |
1471 | } | |
1472 | '^' => { | |
1473 | return Ok(self.binop(token::Caret)); | |
1474 | } | |
1475 | '%' => { | |
1476 | return Ok(self.binop(token::Percent)); | |
1477 | } | |
1478 | c => { | |
1479 | let last_bpos = self.pos; | |
1480 | let bpos = self.next_pos; | |
1481 | let mut err = self.struct_fatal_span_char(last_bpos, | |
1482 | bpos, | |
1483 | "unknown start of token", | |
1484 | c); | |
1485 | unicode_chars::check_for_substitution(&self, c, &mut err); | |
1486 | self.fatal_errs.push(err); | |
1487 | Err(()) | |
1488 | } | |
1489 | } | |
1490 | } | |
1491 | ||
1492 | fn consume_whitespace(&mut self) { | |
1493 | while is_pattern_whitespace(self.ch) && !self.is_eof() { | |
1494 | self.bump(); | |
1495 | } | |
1496 | } | |
1497 | ||
1498 | fn read_to_eol(&mut self) -> String { | |
1499 | let mut val = String::new(); | |
1500 | while !self.ch_is('\n') && !self.is_eof() { | |
1501 | val.push(self.ch.unwrap()); | |
1502 | self.bump(); | |
1503 | } | |
1504 | if self.ch_is('\n') { | |
1505 | self.bump(); | |
1506 | } | |
1507 | return val; | |
1508 | } | |
1509 | ||
1510 | fn read_one_line_comment(&mut self) -> String { | |
1511 | let val = self.read_to_eol(); | |
1512 | assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') || | |
1513 | (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!')); | |
1514 | return val; | |
1515 | } | |
1516 | ||
1517 | fn consume_non_eol_whitespace(&mut self) { | |
1518 | while is_pattern_whitespace(self.ch) && !self.ch_is('\n') && !self.is_eof() { | |
1519 | self.bump(); | |
1520 | } | |
1521 | } | |
1522 | ||
1523 | fn peeking_at_comment(&self) -> bool { | |
1524 | (self.ch_is('/') && self.nextch_is('/')) || (self.ch_is('/') && self.nextch_is('*')) || | |
1525 | // consider shebangs comments, but not inner attributes | |
1526 | (self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) | |
1527 | } | |
1528 | ||
1529 | fn scan_byte(&mut self) -> token::Lit { | |
1530 | self.bump(); | |
1531 | let start = self.pos; | |
1532 | ||
1533 | // the eof will be picked up by the final `'` check below | |
1534 | let c2 = self.ch.unwrap_or('\x00'); | |
1535 | self.bump(); | |
1536 | ||
1537 | let valid = self.scan_char_or_byte(start, | |
1538 | c2, | |
1539 | // ascii_only = | |
1540 | true, | |
1541 | '\''); | |
1542 | if !self.ch_is('\'') { | |
1543 | // Byte offsetting here is okay because the | |
1544 | // character before position `start` are an | |
1545 | // ascii single quote and ascii 'b'. | |
1546 | let pos = self.pos; | |
1547 | panic!(self.fatal_span_verbose(start - BytePos(2), | |
1548 | pos, | |
1549 | "unterminated byte constant".to_string())); | |
1550 | } | |
1551 | ||
1552 | let id = if valid { | |
1553 | self.name_from(start) | |
1554 | } else { | |
1555 | Symbol::intern("?") | |
1556 | }; | |
1557 | self.bump(); // advance ch past token | |
1558 | return token::Byte(id); | |
1559 | } | |
1560 | ||
1561 | fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool { | |
1562 | self.scan_hex_digits(2, delim, below_0x7f_only) | |
1563 | } | |
1564 | ||
1565 | fn scan_byte_string(&mut self) -> token::Lit { | |
1566 | self.bump(); | |
1567 | let start = self.pos; | |
1568 | let mut valid = true; | |
1569 | ||
1570 | while !self.ch_is('"') { | |
1571 | if self.is_eof() { | |
1572 | let pos = self.pos; | |
1573 | panic!(self.fatal_span_(start, pos, "unterminated double quote byte string")); | |
1574 | } | |
1575 | ||
1576 | let ch_start = self.pos; | |
1577 | let ch = self.ch.unwrap(); | |
1578 | self.bump(); | |
1579 | valid &= self.scan_char_or_byte(ch_start, | |
1580 | ch, | |
1581 | // ascii_only = | |
1582 | true, | |
1583 | '"'); | |
1584 | } | |
1585 | let id = if valid { | |
1586 | self.name_from(start) | |
1587 | } else { | |
1588 | Symbol::intern("??") | |
1589 | }; | |
1590 | self.bump(); | |
1591 | return token::ByteStr(id); | |
1592 | } | |
1593 | ||
1594 | fn scan_raw_byte_string(&mut self) -> token::Lit { | |
1595 | let start_bpos = self.pos; | |
1596 | self.bump(); | |
1597 | let mut hash_count = 0; | |
1598 | while self.ch_is('#') { | |
1599 | self.bump(); | |
1600 | hash_count += 1; | |
1601 | } | |
1602 | ||
1603 | if self.is_eof() { | |
1604 | let pos = self.pos; | |
1605 | panic!(self.fatal_span_(start_bpos, pos, "unterminated raw string")); | |
1606 | } else if !self.ch_is('"') { | |
1607 | let pos = self.pos; | |
1608 | let ch = self.ch.unwrap(); | |
1609 | panic!(self.fatal_span_char(start_bpos, | |
1610 | pos, | |
1611 | "found invalid character; only `#` is allowed in raw \ | |
1612 | string delimitation", | |
1613 | ch)); | |
1614 | } | |
1615 | self.bump(); | |
1616 | let content_start_bpos = self.pos; | |
1617 | let mut content_end_bpos; | |
1618 | 'outer: loop { | |
1619 | match self.ch { | |
1620 | None => { | |
1621 | let pos = self.pos; | |
1622 | panic!(self.fatal_span_(start_bpos, pos, "unterminated raw string")) | |
1623 | } | |
1624 | Some('"') => { | |
1625 | content_end_bpos = self.pos; | |
1626 | for _ in 0..hash_count { | |
1627 | self.bump(); | |
1628 | if !self.ch_is('#') { | |
1629 | continue 'outer; | |
1630 | } | |
1631 | } | |
1632 | break; | |
1633 | } | |
1634 | Some(c) => { | |
1635 | if c > '\x7F' { | |
1636 | let pos = self.pos; | |
1637 | self.err_span_char(pos, pos, "raw byte string must be ASCII", c); | |
1638 | } | |
1639 | } | |
1640 | } | |
1641 | self.bump(); | |
1642 | } | |
1643 | self.bump(); | |
1644 | return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), | |
1645 | hash_count); | |
1646 | } | |
1647 | } | |
1648 | ||
1649 | // This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which | |
1650 | // is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3 | |
1651 | pub fn is_pattern_whitespace(c: Option<char>) -> bool { | |
1652 | c.map_or(false, Pattern_White_Space) | |
1653 | } | |
1654 | ||
1655 | fn in_range(c: Option<char>, lo: char, hi: char) -> bool { | |
1656 | match c { | |
1657 | Some(c) => lo <= c && c <= hi, | |
1658 | _ => false, | |
1659 | } | |
1660 | } | |
1661 | ||
1662 | fn is_dec_digit(c: Option<char>) -> bool { | |
1663 | return in_range(c, '0', '9'); | |
1664 | } | |
1665 | ||
1666 | pub fn is_doc_comment(s: &str) -> bool { | |
1667 | let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') || | |
1668 | s.starts_with("//!"); | |
1669 | debug!("is {:?} a doc comment? {}", s, res); | |
1670 | res | |
1671 | } | |
1672 | ||
1673 | pub fn is_block_doc_comment(s: &str) -> bool { | |
1674 | // Prevent `/**/` from being parsed as a doc comment | |
1675 | let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') || | |
1676 | s.starts_with("/*!")) && s.len() >= 5; | |
1677 | debug!("is {:?} a doc comment? {}", s, res); | |
1678 | res | |
1679 | } | |
1680 | ||
1681 | fn ident_start(c: Option<char>) -> bool { | |
1682 | let c = match c { | |
1683 | Some(c) => c, | |
1684 | None => return false, | |
1685 | }; | |
1686 | ||
1687 | (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start()) | |
1688 | } | |
1689 | ||
1690 | fn ident_continue(c: Option<char>) -> bool { | |
1691 | let c = match c { | |
1692 | Some(c) => c, | |
1693 | None => return false, | |
1694 | }; | |
1695 | ||
1696 | (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || | |
1697 | (c > '\x7f' && c.is_xid_continue()) | |
1698 | } | |
1699 | ||
1700 | #[cfg(test)] | |
1701 | mod tests { | |
1702 | use super::*; | |
1703 | ||
1704 | use ast::Ident; | |
1705 | use symbol::Symbol; | |
1706 | use syntax_pos::{BytePos, Span, NO_EXPANSION}; | |
1707 | use codemap::CodeMap; | |
1708 | use errors; | |
1709 | use parse::token; | |
1710 | use std::io; | |
1711 | use std::rc::Rc; | |
1712 | ||
1713 | fn mk_sh(cm: Rc<CodeMap>) -> errors::Handler { | |
1714 | // FIXME (#22405): Replace `Box::new` with `box` here when/if possible. | |
1715 | let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), | |
1716 | Some(cm)); | |
1717 | errors::Handler::with_emitter(true, false, Box::new(emitter)) | |
1718 | } | |
1719 | ||
1720 | // open a string reader for the given string | |
1721 | fn setup<'a>(cm: &CodeMap, | |
1722 | span_handler: &'a errors::Handler, | |
1723 | teststr: String) | |
1724 | -> StringReader<'a> { | |
1725 | let fm = cm.new_filemap("zebra.rs".to_string(), None, teststr); | |
1726 | StringReader::new(span_handler, fm) | |
1727 | } | |
1728 | ||
1729 | #[test] | |
1730 | fn t1() { | |
1731 | let cm = Rc::new(CodeMap::new()); | |
1732 | let sh = mk_sh(cm.clone()); | |
1733 | let mut string_reader = setup(&cm, | |
1734 | &sh, | |
1735 | "/* my source file */ fn main() { println!(\"zebra\"); }\n" | |
1736 | .to_string()); | |
1737 | let id = Ident::from_str("fn"); | |
1738 | assert_eq!(string_reader.next_token().tok, token::Comment); | |
1739 | assert_eq!(string_reader.next_token().tok, token::Whitespace); | |
1740 | let tok1 = string_reader.next_token(); | |
1741 | let tok2 = TokenAndSpan { | |
1742 | tok: token::Ident(id), | |
1743 | sp: Span { | |
1744 | lo: BytePos(21), | |
1745 | hi: BytePos(23), | |
1746 | expn_id: NO_EXPANSION, | |
1747 | }, | |
1748 | }; | |
1749 | assert_eq!(tok1, tok2); | |
1750 | assert_eq!(string_reader.next_token().tok, token::Whitespace); | |
1751 | // the 'main' id is already read: | |
1752 | assert_eq!(string_reader.pos.clone(), BytePos(28)); | |
1753 | // read another token: | |
1754 | let tok3 = string_reader.next_token(); | |
1755 | let tok4 = TokenAndSpan { | |
1756 | tok: token::Ident(Ident::from_str("main")), | |
1757 | sp: Span { | |
1758 | lo: BytePos(24), | |
1759 | hi: BytePos(28), | |
1760 | expn_id: NO_EXPANSION, | |
1761 | }, | |
1762 | }; | |
1763 | assert_eq!(tok3, tok4); | |
1764 | // the lparen is already read: | |
1765 | assert_eq!(string_reader.pos.clone(), BytePos(29)) | |
1766 | } | |
1767 | ||
1768 | // check that the given reader produces the desired stream | |
1769 | // of tokens (stop checking after exhausting the expected vec) | |
1770 | fn check_tokenization(mut string_reader: StringReader, expected: Vec<token::Token>) { | |
1771 | for expected_tok in &expected { | |
1772 | assert_eq!(&string_reader.next_token().tok, expected_tok); | |
1773 | } | |
1774 | } | |
1775 | ||
1776 | // make the identifier by looking up the string in the interner | |
1777 | fn mk_ident(id: &str) -> token::Token { | |
1778 | token::Ident(Ident::from_str(id)) | |
1779 | } | |
1780 | ||
1781 | #[test] | |
1782 | fn doublecolonparsing() { | |
1783 | let cm = Rc::new(CodeMap::new()); | |
1784 | let sh = mk_sh(cm.clone()); | |
1785 | check_tokenization(setup(&cm, &sh, "a b".to_string()), | |
1786 | vec![mk_ident("a"), token::Whitespace, mk_ident("b")]); | |
1787 | } | |
1788 | ||
1789 | #[test] | |
1790 | fn dcparsing_2() { | |
1791 | let cm = Rc::new(CodeMap::new()); | |
1792 | let sh = mk_sh(cm.clone()); | |
1793 | check_tokenization(setup(&cm, &sh, "a::b".to_string()), | |
1794 | vec![mk_ident("a"), token::ModSep, mk_ident("b")]); | |
1795 | } | |
1796 | ||
1797 | #[test] | |
1798 | fn dcparsing_3() { | |
1799 | let cm = Rc::new(CodeMap::new()); | |
1800 | let sh = mk_sh(cm.clone()); | |
1801 | check_tokenization(setup(&cm, &sh, "a ::b".to_string()), | |
1802 | vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]); | |
1803 | } | |
1804 | ||
1805 | #[test] | |
1806 | fn dcparsing_4() { | |
1807 | let cm = Rc::new(CodeMap::new()); | |
1808 | let sh = mk_sh(cm.clone()); | |
1809 | check_tokenization(setup(&cm, &sh, "a:: b".to_string()), | |
1810 | vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]); | |
1811 | } | |
1812 | ||
1813 | #[test] | |
1814 | fn character_a() { | |
1815 | let cm = Rc::new(CodeMap::new()); | |
1816 | let sh = mk_sh(cm.clone()); | |
1817 | assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok, | |
1818 | token::Literal(token::Char(Symbol::intern("a")), None)); | |
1819 | } | |
1820 | ||
1821 | #[test] | |
1822 | fn character_space() { | |
1823 | let cm = Rc::new(CodeMap::new()); | |
1824 | let sh = mk_sh(cm.clone()); | |
1825 | assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok, | |
1826 | token::Literal(token::Char(Symbol::intern(" ")), None)); | |
1827 | } | |
1828 | ||
1829 | #[test] | |
1830 | fn character_escaped() { | |
1831 | let cm = Rc::new(CodeMap::new()); | |
1832 | let sh = mk_sh(cm.clone()); | |
1833 | assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok, | |
1834 | token::Literal(token::Char(Symbol::intern("\\n")), None)); | |
1835 | } | |
1836 | ||
1837 | #[test] | |
1838 | fn lifetime_name() { | |
1839 | let cm = Rc::new(CodeMap::new()); | |
1840 | let sh = mk_sh(cm.clone()); | |
1841 | assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok, | |
1842 | token::Lifetime(Ident::from_str("'abc"))); | |
1843 | } | |
1844 | ||
1845 | #[test] | |
1846 | fn raw_string() { | |
1847 | let cm = Rc::new(CodeMap::new()); | |
1848 | let sh = mk_sh(cm.clone()); | |
1849 | assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()) | |
1850 | .next_token() | |
1851 | .tok, | |
1852 | token::Literal(token::StrRaw(Symbol::intern("\"#a\\b\x00c\""), 3), None)); | |
1853 | } | |
1854 | ||
1855 | #[test] | |
1856 | fn literal_suffixes() { | |
1857 | let cm = Rc::new(CodeMap::new()); | |
1858 | let sh = mk_sh(cm.clone()); | |
1859 | macro_rules! test { | |
1860 | ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ | |
1861 | assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok, | |
1862 | token::Literal(token::$tok_type(Symbol::intern($tok_contents)), | |
1863 | Some(Symbol::intern("suffix")))); | |
1864 | // with a whitespace separator: | |
1865 | assert_eq!(setup(&cm, &sh, format!("{} suffix", $input)).next_token().tok, | |
1866 | token::Literal(token::$tok_type(Symbol::intern($tok_contents)), | |
1867 | None)); | |
1868 | }} | |
1869 | } | |
1870 | ||
1871 | test!("'a'", Char, "a"); | |
1872 | test!("b'a'", Byte, "a"); | |
1873 | test!("\"a\"", Str_, "a"); | |
1874 | test!("b\"a\"", ByteStr, "a"); | |
1875 | test!("1234", Integer, "1234"); | |
1876 | test!("0b101", Integer, "0b101"); | |
1877 | test!("0xABC", Integer, "0xABC"); | |
1878 | test!("1.0", Float, "1.0"); | |
1879 | test!("1.0e10", Float, "1.0e10"); | |
1880 | ||
1881 | assert_eq!(setup(&cm, &sh, "2us".to_string()).next_token().tok, | |
1882 | token::Literal(token::Integer(Symbol::intern("2")), | |
1883 | Some(Symbol::intern("us")))); | |
1884 | assert_eq!(setup(&cm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok, | |
1885 | token::Literal(token::StrRaw(Symbol::intern("raw"), 3), | |
1886 | Some(Symbol::intern("suffix")))); | |
1887 | assert_eq!(setup(&cm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok, | |
1888 | token::Literal(token::ByteStrRaw(Symbol::intern("raw"), 3), | |
1889 | Some(Symbol::intern("suffix")))); | |
1890 | } | |
1891 | ||
1892 | #[test] | |
1893 | fn line_doc_comments() { | |
1894 | assert!(is_doc_comment("///")); | |
1895 | assert!(is_doc_comment("/// blah")); | |
1896 | assert!(!is_doc_comment("////")); | |
1897 | } | |
1898 | ||
1899 | #[test] | |
1900 | fn nested_block_comments() { | |
1901 | let cm = Rc::new(CodeMap::new()); | |
1902 | let sh = mk_sh(cm.clone()); | |
1903 | let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string()); | |
1904 | match lexer.next_token().tok { | |
1905 | token::Comment => {} | |
1906 | _ => panic!("expected a comment!"), | |
1907 | } | |
1908 | assert_eq!(lexer.next_token().tok, | |
1909 | token::Literal(token::Char(Symbol::intern("a")), None)); | |
1910 | } | |
1911 | ||
1912 | #[test] | |
1913 | fn crlf_comments() { | |
1914 | let cm = Rc::new(CodeMap::new()); | |
1915 | let sh = mk_sh(cm.clone()); | |
1916 | let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string()); | |
1917 | let comment = lexer.next_token(); | |
1918 | assert_eq!(comment.tok, token::Comment); | |
1919 | assert_eq!(comment.sp, ::syntax_pos::mk_sp(BytePos(0), BytePos(7))); | |
1920 | assert_eq!(lexer.next_token().tok, token::Whitespace); | |
1921 | assert_eq!(lexer.next_token().tok, | |
1922 | token::DocComment(Symbol::intern("/// test"))); | |
1923 | } | |
1924 | } |