]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | use ast; | |
12 | use codemap::{BytePos, CharPos, CodeMap, Pos, Span}; | |
13 | use codemap; | |
92a42be0 | 14 | use diagnostic::FatalError; |
1a4d82fc JJ |
15 | use diagnostic::SpanHandler; |
16 | use ext::tt::transcribe::tt_next_token; | |
c34b1796 | 17 | use parse::token::str_to_ident; |
d9579d0f AL |
18 | use parse::token; |
19 | use str::char_at; | |
1a4d82fc | 20 | |
d9579d0f | 21 | use std::borrow::Cow; |
1a4d82fc | 22 | use std::char; |
1a4d82fc | 23 | use std::mem::replace; |
1a4d82fc | 24 | use std::rc::Rc; |
1a4d82fc JJ |
25 | |
26 | pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag}; | |
27 | ||
28 | pub mod comments; | |
92a42be0 | 29 | mod unicode_chars; |
1a4d82fc JJ |
30 | |
31 | pub trait Reader { | |
32 | fn is_eof(&self) -> bool; | |
33 | fn next_token(&mut self) -> TokenAndSpan; | |
34 | /// Report a fatal error with the current span. | |
92a42be0 | 35 | fn fatal(&self, &str) -> FatalError; |
1a4d82fc JJ |
36 | /// Report a non-fatal error with the current span. |
37 | fn err(&self, &str); | |
38 | fn peek(&self) -> TokenAndSpan; | |
39 | /// Get a token the parser cares about. | |
40 | fn real_token(&mut self) -> TokenAndSpan { | |
41 | let mut t = self.next_token(); | |
42 | loop { | |
43 | match t.tok { | |
44 | token::Whitespace | token::Comment | token::Shebang(_) => { | |
45 | t = self.next_token(); | |
46 | }, | |
47 | _ => break | |
48 | } | |
49 | } | |
50 | t | |
51 | } | |
52 | } | |
53 | ||
85aaf69f | 54 | #[derive(Clone, PartialEq, Eq, Debug)] |
1a4d82fc JJ |
55 | pub struct TokenAndSpan { |
56 | pub tok: token::Token, | |
57 | pub sp: Span, | |
58 | } | |
59 | ||
60 | pub struct StringReader<'a> { | |
61 | pub span_diagnostic: &'a SpanHandler, | |
62 | /// The absolute offset within the codemap of the next character to read | |
63 | pub pos: BytePos, | |
64 | /// The absolute offset within the codemap of the last character read(curr) | |
65 | pub last_pos: BytePos, | |
66 | /// The column of the next character to read | |
67 | pub col: CharPos, | |
68 | /// The last character to be read | |
69 | pub curr: Option<char>, | |
70 | pub filemap: Rc<codemap::FileMap>, | |
71 | /* cached: */ | |
72 | pub peek_tok: token::Token, | |
73 | pub peek_span: Span, | |
74 | ||
c34b1796 AL |
75 | // cache a direct reference to the source text, so that we don't have to |
76 | // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. | |
77 | source_text: Rc<String> | |
1a4d82fc JJ |
78 | } |
79 | ||
80 | impl<'a> Reader for StringReader<'a> { | |
81 | fn is_eof(&self) -> bool { self.curr.is_none() } | |
82 | /// Return the next token. EFFECT: advances the string_reader. | |
83 | fn next_token(&mut self) -> TokenAndSpan { | |
84 | let ret_val = TokenAndSpan { | |
85 | tok: replace(&mut self.peek_tok, token::Underscore), | |
86 | sp: self.peek_span, | |
87 | }; | |
88 | self.advance_token(); | |
89 | ret_val | |
90 | } | |
92a42be0 | 91 | fn fatal(&self, m: &str) -> FatalError { |
1a4d82fc JJ |
92 | self.fatal_span(self.peek_span, m) |
93 | } | |
94 | fn err(&self, m: &str) { | |
95 | self.err_span(self.peek_span, m) | |
96 | } | |
97 | fn peek(&self) -> TokenAndSpan { | |
98 | // FIXME(pcwalton): Bad copy! | |
99 | TokenAndSpan { | |
100 | tok: self.peek_tok.clone(), | |
101 | sp: self.peek_span, | |
102 | } | |
103 | } | |
104 | } | |
105 | ||
106 | impl<'a> Reader for TtReader<'a> { | |
107 | fn is_eof(&self) -> bool { | |
108 | self.cur_tok == token::Eof | |
109 | } | |
110 | fn next_token(&mut self) -> TokenAndSpan { | |
111 | let r = tt_next_token(self); | |
112 | debug!("TtReader: r={:?}", r); | |
113 | r | |
114 | } | |
92a42be0 SL |
115 | fn fatal(&self, m: &str) -> FatalError { |
116 | self.sp_diag.span_fatal(self.cur_span, m) | |
1a4d82fc JJ |
117 | } |
118 | fn err(&self, m: &str) { | |
119 | self.sp_diag.span_err(self.cur_span, m); | |
120 | } | |
121 | fn peek(&self) -> TokenAndSpan { | |
122 | TokenAndSpan { | |
123 | tok: self.cur_tok.clone(), | |
124 | sp: self.cur_span, | |
125 | } | |
126 | } | |
127 | } | |
128 | ||
1a4d82fc JJ |
129 | impl<'a> StringReader<'a> { |
130 | /// For comments.rs, which hackily pokes into pos and curr | |
131 | pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler, | |
c34b1796 AL |
132 | filemap: Rc<codemap::FileMap>) -> StringReader<'b> { |
133 | if filemap.src.is_none() { | |
134 | span_diagnostic.handler.bug(&format!("Cannot lex filemap without source: {}", | |
135 | filemap.name)[..]); | |
136 | } | |
137 | ||
138 | let source_text = (*filemap.src.as_ref().unwrap()).clone(); | |
139 | ||
1a4d82fc JJ |
140 | let mut sr = StringReader { |
141 | span_diagnostic: span_diagnostic, | |
142 | pos: filemap.start_pos, | |
143 | last_pos: filemap.start_pos, | |
144 | col: CharPos(0), | |
145 | curr: Some('\n'), | |
146 | filemap: filemap, | |
147 | /* dummy values; not read */ | |
148 | peek_tok: token::Eof, | |
149 | peek_span: codemap::DUMMY_SP, | |
c34b1796 | 150 | source_text: source_text |
1a4d82fc JJ |
151 | }; |
152 | sr.bump(); | |
153 | sr | |
154 | } | |
155 | ||
156 | pub fn new<'b>(span_diagnostic: &'b SpanHandler, | |
157 | filemap: Rc<codemap::FileMap>) -> StringReader<'b> { | |
158 | let mut sr = StringReader::new_raw(span_diagnostic, filemap); | |
159 | sr.advance_token(); | |
160 | sr | |
161 | } | |
162 | ||
163 | pub fn curr_is(&self, c: char) -> bool { | |
164 | self.curr == Some(c) | |
165 | } | |
166 | ||
167 | /// Report a fatal lexical error with a given span. | |
92a42be0 SL |
168 | pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError { |
169 | self.span_diagnostic.span_fatal(sp, m) | |
1a4d82fc JJ |
170 | } |
171 | ||
172 | /// Report a lexical error with a given span. | |
173 | pub fn err_span(&self, sp: Span, m: &str) { | |
174 | self.span_diagnostic.span_err(sp, m) | |
175 | } | |
176 | ||
c1a9b12d SL |
177 | /// Suggest some help with a given span. |
178 | pub fn help_span(&self, sp: Span, m: &str) { | |
179 | self.span_diagnostic.span_help(sp, m) | |
180 | } | |
181 | ||
1a4d82fc | 182 | /// Report a fatal error spanning [`from_pos`, `to_pos`). |
92a42be0 | 183 | fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError { |
1a4d82fc JJ |
184 | self.fatal_span(codemap::mk_sp(from_pos, to_pos), m) |
185 | } | |
186 | ||
187 | /// Report a lexical error spanning [`from_pos`, `to_pos`). | |
188 | fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { | |
189 | self.err_span(codemap::mk_sp(from_pos, to_pos), m) | |
190 | } | |
191 | ||
c1a9b12d SL |
192 | /// Suggest some help spanning [`from_pos`, `to_pos`). |
193 | fn help_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { | |
194 | self.help_span(codemap::mk_sp(from_pos, to_pos), m) | |
195 | } | |
196 | ||
1a4d82fc JJ |
197 | /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an |
198 | /// escaped character to the error message | |
92a42be0 | 199 | fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError { |
1a4d82fc JJ |
200 | let mut m = m.to_string(); |
201 | m.push_str(": "); | |
202 | for c in c.escape_default() { m.push(c) } | |
92a42be0 | 203 | self.fatal_span_(from_pos, to_pos, &m[..]) |
1a4d82fc JJ |
204 | } |
205 | ||
206 | /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an | |
207 | /// escaped character to the error message | |
208 | fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) { | |
209 | let mut m = m.to_string(); | |
210 | m.push_str(": "); | |
211 | for c in c.escape_default() { m.push(c) } | |
85aaf69f | 212 | self.err_span_(from_pos, to_pos, &m[..]); |
1a4d82fc JJ |
213 | } |
214 | ||
215 | /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the | |
216 | /// offending string to the error message | |
92a42be0 | 217 | fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError { |
1a4d82fc | 218 | m.push_str(": "); |
85aaf69f SL |
219 | let from = self.byte_offset(from_pos).to_usize(); |
220 | let to = self.byte_offset(to_pos).to_usize(); | |
c34b1796 | 221 | m.push_str(&self.source_text[from..to]); |
92a42be0 | 222 | self.fatal_span_(from_pos, to_pos, &m[..]) |
1a4d82fc JJ |
223 | } |
224 | ||
225 | /// Advance peek_tok and peek_span to refer to the next token, and | |
226 | /// possibly update the interner. | |
227 | fn advance_token(&mut self) { | |
228 | match self.scan_whitespace_or_comment() { | |
229 | Some(comment) => { | |
230 | self.peek_span = comment.sp; | |
231 | self.peek_tok = comment.tok; | |
232 | }, | |
233 | None => { | |
234 | if self.is_eof() { | |
235 | self.peek_tok = token::Eof; | |
c1a9b12d | 236 | self.peek_span = codemap::mk_sp(self.filemap.end_pos, self.filemap.end_pos); |
1a4d82fc JJ |
237 | } else { |
238 | let start_bytepos = self.last_pos; | |
239 | self.peek_tok = self.next_token_inner(); | |
240 | self.peek_span = codemap::mk_sp(start_bytepos, | |
241 | self.last_pos); | |
242 | }; | |
243 | } | |
244 | } | |
245 | } | |
246 | ||
247 | fn byte_offset(&self, pos: BytePos) -> BytePos { | |
248 | (pos - self.filemap.start_pos) | |
249 | } | |
250 | ||
251 | /// Calls `f` with a string slice of the source text spanning from `start` | |
252 | /// up to but excluding `self.last_pos`, meaning the slice does not include | |
253 | /// the character `self.curr`. | |
254 | pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T where | |
255 | F: FnOnce(&str) -> T, | |
256 | { | |
257 | self.with_str_from_to(start, self.last_pos, f) | |
258 | } | |
259 | ||
260 | /// Create a Name from a given offset to the current offset, each | |
261 | /// adjusted 1 towards each other (assumes that on either side there is a | |
262 | /// single-byte delimiter). | |
263 | pub fn name_from(&self, start: BytePos) -> ast::Name { | |
264 | debug!("taking an ident from {:?} to {:?}", start, self.last_pos); | |
265 | self.with_str_from(start, token::intern) | |
266 | } | |
267 | ||
268 | /// As name_from, with an explicit endpoint. | |
269 | pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { | |
270 | debug!("taking an ident from {:?} to {:?}", start, end); | |
271 | self.with_str_from_to(start, end, token::intern) | |
272 | } | |
273 | ||
274 | /// Calls `f` with a string slice of the source text spanning from `start` | |
275 | /// up to but excluding `end`. | |
276 | fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T where | |
277 | F: FnOnce(&str) -> T, | |
278 | { | |
c34b1796 AL |
279 | f(&self.source_text[self.byte_offset(start).to_usize().. |
280 | self.byte_offset(end).to_usize()]) | |
1a4d82fc JJ |
281 | } |
282 | ||
283 | /// Converts CRLF to LF in the given string, raising an error on bare CR. | |
284 | fn translate_crlf<'b>(&self, start: BytePos, | |
85aaf69f SL |
285 | s: &'b str, errmsg: &'b str) -> Cow<'b, str> { |
286 | let mut i = 0; | |
1a4d82fc | 287 | while i < s.len() { |
d9579d0f | 288 | let ch = char_at(s, i); |
c34b1796 | 289 | let next = i + ch.len_utf8(); |
1a4d82fc | 290 | if ch == '\r' { |
d9579d0f AL |
291 | if next < s.len() && char_at(s, next) == '\n' { |
292 | return translate_crlf_(self, start, s, errmsg, i).into(); | |
1a4d82fc JJ |
293 | } |
294 | let pos = start + BytePos(i as u32); | |
295 | let end_pos = start + BytePos(next as u32); | |
296 | self.err_span_(pos, end_pos, errmsg); | |
297 | } | |
298 | i = next; | |
299 | } | |
d9579d0f | 300 | return s.into(); |
1a4d82fc JJ |
301 | |
302 | fn translate_crlf_(rdr: &StringReader, start: BytePos, | |
85aaf69f | 303 | s: &str, errmsg: &str, mut i: usize) -> String { |
1a4d82fc JJ |
304 | let mut buf = String::with_capacity(s.len()); |
305 | let mut j = 0; | |
306 | while i < s.len() { | |
d9579d0f | 307 | let ch = char_at(s, i); |
c34b1796 | 308 | let next = i + ch.len_utf8(); |
1a4d82fc JJ |
309 | if ch == '\r' { |
310 | if j < i { buf.push_str(&s[j..i]); } | |
311 | j = next; | |
d9579d0f | 312 | if next >= s.len() || char_at(s, next) != '\n' { |
1a4d82fc JJ |
313 | let pos = start + BytePos(i as u32); |
314 | let end_pos = start + BytePos(next as u32); | |
315 | rdr.err_span_(pos, end_pos, errmsg); | |
316 | } | |
317 | } | |
318 | i = next; | |
319 | } | |
320 | if j < s.len() { buf.push_str(&s[j..]); } | |
321 | buf | |
322 | } | |
323 | } | |
324 | ||
325 | ||
326 | /// Advance the StringReader by one character. If a newline is | |
327 | /// discovered, add it to the FileMap's list of line start offsets. | |
328 | pub fn bump(&mut self) { | |
329 | self.last_pos = self.pos; | |
85aaf69f | 330 | let current_byte_offset = self.byte_offset(self.pos).to_usize(); |
c34b1796 | 331 | if current_byte_offset < self.source_text.len() { |
1a4d82fc JJ |
332 | assert!(self.curr.is_some()); |
333 | let last_char = self.curr.unwrap(); | |
d9579d0f | 334 | let ch = char_at(&self.source_text, current_byte_offset); |
c34b1796 AL |
335 | let next = current_byte_offset + ch.len_utf8(); |
336 | let byte_offset_diff = next - current_byte_offset; | |
85aaf69f | 337 | self.pos = self.pos + Pos::from_usize(byte_offset_diff); |
c34b1796 | 338 | self.curr = Some(ch); |
85aaf69f | 339 | self.col = self.col + CharPos(1); |
1a4d82fc JJ |
340 | if last_char == '\n' { |
341 | self.filemap.next_line(self.last_pos); | |
85aaf69f | 342 | self.col = CharPos(0); |
1a4d82fc JJ |
343 | } |
344 | ||
345 | if byte_offset_diff > 1 { | |
346 | self.filemap.record_multibyte_char(self.last_pos, byte_offset_diff); | |
347 | } | |
348 | } else { | |
349 | self.curr = None; | |
350 | } | |
351 | } | |
352 | ||
353 | pub fn nextch(&self) -> Option<char> { | |
85aaf69f | 354 | let offset = self.byte_offset(self.pos).to_usize(); |
c34b1796 | 355 | if offset < self.source_text.len() { |
d9579d0f | 356 | Some(char_at(&self.source_text, offset)) |
1a4d82fc JJ |
357 | } else { |
358 | None | |
359 | } | |
360 | } | |
361 | ||
362 | pub fn nextch_is(&self, c: char) -> bool { | |
363 | self.nextch() == Some(c) | |
364 | } | |
365 | ||
366 | pub fn nextnextch(&self) -> Option<char> { | |
85aaf69f | 367 | let offset = self.byte_offset(self.pos).to_usize(); |
c34b1796 | 368 | let s = &self.source_text[..]; |
1a4d82fc | 369 | if offset >= s.len() { return None } |
d9579d0f | 370 | let next = offset + char_at(s, offset).len_utf8(); |
1a4d82fc | 371 | if next < s.len() { |
d9579d0f | 372 | Some(char_at(s, next)) |
1a4d82fc JJ |
373 | } else { |
374 | None | |
375 | } | |
376 | } | |
377 | ||
378 | pub fn nextnextch_is(&self, c: char) -> bool { | |
379 | self.nextnextch() == Some(c) | |
380 | } | |
381 | ||
382 | /// Eats <XID_start><XID_continue>*, if possible. | |
383 | fn scan_optional_raw_name(&mut self) -> Option<ast::Name> { | |
384 | if !ident_start(self.curr) { | |
385 | return None | |
386 | } | |
387 | let start = self.last_pos; | |
388 | while ident_continue(self.curr) { | |
389 | self.bump(); | |
390 | } | |
391 | ||
392 | self.with_str_from(start, |string| { | |
393 | if string == "_" { | |
394 | None | |
395 | } else { | |
396 | Some(token::intern(string)) | |
397 | } | |
398 | }) | |
399 | } | |
400 | ||
401 | /// PRECONDITION: self.curr is not whitespace | |
402 | /// Eats any kind of comment. | |
403 | fn scan_comment(&mut self) -> Option<TokenAndSpan> { | |
404 | match self.curr { | |
405 | Some(c) => { | |
406 | if c.is_whitespace() { | |
407 | self.span_diagnostic.span_err(codemap::mk_sp(self.last_pos, self.last_pos), | |
408 | "called consume_any_line_comment, but there was whitespace"); | |
409 | } | |
410 | }, | |
411 | None => { } | |
412 | } | |
413 | ||
414 | if self.curr_is('/') { | |
415 | match self.nextch() { | |
416 | Some('/') => { | |
417 | self.bump(); | |
418 | self.bump(); | |
62682a34 | 419 | |
1a4d82fc | 420 | // line comments starting with "///" or "//!" are doc-comments |
62682a34 SL |
421 | let doc_comment = self.curr_is('/') || self.curr_is('!'); |
422 | let start_bpos = if doc_comment { | |
423 | self.pos - BytePos(3) | |
424 | } else { | |
425 | self.last_pos - BytePos(2) | |
426 | }; | |
427 | ||
428 | while !self.is_eof() { | |
429 | match self.curr.unwrap() { | |
430 | '\n' => break, | |
431 | '\r' => { | |
432 | if self.nextch_is('\n') { | |
433 | // CRLF | |
434 | break | |
435 | } else if doc_comment { | |
436 | self.err_span_(self.last_pos, self.pos, | |
437 | "bare CR not allowed in doc-comment"); | |
1a4d82fc | 438 | } |
1a4d82fc | 439 | } |
62682a34 | 440 | _ => () |
1a4d82fc | 441 | } |
62682a34 SL |
442 | self.bump(); |
443 | } | |
444 | ||
445 | return if doc_comment { | |
446 | self.with_str_from(start_bpos, |string| { | |
447 | // comments with only more "/"s are not doc comments | |
1a4d82fc JJ |
448 | let tok = if is_doc_comment(string) { |
449 | token::DocComment(token::intern(string)) | |
450 | } else { | |
451 | token::Comment | |
452 | }; | |
453 | ||
62682a34 | 454 | Some(TokenAndSpan { |
1a4d82fc JJ |
455 | tok: tok, |
456 | sp: codemap::mk_sp(start_bpos, self.last_pos) | |
62682a34 SL |
457 | }) |
458 | }) | |
1a4d82fc | 459 | } else { |
62682a34 | 460 | Some(TokenAndSpan { |
1a4d82fc JJ |
461 | tok: token::Comment, |
462 | sp: codemap::mk_sp(start_bpos, self.last_pos) | |
62682a34 | 463 | }) |
1a4d82fc JJ |
464 | } |
465 | } | |
466 | Some('*') => { | |
467 | self.bump(); self.bump(); | |
468 | self.scan_block_comment() | |
469 | } | |
470 | _ => None | |
471 | } | |
472 | } else if self.curr_is('#') { | |
473 | if self.nextch_is('!') { | |
474 | ||
475 | // Parse an inner attribute. | |
476 | if self.nextnextch_is('[') { | |
477 | return None; | |
478 | } | |
479 | ||
480 | // I guess this is the only way to figure out if | |
481 | // we're at the beginning of the file... | |
482 | let cmap = CodeMap::new(); | |
483 | cmap.files.borrow_mut().push(self.filemap.clone()); | |
484 | let loc = cmap.lookup_char_pos_adj(self.last_pos); | |
485 | debug!("Skipping a shebang"); | |
85aaf69f | 486 | if loc.line == 1 && loc.col == CharPos(0) { |
1a4d82fc JJ |
487 | // FIXME: Add shebang "token", return it |
488 | let start = self.last_pos; | |
489 | while !self.curr_is('\n') && !self.is_eof() { self.bump(); } | |
490 | return Some(TokenAndSpan { | |
491 | tok: token::Shebang(self.name_from(start)), | |
492 | sp: codemap::mk_sp(start, self.last_pos) | |
493 | }); | |
494 | } | |
495 | } | |
496 | None | |
497 | } else { | |
498 | None | |
499 | } | |
500 | } | |
501 | ||
502 | /// If there is whitespace, shebang, or a comment, scan it. Otherwise, | |
503 | /// return None. | |
504 | fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> { | |
505 | match self.curr.unwrap_or('\0') { | |
506 | // # to handle shebang at start of file -- this is the entry point | |
507 | // for skipping over all "junk" | |
508 | '/' | '#' => { | |
509 | let c = self.scan_comment(); | |
510 | debug!("scanning a comment {:?}", c); | |
511 | c | |
512 | }, | |
513 | c if is_whitespace(Some(c)) => { | |
514 | let start_bpos = self.last_pos; | |
515 | while is_whitespace(self.curr) { self.bump(); } | |
516 | let c = Some(TokenAndSpan { | |
517 | tok: token::Whitespace, | |
518 | sp: codemap::mk_sp(start_bpos, self.last_pos) | |
519 | }); | |
520 | debug!("scanning whitespace: {:?}", c); | |
521 | c | |
522 | }, | |
523 | _ => None | |
524 | } | |
525 | } | |
526 | ||
527 | /// Might return a sugared-doc-attr | |
528 | fn scan_block_comment(&mut self) -> Option<TokenAndSpan> { | |
529 | // block comments starting with "/**" or "/*!" are doc-comments | |
530 | let is_doc_comment = self.curr_is('*') || self.curr_is('!'); | |
531 | let start_bpos = self.last_pos - BytePos(2); | |
532 | ||
85aaf69f | 533 | let mut level: isize = 1; |
1a4d82fc JJ |
534 | let mut has_cr = false; |
535 | while level > 0 { | |
536 | if self.is_eof() { | |
537 | let msg = if is_doc_comment { | |
538 | "unterminated block doc-comment" | |
539 | } else { | |
540 | "unterminated block comment" | |
541 | }; | |
542 | let last_bpos = self.last_pos; | |
92a42be0 | 543 | panic!(self.fatal_span_(start_bpos, last_bpos, msg)); |
1a4d82fc JJ |
544 | } |
545 | let n = self.curr.unwrap(); | |
546 | match n { | |
547 | '/' if self.nextch_is('*') => { | |
548 | level += 1; | |
549 | self.bump(); | |
550 | } | |
551 | '*' if self.nextch_is('/') => { | |
552 | level -= 1; | |
553 | self.bump(); | |
554 | } | |
555 | '\r' => { | |
556 | has_cr = true; | |
557 | } | |
558 | _ => () | |
559 | } | |
560 | self.bump(); | |
561 | } | |
562 | ||
563 | self.with_str_from(start_bpos, |string| { | |
564 | // but comments with only "*"s between two "/"s are not | |
565 | let tok = if is_block_doc_comment(string) { | |
566 | let string = if has_cr { | |
567 | self.translate_crlf(start_bpos, string, | |
568 | "bare CR not allowed in block doc-comment") | |
d9579d0f | 569 | } else { string.into() }; |
85aaf69f | 570 | token::DocComment(token::intern(&string[..])) |
1a4d82fc JJ |
571 | } else { |
572 | token::Comment | |
573 | }; | |
574 | ||
575 | Some(TokenAndSpan{ | |
576 | tok: tok, | |
577 | sp: codemap::mk_sp(start_bpos, self.last_pos) | |
578 | }) | |
579 | }) | |
580 | } | |
581 | ||
c34b1796 AL |
582 | /// Scan through any digits (base `scan_radix`) or underscores, |
583 | /// and return how many digits there were. | |
584 | /// | |
585 | /// `real_radix` represents the true radix of the number we're | |
586 | /// interested in, and errors will be emitted for any digits | |
587 | /// between `real_radix` and `scan_radix`. | |
588 | fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize { | |
589 | assert!(real_radix <= scan_radix); | |
85aaf69f | 590 | let mut len = 0; |
1a4d82fc JJ |
591 | loop { |
592 | let c = self.curr; | |
593 | if c == Some('_') { debug!("skipping a _"); self.bump(); continue; } | |
c34b1796 | 594 | match c.and_then(|cc| cc.to_digit(scan_radix)) { |
1a4d82fc JJ |
595 | Some(_) => { |
596 | debug!("{:?} in scan_digits", c); | |
c34b1796 AL |
597 | // check that the hypothetical digit is actually |
598 | // in range for the true radix | |
599 | if c.unwrap().to_digit(real_radix).is_none() { | |
600 | self.err_span_(self.last_pos, self.pos, | |
601 | &format!("invalid digit for a base {} literal", | |
602 | real_radix)); | |
603 | } | |
1a4d82fc JJ |
604 | len += 1; |
605 | self.bump(); | |
606 | } | |
607 | _ => return len | |
608 | } | |
609 | }; | |
610 | } | |
611 | ||
612 | /// Lex a LIT_INTEGER or a LIT_FLOAT | |
613 | fn scan_number(&mut self, c: char) -> token::Lit { | |
c1a9b12d | 614 | let num_digits; |
1a4d82fc JJ |
615 | let mut base = 10; |
616 | let start_bpos = self.last_pos; | |
617 | ||
618 | self.bump(); | |
619 | ||
620 | if c == '0' { | |
621 | match self.curr.unwrap_or('\0') { | |
c34b1796 AL |
622 | 'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2, 10); } |
623 | 'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8, 10); } | |
624 | 'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16, 16); } | |
1a4d82fc | 625 | '0'...'9' | '_' | '.' => { |
c34b1796 | 626 | num_digits = self.scan_digits(10, 10) + 1; |
1a4d82fc JJ |
627 | } |
628 | _ => { | |
629 | // just a 0 | |
630 | return token::Integer(self.name_from(start_bpos)); | |
631 | } | |
632 | } | |
633 | } else if c.is_digit(10) { | |
c34b1796 | 634 | num_digits = self.scan_digits(10, 10) + 1; |
1a4d82fc JJ |
635 | } else { |
636 | num_digits = 0; | |
637 | } | |
638 | ||
639 | if num_digits == 0 { | |
640 | self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); | |
641 | return token::Integer(token::intern("0")); | |
642 | } | |
643 | ||
644 | // might be a float, but don't be greedy if this is actually an | |
645 | // integer literal followed by field/method access or a range pattern | |
646 | // (`0..2` and `12.foo()`) | |
647 | if self.curr_is('.') && !self.nextch_is('.') && !self.nextch().unwrap_or('\0') | |
648 | .is_xid_start() { | |
649 | // might have stuff after the ., and if it does, it needs to start | |
650 | // with a number | |
651 | self.bump(); | |
652 | if self.curr.unwrap_or('\0').is_digit(10) { | |
c34b1796 | 653 | self.scan_digits(10, 10); |
1a4d82fc JJ |
654 | self.scan_float_exponent(); |
655 | } | |
656 | let last_pos = self.last_pos; | |
657 | self.check_float_base(start_bpos, last_pos, base); | |
658 | return token::Float(self.name_from(start_bpos)); | |
659 | } else { | |
660 | // it might be a float if it has an exponent | |
661 | if self.curr_is('e') || self.curr_is('E') { | |
662 | self.scan_float_exponent(); | |
663 | let last_pos = self.last_pos; | |
664 | self.check_float_base(start_bpos, last_pos, base); | |
665 | return token::Float(self.name_from(start_bpos)); | |
666 | } | |
667 | // but we certainly have an integer! | |
668 | return token::Integer(self.name_from(start_bpos)); | |
669 | } | |
670 | } | |
671 | ||
672 | /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an | |
673 | /// error if too many or too few digits are encountered. | |
674 | fn scan_hex_digits(&mut self, | |
85aaf69f | 675 | n_digits: usize, |
1a4d82fc JJ |
676 | delim: char, |
677 | below_0x7f_only: bool) | |
678 | -> bool { | |
679 | debug!("scanning {} digits until {:?}", n_digits, delim); | |
680 | let start_bpos = self.last_pos; | |
681 | let mut accum_int = 0; | |
682 | ||
c34b1796 | 683 | let mut valid = true; |
85aaf69f | 684 | for _ in 0..n_digits { |
1a4d82fc JJ |
685 | if self.is_eof() { |
686 | let last_bpos = self.last_pos; | |
92a42be0 SL |
687 | panic!(self.fatal_span_(start_bpos, |
688 | last_bpos, | |
689 | "unterminated numeric character escape")); | |
1a4d82fc JJ |
690 | } |
691 | if self.curr_is(delim) { | |
692 | let last_bpos = self.last_pos; | |
693 | self.err_span_(start_bpos, last_bpos, "numeric character escape is too short"); | |
c34b1796 | 694 | valid = false; |
1a4d82fc JJ |
695 | break; |
696 | } | |
697 | let c = self.curr.unwrap_or('\x00'); | |
698 | accum_int *= 16; | |
699 | accum_int += c.to_digit(16).unwrap_or_else(|| { | |
700 | self.err_span_char(self.last_pos, self.pos, | |
c1a9b12d | 701 | "invalid character in numeric character escape", c); |
c34b1796 AL |
702 | |
703 | valid = false; | |
1a4d82fc | 704 | 0 |
c34b1796 | 705 | }); |
1a4d82fc JJ |
706 | self.bump(); |
707 | } | |
708 | ||
709 | if below_0x7f_only && accum_int >= 0x80 { | |
710 | self.err_span_(start_bpos, | |
711 | self.last_pos, | |
712 | "this form of character escape may only be used \ | |
713 | with characters in the range [\\x00-\\x7f]"); | |
c34b1796 | 714 | valid = false; |
1a4d82fc JJ |
715 | } |
716 | ||
717 | match char::from_u32(accum_int) { | |
c34b1796 | 718 | Some(_) => valid, |
1a4d82fc JJ |
719 | None => { |
720 | let last_bpos = self.last_pos; | |
c1a9b12d | 721 | self.err_span_(start_bpos, last_bpos, "invalid numeric character escape"); |
1a4d82fc JJ |
722 | false |
723 | } | |
724 | } | |
725 | } | |
726 | ||
1a4d82fc JJ |
727 | /// Scan for a single (possibly escaped) byte or char |
728 | /// in a byte, (non-raw) byte string, char, or (non-raw) string literal. | |
729 | /// `start` is the position of `first_source_char`, which is already consumed. | |
730 | /// | |
731 | /// Returns true if there was a valid char/byte, false otherwise. | |
732 | fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char, | |
733 | ascii_only: bool, delim: char) -> bool { | |
734 | match first_source_char { | |
735 | '\\' => { | |
736 | // '\X' for some X must be a character constant: | |
737 | let escaped = self.curr; | |
738 | let escaped_pos = self.last_pos; | |
739 | self.bump(); | |
740 | match escaped { | |
741 | None => {}, // EOF here is an error that will be checked later. | |
742 | Some(e) => { | |
743 | return match e { | |
744 | 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, | |
745 | 'x' => self.scan_byte_escape(delim, !ascii_only), | |
c1a9b12d SL |
746 | 'u' => { |
747 | let valid = if self.curr_is('{') { | |
748 | self.scan_unicode_escape(delim) && !ascii_only | |
749 | } else { | |
750 | self.err_span_(start, self.last_pos, | |
751 | "incorrect unicode escape sequence"); | |
752 | self.help_span_(start, self.last_pos, | |
753 | "format of unicode escape sequences is `\\u{…}`"); | |
754 | false | |
755 | }; | |
756 | if ascii_only { | |
757 | self.err_span_(start, self.last_pos, | |
62682a34 SL |
758 | "unicode escape sequences cannot be used as a byte or in \ |
759 | a byte string" | |
760 | ); | |
62682a34 | 761 | } |
c1a9b12d SL |
762 | valid |
763 | ||
1a4d82fc JJ |
764 | } |
765 | '\n' if delim == '"' => { | |
766 | self.consume_whitespace(); | |
767 | true | |
768 | }, | |
769 | '\r' if delim == '"' && self.curr_is('\n') => { | |
770 | self.consume_whitespace(); | |
771 | true | |
772 | } | |
773 | c => { | |
774 | let last_pos = self.last_pos; | |
775 | self.err_span_char( | |
776 | escaped_pos, last_pos, | |
777 | if ascii_only { "unknown byte escape" } | |
778 | else { "unknown character escape" }, | |
779 | c); | |
780 | if e == '\r' { | |
c1a9b12d | 781 | self.help_span_(escaped_pos, last_pos, |
1a4d82fc JJ |
782 | "this is an isolated carriage return; consider checking \ |
783 | your editor and version control settings") | |
784 | } | |
9346a6ac | 785 | if (e == '{' || e == '}') && !ascii_only { |
c1a9b12d | 786 | self.help_span_(escaped_pos, last_pos, |
9346a6ac AL |
787 | "if used in a formatting string, \ |
788 | curly braces are escaped with `{{` and `}}`") | |
789 | } | |
1a4d82fc JJ |
790 | false |
791 | } | |
792 | } | |
793 | } | |
794 | } | |
795 | } | |
796 | '\t' | '\n' | '\r' | '\'' if delim == '\'' => { | |
797 | let last_pos = self.last_pos; | |
798 | self.err_span_char( | |
799 | start, last_pos, | |
800 | if ascii_only { "byte constant must be escaped" } | |
801 | else { "character constant must be escaped" }, | |
802 | first_source_char); | |
803 | return false; | |
804 | } | |
805 | '\r' => { | |
806 | if self.curr_is('\n') { | |
807 | self.bump(); | |
808 | return true; | |
809 | } else { | |
810 | self.err_span_(start, self.last_pos, | |
811 | "bare CR not allowed in string, use \\r instead"); | |
812 | return false; | |
813 | } | |
814 | } | |
815 | _ => if ascii_only && first_source_char > '\x7F' { | |
816 | let last_pos = self.last_pos; | |
817 | self.err_span_char( | |
818 | start, last_pos, | |
819 | "byte constant must be ASCII. \ | |
820 | Use a \\xHH escape for a non-ASCII byte", first_source_char); | |
821 | return false; | |
822 | } | |
823 | } | |
824 | true | |
825 | } | |
826 | ||
827 | /// Scan over a \u{...} escape | |
828 | /// | |
829 | /// At this point, we have already seen the \ and the u, the { is the current character. We | |
830 | /// will read at least one digit, and up to 6, and pass over the }. | |
831 | fn scan_unicode_escape(&mut self, delim: char) -> bool { | |
832 | self.bump(); // past the { | |
833 | let start_bpos = self.last_pos; | |
85aaf69f | 834 | let mut count = 0; |
1a4d82fc | 835 | let mut accum_int = 0; |
c34b1796 | 836 | let mut valid = true; |
1a4d82fc JJ |
837 | |
838 | while !self.curr_is('}') && count <= 6 { | |
839 | let c = match self.curr { | |
840 | Some(c) => c, | |
841 | None => { | |
92a42be0 SL |
842 | panic!(self.fatal_span_(start_bpos, self.last_pos, |
843 | "unterminated unicode escape (found EOF)")); | |
1a4d82fc JJ |
844 | } |
845 | }; | |
846 | accum_int *= 16; | |
847 | accum_int += c.to_digit(16).unwrap_or_else(|| { | |
848 | if c == delim { | |
92a42be0 SL |
849 | panic!(self.fatal_span_(self.last_pos, self.pos, |
850 | "unterminated unicode escape (needed a `}`)")); | |
1a4d82fc | 851 | } else { |
c34b1796 | 852 | self.err_span_char(self.last_pos, self.pos, |
c1a9b12d | 853 | "invalid character in unicode escape", c); |
1a4d82fc | 854 | } |
c34b1796 AL |
855 | valid = false; |
856 | 0 | |
857 | }); | |
1a4d82fc JJ |
858 | self.bump(); |
859 | count += 1; | |
860 | } | |
861 | ||
862 | if count > 6 { | |
c34b1796 | 863 | self.err_span_(start_bpos, self.last_pos, |
1a4d82fc | 864 | "overlong unicode escape (can have at most 6 hex digits)"); |
c34b1796 | 865 | valid = false; |
1a4d82fc JJ |
866 | } |
867 | ||
c34b1796 | 868 | if valid && (char::from_u32(accum_int).is_none() || count == 0) { |
c1a9b12d | 869 | self.err_span_(start_bpos, self.last_pos, "invalid unicode character escape"); |
62682a34 | 870 | valid = false; |
1a4d82fc JJ |
871 | } |
872 | ||
c1a9b12d | 873 | self.bump(); // past the ending } |
1a4d82fc JJ |
874 | valid |
875 | } | |
876 | ||
877 | /// Scan over a float exponent. | |
878 | fn scan_float_exponent(&mut self) { | |
879 | if self.curr_is('e') || self.curr_is('E') { | |
880 | self.bump(); | |
881 | if self.curr_is('-') || self.curr_is('+') { | |
882 | self.bump(); | |
883 | } | |
c34b1796 | 884 | if self.scan_digits(10, 10) == 0 { |
1a4d82fc JJ |
885 | self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent") |
886 | } | |
887 | } | |
888 | } | |
889 | ||
890 | /// Check that a base is valid for a floating literal, emitting a nice | |
891 | /// error if it isn't. | |
85aaf69f | 892 | fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) { |
1a4d82fc | 893 | match base { |
85aaf69f SL |
894 | 16 => self.err_span_(start_bpos, last_bpos, "hexadecimal float literal is not \ |
895 | supported"), | |
896 | 8 => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"), | |
897 | 2 => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"), | |
898 | _ => () | |
1a4d82fc JJ |
899 | } |
900 | } | |
901 | ||
902 | fn binop(&mut self, op: token::BinOpToken) -> token::Token { | |
903 | self.bump(); | |
904 | if self.curr_is('=') { | |
905 | self.bump(); | |
906 | return token::BinOpEq(op); | |
907 | } else { | |
908 | return token::BinOp(op); | |
909 | } | |
910 | } | |
911 | ||
912 | /// Return the next token from the string, advances the input past that | |
913 | /// token, and updates the interner | |
914 | fn next_token_inner(&mut self) -> token::Token { | |
915 | let c = self.curr; | |
916 | if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) { | |
917 | // Note: r as in r" or r#" is part of a raw string literal, | |
918 | // b as in b' is part of a byte literal. | |
919 | // They are not identifiers, and are handled further down. | |
920 | ('r', Some('"'), _) | ('r', Some('#'), _) | | |
921 | ('b', Some('"'), _) | ('b', Some('\''), _) | | |
922 | ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false, | |
923 | _ => true | |
924 | } { | |
925 | let start = self.last_pos; | |
926 | while ident_continue(self.curr) { | |
927 | self.bump(); | |
928 | } | |
929 | ||
930 | return self.with_str_from(start, |string| { | |
931 | if string == "_" { | |
932 | token::Underscore | |
933 | } else { | |
934 | // FIXME: perform NFKC normalization here. (Issue #2253) | |
935 | if self.curr_is(':') && self.nextch_is(':') { | |
936 | token::Ident(str_to_ident(string), token::ModName) | |
937 | } else { | |
938 | token::Ident(str_to_ident(string), token::Plain) | |
939 | } | |
940 | } | |
941 | }); | |
942 | } | |
943 | ||
944 | if is_dec_digit(c) { | |
945 | let num = self.scan_number(c.unwrap()); | |
946 | let suffix = self.scan_optional_raw_name(); | |
947 | debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); | |
948 | return token::Literal(num, suffix) | |
949 | } | |
950 | ||
1a4d82fc JJ |
951 | match c.expect("next_token_inner called at EOF") { |
952 | // One-byte tokens. | |
953 | ';' => { self.bump(); return token::Semi; } | |
954 | ',' => { self.bump(); return token::Comma; } | |
955 | '.' => { | |
956 | self.bump(); | |
957 | return if self.curr_is('.') { | |
958 | self.bump(); | |
959 | if self.curr_is('.') { | |
960 | self.bump(); | |
961 | token::DotDotDot | |
962 | } else { | |
963 | token::DotDot | |
964 | } | |
965 | } else { | |
966 | token::Dot | |
967 | }; | |
968 | } | |
969 | '(' => { self.bump(); return token::OpenDelim(token::Paren); } | |
970 | ')' => { self.bump(); return token::CloseDelim(token::Paren); } | |
971 | '{' => { self.bump(); return token::OpenDelim(token::Brace); } | |
972 | '}' => { self.bump(); return token::CloseDelim(token::Brace); } | |
973 | '[' => { self.bump(); return token::OpenDelim(token::Bracket); } | |
974 | ']' => { self.bump(); return token::CloseDelim(token::Bracket); } | |
975 | '@' => { self.bump(); return token::At; } | |
976 | '#' => { self.bump(); return token::Pound; } | |
977 | '~' => { self.bump(); return token::Tilde; } | |
978 | '?' => { self.bump(); return token::Question; } | |
979 | ':' => { | |
980 | self.bump(); | |
981 | if self.curr_is(':') { | |
982 | self.bump(); | |
983 | return token::ModSep; | |
984 | } else { | |
985 | return token::Colon; | |
986 | } | |
987 | } | |
988 | ||
989 | '$' => { self.bump(); return token::Dollar; } | |
990 | ||
991 | // Multi-byte tokens. | |
992 | '=' => { | |
993 | self.bump(); | |
994 | if self.curr_is('=') { | |
995 | self.bump(); | |
996 | return token::EqEq; | |
997 | } else if self.curr_is('>') { | |
998 | self.bump(); | |
999 | return token::FatArrow; | |
1000 | } else { | |
1001 | return token::Eq; | |
1002 | } | |
1003 | } | |
1004 | '!' => { | |
1005 | self.bump(); | |
1006 | if self.curr_is('=') { | |
1007 | self.bump(); | |
1008 | return token::Ne; | |
1009 | } else { return token::Not; } | |
1010 | } | |
1011 | '<' => { | |
1012 | self.bump(); | |
1013 | match self.curr.unwrap_or('\x00') { | |
1014 | '=' => { self.bump(); return token::Le; } | |
1015 | '<' => { return self.binop(token::Shl); } | |
1016 | '-' => { | |
1017 | self.bump(); | |
1018 | match self.curr.unwrap_or('\x00') { | |
1019 | _ => { return token::LArrow; } | |
1020 | } | |
1021 | } | |
1022 | _ => { return token::Lt; } | |
1023 | } | |
1024 | } | |
1025 | '>' => { | |
1026 | self.bump(); | |
1027 | match self.curr.unwrap_or('\x00') { | |
1028 | '=' => { self.bump(); return token::Ge; } | |
1029 | '>' => { return self.binop(token::Shr); } | |
1030 | _ => { return token::Gt; } | |
1031 | } | |
1032 | } | |
1033 | '\'' => { | |
1034 | // Either a character constant 'a' OR a lifetime name 'abc | |
1035 | self.bump(); | |
1036 | let start = self.last_pos; | |
1037 | ||
1038 | // the eof will be picked up by the final `'` check below | |
1039 | let c2 = self.curr.unwrap_or('\x00'); | |
1040 | self.bump(); | |
1041 | ||
1042 | // If the character is an ident start not followed by another single | |
1043 | // quote, then this is a lifetime name: | |
1044 | if ident_start(Some(c2)) && !self.curr_is('\'') { | |
1045 | while ident_continue(self.curr) { | |
1046 | self.bump(); | |
1047 | } | |
1048 | ||
1049 | // Include the leading `'` in the real identifier, for macro | |
1050 | // expansion purposes. See #12512 for the gory details of why | |
1051 | // this is necessary. | |
1052 | let ident = self.with_str_from(start, |lifetime_name| { | |
c34b1796 | 1053 | str_to_ident(&format!("'{}", lifetime_name)) |
1a4d82fc JJ |
1054 | }); |
1055 | ||
1056 | // Conjure up a "keyword checking ident" to make sure that | |
1057 | // the lifetime name is not a keyword. | |
1058 | let keyword_checking_ident = | |
1059 | self.with_str_from(start, |lifetime_name| { | |
1060 | str_to_ident(lifetime_name) | |
1061 | }); | |
1062 | let keyword_checking_token = | |
1063 | &token::Ident(keyword_checking_ident, token::Plain); | |
1064 | let last_bpos = self.last_pos; | |
85aaf69f | 1065 | if keyword_checking_token.is_keyword(token::keywords::SelfValue) { |
1a4d82fc JJ |
1066 | self.err_span_(start, |
1067 | last_bpos, | |
1068 | "invalid lifetime name: 'self \ | |
1069 | is no longer a special lifetime"); | |
1070 | } else if keyword_checking_token.is_any_keyword() && | |
1071 | !keyword_checking_token.is_keyword(token::keywords::Static) | |
1072 | { | |
1073 | self.err_span_(start, | |
1074 | last_bpos, | |
1075 | "invalid lifetime name"); | |
1076 | } | |
1077 | return token::Lifetime(ident); | |
1078 | } | |
1079 | ||
1080 | // Otherwise it is a character constant: | |
1081 | let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\''); | |
1082 | if !self.curr_is('\'') { | |
1083 | let last_bpos = self.last_pos; | |
92a42be0 SL |
1084 | panic!(self.fatal_span_verbose( |
1085 | // Byte offsetting here is okay because the | |
1086 | // character before position `start` is an | |
1087 | // ascii single quote. | |
1088 | start - BytePos(1), last_bpos, | |
1089 | ||
1090 | String::from("character literal may only contain one codepoint"))); | |
1a4d82fc JJ |
1091 | } |
1092 | let id = if valid { self.name_from(start) } else { token::intern("0") }; | |
1093 | self.bump(); // advance curr past token | |
1094 | let suffix = self.scan_optional_raw_name(); | |
1095 | return token::Literal(token::Char(id), suffix); | |
1096 | } | |
1097 | 'b' => { | |
1098 | self.bump(); | |
1099 | let lit = match self.curr { | |
1100 | Some('\'') => self.scan_byte(), | |
1101 | Some('"') => self.scan_byte_string(), | |
1102 | Some('r') => self.scan_raw_byte_string(), | |
1103 | _ => unreachable!() // Should have been a token::Ident above. | |
1104 | }; | |
1105 | let suffix = self.scan_optional_raw_name(); | |
1106 | return token::Literal(lit, suffix); | |
1107 | } | |
1108 | '"' => { | |
1109 | let start_bpos = self.last_pos; | |
1110 | let mut valid = true; | |
1111 | self.bump(); | |
1112 | while !self.curr_is('"') { | |
1113 | if self.is_eof() { | |
1114 | let last_bpos = self.last_pos; | |
92a42be0 SL |
1115 | panic!(self.fatal_span_(start_bpos, |
1116 | last_bpos, | |
1117 | "unterminated double quote string")); | |
1a4d82fc JJ |
1118 | } |
1119 | ||
1120 | let ch_start = self.last_pos; | |
1121 | let ch = self.curr.unwrap(); | |
1122 | self.bump(); | |
1123 | valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"'); | |
1124 | } | |
1125 | // adjust for the ASCII " at the start of the literal | |
1126 | let id = if valid { self.name_from(start_bpos + BytePos(1)) } | |
1127 | else { token::intern("??") }; | |
1128 | self.bump(); | |
1129 | let suffix = self.scan_optional_raw_name(); | |
1130 | return token::Literal(token::Str_(id), suffix); | |
1131 | } | |
1132 | 'r' => { | |
1133 | let start_bpos = self.last_pos; | |
1134 | self.bump(); | |
85aaf69f | 1135 | let mut hash_count = 0; |
1a4d82fc JJ |
1136 | while self.curr_is('#') { |
1137 | self.bump(); | |
1138 | hash_count += 1; | |
1139 | } | |
1140 | ||
1141 | if self.is_eof() { | |
1142 | let last_bpos = self.last_pos; | |
92a42be0 | 1143 | panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string")); |
1a4d82fc JJ |
1144 | } else if !self.curr_is('"') { |
1145 | let last_bpos = self.last_pos; | |
1146 | let curr_char = self.curr.unwrap(); | |
92a42be0 | 1147 | panic!(self.fatal_span_char(start_bpos, last_bpos, |
c1a9b12d SL |
1148 | "found invalid character; \ |
1149 | only `#` is allowed in raw string delimitation", | |
92a42be0 | 1150 | curr_char)); |
1a4d82fc JJ |
1151 | } |
1152 | self.bump(); | |
1153 | let content_start_bpos = self.last_pos; | |
1154 | let mut content_end_bpos; | |
1155 | let mut valid = true; | |
1156 | 'outer: loop { | |
1157 | if self.is_eof() { | |
1158 | let last_bpos = self.last_pos; | |
92a42be0 | 1159 | panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string")); |
1a4d82fc JJ |
1160 | } |
1161 | //if self.curr_is('"') { | |
1162 | //content_end_bpos = self.last_pos; | |
85aaf69f | 1163 | //for _ in 0..hash_count { |
1a4d82fc JJ |
1164 | //self.bump(); |
1165 | //if !self.curr_is('#') { | |
1166 | //continue 'outer; | |
1167 | let c = self.curr.unwrap(); | |
1168 | match c { | |
1169 | '"' => { | |
1170 | content_end_bpos = self.last_pos; | |
85aaf69f | 1171 | for _ in 0..hash_count { |
1a4d82fc JJ |
1172 | self.bump(); |
1173 | if !self.curr_is('#') { | |
1174 | continue 'outer; | |
1175 | } | |
1176 | } | |
1177 | break; | |
1178 | }, | |
1179 | '\r' => { | |
1180 | if !self.nextch_is('\n') { | |
1181 | let last_bpos = self.last_pos; | |
1182 | self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \ | |
1183 | string, use \\r instead"); | |
1184 | valid = false; | |
1185 | } | |
1186 | } | |
1187 | _ => () | |
1188 | } | |
1189 | self.bump(); | |
1190 | } | |
1191 | self.bump(); | |
1192 | let id = if valid { | |
1193 | self.name_from_to(content_start_bpos, content_end_bpos) | |
1194 | } else { | |
1195 | token::intern("??") | |
1196 | }; | |
1197 | let suffix = self.scan_optional_raw_name(); | |
1198 | return token::Literal(token::StrRaw(id, hash_count), suffix); | |
1199 | } | |
1200 | '-' => { | |
1201 | if self.nextch_is('>') { | |
1202 | self.bump(); | |
1203 | self.bump(); | |
1204 | return token::RArrow; | |
1205 | } else { return self.binop(token::Minus); } | |
1206 | } | |
1207 | '&' => { | |
1208 | if self.nextch_is('&') { | |
1209 | self.bump(); | |
1210 | self.bump(); | |
1211 | return token::AndAnd; | |
1212 | } else { return self.binop(token::And); } | |
1213 | } | |
1214 | '|' => { | |
1215 | match self.nextch() { | |
1216 | Some('|') => { self.bump(); self.bump(); return token::OrOr; } | |
1217 | _ => { return self.binop(token::Or); } | |
1218 | } | |
1219 | } | |
1220 | '+' => { return self.binop(token::Plus); } | |
1221 | '*' => { return self.binop(token::Star); } | |
1222 | '/' => { return self.binop(token::Slash); } | |
1223 | '^' => { return self.binop(token::Caret); } | |
1224 | '%' => { return self.binop(token::Percent); } | |
1225 | c => { | |
1226 | let last_bpos = self.last_pos; | |
1227 | let bpos = self.pos; | |
92a42be0 SL |
1228 | unicode_chars::check_for_substitution(&self, c); |
1229 | panic!(self.fatal_span_char(last_bpos, bpos, "unknown start of token", c)) | |
1a4d82fc JJ |
1230 | } |
1231 | } | |
1232 | } | |
1233 | ||
1234 | fn consume_whitespace(&mut self) { | |
1235 | while is_whitespace(self.curr) && !self.is_eof() { self.bump(); } | |
1236 | } | |
1237 | ||
1238 | fn read_to_eol(&mut self) -> String { | |
1239 | let mut val = String::new(); | |
1240 | while !self.curr_is('\n') && !self.is_eof() { | |
1241 | val.push(self.curr.unwrap()); | |
1242 | self.bump(); | |
1243 | } | |
1244 | if self.curr_is('\n') { self.bump(); } | |
1245 | return val | |
1246 | } | |
1247 | ||
1248 | fn read_one_line_comment(&mut self) -> String { | |
1249 | let val = self.read_to_eol(); | |
1250 | assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') | |
1251 | || (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!')); | |
1252 | return val; | |
1253 | } | |
1254 | ||
1255 | fn consume_non_eol_whitespace(&mut self) { | |
1256 | while is_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() { | |
1257 | self.bump(); | |
1258 | } | |
1259 | } | |
1260 | ||
1261 | fn peeking_at_comment(&self) -> bool { | |
1262 | (self.curr_is('/') && self.nextch_is('/')) | |
1263 | || (self.curr_is('/') && self.nextch_is('*')) | |
1264 | // consider shebangs comments, but not inner attributes | |
1265 | || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) | |
1266 | } | |
1267 | ||
1268 | fn scan_byte(&mut self) -> token::Lit { | |
1269 | self.bump(); | |
1270 | let start = self.last_pos; | |
1271 | ||
1272 | // the eof will be picked up by the final `'` check below | |
1273 | let c2 = self.curr.unwrap_or('\x00'); | |
1274 | self.bump(); | |
1275 | ||
1276 | let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\''); | |
1277 | if !self.curr_is('\'') { | |
1278 | // Byte offsetting here is okay because the | |
1279 | // character before position `start` are an | |
1280 | // ascii single quote and ascii 'b'. | |
1281 | let last_pos = self.last_pos; | |
92a42be0 | 1282 | panic!(self.fatal_span_verbose( |
1a4d82fc | 1283 | start - BytePos(2), last_pos, |
92a42be0 | 1284 | "unterminated byte constant".to_string())); |
1a4d82fc JJ |
1285 | } |
1286 | ||
c34b1796 | 1287 | let id = if valid { self.name_from(start) } else { token::intern("?") }; |
1a4d82fc JJ |
1288 | self.bump(); // advance curr past token |
1289 | return token::Byte(id); | |
1290 | } | |
1291 | ||
1292 | fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool { | |
1293 | self.scan_hex_digits(2, delim, below_0x7f_only) | |
1294 | } | |
1295 | ||
1296 | fn scan_byte_string(&mut self) -> token::Lit { | |
1297 | self.bump(); | |
1298 | let start = self.last_pos; | |
1299 | let mut valid = true; | |
1300 | ||
1301 | while !self.curr_is('"') { | |
1302 | if self.is_eof() { | |
1303 | let last_pos = self.last_pos; | |
92a42be0 | 1304 | panic!(self.fatal_span_(start, last_pos, "unterminated double quote byte string")); |
1a4d82fc JJ |
1305 | } |
1306 | ||
1307 | let ch_start = self.last_pos; | |
1308 | let ch = self.curr.unwrap(); | |
1309 | self.bump(); | |
1310 | valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"'); | |
1311 | } | |
1312 | let id = if valid { self.name_from(start) } else { token::intern("??") }; | |
1313 | self.bump(); | |
e9174d1e | 1314 | return token::ByteStr(id); |
1a4d82fc JJ |
1315 | } |
1316 | ||
1317 | fn scan_raw_byte_string(&mut self) -> token::Lit { | |
1318 | let start_bpos = self.last_pos; | |
1319 | self.bump(); | |
85aaf69f | 1320 | let mut hash_count = 0; |
1a4d82fc JJ |
1321 | while self.curr_is('#') { |
1322 | self.bump(); | |
1323 | hash_count += 1; | |
1324 | } | |
1325 | ||
1326 | if self.is_eof() { | |
1327 | let last_pos = self.last_pos; | |
92a42be0 | 1328 | panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string")); |
1a4d82fc JJ |
1329 | } else if !self.curr_is('"') { |
1330 | let last_pos = self.last_pos; | |
1331 | let ch = self.curr.unwrap(); | |
92a42be0 | 1332 | panic!(self.fatal_span_char(start_bpos, last_pos, |
c1a9b12d SL |
1333 | "found invalid character; \ |
1334 | only `#` is allowed in raw string delimitation", | |
92a42be0 | 1335 | ch)); |
1a4d82fc JJ |
1336 | } |
1337 | self.bump(); | |
1338 | let content_start_bpos = self.last_pos; | |
1339 | let mut content_end_bpos; | |
1340 | 'outer: loop { | |
1341 | match self.curr { | |
1342 | None => { | |
1343 | let last_pos = self.last_pos; | |
92a42be0 | 1344 | panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string")) |
1a4d82fc JJ |
1345 | }, |
1346 | Some('"') => { | |
1347 | content_end_bpos = self.last_pos; | |
85aaf69f | 1348 | for _ in 0..hash_count { |
1a4d82fc JJ |
1349 | self.bump(); |
1350 | if !self.curr_is('#') { | |
1351 | continue 'outer; | |
1352 | } | |
1353 | } | |
1354 | break; | |
1355 | }, | |
1356 | Some(c) => if c > '\x7F' { | |
1357 | let last_pos = self.last_pos; | |
1358 | self.err_span_char( | |
1359 | last_pos, last_pos, "raw byte string must be ASCII", c); | |
1360 | } | |
1361 | } | |
1362 | self.bump(); | |
1363 | } | |
1364 | self.bump(); | |
e9174d1e | 1365 | return token::ByteStrRaw(self.name_from_to(content_start_bpos, |
1a4d82fc JJ |
1366 | content_end_bpos), |
1367 | hash_count); | |
1368 | } | |
1369 | } | |
1370 | ||
1371 | pub fn is_whitespace(c: Option<char>) -> bool { | |
1372 | match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace | |
1373 | ' ' | '\n' | '\t' | '\r' => true, | |
1374 | _ => false | |
1375 | } | |
1376 | } | |
1377 | ||
1378 | fn in_range(c: Option<char>, lo: char, hi: char) -> bool { | |
1379 | match c { | |
1380 | Some(c) => lo <= c && c <= hi, | |
1381 | _ => false | |
1382 | } | |
1383 | } | |
1384 | ||
1385 | fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); } | |
1386 | ||
1387 | pub fn is_doc_comment(s: &str) -> bool { | |
1388 | let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') | |
1389 | || s.starts_with("//!"); | |
1390 | debug!("is {:?} a doc comment? {}", s, res); | |
1391 | res | |
1392 | } | |
1393 | ||
1394 | pub fn is_block_doc_comment(s: &str) -> bool { | |
b039eaaf SL |
1395 | let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') |
1396 | || s.starts_with("/*!")) | |
1397 | && s.len() >= 5; // Prevent `/**/` from being parsed as a doc comment | |
1a4d82fc JJ |
1398 | debug!("is {:?} a doc comment? {}", s, res); |
1399 | res | |
1400 | } | |
1401 | ||
1402 | fn ident_start(c: Option<char>) -> bool { | |
1403 | let c = match c { Some(c) => c, None => return false }; | |
1404 | ||
1405 | (c >= 'a' && c <= 'z') | |
1406 | || (c >= 'A' && c <= 'Z') | |
1407 | || c == '_' | |
1408 | || (c > '\x7f' && c.is_xid_start()) | |
1409 | } | |
1410 | ||
1411 | fn ident_continue(c: Option<char>) -> bool { | |
1412 | let c = match c { Some(c) => c, None => return false }; | |
1413 | ||
1414 | (c >= 'a' && c <= 'z') | |
1415 | || (c >= 'A' && c <= 'Z') | |
1416 | || (c >= '0' && c <= '9') | |
1417 | || c == '_' | |
1418 | || (c > '\x7f' && c.is_xid_continue()) | |
1419 | } | |
1420 | ||
1421 | #[cfg(test)] | |
d9579d0f | 1422 | mod tests { |
1a4d82fc JJ |
1423 | use super::*; |
1424 | ||
1425 | use codemap::{BytePos, CodeMap, Span, NO_EXPANSION}; | |
1426 | use diagnostic; | |
1427 | use parse::token; | |
1428 | use parse::token::{str_to_ident}; | |
c34b1796 | 1429 | use std::io; |
1a4d82fc JJ |
1430 | |
1431 | fn mk_sh() -> diagnostic::SpanHandler { | |
c34b1796 AL |
1432 | // FIXME (#22405): Replace `Box::new` with `box` here when/if possible. |
1433 | let emitter = diagnostic::EmitterWriter::new(Box::new(io::sink()), None); | |
62682a34 SL |
1434 | let handler = diagnostic::Handler::with_emitter(true, Box::new(emitter)); |
1435 | diagnostic::SpanHandler::new(handler, CodeMap::new()) | |
1a4d82fc JJ |
1436 | } |
1437 | ||
1438 | // open a string reader for the given string | |
1439 | fn setup<'a>(span_handler: &'a diagnostic::SpanHandler, | |
1440 | teststr: String) -> StringReader<'a> { | |
1441 | let fm = span_handler.cm.new_filemap("zebra.rs".to_string(), teststr); | |
1442 | StringReader::new(span_handler, fm) | |
1443 | } | |
1444 | ||
1445 | #[test] fn t1 () { | |
1446 | let span_handler = mk_sh(); | |
1447 | let mut string_reader = setup(&span_handler, | |
1448 | "/* my source file */ \ | |
1449 | fn main() { println!(\"zebra\"); }\n".to_string()); | |
1450 | let id = str_to_ident("fn"); | |
1451 | assert_eq!(string_reader.next_token().tok, token::Comment); | |
1452 | assert_eq!(string_reader.next_token().tok, token::Whitespace); | |
1453 | let tok1 = string_reader.next_token(); | |
1454 | let tok2 = TokenAndSpan{ | |
1455 | tok:token::Ident(id, token::Plain), | |
1456 | sp:Span {lo:BytePos(21),hi:BytePos(23),expn_id: NO_EXPANSION}}; | |
1457 | assert_eq!(tok1,tok2); | |
1458 | assert_eq!(string_reader.next_token().tok, token::Whitespace); | |
1459 | // the 'main' id is already read: | |
1460 | assert_eq!(string_reader.last_pos.clone(), BytePos(28)); | |
1461 | // read another token: | |
1462 | let tok3 = string_reader.next_token(); | |
1463 | let tok4 = TokenAndSpan{ | |
1464 | tok:token::Ident(str_to_ident("main"), token::Plain), | |
1465 | sp:Span {lo:BytePos(24),hi:BytePos(28),expn_id: NO_EXPANSION}}; | |
1466 | assert_eq!(tok3,tok4); | |
1467 | // the lparen is already read: | |
1468 | assert_eq!(string_reader.last_pos.clone(), BytePos(29)) | |
1469 | } | |
1470 | ||
1471 | // check that the given reader produces the desired stream | |
1472 | // of tokens (stop checking after exhausting the expected vec) | |
1473 | fn check_tokenization (mut string_reader: StringReader, expected: Vec<token::Token> ) { | |
85aaf69f | 1474 | for expected_tok in &expected { |
1a4d82fc JJ |
1475 | assert_eq!(&string_reader.next_token().tok, expected_tok); |
1476 | } | |
1477 | } | |
1478 | ||
1479 | // make the identifier by looking up the string in the interner | |
1480 | fn mk_ident(id: &str, style: token::IdentStyle) -> token::Token { | |
1481 | token::Ident(str_to_ident(id), style) | |
1482 | } | |
1483 | ||
1484 | #[test] fn doublecolonparsing () { | |
1485 | check_tokenization(setup(&mk_sh(), "a b".to_string()), | |
1486 | vec![mk_ident("a", token::Plain), | |
1487 | token::Whitespace, | |
1488 | mk_ident("b", token::Plain)]); | |
1489 | } | |
1490 | ||
1491 | #[test] fn dcparsing_2 () { | |
1492 | check_tokenization(setup(&mk_sh(), "a::b".to_string()), | |
1493 | vec![mk_ident("a",token::ModName), | |
1494 | token::ModSep, | |
1495 | mk_ident("b", token::Plain)]); | |
1496 | } | |
1497 | ||
1498 | #[test] fn dcparsing_3 () { | |
1499 | check_tokenization(setup(&mk_sh(), "a ::b".to_string()), | |
1500 | vec![mk_ident("a", token::Plain), | |
1501 | token::Whitespace, | |
1502 | token::ModSep, | |
1503 | mk_ident("b", token::Plain)]); | |
1504 | } | |
1505 | ||
1506 | #[test] fn dcparsing_4 () { | |
1507 | check_tokenization(setup(&mk_sh(), "a:: b".to_string()), | |
1508 | vec![mk_ident("a",token::ModName), | |
1509 | token::ModSep, | |
1510 | token::Whitespace, | |
1511 | mk_ident("b", token::Plain)]); | |
1512 | } | |
1513 | ||
1514 | #[test] fn character_a() { | |
1515 | assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok, | |
1516 | token::Literal(token::Char(token::intern("a")), None)); | |
1517 | } | |
1518 | ||
1519 | #[test] fn character_space() { | |
1520 | assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok, | |
1521 | token::Literal(token::Char(token::intern(" ")), None)); | |
1522 | } | |
1523 | ||
1524 | #[test] fn character_escaped() { | |
1525 | assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok, | |
1526 | token::Literal(token::Char(token::intern("\\n")), None)); | |
1527 | } | |
1528 | ||
1529 | #[test] fn lifetime_name() { | |
1530 | assert_eq!(setup(&mk_sh(), "'abc".to_string()).next_token().tok, | |
1531 | token::Lifetime(token::str_to_ident("'abc"))); | |
1532 | } | |
1533 | ||
1534 | #[test] fn raw_string() { | |
1535 | assert_eq!(setup(&mk_sh(), | |
1536 | "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token() | |
1537 | .tok, | |
1538 | token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None)); | |
1539 | } | |
1540 | ||
1541 | #[test] fn literal_suffixes() { | |
1542 | macro_rules! test { | |
1543 | ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ | |
1544 | assert_eq!(setup(&mk_sh(), format!("{}suffix", $input)).next_token().tok, | |
1545 | token::Literal(token::$tok_type(token::intern($tok_contents)), | |
1546 | Some(token::intern("suffix")))); | |
1547 | // with a whitespace separator: | |
1548 | assert_eq!(setup(&mk_sh(), format!("{} suffix", $input)).next_token().tok, | |
1549 | token::Literal(token::$tok_type(token::intern($tok_contents)), | |
1550 | None)); | |
1551 | }} | |
1552 | } | |
1553 | ||
1554 | test!("'a'", Char, "a"); | |
1555 | test!("b'a'", Byte, "a"); | |
1556 | test!("\"a\"", Str_, "a"); | |
e9174d1e | 1557 | test!("b\"a\"", ByteStr, "a"); |
1a4d82fc JJ |
1558 | test!("1234", Integer, "1234"); |
1559 | test!("0b101", Integer, "0b101"); | |
1560 | test!("0xABC", Integer, "0xABC"); | |
1561 | test!("1.0", Float, "1.0"); | |
1562 | test!("1.0e10", Float, "1.0e10"); | |
1563 | ||
85aaf69f | 1564 | assert_eq!(setup(&mk_sh(), "2us".to_string()).next_token().tok, |
1a4d82fc | 1565 | token::Literal(token::Integer(token::intern("2")), |
85aaf69f | 1566 | Some(token::intern("us")))); |
1a4d82fc JJ |
1567 | assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok, |
1568 | token::Literal(token::StrRaw(token::intern("raw"), 3), | |
1569 | Some(token::intern("suffix")))); | |
1570 | assert_eq!(setup(&mk_sh(), "br###\"raw\"###suffix".to_string()).next_token().tok, | |
e9174d1e | 1571 | token::Literal(token::ByteStrRaw(token::intern("raw"), 3), |
1a4d82fc JJ |
1572 | Some(token::intern("suffix")))); |
1573 | } | |
1574 | ||
1575 | #[test] fn line_doc_comments() { | |
1576 | assert!(is_doc_comment("///")); | |
1577 | assert!(is_doc_comment("/// blah")); | |
1578 | assert!(!is_doc_comment("////")); | |
1579 | } | |
1580 | ||
1581 | #[test] fn nested_block_comments() { | |
1582 | let sh = mk_sh(); | |
1583 | let mut lexer = setup(&sh, "/* /* */ */'a'".to_string()); | |
1584 | match lexer.next_token().tok { | |
1585 | token::Comment => { }, | |
1586 | _ => panic!("expected a comment!") | |
1587 | } | |
1588 | assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None)); | |
1589 | } | |
1590 | ||
62682a34 SL |
1591 | #[test] fn crlf_comments() { |
1592 | let sh = mk_sh(); | |
1593 | let mut lexer = setup(&sh, "// test\r\n/// test\r\n".to_string()); | |
1594 | let comment = lexer.next_token(); | |
1595 | assert_eq!(comment.tok, token::Comment); | |
1596 | assert_eq!(comment.sp, ::codemap::mk_sp(BytePos(0), BytePos(7))); | |
1597 | assert_eq!(lexer.next_token().tok, token::Whitespace); | |
1598 | assert_eq!(lexer.next_token().tok, token::DocComment(token::intern("/// test"))); | |
1599 | } | |
1a4d82fc | 1600 | } |