]>
Commit | Line | Data |
---|---|---|
dc9dc135 | 1 | use crate::parse::token::{self, Token, TokenKind}; |
e74abb32 | 2 | use crate::sess::ParseSess; |
dc9dc135 | 3 | use crate::symbol::{sym, Symbol}; |
48663c56 | 4 | use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; |
9fa01778 | 5 | |
416331ca | 6 | use errors::{FatalError, DiagnosticBuilder}; |
e1599b0c | 7 | use syntax_pos::{BytePos, Pos, Span}; |
416331ca XL |
8 | use rustc_lexer::Base; |
9 | use rustc_lexer::unescape; | |
1a4d82fc | 10 | |
1a4d82fc | 11 | use std::char; |
416331ca | 12 | use std::convert::TryInto; |
0531ce1d | 13 | use rustc_data_structures::sync::Lrc; |
9fa01778 | 14 | use log::debug; |
1a4d82fc | 15 | |
416331ca XL |
16 | #[cfg(test)] |
17 | mod tests; | |
18 | ||
1a4d82fc | 19 | pub mod comments; |
32a655c1 | 20 | mod tokentrees; |
92a42be0 | 21 | mod unicode_chars; |
1a4d82fc | 22 | |
9fa01778 XL |
23 | #[derive(Clone, Debug)] |
24 | pub struct UnmatchedBrace { | |
25 | pub expected_delim: token::DelimToken, | |
e74abb32 | 26 | pub found_delim: Option<token::DelimToken>, |
9fa01778 XL |
27 | pub found_span: Span, |
28 | pub unclosed_span: Option<Span>, | |
29 | pub candidate_span: Option<Span>, | |
30 | } | |
31 | ||
1a4d82fc | 32 | pub struct StringReader<'a> { |
416331ca XL |
33 | sess: &'a ParseSess, |
34 | /// Initial position, read-only. | |
35 | start_pos: BytePos, | |
36 | /// The absolute offset within the source_map of the current character. | |
37 | pos: BytePos, | |
94b46f34 | 38 | /// Stop reading src at this index. |
416331ca XL |
39 | end_src_index: usize, |
40 | /// Source text to tokenize. | |
94b46f34 | 41 | src: Lrc<String>, |
48663c56 | 42 | override_span: Option<Span>, |
cc61c64b XL |
43 | } |
44 | ||
32a655c1 | 45 | impl<'a> StringReader<'a> { |
416331ca | 46 | pub fn new(sess: &'a ParseSess, |
b7449926 XL |
47 | source_file: Lrc<syntax_pos::SourceFile>, |
48 | override_span: Option<Span>) -> Self { | |
b7449926 | 49 | if source_file.src.is_none() { |
e74abb32 | 50 | sess.span_diagnostic.bug(&format!("cannot lex `source_file` without source: {}", |
b7449926 | 51 | source_file.name)); |
c34b1796 AL |
52 | } |
53 | ||
b7449926 | 54 | let src = (*source_file.src.as_ref().unwrap()).clone(); |
c34b1796 | 55 | |
9e0c209e | 56 | StringReader { |
3b2f2976 | 57 | sess, |
416331ca | 58 | start_pos: source_file.start_pos, |
b7449926 | 59 | pos: source_file.start_pos, |
94b46f34 | 60 | end_src_index: src.len(), |
94b46f34 | 61 | src, |
94b46f34 | 62 | override_span, |
a7813a04 | 63 | } |
1a4d82fc JJ |
64 | } |
65 | ||
8bb4bdeb | 66 | pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { |
b7449926 XL |
67 | let begin = sess.source_map().lookup_byte_offset(span.lo()); |
68 | let end = sess.source_map().lookup_byte_offset(span.hi()); | |
8bb4bdeb XL |
69 | |
70 | // Make the range zero-length if the span is invalid. | |
a1dfa0c6 | 71 | if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { |
0531ce1d | 72 | span = span.shrink_to_lo(); |
8bb4bdeb XL |
73 | } |
74 | ||
416331ca | 75 | let mut sr = StringReader::new(sess, begin.sf, None); |
8bb4bdeb XL |
76 | |
77 | // Seek the lexer to the right byte range. | |
94b46f34 | 78 | sr.end_src_index = sr.src_index(span.hi()); |
8bb4bdeb | 79 | |
416331ca XL |
80 | sr |
81 | } | |
8bb4bdeb | 82 | |
b7449926 | 83 | |
416331ca | 84 | fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { |
e1599b0c | 85 | self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi)) |
8bb4bdeb XL |
86 | } |
87 | ||
416331ca XL |
88 | /// Returns the next token, including trivia like whitespace or comments. |
89 | /// | |
90 | /// `Err(())` means that some errors were encountered, which can be | |
91 | /// retrieved using `buffer_fatal_errors`. | |
92 | pub fn next_token(&mut self) -> Token { | |
93 | let start_src_index = self.src_index(self.pos); | |
94 | let text: &str = &self.src[start_src_index..self.end_src_index]; | |
95 | ||
96 | if text.is_empty() { | |
97 | let span = self.mk_sp(self.pos, self.pos); | |
98 | return Token::new(token::Eof, span); | |
99 | } | |
100 | ||
101 | { | |
102 | let is_beginning_of_file = self.pos == self.start_pos; | |
103 | if is_beginning_of_file { | |
104 | if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { | |
105 | let start = self.pos; | |
106 | self.pos = self.pos + BytePos::from_usize(shebang_len); | |
107 | ||
108 | let sym = self.symbol_from(start + BytePos::from_usize("#!".len())); | |
109 | let kind = token::Shebang(sym); | |
110 | ||
111 | let span = self.mk_sp(start, self.pos); | |
112 | return Token::new(kind, span); | |
113 | } | |
114 | } | |
115 | } | |
116 | ||
117 | let token = rustc_lexer::first_token(text); | |
118 | ||
119 | let start = self.pos; | |
120 | self.pos = self.pos + BytePos::from_usize(token.len); | |
121 | ||
122 | debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start)); | |
123 | ||
124 | // This could use `?`, but that makes code significantly (10-20%) slower. | |
125 | // https://github.com/rust-lang/rust/issues/37939 | |
126 | let kind = self.cook_lexer_token(token.kind, start); | |
127 | ||
128 | let span = self.mk_sp(start, self.pos); | |
129 | Token::new(kind, span) | |
1a4d82fc JJ |
130 | } |
131 | ||
132 | /// Report a fatal lexical error with a given span. | |
94b46f34 | 133 | fn fatal_span(&self, sp: Span, m: &str) -> FatalError { |
32a655c1 | 134 | self.sess.span_diagnostic.span_fatal(sp, m) |
1a4d82fc JJ |
135 | } |
136 | ||
137 | /// Report a lexical error with a given span. | |
94b46f34 | 138 | fn err_span(&self, sp: Span, m: &str) { |
0731742a | 139 | self.sess.span_diagnostic.struct_span_err(sp, m).emit(); |
1a4d82fc JJ |
140 | } |
141 | ||
c1a9b12d | 142 | |
1a4d82fc | 143 | /// Report a fatal error spanning [`from_pos`, `to_pos`). |
92a42be0 | 144 | fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError { |
041b39d2 | 145 | self.fatal_span(self.mk_sp(from_pos, to_pos), m) |
1a4d82fc JJ |
146 | } |
147 | ||
148 | /// Report a lexical error spanning [`from_pos`, `to_pos`). | |
149 | fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { | |
041b39d2 | 150 | self.err_span(self.mk_sp(from_pos, to_pos), m) |
1a4d82fc JJ |
151 | } |
152 | ||
b7449926 XL |
153 | fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str) |
154 | -> DiagnosticBuilder<'a> | |
155 | { | |
0531ce1d XL |
156 | self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m) |
157 | } | |
158 | ||
b7449926 XL |
159 | fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) |
160 | -> DiagnosticBuilder<'a> | |
161 | { | |
9cc50fc6 SL |
162 | let mut m = m.to_string(); |
163 | m.push_str(": "); | |
48663c56 | 164 | push_escaped_char(&mut m, c); |
b7449926 | 165 | |
041b39d2 | 166 | self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) |
9cc50fc6 | 167 | } |
1a4d82fc | 168 | |
416331ca XL |
169 | /// Turns simple `rustc_lexer::TokenKind` enum into a rich |
170 | /// `libsyntax::TokenKind`. This turns strings into interned | |
171 | /// symbols and runs additional validation. | |
172 | fn cook_lexer_token( | |
173 | &self, | |
174 | token: rustc_lexer::TokenKind, | |
175 | start: BytePos, | |
176 | ) -> TokenKind { | |
177 | match token { | |
178 | rustc_lexer::TokenKind::LineComment => { | |
179 | let string = self.str_from(start); | |
180 | // comments with only more "/"s are not doc comments | |
181 | let tok = if is_doc_comment(string) { | |
e74abb32 | 182 | self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment"); |
416331ca XL |
183 | token::DocComment(Symbol::intern(string)) |
184 | } else { | |
185 | token::Comment | |
186 | }; | |
187 | ||
188 | tok | |
189 | } | |
190 | rustc_lexer::TokenKind::BlockComment { terminated } => { | |
191 | let string = self.str_from(start); | |
192 | // block comments starting with "/**" or "/*!" are doc-comments | |
193 | // but comments with only "*"s between two "/"s are not | |
194 | let is_doc_comment = is_block_doc_comment(string); | |
195 | ||
196 | if !terminated { | |
197 | let msg = if is_doc_comment { | |
198 | "unterminated block doc-comment" | |
199 | } else { | |
200 | "unterminated block comment" | |
201 | }; | |
202 | let last_bpos = self.pos; | |
203 | self.fatal_span_(start, last_bpos, msg).raise(); | |
204 | } | |
205 | ||
206 | let tok = if is_doc_comment { | |
e74abb32 XL |
207 | self.forbid_bare_cr(start, |
208 | string, | |
209 | "bare CR not allowed in block doc-comment"); | |
210 | token::DocComment(Symbol::intern(string)) | |
416331ca XL |
211 | } else { |
212 | token::Comment | |
213 | }; | |
214 | ||
215 | tok | |
9cc50fc6 | 216 | } |
416331ca XL |
217 | rustc_lexer::TokenKind::Whitespace => token::Whitespace, |
218 | rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => { | |
219 | let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; | |
220 | let mut ident_start = start; | |
221 | if is_raw_ident { | |
222 | ident_start = ident_start + BytePos(2); | |
223 | } | |
224 | // FIXME: perform NFKC normalization here. (Issue #2253) | |
225 | let sym = self.symbol_from(ident_start); | |
226 | if is_raw_ident { | |
227 | let span = self.mk_sp(start, self.pos); | |
228 | if !sym.can_be_raw() { | |
229 | self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); | |
230 | } | |
231 | self.sess.raw_identifier_spans.borrow_mut().push(span); | |
232 | } | |
233 | token::Ident(sym, is_raw_ident) | |
234 | } | |
235 | rustc_lexer::TokenKind::Literal { kind, suffix_start } => { | |
236 | let suffix_start = start + BytePos(suffix_start as u32); | |
237 | let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); | |
238 | let suffix = if suffix_start < self.pos { | |
239 | let string = self.str_from(suffix_start); | |
240 | if string == "_" { | |
241 | self.sess.span_diagnostic | |
242 | .struct_span_warn(self.mk_sp(suffix_start, self.pos), | |
243 | "underscore literal suffix is not allowed") | |
244 | .warn("this was previously accepted by the compiler but is \ | |
245 | being phased out; it will become a hard error in \ | |
246 | a future release!") | |
247 | .note("for more information, see issue #42326 \ | |
248 | <https://github.com/rust-lang/rust/issues/42326>") | |
249 | .emit(); | |
250 | None | |
251 | } else { | |
252 | Some(Symbol::intern(string)) | |
253 | } | |
1a4d82fc | 254 | } else { |
416331ca | 255 | None |
1a4d82fc | 256 | }; |
416331ca XL |
257 | token::Literal(token::Lit { kind, symbol, suffix }) |
258 | } | |
259 | rustc_lexer::TokenKind::Lifetime { starts_with_number } => { | |
260 | // Include the leading `'` in the real identifier, for macro | |
261 | // expansion purposes. See #12512 for the gory details of why | |
262 | // this is necessary. | |
263 | let lifetime_name = self.str_from(start); | |
264 | if starts_with_number { | |
265 | self.err_span_( | |
266 | start, | |
267 | self.pos, | |
268 | "lifetimes cannot start with a number", | |
269 | ); | |
270 | } | |
271 | let ident = Symbol::intern(lifetime_name); | |
272 | token::Lifetime(ident) | |
273 | } | |
274 | rustc_lexer::TokenKind::Semi => token::Semi, | |
275 | rustc_lexer::TokenKind::Comma => token::Comma, | |
416331ca XL |
276 | rustc_lexer::TokenKind::Dot => token::Dot, |
277 | rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren), | |
278 | rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren), | |
279 | rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace), | |
280 | rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace), | |
281 | rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket), | |
282 | rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket), | |
283 | rustc_lexer::TokenKind::At => token::At, | |
284 | rustc_lexer::TokenKind::Pound => token::Pound, | |
285 | rustc_lexer::TokenKind::Tilde => token::Tilde, | |
286 | rustc_lexer::TokenKind::Question => token::Question, | |
416331ca XL |
287 | rustc_lexer::TokenKind::Colon => token::Colon, |
288 | rustc_lexer::TokenKind::Dollar => token::Dollar, | |
416331ca | 289 | rustc_lexer::TokenKind::Eq => token::Eq, |
416331ca | 290 | rustc_lexer::TokenKind::Not => token::Not, |
416331ca | 291 | rustc_lexer::TokenKind::Lt => token::Lt, |
416331ca | 292 | rustc_lexer::TokenKind::Gt => token::Gt, |
416331ca | 293 | rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), |
416331ca | 294 | rustc_lexer::TokenKind::And => token::BinOp(token::And), |
416331ca | 295 | rustc_lexer::TokenKind::Or => token::BinOp(token::Or), |
416331ca | 296 | rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), |
416331ca | 297 | rustc_lexer::TokenKind::Star => token::BinOp(token::Star), |
416331ca | 298 | rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), |
416331ca | 299 | rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), |
416331ca | 300 | rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), |
416331ca XL |
301 | |
302 | rustc_lexer::TokenKind::Unknown => { | |
303 | let c = self.str_from(start).chars().next().unwrap(); | |
304 | let mut err = self.struct_fatal_span_char(start, | |
305 | self.pos, | |
306 | "unknown start of token", | |
307 | c); | |
308 | // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, | |
309 | // instead of keeping a table in `check_for_substitution`into the token. Ideally, | |
310 | // this should be inside `rustc_lexer`. However, we should first remove compound | |
311 | // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, | |
312 | // as there will be less overall work to do this way. | |
313 | let token = unicode_chars::check_for_substitution(self, start, c, &mut err) | |
314 | .unwrap_or_else(|| token::Unknown(self.symbol_from(start))); | |
315 | err.emit(); | |
316 | token | |
1a4d82fc JJ |
317 | } |
318 | } | |
416331ca | 319 | } |
b7449926 | 320 | |
416331ca XL |
321 | fn cook_lexer_literal( |
322 | &self, | |
323 | start: BytePos, | |
324 | suffix_start: BytePos, | |
325 | kind: rustc_lexer::LiteralKind | |
326 | ) -> (token::LitKind, Symbol) { | |
327 | match kind { | |
328 | rustc_lexer::LiteralKind::Char { terminated } => { | |
329 | if !terminated { | |
330 | self.fatal_span_(start, suffix_start, | |
331 | "unterminated character literal".into()) | |
332 | .raise() | |
333 | } | |
334 | let content_start = start + BytePos(1); | |
335 | let content_end = suffix_start - BytePos(1); | |
336 | self.validate_char_escape(content_start, content_end); | |
337 | let id = self.symbol_from_to(content_start, content_end); | |
338 | (token::Char, id) | |
339 | }, | |
340 | rustc_lexer::LiteralKind::Byte { terminated } => { | |
341 | if !terminated { | |
342 | self.fatal_span_(start + BytePos(1), suffix_start, | |
343 | "unterminated byte constant".into()) | |
344 | .raise() | |
345 | } | |
346 | let content_start = start + BytePos(2); | |
347 | let content_end = suffix_start - BytePos(1); | |
348 | self.validate_byte_escape(content_start, content_end); | |
349 | let id = self.symbol_from_to(content_start, content_end); | |
350 | (token::Byte, id) | |
351 | }, | |
352 | rustc_lexer::LiteralKind::Str { terminated } => { | |
353 | if !terminated { | |
354 | self.fatal_span_(start, suffix_start, | |
355 | "unterminated double quote string".into()) | |
356 | .raise() | |
357 | } | |
358 | let content_start = start + BytePos(1); | |
359 | let content_end = suffix_start - BytePos(1); | |
360 | self.validate_str_escape(content_start, content_end); | |
361 | let id = self.symbol_from_to(content_start, content_end); | |
362 | (token::Str, id) | |
363 | } | |
364 | rustc_lexer::LiteralKind::ByteStr { terminated } => { | |
365 | if !terminated { | |
366 | self.fatal_span_(start + BytePos(1), suffix_start, | |
367 | "unterminated double quote byte string".into()) | |
368 | .raise() | |
369 | } | |
370 | let content_start = start + BytePos(2); | |
371 | let content_end = suffix_start - BytePos(1); | |
372 | self.validate_byte_str_escape(content_start, content_end); | |
373 | let id = self.symbol_from_to(content_start, content_end); | |
374 | (token::ByteStr, id) | |
375 | } | |
376 | rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => { | |
377 | if !started { | |
378 | self.report_non_started_raw_string(start); | |
379 | } | |
380 | if !terminated { | |
381 | self.report_unterminated_raw_string(start, n_hashes) | |
382 | } | |
383 | let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); | |
384 | let n = u32::from(n_hashes); | |
385 | let content_start = start + BytePos(2 + n); | |
386 | let content_end = suffix_start - BytePos(1 + n); | |
387 | self.validate_raw_str_escape(content_start, content_end); | |
388 | let id = self.symbol_from_to(content_start, content_end); | |
389 | (token::StrRaw(n_hashes), id) | |
390 | } | |
391 | rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => { | |
392 | if !started { | |
393 | self.report_non_started_raw_string(start); | |
394 | } | |
395 | if !terminated { | |
396 | self.report_unterminated_raw_string(start, n_hashes) | |
397 | } | |
398 | let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); | |
399 | let n = u32::from(n_hashes); | |
400 | let content_start = start + BytePos(3 + n); | |
401 | let content_end = suffix_start - BytePos(1 + n); | |
402 | self.validate_raw_byte_str_escape(content_start, content_end); | |
403 | let id = self.symbol_from_to(content_start, content_end); | |
404 | (token::ByteStrRaw(n_hashes), id) | |
405 | } | |
406 | rustc_lexer::LiteralKind::Int { base, empty_int } => { | |
407 | if empty_int { | |
408 | self.err_span_(start, suffix_start, "no valid digits found for number"); | |
409 | (token::Integer, sym::integer(0)) | |
410 | } else { | |
411 | self.validate_int_literal(base, start, suffix_start); | |
412 | (token::Integer, self.symbol_from_to(start, suffix_start)) | |
413 | } | |
414 | }, | |
415 | rustc_lexer::LiteralKind::Float { base, empty_exponent } => { | |
416 | if empty_exponent { | |
417 | let mut err = self.struct_span_fatal( | |
418 | start, self.pos, | |
419 | "expected at least one digit in exponent" | |
420 | ); | |
421 | err.emit(); | |
422 | } | |
423 | ||
424 | match base { | |
425 | Base::Hexadecimal => { | |
426 | self.err_span_(start, suffix_start, | |
427 | "hexadecimal float literal is not supported") | |
428 | } | |
429 | Base::Octal => { | |
430 | self.err_span_(start, suffix_start, | |
431 | "octal float literal is not supported") | |
432 | } | |
433 | Base::Binary => { | |
434 | self.err_span_(start, suffix_start, | |
435 | "binary float literal is not supported") | |
436 | } | |
437 | _ => () | |
438 | } | |
439 | ||
440 | let id = self.symbol_from_to(start, suffix_start); | |
441 | (token::Float, id) | |
442 | }, | |
443 | } | |
1a4d82fc JJ |
444 | } |
445 | ||
94b46f34 XL |
446 | #[inline] |
447 | fn src_index(&self, pos: BytePos) -> usize { | |
416331ca | 448 | (pos - self.start_pos).to_usize() |
1a4d82fc JJ |
449 | } |
450 | ||
dc9dc135 XL |
451 | /// Slice of the source text from `start` up to but excluding `self.pos`, |
452 | /// meaning the slice does not include the character `self.ch`. | |
453 | fn str_from(&self, start: BytePos) -> &str | |
1a4d82fc | 454 | { |
dc9dc135 | 455 | self.str_from_to(start, self.pos) |
1a4d82fc JJ |
456 | } |
457 | ||
dc9dc135 XL |
458 | /// Creates a Symbol from a given offset to the current offset. |
459 | fn symbol_from(&self, start: BytePos) -> Symbol { | |
c30ab7b3 | 460 | debug!("taking an ident from {:?} to {:?}", start, self.pos); |
dc9dc135 | 461 | Symbol::intern(self.str_from(start)) |
1a4d82fc JJ |
462 | } |
463 | ||
dc9dc135 XL |
464 | /// As symbol_from, with an explicit endpoint. |
465 | fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol { | |
1a4d82fc | 466 | debug!("taking an ident from {:?} to {:?}", start, end); |
dc9dc135 | 467 | Symbol::intern(self.str_from_to(start, end)) |
1a4d82fc JJ |
468 | } |
469 | ||
dc9dc135 XL |
470 | /// Slice of the source text spanning from `start` up to but excluding `end`. |
471 | fn str_from_to(&self, start: BytePos, end: BytePos) -> &str | |
1a4d82fc | 472 | { |
dc9dc135 | 473 | &self.src[self.src_index(start)..self.src_index(end)] |
1a4d82fc JJ |
474 | } |
475 | ||
e74abb32 XL |
476 | fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) { |
477 | let mut idx = 0; | |
478 | loop { | |
479 | idx = match s[idx..].find('\r') { | |
480 | None => break, | |
481 | Some(it) => idx + it + 1 | |
482 | }; | |
483 | self.err_span_(start + BytePos(idx as u32 - 1), | |
484 | start + BytePos(idx as u32), | |
485 | errmsg); | |
1a4d82fc JJ |
486 | } |
487 | } | |
488 | ||
416331ca XL |
489 | fn report_non_started_raw_string(&self, start: BytePos) -> ! { |
490 | let bad_char = self.str_from(start).chars().last().unwrap(); | |
491 | self | |
492 | .struct_fatal_span_char( | |
493 | start, | |
494 | self.pos, | |
495 | "found invalid character; only `#` is allowed \ | |
496 | in raw string delimitation", | |
497 | bad_char, | |
498 | ) | |
499 | .emit(); | |
500 | FatalError.raise() | |
1a4d82fc JJ |
501 | } |
502 | ||
416331ca XL |
503 | fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! { |
504 | let mut err = self.struct_span_fatal( | |
505 | start, start, | |
506 | "unterminated raw string", | |
507 | ); | |
508 | err.span_label( | |
509 | self.mk_sp(start, start), | |
510 | "unterminated raw string", | |
511 | ); | |
1a4d82fc | 512 | |
416331ca XL |
513 | if n_hashes > 0 { |
514 | err.note(&format!("this raw string should be terminated with `\"{}`", | |
515 | "#".repeat(n_hashes as usize))); | |
48663c56 | 516 | } |
1a4d82fc | 517 | |
416331ca XL |
518 | err.emit(); |
519 | FatalError.raise() | |
1a4d82fc JJ |
520 | } |
521 | ||
416331ca XL |
522 | fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 { |
523 | match n_hashes.try_into() { | |
524 | Ok(n_hashes) => n_hashes, | |
525 | Err(_) => { | |
526 | self.fatal_span_(start, | |
527 | self.pos, | |
dc9dc135 | 528 | "too many `#` symbols: raw strings may be \ |
416331ca | 529 | delimited by up to 65535 `#` symbols").raise(); |
94b46f34 | 530 | } |
1a4d82fc | 531 | } |
1a4d82fc | 532 | } |
48663c56 | 533 | |
416331ca XL |
534 | fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) { |
535 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
536 | if let Err((off, err)) = unescape::unescape_char(lit) { |
537 | emit_unescape_error( | |
538 | &self.sess.span_diagnostic, | |
539 | lit, | |
416331ca | 540 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
541 | unescape::Mode::Char, |
542 | 0..off, | |
543 | err, | |
544 | ) | |
545 | } | |
546 | } | |
547 | ||
416331ca XL |
548 | fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) { |
549 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
550 | if let Err((off, err)) = unescape::unescape_byte(lit) { |
551 | emit_unescape_error( | |
552 | &self.sess.span_diagnostic, | |
553 | lit, | |
416331ca | 554 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
555 | unescape::Mode::Byte, |
556 | 0..off, | |
557 | err, | |
558 | ) | |
559 | } | |
560 | } | |
561 | ||
416331ca XL |
562 | fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
563 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
564 | unescape::unescape_str(lit, &mut |range, c| { |
565 | if let Err(err) = c { | |
48663c56 XL |
566 | emit_unescape_error( |
567 | &self.sess.span_diagnostic, | |
568 | lit, | |
416331ca | 569 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
570 | unescape::Mode::Str, |
571 | range, | |
48663c56 XL |
572 | err, |
573 | ) | |
574 | } | |
dc9dc135 | 575 | }) |
48663c56 XL |
576 | } |
577 | ||
dc9dc135 XL |
578 | fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
579 | let lit = self.str_from_to(content_start, content_end); | |
580 | unescape::unescape_raw_str(lit, &mut |range, c| { | |
581 | if let Err(err) = c { | |
48663c56 XL |
582 | emit_unescape_error( |
583 | &self.sess.span_diagnostic, | |
584 | lit, | |
dc9dc135 XL |
585 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
586 | unescape::Mode::Str, | |
587 | range, | |
48663c56 XL |
588 | err, |
589 | ) | |
590 | } | |
dc9dc135 | 591 | }) |
48663c56 XL |
592 | } |
593 | ||
dc9dc135 XL |
594 | fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
595 | let lit = self.str_from_to(content_start, content_end); | |
596 | unescape::unescape_raw_byte_str(lit, &mut |range, c| { | |
597 | if let Err(err) = c { | |
598 | emit_unescape_error( | |
599 | &self.sess.span_diagnostic, | |
600 | lit, | |
601 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), | |
602 | unescape::Mode::ByteStr, | |
603 | range, | |
604 | err, | |
605 | ) | |
606 | } | |
607 | }) | |
48663c56 XL |
608 | } |
609 | ||
416331ca XL |
610 | fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
611 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
612 | unescape::unescape_byte_str(lit, &mut |range, c| { |
613 | if let Err(err) = c { | |
614 | emit_unescape_error( | |
615 | &self.sess.span_diagnostic, | |
616 | lit, | |
416331ca | 617 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
618 | unescape::Mode::ByteStr, |
619 | range, | |
620 | err, | |
621 | ) | |
622 | } | |
623 | }) | |
48663c56 | 624 | } |
1a4d82fc | 625 | |
416331ca XL |
626 | fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { |
627 | let base = match base { | |
628 | Base::Binary => 2, | |
629 | Base::Octal => 8, | |
630 | _ => return, | |
631 | }; | |
632 | let s = self.str_from_to(content_start + BytePos(2), content_end); | |
633 | for (idx, c) in s.char_indices() { | |
634 | let idx = idx as u32; | |
635 | if c != '_' && c.to_digit(base).is_none() { | |
636 | let lo = content_start + BytePos(2 + idx); | |
637 | let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); | |
638 | self.err_span_(lo, hi, | |
639 | &format!("invalid digit for a base {} literal", base)); | |
1a4d82fc | 640 | |
416331ca XL |
641 | } |
642 | } | |
643 | } | |
9cc50fc6 | 644 | } |
1a4d82fc | 645 | |
94b46f34 | 646 | fn is_doc_comment(s: &str) -> bool { |
9cc50fc6 SL |
647 | let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') || |
648 | s.starts_with("//!"); | |
1a4d82fc JJ |
649 | debug!("is {:?} a doc comment? {}", s, res); |
650 | res | |
651 | } | |
652 | ||
94b46f34 | 653 | fn is_block_doc_comment(s: &str) -> bool { |
9cc50fc6 SL |
654 | // Prevent `/**/` from being parsed as a doc comment |
655 | let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') || | |
656 | s.starts_with("/*!")) && s.len() >= 5; | |
1a4d82fc JJ |
657 | debug!("is {:?} a doc comment? {}", s, res); |
658 | res | |
659 | } |