]>
Commit | Line | Data |
---|---|---|
dc9dc135 XL |
1 | use crate::parse::ParseSess; |
2 | use crate::parse::token::{self, Token, TokenKind}; | |
3 | use crate::symbol::{sym, Symbol}; | |
48663c56 | 4 | use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; |
9fa01778 | 5 | |
416331ca | 6 | use errors::{FatalError, DiagnosticBuilder}; |
e1599b0c | 7 | use syntax_pos::{BytePos, Pos, Span}; |
416331ca XL |
8 | use rustc_lexer::Base; |
9 | use rustc_lexer::unescape; | |
1a4d82fc | 10 | |
d9579d0f | 11 | use std::borrow::Cow; |
1a4d82fc | 12 | use std::char; |
0731742a | 13 | use std::iter; |
416331ca | 14 | use std::convert::TryInto; |
0531ce1d | 15 | use rustc_data_structures::sync::Lrc; |
9fa01778 | 16 | use log::debug; |
1a4d82fc | 17 | |
416331ca XL |
18 | #[cfg(test)] |
19 | mod tests; | |
20 | ||
1a4d82fc | 21 | pub mod comments; |
32a655c1 | 22 | mod tokentrees; |
92a42be0 | 23 | mod unicode_chars; |
1a4d82fc | 24 | |
9fa01778 XL |
25 | #[derive(Clone, Debug)] |
26 | pub struct UnmatchedBrace { | |
27 | pub expected_delim: token::DelimToken, | |
28 | pub found_delim: token::DelimToken, | |
29 | pub found_span: Span, | |
30 | pub unclosed_span: Option<Span>, | |
31 | pub candidate_span: Option<Span>, | |
32 | } | |
33 | ||
1a4d82fc | 34 | pub struct StringReader<'a> { |
416331ca XL |
35 | sess: &'a ParseSess, |
36 | /// Initial position, read-only. | |
37 | start_pos: BytePos, | |
38 | /// The absolute offset within the source_map of the current character. | |
39 | pos: BytePos, | |
94b46f34 | 40 | /// Stop reading src at this index. |
416331ca XL |
41 | end_src_index: usize, |
42 | /// Source text to tokenize. | |
94b46f34 | 43 | src: Lrc<String>, |
48663c56 | 44 | override_span: Option<Span>, |
cc61c64b XL |
45 | } |
46 | ||
32a655c1 | 47 | impl<'a> StringReader<'a> { |
416331ca | 48 | pub fn new(sess: &'a ParseSess, |
b7449926 XL |
49 | source_file: Lrc<syntax_pos::SourceFile>, |
50 | override_span: Option<Span>) -> Self { | |
b7449926 XL |
51 | if source_file.src.is_none() { |
52 | sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", | |
53 | source_file.name)); | |
c34b1796 AL |
54 | } |
55 | ||
b7449926 | 56 | let src = (*source_file.src.as_ref().unwrap()).clone(); |
c34b1796 | 57 | |
9e0c209e | 58 | StringReader { |
3b2f2976 | 59 | sess, |
416331ca | 60 | start_pos: source_file.start_pos, |
b7449926 | 61 | pos: source_file.start_pos, |
94b46f34 | 62 | end_src_index: src.len(), |
94b46f34 | 63 | src, |
94b46f34 | 64 | override_span, |
a7813a04 | 65 | } |
1a4d82fc JJ |
66 | } |
67 | ||
8bb4bdeb | 68 | pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { |
b7449926 XL |
69 | let begin = sess.source_map().lookup_byte_offset(span.lo()); |
70 | let end = sess.source_map().lookup_byte_offset(span.hi()); | |
8bb4bdeb XL |
71 | |
72 | // Make the range zero-length if the span is invalid. | |
a1dfa0c6 | 73 | if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { |
0531ce1d | 74 | span = span.shrink_to_lo(); |
8bb4bdeb XL |
75 | } |
76 | ||
416331ca | 77 | let mut sr = StringReader::new(sess, begin.sf, None); |
8bb4bdeb XL |
78 | |
79 | // Seek the lexer to the right byte range. | |
94b46f34 | 80 | sr.end_src_index = sr.src_index(span.hi()); |
8bb4bdeb | 81 | |
416331ca XL |
82 | sr |
83 | } | |
8bb4bdeb | 84 | |
b7449926 | 85 | |
416331ca | 86 | fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { |
e1599b0c | 87 | self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi)) |
8bb4bdeb XL |
88 | } |
89 | ||
416331ca XL |
90 | /// Returns the next token, including trivia like whitespace or comments. |
91 | /// | |
92 | /// `Err(())` means that some errors were encountered, which can be | |
93 | /// retrieved using `buffer_fatal_errors`. | |
94 | pub fn next_token(&mut self) -> Token { | |
95 | let start_src_index = self.src_index(self.pos); | |
96 | let text: &str = &self.src[start_src_index..self.end_src_index]; | |
97 | ||
98 | if text.is_empty() { | |
99 | let span = self.mk_sp(self.pos, self.pos); | |
100 | return Token::new(token::Eof, span); | |
101 | } | |
102 | ||
103 | { | |
104 | let is_beginning_of_file = self.pos == self.start_pos; | |
105 | if is_beginning_of_file { | |
106 | if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { | |
107 | let start = self.pos; | |
108 | self.pos = self.pos + BytePos::from_usize(shebang_len); | |
109 | ||
110 | let sym = self.symbol_from(start + BytePos::from_usize("#!".len())); | |
111 | let kind = token::Shebang(sym); | |
112 | ||
113 | let span = self.mk_sp(start, self.pos); | |
114 | return Token::new(kind, span); | |
115 | } | |
116 | } | |
117 | } | |
118 | ||
119 | let token = rustc_lexer::first_token(text); | |
120 | ||
121 | let start = self.pos; | |
122 | self.pos = self.pos + BytePos::from_usize(token.len); | |
123 | ||
124 | debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start)); | |
125 | ||
126 | // This could use `?`, but that makes code significantly (10-20%) slower. | |
127 | // https://github.com/rust-lang/rust/issues/37939 | |
128 | let kind = self.cook_lexer_token(token.kind, start); | |
129 | ||
130 | let span = self.mk_sp(start, self.pos); | |
131 | Token::new(kind, span) | |
1a4d82fc JJ |
132 | } |
133 | ||
134 | /// Report a fatal lexical error with a given span. | |
94b46f34 | 135 | fn fatal_span(&self, sp: Span, m: &str) -> FatalError { |
32a655c1 | 136 | self.sess.span_diagnostic.span_fatal(sp, m) |
1a4d82fc JJ |
137 | } |
138 | ||
139 | /// Report a lexical error with a given span. | |
94b46f34 | 140 | fn err_span(&self, sp: Span, m: &str) { |
0731742a | 141 | self.sess.span_diagnostic.struct_span_err(sp, m).emit(); |
1a4d82fc JJ |
142 | } |
143 | ||
c1a9b12d | 144 | |
1a4d82fc | 145 | /// Report a fatal error spanning [`from_pos`, `to_pos`). |
92a42be0 | 146 | fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError { |
041b39d2 | 147 | self.fatal_span(self.mk_sp(from_pos, to_pos), m) |
1a4d82fc JJ |
148 | } |
149 | ||
150 | /// Report a lexical error spanning [`from_pos`, `to_pos`). | |
151 | fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { | |
041b39d2 | 152 | self.err_span(self.mk_sp(from_pos, to_pos), m) |
1a4d82fc JJ |
153 | } |
154 | ||
b7449926 XL |
155 | fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str) |
156 | -> DiagnosticBuilder<'a> | |
157 | { | |
0531ce1d XL |
158 | self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m) |
159 | } | |
160 | ||
b7449926 XL |
161 | fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) |
162 | -> DiagnosticBuilder<'a> | |
163 | { | |
9cc50fc6 SL |
164 | let mut m = m.to_string(); |
165 | m.push_str(": "); | |
48663c56 | 166 | push_escaped_char(&mut m, c); |
b7449926 | 167 | |
041b39d2 | 168 | self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) |
9cc50fc6 | 169 | } |
1a4d82fc | 170 | |
416331ca XL |
171 | /// Turns simple `rustc_lexer::TokenKind` enum into a rich |
172 | /// `libsyntax::TokenKind`. This turns strings into interned | |
173 | /// symbols and runs additional validation. | |
174 | fn cook_lexer_token( | |
175 | &self, | |
176 | token: rustc_lexer::TokenKind, | |
177 | start: BytePos, | |
178 | ) -> TokenKind { | |
179 | match token { | |
180 | rustc_lexer::TokenKind::LineComment => { | |
181 | let string = self.str_from(start); | |
182 | // comments with only more "/"s are not doc comments | |
183 | let tok = if is_doc_comment(string) { | |
184 | let mut idx = 0; | |
185 | loop { | |
186 | idx = match string[idx..].find('\r') { | |
187 | None => break, | |
188 | Some(it) => idx + it + 1 | |
189 | }; | |
190 | if string[idx..].chars().next() != Some('\n') { | |
191 | self.err_span_(start + BytePos(idx as u32 - 1), | |
192 | start + BytePos(idx as u32), | |
193 | "bare CR not allowed in doc-comment"); | |
194 | } | |
195 | } | |
196 | token::DocComment(Symbol::intern(string)) | |
197 | } else { | |
198 | token::Comment | |
199 | }; | |
200 | ||
201 | tok | |
202 | } | |
203 | rustc_lexer::TokenKind::BlockComment { terminated } => { | |
204 | let string = self.str_from(start); | |
205 | // block comments starting with "/**" or "/*!" are doc-comments | |
206 | // but comments with only "*"s between two "/"s are not | |
207 | let is_doc_comment = is_block_doc_comment(string); | |
208 | ||
209 | if !terminated { | |
210 | let msg = if is_doc_comment { | |
211 | "unterminated block doc-comment" | |
212 | } else { | |
213 | "unterminated block comment" | |
214 | }; | |
215 | let last_bpos = self.pos; | |
216 | self.fatal_span_(start, last_bpos, msg).raise(); | |
217 | } | |
218 | ||
219 | let tok = if is_doc_comment { | |
220 | let has_cr = string.contains('\r'); | |
221 | let string = if has_cr { | |
222 | self.translate_crlf(start, | |
223 | string, | |
224 | "bare CR not allowed in block doc-comment") | |
225 | } else { | |
226 | string.into() | |
227 | }; | |
228 | token::DocComment(Symbol::intern(&string[..])) | |
229 | } else { | |
230 | token::Comment | |
231 | }; | |
232 | ||
233 | tok | |
9cc50fc6 | 234 | } |
416331ca XL |
235 | rustc_lexer::TokenKind::Whitespace => token::Whitespace, |
236 | rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => { | |
237 | let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; | |
238 | let mut ident_start = start; | |
239 | if is_raw_ident { | |
240 | ident_start = ident_start + BytePos(2); | |
241 | } | |
242 | // FIXME: perform NFKC normalization here. (Issue #2253) | |
243 | let sym = self.symbol_from(ident_start); | |
244 | if is_raw_ident { | |
245 | let span = self.mk_sp(start, self.pos); | |
246 | if !sym.can_be_raw() { | |
247 | self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); | |
248 | } | |
249 | self.sess.raw_identifier_spans.borrow_mut().push(span); | |
250 | } | |
251 | token::Ident(sym, is_raw_ident) | |
252 | } | |
253 | rustc_lexer::TokenKind::Literal { kind, suffix_start } => { | |
254 | let suffix_start = start + BytePos(suffix_start as u32); | |
255 | let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); | |
256 | let suffix = if suffix_start < self.pos { | |
257 | let string = self.str_from(suffix_start); | |
258 | if string == "_" { | |
259 | self.sess.span_diagnostic | |
260 | .struct_span_warn(self.mk_sp(suffix_start, self.pos), | |
261 | "underscore literal suffix is not allowed") | |
262 | .warn("this was previously accepted by the compiler but is \ | |
263 | being phased out; it will become a hard error in \ | |
264 | a future release!") | |
265 | .note("for more information, see issue #42326 \ | |
266 | <https://github.com/rust-lang/rust/issues/42326>") | |
267 | .emit(); | |
268 | None | |
269 | } else { | |
270 | Some(Symbol::intern(string)) | |
271 | } | |
1a4d82fc | 272 | } else { |
416331ca | 273 | None |
1a4d82fc | 274 | }; |
416331ca XL |
275 | token::Literal(token::Lit { kind, symbol, suffix }) |
276 | } | |
277 | rustc_lexer::TokenKind::Lifetime { starts_with_number } => { | |
278 | // Include the leading `'` in the real identifier, for macro | |
279 | // expansion purposes. See #12512 for the gory details of why | |
280 | // this is necessary. | |
281 | let lifetime_name = self.str_from(start); | |
282 | if starts_with_number { | |
283 | self.err_span_( | |
284 | start, | |
285 | self.pos, | |
286 | "lifetimes cannot start with a number", | |
287 | ); | |
288 | } | |
289 | let ident = Symbol::intern(lifetime_name); | |
290 | token::Lifetime(ident) | |
291 | } | |
292 | rustc_lexer::TokenKind::Semi => token::Semi, | |
293 | rustc_lexer::TokenKind::Comma => token::Comma, | |
416331ca XL |
294 | rustc_lexer::TokenKind::Dot => token::Dot, |
295 | rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren), | |
296 | rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren), | |
297 | rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace), | |
298 | rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace), | |
299 | rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket), | |
300 | rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket), | |
301 | rustc_lexer::TokenKind::At => token::At, | |
302 | rustc_lexer::TokenKind::Pound => token::Pound, | |
303 | rustc_lexer::TokenKind::Tilde => token::Tilde, | |
304 | rustc_lexer::TokenKind::Question => token::Question, | |
416331ca XL |
305 | rustc_lexer::TokenKind::Colon => token::Colon, |
306 | rustc_lexer::TokenKind::Dollar => token::Dollar, | |
416331ca | 307 | rustc_lexer::TokenKind::Eq => token::Eq, |
416331ca | 308 | rustc_lexer::TokenKind::Not => token::Not, |
416331ca | 309 | rustc_lexer::TokenKind::Lt => token::Lt, |
416331ca | 310 | rustc_lexer::TokenKind::Gt => token::Gt, |
416331ca | 311 | rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), |
416331ca | 312 | rustc_lexer::TokenKind::And => token::BinOp(token::And), |
416331ca | 313 | rustc_lexer::TokenKind::Or => token::BinOp(token::Or), |
416331ca | 314 | rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), |
416331ca | 315 | rustc_lexer::TokenKind::Star => token::BinOp(token::Star), |
416331ca | 316 | rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), |
416331ca | 317 | rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), |
416331ca | 318 | rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), |
416331ca XL |
319 | |
320 | rustc_lexer::TokenKind::Unknown => { | |
321 | let c = self.str_from(start).chars().next().unwrap(); | |
322 | let mut err = self.struct_fatal_span_char(start, | |
323 | self.pos, | |
324 | "unknown start of token", | |
325 | c); | |
326 | // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, | |
327 | // instead of keeping a table in `check_for_substitution`into the token. Ideally, | |
328 | // this should be inside `rustc_lexer`. However, we should first remove compound | |
329 | // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, | |
330 | // as there will be less overall work to do this way. | |
331 | let token = unicode_chars::check_for_substitution(self, start, c, &mut err) | |
332 | .unwrap_or_else(|| token::Unknown(self.symbol_from(start))); | |
333 | err.emit(); | |
334 | token | |
1a4d82fc JJ |
335 | } |
336 | } | |
416331ca | 337 | } |
b7449926 | 338 | |
416331ca XL |
339 | fn cook_lexer_literal( |
340 | &self, | |
341 | start: BytePos, | |
342 | suffix_start: BytePos, | |
343 | kind: rustc_lexer::LiteralKind | |
344 | ) -> (token::LitKind, Symbol) { | |
345 | match kind { | |
346 | rustc_lexer::LiteralKind::Char { terminated } => { | |
347 | if !terminated { | |
348 | self.fatal_span_(start, suffix_start, | |
349 | "unterminated character literal".into()) | |
350 | .raise() | |
351 | } | |
352 | let content_start = start + BytePos(1); | |
353 | let content_end = suffix_start - BytePos(1); | |
354 | self.validate_char_escape(content_start, content_end); | |
355 | let id = self.symbol_from_to(content_start, content_end); | |
356 | (token::Char, id) | |
357 | }, | |
358 | rustc_lexer::LiteralKind::Byte { terminated } => { | |
359 | if !terminated { | |
360 | self.fatal_span_(start + BytePos(1), suffix_start, | |
361 | "unterminated byte constant".into()) | |
362 | .raise() | |
363 | } | |
364 | let content_start = start + BytePos(2); | |
365 | let content_end = suffix_start - BytePos(1); | |
366 | self.validate_byte_escape(content_start, content_end); | |
367 | let id = self.symbol_from_to(content_start, content_end); | |
368 | (token::Byte, id) | |
369 | }, | |
370 | rustc_lexer::LiteralKind::Str { terminated } => { | |
371 | if !terminated { | |
372 | self.fatal_span_(start, suffix_start, | |
373 | "unterminated double quote string".into()) | |
374 | .raise() | |
375 | } | |
376 | let content_start = start + BytePos(1); | |
377 | let content_end = suffix_start - BytePos(1); | |
378 | self.validate_str_escape(content_start, content_end); | |
379 | let id = self.symbol_from_to(content_start, content_end); | |
380 | (token::Str, id) | |
381 | } | |
382 | rustc_lexer::LiteralKind::ByteStr { terminated } => { | |
383 | if !terminated { | |
384 | self.fatal_span_(start + BytePos(1), suffix_start, | |
385 | "unterminated double quote byte string".into()) | |
386 | .raise() | |
387 | } | |
388 | let content_start = start + BytePos(2); | |
389 | let content_end = suffix_start - BytePos(1); | |
390 | self.validate_byte_str_escape(content_start, content_end); | |
391 | let id = self.symbol_from_to(content_start, content_end); | |
392 | (token::ByteStr, id) | |
393 | } | |
394 | rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => { | |
395 | if !started { | |
396 | self.report_non_started_raw_string(start); | |
397 | } | |
398 | if !terminated { | |
399 | self.report_unterminated_raw_string(start, n_hashes) | |
400 | } | |
401 | let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); | |
402 | let n = u32::from(n_hashes); | |
403 | let content_start = start + BytePos(2 + n); | |
404 | let content_end = suffix_start - BytePos(1 + n); | |
405 | self.validate_raw_str_escape(content_start, content_end); | |
406 | let id = self.symbol_from_to(content_start, content_end); | |
407 | (token::StrRaw(n_hashes), id) | |
408 | } | |
409 | rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => { | |
410 | if !started { | |
411 | self.report_non_started_raw_string(start); | |
412 | } | |
413 | if !terminated { | |
414 | self.report_unterminated_raw_string(start, n_hashes) | |
415 | } | |
416 | let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); | |
417 | let n = u32::from(n_hashes); | |
418 | let content_start = start + BytePos(3 + n); | |
419 | let content_end = suffix_start - BytePos(1 + n); | |
420 | self.validate_raw_byte_str_escape(content_start, content_end); | |
421 | let id = self.symbol_from_to(content_start, content_end); | |
422 | (token::ByteStrRaw(n_hashes), id) | |
423 | } | |
424 | rustc_lexer::LiteralKind::Int { base, empty_int } => { | |
425 | if empty_int { | |
426 | self.err_span_(start, suffix_start, "no valid digits found for number"); | |
427 | (token::Integer, sym::integer(0)) | |
428 | } else { | |
429 | self.validate_int_literal(base, start, suffix_start); | |
430 | (token::Integer, self.symbol_from_to(start, suffix_start)) | |
431 | } | |
432 | }, | |
433 | rustc_lexer::LiteralKind::Float { base, empty_exponent } => { | |
434 | if empty_exponent { | |
435 | let mut err = self.struct_span_fatal( | |
436 | start, self.pos, | |
437 | "expected at least one digit in exponent" | |
438 | ); | |
439 | err.emit(); | |
440 | } | |
441 | ||
442 | match base { | |
443 | Base::Hexadecimal => { | |
444 | self.err_span_(start, suffix_start, | |
445 | "hexadecimal float literal is not supported") | |
446 | } | |
447 | Base::Octal => { | |
448 | self.err_span_(start, suffix_start, | |
449 | "octal float literal is not supported") | |
450 | } | |
451 | Base::Binary => { | |
452 | self.err_span_(start, suffix_start, | |
453 | "binary float literal is not supported") | |
454 | } | |
455 | _ => () | |
456 | } | |
457 | ||
458 | let id = self.symbol_from_to(start, suffix_start); | |
459 | (token::Float, id) | |
460 | }, | |
461 | } | |
1a4d82fc JJ |
462 | } |
463 | ||
94b46f34 XL |
464 | #[inline] |
465 | fn src_index(&self, pos: BytePos) -> usize { | |
416331ca | 466 | (pos - self.start_pos).to_usize() |
1a4d82fc JJ |
467 | } |
468 | ||
dc9dc135 XL |
469 | /// Slice of the source text from `start` up to but excluding `self.pos`, |
470 | /// meaning the slice does not include the character `self.ch`. | |
471 | fn str_from(&self, start: BytePos) -> &str | |
1a4d82fc | 472 | { |
dc9dc135 | 473 | self.str_from_to(start, self.pos) |
1a4d82fc JJ |
474 | } |
475 | ||
dc9dc135 XL |
476 | /// Creates a Symbol from a given offset to the current offset. |
477 | fn symbol_from(&self, start: BytePos) -> Symbol { | |
c30ab7b3 | 478 | debug!("taking an ident from {:?} to {:?}", start, self.pos); |
dc9dc135 | 479 | Symbol::intern(self.str_from(start)) |
1a4d82fc JJ |
480 | } |
481 | ||
dc9dc135 XL |
482 | /// As symbol_from, with an explicit endpoint. |
483 | fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol { | |
1a4d82fc | 484 | debug!("taking an ident from {:?} to {:?}", start, end); |
dc9dc135 | 485 | Symbol::intern(self.str_from_to(start, end)) |
1a4d82fc JJ |
486 | } |
487 | ||
dc9dc135 XL |
488 | /// Slice of the source text spanning from `start` up to but excluding `end`. |
489 | fn str_from_to(&self, start: BytePos, end: BytePos) -> &str | |
1a4d82fc | 490 | { |
dc9dc135 | 491 | &self.src[self.src_index(start)..self.src_index(end)] |
1a4d82fc JJ |
492 | } |
493 | ||
494 | /// Converts CRLF to LF in the given string, raising an error on bare CR. | |
9cc50fc6 | 495 | fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> { |
0731742a XL |
496 | let mut chars = s.char_indices().peekable(); |
497 | while let Some((i, ch)) = chars.next() { | |
1a4d82fc | 498 | if ch == '\r' { |
0731742a XL |
499 | if let Some((lf_idx, '\n')) = chars.peek() { |
500 | return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into(); | |
1a4d82fc JJ |
501 | } |
502 | let pos = start + BytePos(i as u32); | |
0731742a | 503 | let end_pos = start + BytePos((i + ch.len_utf8()) as u32); |
1a4d82fc JJ |
504 | self.err_span_(pos, end_pos, errmsg); |
505 | } | |
1a4d82fc | 506 | } |
d9579d0f | 507 | return s.into(); |
1a4d82fc | 508 | |
9fa01778 | 509 | fn translate_crlf_(rdr: &StringReader<'_>, |
9cc50fc6 SL |
510 | start: BytePos, |
511 | s: &str, | |
0731742a XL |
512 | mut j: usize, |
513 | mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>, | |
514 | errmsg: &str) | |
9cc50fc6 | 515 | -> String { |
1a4d82fc | 516 | let mut buf = String::with_capacity(s.len()); |
0731742a XL |
517 | // Skip first CR |
518 | buf.push_str(&s[.. j - 1]); | |
519 | while let Some((i, ch)) = chars.next() { | |
1a4d82fc | 520 | if ch == '\r' { |
9cc50fc6 SL |
521 | if j < i { |
522 | buf.push_str(&s[j..i]); | |
523 | } | |
0731742a | 524 | let next = i + ch.len_utf8(); |
1a4d82fc | 525 | j = next; |
0731742a | 526 | if chars.peek().map(|(_, ch)| *ch) != Some('\n') { |
1a4d82fc JJ |
527 | let pos = start + BytePos(i as u32); |
528 | let end_pos = start + BytePos(next as u32); | |
529 | rdr.err_span_(pos, end_pos, errmsg); | |
530 | } | |
531 | } | |
1a4d82fc | 532 | } |
9cc50fc6 SL |
533 | if j < s.len() { |
534 | buf.push_str(&s[j..]); | |
535 | } | |
1a4d82fc JJ |
536 | buf |
537 | } | |
538 | } | |
539 | ||
416331ca XL |
540 | fn report_non_started_raw_string(&self, start: BytePos) -> ! { |
541 | let bad_char = self.str_from(start).chars().last().unwrap(); | |
542 | self | |
543 | .struct_fatal_span_char( | |
544 | start, | |
545 | self.pos, | |
546 | "found invalid character; only `#` is allowed \ | |
547 | in raw string delimitation", | |
548 | bad_char, | |
549 | ) | |
550 | .emit(); | |
551 | FatalError.raise() | |
1a4d82fc JJ |
552 | } |
553 | ||
416331ca XL |
554 | fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! { |
555 | let mut err = self.struct_span_fatal( | |
556 | start, start, | |
557 | "unterminated raw string", | |
558 | ); | |
559 | err.span_label( | |
560 | self.mk_sp(start, start), | |
561 | "unterminated raw string", | |
562 | ); | |
1a4d82fc | 563 | |
416331ca XL |
564 | if n_hashes > 0 { |
565 | err.note(&format!("this raw string should be terminated with `\"{}`", | |
566 | "#".repeat(n_hashes as usize))); | |
48663c56 | 567 | } |
1a4d82fc | 568 | |
416331ca XL |
569 | err.emit(); |
570 | FatalError.raise() | |
1a4d82fc JJ |
571 | } |
572 | ||
416331ca XL |
573 | fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 { |
574 | match n_hashes.try_into() { | |
575 | Ok(n_hashes) => n_hashes, | |
576 | Err(_) => { | |
577 | self.fatal_span_(start, | |
578 | self.pos, | |
dc9dc135 | 579 | "too many `#` symbols: raw strings may be \ |
416331ca | 580 | delimited by up to 65535 `#` symbols").raise(); |
94b46f34 | 581 | } |
1a4d82fc | 582 | } |
1a4d82fc | 583 | } |
48663c56 | 584 | |
416331ca XL |
585 | fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) { |
586 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
587 | if let Err((off, err)) = unescape::unescape_char(lit) { |
588 | emit_unescape_error( | |
589 | &self.sess.span_diagnostic, | |
590 | lit, | |
416331ca | 591 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
592 | unescape::Mode::Char, |
593 | 0..off, | |
594 | err, | |
595 | ) | |
596 | } | |
597 | } | |
598 | ||
416331ca XL |
599 | fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) { |
600 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
601 | if let Err((off, err)) = unescape::unescape_byte(lit) { |
602 | emit_unescape_error( | |
603 | &self.sess.span_diagnostic, | |
604 | lit, | |
416331ca | 605 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
606 | unescape::Mode::Byte, |
607 | 0..off, | |
608 | err, | |
609 | ) | |
610 | } | |
611 | } | |
612 | ||
416331ca XL |
613 | fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
614 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
615 | unescape::unescape_str(lit, &mut |range, c| { |
616 | if let Err(err) = c { | |
48663c56 XL |
617 | emit_unescape_error( |
618 | &self.sess.span_diagnostic, | |
619 | lit, | |
416331ca | 620 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
621 | unescape::Mode::Str, |
622 | range, | |
48663c56 XL |
623 | err, |
624 | ) | |
625 | } | |
dc9dc135 | 626 | }) |
48663c56 XL |
627 | } |
628 | ||
dc9dc135 XL |
629 | fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
630 | let lit = self.str_from_to(content_start, content_end); | |
631 | unescape::unescape_raw_str(lit, &mut |range, c| { | |
632 | if let Err(err) = c { | |
48663c56 XL |
633 | emit_unescape_error( |
634 | &self.sess.span_diagnostic, | |
635 | lit, | |
dc9dc135 XL |
636 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
637 | unescape::Mode::Str, | |
638 | range, | |
48663c56 XL |
639 | err, |
640 | ) | |
641 | } | |
dc9dc135 | 642 | }) |
48663c56 XL |
643 | } |
644 | ||
dc9dc135 XL |
645 | fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
646 | let lit = self.str_from_to(content_start, content_end); | |
647 | unescape::unescape_raw_byte_str(lit, &mut |range, c| { | |
648 | if let Err(err) = c { | |
649 | emit_unescape_error( | |
650 | &self.sess.span_diagnostic, | |
651 | lit, | |
652 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), | |
653 | unescape::Mode::ByteStr, | |
654 | range, | |
655 | err, | |
656 | ) | |
657 | } | |
658 | }) | |
48663c56 XL |
659 | } |
660 | ||
416331ca XL |
661 | fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { |
662 | let lit = self.str_from_to(content_start, content_end); | |
dc9dc135 XL |
663 | unescape::unescape_byte_str(lit, &mut |range, c| { |
664 | if let Err(err) = c { | |
665 | emit_unescape_error( | |
666 | &self.sess.span_diagnostic, | |
667 | lit, | |
416331ca | 668 | self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), |
dc9dc135 XL |
669 | unescape::Mode::ByteStr, |
670 | range, | |
671 | err, | |
672 | ) | |
673 | } | |
674 | }) | |
48663c56 | 675 | } |
1a4d82fc | 676 | |
416331ca XL |
677 | fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { |
678 | let base = match base { | |
679 | Base::Binary => 2, | |
680 | Base::Octal => 8, | |
681 | _ => return, | |
682 | }; | |
683 | let s = self.str_from_to(content_start + BytePos(2), content_end); | |
684 | for (idx, c) in s.char_indices() { | |
685 | let idx = idx as u32; | |
686 | if c != '_' && c.to_digit(base).is_none() { | |
687 | let lo = content_start + BytePos(2 + idx); | |
688 | let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); | |
689 | self.err_span_(lo, hi, | |
690 | &format!("invalid digit for a base {} literal", base)); | |
1a4d82fc | 691 | |
416331ca XL |
692 | } |
693 | } | |
694 | } | |
9cc50fc6 | 695 | } |
1a4d82fc | 696 | |
94b46f34 | 697 | fn is_doc_comment(s: &str) -> bool { |
9cc50fc6 SL |
698 | let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') || |
699 | s.starts_with("//!"); | |
1a4d82fc JJ |
700 | debug!("is {:?} a doc comment? {}", s, res); |
701 | res | |
702 | } | |
703 | ||
94b46f34 | 704 | fn is_block_doc_comment(s: &str) -> bool { |
9cc50fc6 SL |
705 | // Prevent `/**/` from being parsed as a doc comment |
706 | let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') || | |
707 | s.starts_with("/*!")) && s.len() >= 5; | |
1a4d82fc JJ |
708 | debug!("is {:?} a doc comment? {}", s, res); |
709 | res | |
710 | } |