]> git.proxmox.com Git - rustc.git/blame - src/libsyntax/parse/lexer/mod.rs
New upstream version 1.40.0+dfsg1
[rustc.git] / src / libsyntax / parse / lexer / mod.rs
CommitLineData
dc9dc135 1use crate::parse::token::{self, Token, TokenKind};
e74abb32 2use crate::sess::ParseSess;
dc9dc135 3use crate::symbol::{sym, Symbol};
48663c56 4use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
9fa01778 5
416331ca 6use errors::{FatalError, DiagnosticBuilder};
e1599b0c 7use syntax_pos::{BytePos, Pos, Span};
416331ca
XL
8use rustc_lexer::Base;
9use rustc_lexer::unescape;
1a4d82fc 10
1a4d82fc 11use std::char;
416331ca 12use std::convert::TryInto;
0531ce1d 13use rustc_data_structures::sync::Lrc;
9fa01778 14use log::debug;
1a4d82fc 15
416331ca
XL
16#[cfg(test)]
17mod tests;
18
1a4d82fc 19pub mod comments;
32a655c1 20mod tokentrees;
92a42be0 21mod unicode_chars;
1a4d82fc 22
9fa01778
XL
23#[derive(Clone, Debug)]
24pub struct UnmatchedBrace {
25 pub expected_delim: token::DelimToken,
e74abb32 26 pub found_delim: Option<token::DelimToken>,
9fa01778
XL
27 pub found_span: Span,
28 pub unclosed_span: Option<Span>,
29 pub candidate_span: Option<Span>,
30}
31
1a4d82fc 32pub struct StringReader<'a> {
416331ca
XL
33 sess: &'a ParseSess,
34 /// Initial position, read-only.
35 start_pos: BytePos,
36 /// The absolute offset within the source_map of the current character.
37 pos: BytePos,
94b46f34 38 /// Stop reading src at this index.
416331ca
XL
39 end_src_index: usize,
40 /// Source text to tokenize.
94b46f34 41 src: Lrc<String>,
48663c56 42 override_span: Option<Span>,
cc61c64b
XL
43}
44
32a655c1 45impl<'a> StringReader<'a> {
416331ca 46 pub fn new(sess: &'a ParseSess,
b7449926
XL
47 source_file: Lrc<syntax_pos::SourceFile>,
48 override_span: Option<Span>) -> Self {
b7449926 49 if source_file.src.is_none() {
e74abb32 50 sess.span_diagnostic.bug(&format!("cannot lex `source_file` without source: {}",
b7449926 51 source_file.name));
c34b1796
AL
52 }
53
b7449926 54 let src = (*source_file.src.as_ref().unwrap()).clone();
c34b1796 55
9e0c209e 56 StringReader {
3b2f2976 57 sess,
416331ca 58 start_pos: source_file.start_pos,
b7449926 59 pos: source_file.start_pos,
94b46f34 60 end_src_index: src.len(),
94b46f34 61 src,
94b46f34 62 override_span,
a7813a04 63 }
1a4d82fc
JJ
64 }
65
8bb4bdeb 66 pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
b7449926
XL
67 let begin = sess.source_map().lookup_byte_offset(span.lo());
68 let end = sess.source_map().lookup_byte_offset(span.hi());
8bb4bdeb
XL
69
70 // Make the range zero-length if the span is invalid.
a1dfa0c6 71 if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos {
0531ce1d 72 span = span.shrink_to_lo();
8bb4bdeb
XL
73 }
74
416331ca 75 let mut sr = StringReader::new(sess, begin.sf, None);
8bb4bdeb
XL
76
77 // Seek the lexer to the right byte range.
94b46f34 78 sr.end_src_index = sr.src_index(span.hi());
8bb4bdeb 79
416331ca
XL
80 sr
81 }
8bb4bdeb 82
b7449926 83
416331ca 84 fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
e1599b0c 85 self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
8bb4bdeb
XL
86 }
87
416331ca
XL
88 /// Returns the next token, including trivia like whitespace or comments.
89 ///
90 /// `Err(())` means that some errors were encountered, which can be
91 /// retrieved using `buffer_fatal_errors`.
92 pub fn next_token(&mut self) -> Token {
93 let start_src_index = self.src_index(self.pos);
94 let text: &str = &self.src[start_src_index..self.end_src_index];
95
96 if text.is_empty() {
97 let span = self.mk_sp(self.pos, self.pos);
98 return Token::new(token::Eof, span);
99 }
100
101 {
102 let is_beginning_of_file = self.pos == self.start_pos;
103 if is_beginning_of_file {
104 if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
105 let start = self.pos;
106 self.pos = self.pos + BytePos::from_usize(shebang_len);
107
108 let sym = self.symbol_from(start + BytePos::from_usize("#!".len()));
109 let kind = token::Shebang(sym);
110
111 let span = self.mk_sp(start, self.pos);
112 return Token::new(kind, span);
113 }
114 }
115 }
116
117 let token = rustc_lexer::first_token(text);
118
119 let start = self.pos;
120 self.pos = self.pos + BytePos::from_usize(token.len);
121
122 debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start));
123
124 // This could use `?`, but that makes code significantly (10-20%) slower.
125 // https://github.com/rust-lang/rust/issues/37939
126 let kind = self.cook_lexer_token(token.kind, start);
127
128 let span = self.mk_sp(start, self.pos);
129 Token::new(kind, span)
1a4d82fc
JJ
130 }
131
132 /// Report a fatal lexical error with a given span.
94b46f34 133 fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
32a655c1 134 self.sess.span_diagnostic.span_fatal(sp, m)
1a4d82fc
JJ
135 }
136
137 /// Report a lexical error with a given span.
94b46f34 138 fn err_span(&self, sp: Span, m: &str) {
0731742a 139 self.sess.span_diagnostic.struct_span_err(sp, m).emit();
1a4d82fc
JJ
140 }
141
c1a9b12d 142
1a4d82fc 143 /// Report a fatal error spanning [`from_pos`, `to_pos`).
92a42be0 144 fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
041b39d2 145 self.fatal_span(self.mk_sp(from_pos, to_pos), m)
1a4d82fc
JJ
146 }
147
148 /// Report a lexical error spanning [`from_pos`, `to_pos`).
149 fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
041b39d2 150 self.err_span(self.mk_sp(from_pos, to_pos), m)
1a4d82fc
JJ
151 }
152
b7449926
XL
153 fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str)
154 -> DiagnosticBuilder<'a>
155 {
0531ce1d
XL
156 self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m)
157 }
158
b7449926
XL
159 fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
160 -> DiagnosticBuilder<'a>
161 {
9cc50fc6
SL
162 let mut m = m.to_string();
163 m.push_str(": ");
48663c56 164 push_escaped_char(&mut m, c);
b7449926 165
041b39d2 166 self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
9cc50fc6 167 }
1a4d82fc 168
416331ca
XL
169 /// Turns simple `rustc_lexer::TokenKind` enum into a rich
170 /// `libsyntax::TokenKind`. This turns strings into interned
171 /// symbols and runs additional validation.
172 fn cook_lexer_token(
173 &self,
174 token: rustc_lexer::TokenKind,
175 start: BytePos,
176 ) -> TokenKind {
177 match token {
178 rustc_lexer::TokenKind::LineComment => {
179 let string = self.str_from(start);
180 // comments with only more "/"s are not doc comments
181 let tok = if is_doc_comment(string) {
e74abb32 182 self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
416331ca
XL
183 token::DocComment(Symbol::intern(string))
184 } else {
185 token::Comment
186 };
187
188 tok
189 }
190 rustc_lexer::TokenKind::BlockComment { terminated } => {
191 let string = self.str_from(start);
192 // block comments starting with "/**" or "/*!" are doc-comments
193 // but comments with only "*"s between two "/"s are not
194 let is_doc_comment = is_block_doc_comment(string);
195
196 if !terminated {
197 let msg = if is_doc_comment {
198 "unterminated block doc-comment"
199 } else {
200 "unterminated block comment"
201 };
202 let last_bpos = self.pos;
203 self.fatal_span_(start, last_bpos, msg).raise();
204 }
205
206 let tok = if is_doc_comment {
e74abb32
XL
207 self.forbid_bare_cr(start,
208 string,
209 "bare CR not allowed in block doc-comment");
210 token::DocComment(Symbol::intern(string))
416331ca
XL
211 } else {
212 token::Comment
213 };
214
215 tok
9cc50fc6 216 }
416331ca
XL
217 rustc_lexer::TokenKind::Whitespace => token::Whitespace,
218 rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
219 let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
220 let mut ident_start = start;
221 if is_raw_ident {
222 ident_start = ident_start + BytePos(2);
223 }
224 // FIXME: perform NFKC normalization here. (Issue #2253)
225 let sym = self.symbol_from(ident_start);
226 if is_raw_ident {
227 let span = self.mk_sp(start, self.pos);
228 if !sym.can_be_raw() {
229 self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
230 }
231 self.sess.raw_identifier_spans.borrow_mut().push(span);
232 }
233 token::Ident(sym, is_raw_ident)
234 }
235 rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
236 let suffix_start = start + BytePos(suffix_start as u32);
237 let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
238 let suffix = if suffix_start < self.pos {
239 let string = self.str_from(suffix_start);
240 if string == "_" {
241 self.sess.span_diagnostic
242 .struct_span_warn(self.mk_sp(suffix_start, self.pos),
243 "underscore literal suffix is not allowed")
244 .warn("this was previously accepted by the compiler but is \
245 being phased out; it will become a hard error in \
246 a future release!")
247 .note("for more information, see issue #42326 \
248 <https://github.com/rust-lang/rust/issues/42326>")
249 .emit();
250 None
251 } else {
252 Some(Symbol::intern(string))
253 }
1a4d82fc 254 } else {
416331ca 255 None
1a4d82fc 256 };
416331ca
XL
257 token::Literal(token::Lit { kind, symbol, suffix })
258 }
259 rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
260 // Include the leading `'` in the real identifier, for macro
261 // expansion purposes. See #12512 for the gory details of why
262 // this is necessary.
263 let lifetime_name = self.str_from(start);
264 if starts_with_number {
265 self.err_span_(
266 start,
267 self.pos,
268 "lifetimes cannot start with a number",
269 );
270 }
271 let ident = Symbol::intern(lifetime_name);
272 token::Lifetime(ident)
273 }
274 rustc_lexer::TokenKind::Semi => token::Semi,
275 rustc_lexer::TokenKind::Comma => token::Comma,
416331ca
XL
276 rustc_lexer::TokenKind::Dot => token::Dot,
277 rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
278 rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
279 rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace),
280 rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace),
281 rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket),
282 rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket),
283 rustc_lexer::TokenKind::At => token::At,
284 rustc_lexer::TokenKind::Pound => token::Pound,
285 rustc_lexer::TokenKind::Tilde => token::Tilde,
286 rustc_lexer::TokenKind::Question => token::Question,
416331ca
XL
287 rustc_lexer::TokenKind::Colon => token::Colon,
288 rustc_lexer::TokenKind::Dollar => token::Dollar,
416331ca 289 rustc_lexer::TokenKind::Eq => token::Eq,
416331ca 290 rustc_lexer::TokenKind::Not => token::Not,
416331ca 291 rustc_lexer::TokenKind::Lt => token::Lt,
416331ca 292 rustc_lexer::TokenKind::Gt => token::Gt,
416331ca 293 rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
416331ca 294 rustc_lexer::TokenKind::And => token::BinOp(token::And),
416331ca 295 rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
416331ca 296 rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
416331ca 297 rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
416331ca 298 rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
416331ca 299 rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
416331ca 300 rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
416331ca
XL
301
302 rustc_lexer::TokenKind::Unknown => {
303 let c = self.str_from(start).chars().next().unwrap();
304 let mut err = self.struct_fatal_span_char(start,
305 self.pos,
306 "unknown start of token",
307 c);
308 // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
309 // instead of keeping a table in `check_for_substitution`into the token. Ideally,
310 // this should be inside `rustc_lexer`. However, we should first remove compound
311 // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
312 // as there will be less overall work to do this way.
313 let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
314 .unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
315 err.emit();
316 token
1a4d82fc
JJ
317 }
318 }
416331ca 319 }
b7449926 320
416331ca
XL
321 fn cook_lexer_literal(
322 &self,
323 start: BytePos,
324 suffix_start: BytePos,
325 kind: rustc_lexer::LiteralKind
326 ) -> (token::LitKind, Symbol) {
327 match kind {
328 rustc_lexer::LiteralKind::Char { terminated } => {
329 if !terminated {
330 self.fatal_span_(start, suffix_start,
331 "unterminated character literal".into())
332 .raise()
333 }
334 let content_start = start + BytePos(1);
335 let content_end = suffix_start - BytePos(1);
336 self.validate_char_escape(content_start, content_end);
337 let id = self.symbol_from_to(content_start, content_end);
338 (token::Char, id)
339 },
340 rustc_lexer::LiteralKind::Byte { terminated } => {
341 if !terminated {
342 self.fatal_span_(start + BytePos(1), suffix_start,
343 "unterminated byte constant".into())
344 .raise()
345 }
346 let content_start = start + BytePos(2);
347 let content_end = suffix_start - BytePos(1);
348 self.validate_byte_escape(content_start, content_end);
349 let id = self.symbol_from_to(content_start, content_end);
350 (token::Byte, id)
351 },
352 rustc_lexer::LiteralKind::Str { terminated } => {
353 if !terminated {
354 self.fatal_span_(start, suffix_start,
355 "unterminated double quote string".into())
356 .raise()
357 }
358 let content_start = start + BytePos(1);
359 let content_end = suffix_start - BytePos(1);
360 self.validate_str_escape(content_start, content_end);
361 let id = self.symbol_from_to(content_start, content_end);
362 (token::Str, id)
363 }
364 rustc_lexer::LiteralKind::ByteStr { terminated } => {
365 if !terminated {
366 self.fatal_span_(start + BytePos(1), suffix_start,
367 "unterminated double quote byte string".into())
368 .raise()
369 }
370 let content_start = start + BytePos(2);
371 let content_end = suffix_start - BytePos(1);
372 self.validate_byte_str_escape(content_start, content_end);
373 let id = self.symbol_from_to(content_start, content_end);
374 (token::ByteStr, id)
375 }
376 rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => {
377 if !started {
378 self.report_non_started_raw_string(start);
379 }
380 if !terminated {
381 self.report_unterminated_raw_string(start, n_hashes)
382 }
383 let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
384 let n = u32::from(n_hashes);
385 let content_start = start + BytePos(2 + n);
386 let content_end = suffix_start - BytePos(1 + n);
387 self.validate_raw_str_escape(content_start, content_end);
388 let id = self.symbol_from_to(content_start, content_end);
389 (token::StrRaw(n_hashes), id)
390 }
391 rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => {
392 if !started {
393 self.report_non_started_raw_string(start);
394 }
395 if !terminated {
396 self.report_unterminated_raw_string(start, n_hashes)
397 }
398 let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
399 let n = u32::from(n_hashes);
400 let content_start = start + BytePos(3 + n);
401 let content_end = suffix_start - BytePos(1 + n);
402 self.validate_raw_byte_str_escape(content_start, content_end);
403 let id = self.symbol_from_to(content_start, content_end);
404 (token::ByteStrRaw(n_hashes), id)
405 }
406 rustc_lexer::LiteralKind::Int { base, empty_int } => {
407 if empty_int {
408 self.err_span_(start, suffix_start, "no valid digits found for number");
409 (token::Integer, sym::integer(0))
410 } else {
411 self.validate_int_literal(base, start, suffix_start);
412 (token::Integer, self.symbol_from_to(start, suffix_start))
413 }
414 },
415 rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
416 if empty_exponent {
417 let mut err = self.struct_span_fatal(
418 start, self.pos,
419 "expected at least one digit in exponent"
420 );
421 err.emit();
422 }
423
424 match base {
425 Base::Hexadecimal => {
426 self.err_span_(start, suffix_start,
427 "hexadecimal float literal is not supported")
428 }
429 Base::Octal => {
430 self.err_span_(start, suffix_start,
431 "octal float literal is not supported")
432 }
433 Base::Binary => {
434 self.err_span_(start, suffix_start,
435 "binary float literal is not supported")
436 }
437 _ => ()
438 }
439
440 let id = self.symbol_from_to(start, suffix_start);
441 (token::Float, id)
442 },
443 }
1a4d82fc
JJ
444 }
445
94b46f34
XL
446 #[inline]
447 fn src_index(&self, pos: BytePos) -> usize {
416331ca 448 (pos - self.start_pos).to_usize()
1a4d82fc
JJ
449 }
450
dc9dc135
XL
451 /// Slice of the source text from `start` up to but excluding `self.pos`,
452 /// meaning the slice does not include the character `self.ch`.
453 fn str_from(&self, start: BytePos) -> &str
1a4d82fc 454 {
dc9dc135 455 self.str_from_to(start, self.pos)
1a4d82fc
JJ
456 }
457
dc9dc135
XL
458 /// Creates a Symbol from a given offset to the current offset.
459 fn symbol_from(&self, start: BytePos) -> Symbol {
c30ab7b3 460 debug!("taking an ident from {:?} to {:?}", start, self.pos);
dc9dc135 461 Symbol::intern(self.str_from(start))
1a4d82fc
JJ
462 }
463
dc9dc135
XL
464 /// As symbol_from, with an explicit endpoint.
465 fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
1a4d82fc 466 debug!("taking an ident from {:?} to {:?}", start, end);
dc9dc135 467 Symbol::intern(self.str_from_to(start, end))
1a4d82fc
JJ
468 }
469
dc9dc135
XL
470 /// Slice of the source text spanning from `start` up to but excluding `end`.
471 fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
1a4d82fc 472 {
dc9dc135 473 &self.src[self.src_index(start)..self.src_index(end)]
1a4d82fc
JJ
474 }
475
e74abb32
XL
476 fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) {
477 let mut idx = 0;
478 loop {
479 idx = match s[idx..].find('\r') {
480 None => break,
481 Some(it) => idx + it + 1
482 };
483 self.err_span_(start + BytePos(idx as u32 - 1),
484 start + BytePos(idx as u32),
485 errmsg);
1a4d82fc
JJ
486 }
487 }
488
416331ca
XL
489 fn report_non_started_raw_string(&self, start: BytePos) -> ! {
490 let bad_char = self.str_from(start).chars().last().unwrap();
491 self
492 .struct_fatal_span_char(
493 start,
494 self.pos,
495 "found invalid character; only `#` is allowed \
496 in raw string delimitation",
497 bad_char,
498 )
499 .emit();
500 FatalError.raise()
1a4d82fc
JJ
501 }
502
416331ca
XL
503 fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! {
504 let mut err = self.struct_span_fatal(
505 start, start,
506 "unterminated raw string",
507 );
508 err.span_label(
509 self.mk_sp(start, start),
510 "unterminated raw string",
511 );
1a4d82fc 512
416331ca
XL
513 if n_hashes > 0 {
514 err.note(&format!("this raw string should be terminated with `\"{}`",
515 "#".repeat(n_hashes as usize)));
48663c56 516 }
1a4d82fc 517
416331ca
XL
518 err.emit();
519 FatalError.raise()
1a4d82fc
JJ
520 }
521
416331ca
XL
522 fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 {
523 match n_hashes.try_into() {
524 Ok(n_hashes) => n_hashes,
525 Err(_) => {
526 self.fatal_span_(start,
527 self.pos,
dc9dc135 528 "too many `#` symbols: raw strings may be \
416331ca 529 delimited by up to 65535 `#` symbols").raise();
94b46f34 530 }
1a4d82fc 531 }
1a4d82fc 532 }
48663c56 533
416331ca
XL
534 fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) {
535 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
536 if let Err((off, err)) = unescape::unescape_char(lit) {
537 emit_unescape_error(
538 &self.sess.span_diagnostic,
539 lit,
416331ca 540 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
541 unescape::Mode::Char,
542 0..off,
543 err,
544 )
545 }
546 }
547
416331ca
XL
548 fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) {
549 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
550 if let Err((off, err)) = unescape::unescape_byte(lit) {
551 emit_unescape_error(
552 &self.sess.span_diagnostic,
553 lit,
416331ca 554 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
555 unescape::Mode::Byte,
556 0..off,
557 err,
558 )
559 }
560 }
561
416331ca
XL
562 fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) {
563 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
564 unescape::unescape_str(lit, &mut |range, c| {
565 if let Err(err) = c {
48663c56
XL
566 emit_unescape_error(
567 &self.sess.span_diagnostic,
568 lit,
416331ca 569 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
570 unescape::Mode::Str,
571 range,
48663c56
XL
572 err,
573 )
574 }
dc9dc135 575 })
48663c56
XL
576 }
577
dc9dc135
XL
578 fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
579 let lit = self.str_from_to(content_start, content_end);
580 unescape::unescape_raw_str(lit, &mut |range, c| {
581 if let Err(err) = c {
48663c56
XL
582 emit_unescape_error(
583 &self.sess.span_diagnostic,
584 lit,
dc9dc135
XL
585 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
586 unescape::Mode::Str,
587 range,
48663c56
XL
588 err,
589 )
590 }
dc9dc135 591 })
48663c56
XL
592 }
593
dc9dc135
XL
594 fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
595 let lit = self.str_from_to(content_start, content_end);
596 unescape::unescape_raw_byte_str(lit, &mut |range, c| {
597 if let Err(err) = c {
598 emit_unescape_error(
599 &self.sess.span_diagnostic,
600 lit,
601 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
602 unescape::Mode::ByteStr,
603 range,
604 err,
605 )
606 }
607 })
48663c56
XL
608 }
609
416331ca
XL
610 fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
611 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
612 unescape::unescape_byte_str(lit, &mut |range, c| {
613 if let Err(err) = c {
614 emit_unescape_error(
615 &self.sess.span_diagnostic,
616 lit,
416331ca 617 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
618 unescape::Mode::ByteStr,
619 range,
620 err,
621 )
622 }
623 })
48663c56 624 }
1a4d82fc 625
416331ca
XL
626 fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) {
627 let base = match base {
628 Base::Binary => 2,
629 Base::Octal => 8,
630 _ => return,
631 };
632 let s = self.str_from_to(content_start + BytePos(2), content_end);
633 for (idx, c) in s.char_indices() {
634 let idx = idx as u32;
635 if c != '_' && c.to_digit(base).is_none() {
636 let lo = content_start + BytePos(2 + idx);
637 let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32);
638 self.err_span_(lo, hi,
639 &format!("invalid digit for a base {} literal", base));
1a4d82fc 640
416331ca
XL
641 }
642 }
643 }
9cc50fc6 644}
1a4d82fc 645
94b46f34 646fn is_doc_comment(s: &str) -> bool {
9cc50fc6
SL
647 let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
648 s.starts_with("//!");
1a4d82fc
JJ
649 debug!("is {:?} a doc comment? {}", s, res);
650 res
651}
652
94b46f34 653fn is_block_doc_comment(s: &str) -> bool {
9cc50fc6
SL
654 // Prevent `/**/` from being parsed as a doc comment
655 let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
656 s.starts_with("/*!")) && s.len() >= 5;
1a4d82fc
JJ
657 debug!("is {:?} a doc comment? {}", s, res);
658 res
659}