src/libsyntax/parse/lexer/mod.rs

   1 use crate::parse::ParseSess;
   2 use crate::parse::token::{self, Token, TokenKind};
   3 use crate::symbol::{sym, Symbol};
   4 use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
   5
   6 use errors::{FatalError, DiagnosticBuilder};
   7 use syntax_pos::{BytePos, Pos, Span};
   8 use rustc_lexer::Base;
   9 use rustc_lexer::unescape;
  10
  11 use std::borrow::Cow;
  12 use std::char;
  13 use std::iter;
  14 use std::convert::TryInto;
  15 use rustc_data_structures::sync::Lrc;
  16 use log::debug;
  17
  18 #[cfg(test)]
  19 mod tests;
  20
  21 pub mod comments;
  22 mod tokentrees;
  23 mod unicode_chars;
  24
  25 #[derive(Clone, Debug)]
  26 pub struct UnmatchedBrace {
  27     pub expected_delim: token::DelimToken,
  28     pub found_delim: token::DelimToken,
  29     pub found_span: Span,
  30     pub unclosed_span: Option<Span>,
  31     pub candidate_span: Option<Span>,
  32 }
  33
  34 pub struct StringReader<'a> {
  35     sess: &'a ParseSess,
  36     /// Initial position, read-only.
  37     start_pos: BytePos,
  38     /// The absolute offset within the source_map of the current character.
  39     pos: BytePos,
  40     /// Stop reading src at this index.
  41     end_src_index: usize,
  42     /// Source text to tokenize.
  43     src: Lrc<String>,
  44     override_span: Option<Span>,
  45 }
  46
  47 impl<'a> StringReader<'a> {
  48     pub fn new(sess: &'a ParseSess,
  49                source_file: Lrc<syntax_pos::SourceFile>,
  50                override_span: Option<Span>) -> Self {
  51         if source_file.src.is_none() {
  52             sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}",
  53                                               source_file.name));
  54         }
  55
  56         let src = (*source_file.src.as_ref().unwrap()).clone();
  57
  58         StringReader {
  59             sess,
  60             start_pos: source_file.start_pos,
  61             pos: source_file.start_pos,
  62             end_src_index: src.len(),
  63             src,
  64             override_span,
  65         }
  66     }
  67
  68     pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
  69         let begin = sess.source_map().lookup_byte_offset(span.lo());
  70         let end = sess.source_map().lookup_byte_offset(span.hi());
  71
  72         // Make the range zero-length if the span is invalid.
  73         if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos {
  74             span = span.shrink_to_lo();
  75         }
  76
  77         let mut sr = StringReader::new(sess, begin.sf, None);
  78
  79         // Seek the lexer to the right byte range.
  80         sr.end_src_index = sr.src_index(span.hi());
  81
  82         sr
  83     }
  84
  85
  86     fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
  87         self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
  88     }
  89
  90     /// Returns the next token, including trivia like whitespace or comments.
  91     ///
  92     /// `Err(())` means that some errors were encountered, which can be
  93     /// retrieved using `buffer_fatal_errors`.
  94     pub fn next_token(&mut self) -> Token {
  95         let start_src_index = self.src_index(self.pos);
  96         let text: &str = &self.src[start_src_index..self.end_src_index];
  97
  98         if text.is_empty() {
  99             let span = self.mk_sp(self.pos, self.pos);
 100             return Token::new(token::Eof, span);
 101         }
 102
 103         {
 104             let is_beginning_of_file = self.pos == self.start_pos;
 105             if is_beginning_of_file {
 106                 if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
 107                     let start = self.pos;
 108                     self.pos = self.pos + BytePos::from_usize(shebang_len);
 109
 110                     let sym = self.symbol_from(start + BytePos::from_usize("#!".len()));
 111                     let kind = token::Shebang(sym);
 112
 113                     let span = self.mk_sp(start, self.pos);
 114                     return Token::new(kind, span);
 115                 }
 116             }
 117         }
 118
 119         let token = rustc_lexer::first_token(text);
 120
 121         let start = self.pos;
 122         self.pos = self.pos + BytePos::from_usize(token.len);
 123
 124         debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start));
 125
 126         // This could use `?`, but that makes code significantly (10-20%) slower.
 127         // https://github.com/rust-lang/rust/issues/37939
 128         let kind = self.cook_lexer_token(token.kind, start);
 129
 130         let span = self.mk_sp(start, self.pos);
 131         Token::new(kind, span)
 132     }
 133
 134     /// Report a fatal lexical error with a given span.
 135     fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
 136         self.sess.span_diagnostic.span_fatal(sp, m)
 137     }
 138
 139     /// Report a lexical error with a given span.
 140     fn err_span(&self, sp: Span, m: &str) {
 141         self.sess.span_diagnostic.struct_span_err(sp, m).emit();
 142     }
 143
 144
 145     /// Report a fatal error spanning [`from_pos`, `to_pos`).
 146     fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
 147         self.fatal_span(self.mk_sp(from_pos, to_pos), m)
 148     }
 149
 150     /// Report a lexical error spanning [`from_pos`, `to_pos`).
 151     fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
 152         self.err_span(self.mk_sp(from_pos, to_pos), m)
 153     }
 154
 155     fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str)
 156         -> DiagnosticBuilder<'a>
 157     {
 158         self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m)
 159     }
 160
 161     fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
 162         -> DiagnosticBuilder<'a>
 163     {
 164         let mut m = m.to_string();
 165         m.push_str(": ");
 166         push_escaped_char(&mut m, c);
 167
 168         self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
 169     }
 170
 171     /// Turns simple `rustc_lexer::TokenKind` enum into a rich
 172     /// `libsyntax::TokenKind`. This turns strings into interned
 173     /// symbols and runs additional validation.
 174     fn cook_lexer_token(
 175         &self,
 176         token: rustc_lexer::TokenKind,
 177         start: BytePos,
 178     ) -> TokenKind {
 179         match token {
 180             rustc_lexer::TokenKind::LineComment => {
 181                 let string = self.str_from(start);
 182                 // comments with only more "/"s are not doc comments
 183                 let tok = if is_doc_comment(string) {
 184                     let mut idx = 0;
 185                     loop {
 186                         idx = match string[idx..].find('\r') {
 187                             None => break,
 188                             Some(it) => idx + it + 1
 189                         };
 190                         if string[idx..].chars().next() != Some('\n') {
 191                             self.err_span_(start + BytePos(idx as u32 - 1),
 192                                             start + BytePos(idx as u32),
 193                                             "bare CR not allowed in doc-comment");
 194                         }
 195                     }
 196                     token::DocComment(Symbol::intern(string))
 197                 } else {
 198                     token::Comment
 199                 };
 200
 201                 tok
 202             }
 203             rustc_lexer::TokenKind::BlockComment { terminated } => {
 204                 let string = self.str_from(start);
 205                 // block comments starting with "/**" or "/*!" are doc-comments
 206                 // but comments with only "*"s between two "/"s are not
 207                 let is_doc_comment = is_block_doc_comment(string);
 208
 209                 if !terminated {
 210                     let msg = if is_doc_comment {
 211                         "unterminated block doc-comment"
 212                     } else {
 213                         "unterminated block comment"
 214                     };
 215                     let last_bpos = self.pos;
 216                     self.fatal_span_(start, last_bpos, msg).raise();
 217                 }
 218
 219                 let tok = if is_doc_comment {
 220                     let has_cr = string.contains('\r');
 221                     let string = if has_cr {
 222                         self.translate_crlf(start,
 223                                             string,
 224                                             "bare CR not allowed in block doc-comment")
 225                     } else {
 226                         string.into()
 227                     };
 228                     token::DocComment(Symbol::intern(&string[..]))
 229                 } else {
 230                     token::Comment
 231                 };
 232
 233                 tok
 234             }
 235             rustc_lexer::TokenKind::Whitespace => token::Whitespace,
 236             rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
 237                 let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
 238                 let mut ident_start = start;
 239                 if is_raw_ident {
 240                     ident_start = ident_start + BytePos(2);
 241                 }
 242                 // FIXME: perform NFKC normalization here. (Issue #2253)
 243                 let sym = self.symbol_from(ident_start);
 244                 if is_raw_ident {
 245                     let span = self.mk_sp(start, self.pos);
 246                     if !sym.can_be_raw() {
 247                         self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
 248                     }
 249                     self.sess.raw_identifier_spans.borrow_mut().push(span);
 250                 }
 251                 token::Ident(sym, is_raw_ident)
 252             }
 253             rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
 254                 let suffix_start = start + BytePos(suffix_start as u32);
 255                 let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
 256                 let suffix = if suffix_start < self.pos {
 257                     let string = self.str_from(suffix_start);
 258                     if string == "_" {
 259                         self.sess.span_diagnostic
 260                             .struct_span_warn(self.mk_sp(suffix_start, self.pos),
 261                                               "underscore literal suffix is not allowed")
 262                             .warn("this was previously accepted by the compiler but is \
 263                                    being phased out; it will become a hard error in \
 264                                    a future release!")
 265                             .note("for more information, see issue #42326 \
 266                                    <https://github.com/rust-lang/rust/issues/42326>")
 267                             .emit();
 268                         None
 269                     } else {
 270                         Some(Symbol::intern(string))
 271                     }
 272                 } else {
 273                     None
 274                 };
 275                 token::Literal(token::Lit { kind, symbol, suffix })
 276             }
 277             rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
 278                 // Include the leading `'` in the real identifier, for macro
 279                 // expansion purposes. See #12512 for the gory details of why
 280                 // this is necessary.
 281                 let lifetime_name = self.str_from(start);
 282                 if starts_with_number {
 283                     self.err_span_(
 284                         start,
 285                         self.pos,
 286                         "lifetimes cannot start with a number",
 287                     );
 288                 }
 289                 let ident = Symbol::intern(lifetime_name);
 290                 token::Lifetime(ident)
 291             }
 292             rustc_lexer::TokenKind::Semi => token::Semi,
 293             rustc_lexer::TokenKind::Comma => token::Comma,
 294             rustc_lexer::TokenKind::Dot => token::Dot,
 295             rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
 296             rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
 297             rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace),
 298             rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace),
 299             rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket),
 300             rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket),
 301             rustc_lexer::TokenKind::At => token::At,
 302             rustc_lexer::TokenKind::Pound => token::Pound,
 303             rustc_lexer::TokenKind::Tilde => token::Tilde,
 304             rustc_lexer::TokenKind::Question => token::Question,
 305             rustc_lexer::TokenKind::Colon => token::Colon,
 306             rustc_lexer::TokenKind::Dollar => token::Dollar,
 307             rustc_lexer::TokenKind::Eq => token::Eq,
 308             rustc_lexer::TokenKind::Not => token::Not,
 309             rustc_lexer::TokenKind::Lt => token::Lt,
 310             rustc_lexer::TokenKind::Gt => token::Gt,
 311             rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
 312             rustc_lexer::TokenKind::And => token::BinOp(token::And),
 313             rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
 314             rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
 315             rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
 316             rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
 317             rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
 318             rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
 319
 320             rustc_lexer::TokenKind::Unknown => {
 321                 let c = self.str_from(start).chars().next().unwrap();
 322                 let mut err = self.struct_fatal_span_char(start,
 323                                                           self.pos,
 324                                                           "unknown start of token",
 325                                                           c);
 326                 // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
 327                 // instead of keeping a table in `check_for_substitution`into the token. Ideally,
 328                 // this should be inside `rustc_lexer`. However, we should first remove compound
 329                 // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
 330                 // as there will be less overall work to do this way.
 331                 let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
 332                     .unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
 333                 err.emit();
 334                 token
 335             }
 336         }
 337     }
 338
 339     fn cook_lexer_literal(
 340         &self,
 341         start: BytePos,
 342         suffix_start: BytePos,
 343         kind: rustc_lexer::LiteralKind
 344     ) -> (token::LitKind, Symbol) {
 345         match kind {
 346             rustc_lexer::LiteralKind::Char { terminated } => {
 347                 if !terminated {
 348                     self.fatal_span_(start, suffix_start,
 349                                      "unterminated character literal".into())
 350                         .raise()
 351                 }
 352                 let content_start = start + BytePos(1);
 353                 let content_end = suffix_start - BytePos(1);
 354                 self.validate_char_escape(content_start, content_end);
 355                 let id = self.symbol_from_to(content_start, content_end);
 356                 (token::Char, id)
 357             },
 358             rustc_lexer::LiteralKind::Byte { terminated } => {
 359                 if !terminated {
 360                     self.fatal_span_(start + BytePos(1), suffix_start,
 361                                      "unterminated byte constant".into())
 362                         .raise()
 363                 }
 364                 let content_start = start + BytePos(2);
 365                 let content_end = suffix_start - BytePos(1);
 366                 self.validate_byte_escape(content_start, content_end);
 367                 let id = self.symbol_from_to(content_start, content_end);
 368                 (token::Byte, id)
 369             },
 370             rustc_lexer::LiteralKind::Str { terminated } => {
 371                 if !terminated {
 372                     self.fatal_span_(start, suffix_start,
 373                                      "unterminated double quote string".into())
 374                         .raise()
 375                 }
 376                 let content_start = start + BytePos(1);
 377                 let content_end = suffix_start - BytePos(1);
 378                 self.validate_str_escape(content_start, content_end);
 379                 let id = self.symbol_from_to(content_start, content_end);
 380                 (token::Str, id)
 381             }
 382             rustc_lexer::LiteralKind::ByteStr { terminated } => {
 383                 if !terminated {
 384                     self.fatal_span_(start + BytePos(1), suffix_start,
 385                                      "unterminated double quote byte string".into())
 386                         .raise()
 387                 }
 388                 let content_start = start + BytePos(2);
 389                 let content_end = suffix_start - BytePos(1);
 390                 self.validate_byte_str_escape(content_start, content_end);
 391                 let id = self.symbol_from_to(content_start, content_end);
 392                 (token::ByteStr, id)
 393             }
 394             rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => {
 395                 if !started {
 396                     self.report_non_started_raw_string(start);
 397                 }
 398                 if !terminated {
 399                     self.report_unterminated_raw_string(start, n_hashes)
 400                 }
 401                 let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
 402                 let n = u32::from(n_hashes);
 403                 let content_start = start + BytePos(2 + n);
 404                 let content_end = suffix_start - BytePos(1 + n);
 405                 self.validate_raw_str_escape(content_start, content_end);
 406                 let id = self.symbol_from_to(content_start, content_end);
 407                 (token::StrRaw(n_hashes), id)
 408             }
 409             rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => {
 410                 if !started {
 411                     self.report_non_started_raw_string(start);
 412                 }
 413                 if !terminated {
 414                     self.report_unterminated_raw_string(start, n_hashes)
 415                 }
 416                 let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
 417                 let n = u32::from(n_hashes);
 418                 let content_start = start + BytePos(3 + n);
 419                 let content_end = suffix_start - BytePos(1 + n);
 420                 self.validate_raw_byte_str_escape(content_start, content_end);
 421                 let id = self.symbol_from_to(content_start, content_end);
 422                 (token::ByteStrRaw(n_hashes), id)
 423             }
 424             rustc_lexer::LiteralKind::Int { base, empty_int } => {
 425                 if empty_int {
 426                     self.err_span_(start, suffix_start, "no valid digits found for number");
 427                     (token::Integer, sym::integer(0))
 428                 } else {
 429                     self.validate_int_literal(base, start, suffix_start);
 430                     (token::Integer, self.symbol_from_to(start, suffix_start))
 431                 }
 432             },
 433             rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
 434                 if empty_exponent {
 435                     let mut err = self.struct_span_fatal(
 436                         start, self.pos,
 437                         "expected at least one digit in exponent"
 438                     );
 439                     err.emit();
 440                 }
 441
 442                 match base {
 443                     Base::Hexadecimal => {
 444                         self.err_span_(start, suffix_start,
 445                                        "hexadecimal float literal is not supported")
 446                     }
 447                     Base::Octal => {
 448                         self.err_span_(start, suffix_start,
 449                                        "octal float literal is not supported")
 450                     }
 451                     Base::Binary => {
 452                         self.err_span_(start, suffix_start,
 453                                        "binary float literal is not supported")
 454                     }
 455                     _ => ()
 456                 }
 457
 458                 let id = self.symbol_from_to(start, suffix_start);
 459                 (token::Float, id)
 460             },
 461         }
 462     }
 463
 464     #[inline]
 465     fn src_index(&self, pos: BytePos) -> usize {
 466         (pos - self.start_pos).to_usize()
 467     }
 468
 469     /// Slice of the source text from `start` up to but excluding `self.pos`,
 470     /// meaning the slice does not include the character `self.ch`.
 471     fn str_from(&self, start: BytePos) -> &str
 472     {
 473         self.str_from_to(start, self.pos)
 474     }
 475
 476     /// Creates a Symbol from a given offset to the current offset.
 477     fn symbol_from(&self, start: BytePos) -> Symbol {
 478         debug!("taking an ident from {:?} to {:?}", start, self.pos);
 479         Symbol::intern(self.str_from(start))
 480     }
 481
 482     /// As symbol_from, with an explicit endpoint.
 483     fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
 484         debug!("taking an ident from {:?} to {:?}", start, end);
 485         Symbol::intern(self.str_from_to(start, end))
 486     }
 487
 488     /// Slice of the source text spanning from `start` up to but excluding `end`.
 489     fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
 490     {
 491         &self.src[self.src_index(start)..self.src_index(end)]
 492     }
 493
 494     /// Converts CRLF to LF in the given string, raising an error on bare CR.
 495     fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
 496         let mut chars = s.char_indices().peekable();
 497         while let Some((i, ch)) = chars.next() {
 498             if ch == '\r' {
 499                 if let Some((lf_idx, '\n')) = chars.peek() {
 500                     return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into();
 501                 }
 502                 let pos = start + BytePos(i as u32);
 503                 let end_pos = start + BytePos((i + ch.len_utf8()) as u32);
 504                 self.err_span_(pos, end_pos, errmsg);
 505             }
 506         }
 507         return s.into();
 508
 509         fn translate_crlf_(rdr: &StringReader<'_>,
 510                            start: BytePos,
 511                            s: &str,
 512                            mut j: usize,
 513                            mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>,
 514                            errmsg: &str)
 515                            -> String {
 516             let mut buf = String::with_capacity(s.len());
 517             // Skip first CR
 518             buf.push_str(&s[.. j - 1]);
 519             while let Some((i, ch)) = chars.next() {
 520                 if ch == '\r' {
 521                     if j < i {
 522                         buf.push_str(&s[j..i]);
 523                     }
 524                     let next = i + ch.len_utf8();
 525                     j = next;
 526                     if chars.peek().map(|(_, ch)| *ch) != Some('\n') {
 527                         let pos = start + BytePos(i as u32);
 528                         let end_pos = start + BytePos(next as u32);
 529                         rdr.err_span_(pos, end_pos, errmsg);
 530                     }
 531                 }
 532             }
 533             if j < s.len() {
 534                 buf.push_str(&s[j..]);
 535             }
 536             buf
 537         }
 538     }
 539
 540     fn report_non_started_raw_string(&self, start: BytePos) -> ! {
 541         let bad_char = self.str_from(start).chars().last().unwrap();
 542         self
 543             .struct_fatal_span_char(
 544                 start,
 545                 self.pos,
 546                 "found invalid character; only `#` is allowed \
 547                  in raw string delimitation",
 548                 bad_char,
 549             )
 550             .emit();
 551         FatalError.raise()
 552     }
 553
 554     fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! {
 555         let mut err = self.struct_span_fatal(
 556             start, start,
 557             "unterminated raw string",
 558         );
 559         err.span_label(
 560             self.mk_sp(start, start),
 561             "unterminated raw string",
 562         );
 563
 564         if n_hashes > 0 {
 565             err.note(&format!("this raw string should be terminated with `\"{}`",
 566                                 "#".repeat(n_hashes as usize)));
 567         }
 568
 569         err.emit();
 570         FatalError.raise()
 571     }
 572
 573     fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 {
 574         match n_hashes.try_into() {
 575             Ok(n_hashes) => n_hashes,
 576             Err(_) => {
 577                 self.fatal_span_(start,
 578                                  self.pos,
 579                                  "too many `#` symbols: raw strings may be \
 580                                   delimited by up to 65535 `#` symbols").raise();
 581             }
 582         }
 583     }
 584
 585     fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) {
 586         let lit = self.str_from_to(content_start, content_end);
 587         if let Err((off, err)) = unescape::unescape_char(lit) {
 588             emit_unescape_error(
 589                 &self.sess.span_diagnostic,
 590                 lit,
 591                 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 592                 unescape::Mode::Char,
 593                 0..off,
 594                 err,
 595             )
 596         }
 597     }
 598
 599     fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) {
 600         let lit = self.str_from_to(content_start, content_end);
 601         if let Err((off, err)) = unescape::unescape_byte(lit) {
 602             emit_unescape_error(
 603                 &self.sess.span_diagnostic,
 604                 lit,
 605                 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 606                 unescape::Mode::Byte,
 607                 0..off,
 608                 err,
 609             )
 610         }
 611     }
 612
 613     fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) {
 614         let lit = self.str_from_to(content_start, content_end);
 615         unescape::unescape_str(lit, &mut |range, c| {
 616             if let Err(err) = c {
 617                 emit_unescape_error(
 618                     &self.sess.span_diagnostic,
 619                     lit,
 620                     self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 621                     unescape::Mode::Str,
 622                     range,
 623                     err,
 624                 )
 625             }
 626         })
 627     }
 628
 629     fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
 630         let lit = self.str_from_to(content_start, content_end);
 631         unescape::unescape_raw_str(lit, &mut |range, c| {
 632             if let Err(err) = c {
 633                 emit_unescape_error(
 634                     &self.sess.span_diagnostic,
 635                     lit,
 636                     self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 637                     unescape::Mode::Str,
 638                     range,
 639                     err,
 640                 )
 641             }
 642         })
 643     }
 644
 645     fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
 646         let lit = self.str_from_to(content_start, content_end);
 647         unescape::unescape_raw_byte_str(lit, &mut |range, c| {
 648             if let Err(err) = c {
 649                 emit_unescape_error(
 650                     &self.sess.span_diagnostic,
 651                     lit,
 652                     self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 653                     unescape::Mode::ByteStr,
 654                     range,
 655                     err,
 656                 )
 657             }
 658         })
 659     }
 660
 661     fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
 662         let lit = self.str_from_to(content_start, content_end);
 663         unescape::unescape_byte_str(lit, &mut |range, c| {
 664             if let Err(err) = c {
 665                 emit_unescape_error(
 666                     &self.sess.span_diagnostic,
 667                     lit,
 668                     self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 669                     unescape::Mode::ByteStr,
 670                     range,
 671                     err,
 672                 )
 673             }
 674         })
 675     }
 676
 677     fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) {
 678         let base = match base {
 679             Base::Binary => 2,
 680             Base::Octal => 8,
 681             _ => return,
 682         };
 683         let s = self.str_from_to(content_start + BytePos(2), content_end);
 684         for (idx, c) in s.char_indices() {
 685             let idx = idx as u32;
 686             if c != '_' && c.to_digit(base).is_none() {
 687                 let lo = content_start + BytePos(2 + idx);
 688                 let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32);
 689                 self.err_span_(lo, hi,
 690                                &format!("invalid digit for a base {} literal", base));
 691
 692             }
 693         }
 694     }
 695 }
 696
 697 fn is_doc_comment(s: &str) -> bool {
 698     let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
 699               s.starts_with("//!");
 700     debug!("is {:?} a doc comment? {}", s, res);
 701     res
 702 }
 703
 704 fn is_block_doc_comment(s: &str) -> bool {
 705     // Prevent `/**/` from being parsed as a doc comment
 706     let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
 707                s.starts_with("/*!")) && s.len() >= 5;
 708     debug!("is {:?} a doc comment? {}", s, res);
 709     res
 710 }