]> git.proxmox.com Git - rustc.git/blame - src/libsyntax/parse/lexer/mod.rs
New upstream version 1.39.0+dfsg1
[rustc.git] / src / libsyntax / parse / lexer / mod.rs
CommitLineData
dc9dc135
XL
1use crate::parse::ParseSess;
2use crate::parse::token::{self, Token, TokenKind};
3use crate::symbol::{sym, Symbol};
48663c56 4use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
9fa01778 5
416331ca 6use errors::{FatalError, DiagnosticBuilder};
e1599b0c 7use syntax_pos::{BytePos, Pos, Span};
416331ca
XL
8use rustc_lexer::Base;
9use rustc_lexer::unescape;
1a4d82fc 10
d9579d0f 11use std::borrow::Cow;
1a4d82fc 12use std::char;
0731742a 13use std::iter;
416331ca 14use std::convert::TryInto;
0531ce1d 15use rustc_data_structures::sync::Lrc;
9fa01778 16use log::debug;
1a4d82fc 17
416331ca
XL
18#[cfg(test)]
19mod tests;
20
1a4d82fc 21pub mod comments;
32a655c1 22mod tokentrees;
92a42be0 23mod unicode_chars;
1a4d82fc 24
9fa01778
XL
25#[derive(Clone, Debug)]
26pub struct UnmatchedBrace {
27 pub expected_delim: token::DelimToken,
28 pub found_delim: token::DelimToken,
29 pub found_span: Span,
30 pub unclosed_span: Option<Span>,
31 pub candidate_span: Option<Span>,
32}
33
1a4d82fc 34pub struct StringReader<'a> {
416331ca
XL
35 sess: &'a ParseSess,
36 /// Initial position, read-only.
37 start_pos: BytePos,
38 /// The absolute offset within the source_map of the current character.
39 pos: BytePos,
94b46f34 40 /// Stop reading src at this index.
416331ca
XL
41 end_src_index: usize,
42 /// Source text to tokenize.
94b46f34 43 src: Lrc<String>,
48663c56 44 override_span: Option<Span>,
cc61c64b
XL
45}
46
32a655c1 47impl<'a> StringReader<'a> {
416331ca 48 pub fn new(sess: &'a ParseSess,
b7449926
XL
49 source_file: Lrc<syntax_pos::SourceFile>,
50 override_span: Option<Span>) -> Self {
b7449926
XL
51 if source_file.src.is_none() {
52 sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}",
53 source_file.name));
c34b1796
AL
54 }
55
b7449926 56 let src = (*source_file.src.as_ref().unwrap()).clone();
c34b1796 57
9e0c209e 58 StringReader {
3b2f2976 59 sess,
416331ca 60 start_pos: source_file.start_pos,
b7449926 61 pos: source_file.start_pos,
94b46f34 62 end_src_index: src.len(),
94b46f34 63 src,
94b46f34 64 override_span,
a7813a04 65 }
1a4d82fc
JJ
66 }
67
8bb4bdeb 68 pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
b7449926
XL
69 let begin = sess.source_map().lookup_byte_offset(span.lo());
70 let end = sess.source_map().lookup_byte_offset(span.hi());
8bb4bdeb
XL
71
72 // Make the range zero-length if the span is invalid.
a1dfa0c6 73 if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos {
0531ce1d 74 span = span.shrink_to_lo();
8bb4bdeb
XL
75 }
76
416331ca 77 let mut sr = StringReader::new(sess, begin.sf, None);
8bb4bdeb
XL
78
79 // Seek the lexer to the right byte range.
94b46f34 80 sr.end_src_index = sr.src_index(span.hi());
8bb4bdeb 81
416331ca
XL
82 sr
83 }
8bb4bdeb 84
b7449926 85
416331ca 86 fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
e1599b0c 87 self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
8bb4bdeb
XL
88 }
89
416331ca
XL
90 /// Returns the next token, including trivia like whitespace or comments.
91 ///
92 /// `Err(())` means that some errors were encountered, which can be
93 /// retrieved using `buffer_fatal_errors`.
94 pub fn next_token(&mut self) -> Token {
95 let start_src_index = self.src_index(self.pos);
96 let text: &str = &self.src[start_src_index..self.end_src_index];
97
98 if text.is_empty() {
99 let span = self.mk_sp(self.pos, self.pos);
100 return Token::new(token::Eof, span);
101 }
102
103 {
104 let is_beginning_of_file = self.pos == self.start_pos;
105 if is_beginning_of_file {
106 if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
107 let start = self.pos;
108 self.pos = self.pos + BytePos::from_usize(shebang_len);
109
110 let sym = self.symbol_from(start + BytePos::from_usize("#!".len()));
111 let kind = token::Shebang(sym);
112
113 let span = self.mk_sp(start, self.pos);
114 return Token::new(kind, span);
115 }
116 }
117 }
118
119 let token = rustc_lexer::first_token(text);
120
121 let start = self.pos;
122 self.pos = self.pos + BytePos::from_usize(token.len);
123
124 debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start));
125
126 // This could use `?`, but that makes code significantly (10-20%) slower.
127 // https://github.com/rust-lang/rust/issues/37939
128 let kind = self.cook_lexer_token(token.kind, start);
129
130 let span = self.mk_sp(start, self.pos);
131 Token::new(kind, span)
1a4d82fc
JJ
132 }
133
134 /// Report a fatal lexical error with a given span.
94b46f34 135 fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
32a655c1 136 self.sess.span_diagnostic.span_fatal(sp, m)
1a4d82fc
JJ
137 }
138
139 /// Report a lexical error with a given span.
94b46f34 140 fn err_span(&self, sp: Span, m: &str) {
0731742a 141 self.sess.span_diagnostic.struct_span_err(sp, m).emit();
1a4d82fc
JJ
142 }
143
c1a9b12d 144
1a4d82fc 145 /// Report a fatal error spanning [`from_pos`, `to_pos`).
92a42be0 146 fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
041b39d2 147 self.fatal_span(self.mk_sp(from_pos, to_pos), m)
1a4d82fc
JJ
148 }
149
150 /// Report a lexical error spanning [`from_pos`, `to_pos`).
151 fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
041b39d2 152 self.err_span(self.mk_sp(from_pos, to_pos), m)
1a4d82fc
JJ
153 }
154
b7449926
XL
155 fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str)
156 -> DiagnosticBuilder<'a>
157 {
0531ce1d
XL
158 self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m)
159 }
160
b7449926
XL
161 fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
162 -> DiagnosticBuilder<'a>
163 {
9cc50fc6
SL
164 let mut m = m.to_string();
165 m.push_str(": ");
48663c56 166 push_escaped_char(&mut m, c);
b7449926 167
041b39d2 168 self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
9cc50fc6 169 }
1a4d82fc 170
416331ca
XL
171 /// Turns simple `rustc_lexer::TokenKind` enum into a rich
172 /// `libsyntax::TokenKind`. This turns strings into interned
173 /// symbols and runs additional validation.
174 fn cook_lexer_token(
175 &self,
176 token: rustc_lexer::TokenKind,
177 start: BytePos,
178 ) -> TokenKind {
179 match token {
180 rustc_lexer::TokenKind::LineComment => {
181 let string = self.str_from(start);
182 // comments with only more "/"s are not doc comments
183 let tok = if is_doc_comment(string) {
184 let mut idx = 0;
185 loop {
186 idx = match string[idx..].find('\r') {
187 None => break,
188 Some(it) => idx + it + 1
189 };
190 if string[idx..].chars().next() != Some('\n') {
191 self.err_span_(start + BytePos(idx as u32 - 1),
192 start + BytePos(idx as u32),
193 "bare CR not allowed in doc-comment");
194 }
195 }
196 token::DocComment(Symbol::intern(string))
197 } else {
198 token::Comment
199 };
200
201 tok
202 }
203 rustc_lexer::TokenKind::BlockComment { terminated } => {
204 let string = self.str_from(start);
205 // block comments starting with "/**" or "/*!" are doc-comments
206 // but comments with only "*"s between two "/"s are not
207 let is_doc_comment = is_block_doc_comment(string);
208
209 if !terminated {
210 let msg = if is_doc_comment {
211 "unterminated block doc-comment"
212 } else {
213 "unterminated block comment"
214 };
215 let last_bpos = self.pos;
216 self.fatal_span_(start, last_bpos, msg).raise();
217 }
218
219 let tok = if is_doc_comment {
220 let has_cr = string.contains('\r');
221 let string = if has_cr {
222 self.translate_crlf(start,
223 string,
224 "bare CR not allowed in block doc-comment")
225 } else {
226 string.into()
227 };
228 token::DocComment(Symbol::intern(&string[..]))
229 } else {
230 token::Comment
231 };
232
233 tok
9cc50fc6 234 }
416331ca
XL
235 rustc_lexer::TokenKind::Whitespace => token::Whitespace,
236 rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
237 let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
238 let mut ident_start = start;
239 if is_raw_ident {
240 ident_start = ident_start + BytePos(2);
241 }
242 // FIXME: perform NFKC normalization here. (Issue #2253)
243 let sym = self.symbol_from(ident_start);
244 if is_raw_ident {
245 let span = self.mk_sp(start, self.pos);
246 if !sym.can_be_raw() {
247 self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
248 }
249 self.sess.raw_identifier_spans.borrow_mut().push(span);
250 }
251 token::Ident(sym, is_raw_ident)
252 }
253 rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
254 let suffix_start = start + BytePos(suffix_start as u32);
255 let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
256 let suffix = if suffix_start < self.pos {
257 let string = self.str_from(suffix_start);
258 if string == "_" {
259 self.sess.span_diagnostic
260 .struct_span_warn(self.mk_sp(suffix_start, self.pos),
261 "underscore literal suffix is not allowed")
262 .warn("this was previously accepted by the compiler but is \
263 being phased out; it will become a hard error in \
264 a future release!")
265 .note("for more information, see issue #42326 \
266 <https://github.com/rust-lang/rust/issues/42326>")
267 .emit();
268 None
269 } else {
270 Some(Symbol::intern(string))
271 }
1a4d82fc 272 } else {
416331ca 273 None
1a4d82fc 274 };
416331ca
XL
275 token::Literal(token::Lit { kind, symbol, suffix })
276 }
277 rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
278 // Include the leading `'` in the real identifier, for macro
279 // expansion purposes. See #12512 for the gory details of why
280 // this is necessary.
281 let lifetime_name = self.str_from(start);
282 if starts_with_number {
283 self.err_span_(
284 start,
285 self.pos,
286 "lifetimes cannot start with a number",
287 );
288 }
289 let ident = Symbol::intern(lifetime_name);
290 token::Lifetime(ident)
291 }
292 rustc_lexer::TokenKind::Semi => token::Semi,
293 rustc_lexer::TokenKind::Comma => token::Comma,
416331ca
XL
294 rustc_lexer::TokenKind::Dot => token::Dot,
295 rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
296 rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
297 rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace),
298 rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace),
299 rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket),
300 rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket),
301 rustc_lexer::TokenKind::At => token::At,
302 rustc_lexer::TokenKind::Pound => token::Pound,
303 rustc_lexer::TokenKind::Tilde => token::Tilde,
304 rustc_lexer::TokenKind::Question => token::Question,
416331ca
XL
305 rustc_lexer::TokenKind::Colon => token::Colon,
306 rustc_lexer::TokenKind::Dollar => token::Dollar,
416331ca 307 rustc_lexer::TokenKind::Eq => token::Eq,
416331ca 308 rustc_lexer::TokenKind::Not => token::Not,
416331ca 309 rustc_lexer::TokenKind::Lt => token::Lt,
416331ca 310 rustc_lexer::TokenKind::Gt => token::Gt,
416331ca 311 rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
416331ca 312 rustc_lexer::TokenKind::And => token::BinOp(token::And),
416331ca 313 rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
416331ca 314 rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
416331ca 315 rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
416331ca 316 rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
416331ca 317 rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
416331ca 318 rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
416331ca
XL
319
320 rustc_lexer::TokenKind::Unknown => {
321 let c = self.str_from(start).chars().next().unwrap();
322 let mut err = self.struct_fatal_span_char(start,
323 self.pos,
324 "unknown start of token",
325 c);
326 // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
327 // instead of keeping a table in `check_for_substitution`into the token. Ideally,
328 // this should be inside `rustc_lexer`. However, we should first remove compound
329 // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
330 // as there will be less overall work to do this way.
331 let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
332 .unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
333 err.emit();
334 token
1a4d82fc
JJ
335 }
336 }
416331ca 337 }
b7449926 338
416331ca
XL
339 fn cook_lexer_literal(
340 &self,
341 start: BytePos,
342 suffix_start: BytePos,
343 kind: rustc_lexer::LiteralKind
344 ) -> (token::LitKind, Symbol) {
345 match kind {
346 rustc_lexer::LiteralKind::Char { terminated } => {
347 if !terminated {
348 self.fatal_span_(start, suffix_start,
349 "unterminated character literal".into())
350 .raise()
351 }
352 let content_start = start + BytePos(1);
353 let content_end = suffix_start - BytePos(1);
354 self.validate_char_escape(content_start, content_end);
355 let id = self.symbol_from_to(content_start, content_end);
356 (token::Char, id)
357 },
358 rustc_lexer::LiteralKind::Byte { terminated } => {
359 if !terminated {
360 self.fatal_span_(start + BytePos(1), suffix_start,
361 "unterminated byte constant".into())
362 .raise()
363 }
364 let content_start = start + BytePos(2);
365 let content_end = suffix_start - BytePos(1);
366 self.validate_byte_escape(content_start, content_end);
367 let id = self.symbol_from_to(content_start, content_end);
368 (token::Byte, id)
369 },
370 rustc_lexer::LiteralKind::Str { terminated } => {
371 if !terminated {
372 self.fatal_span_(start, suffix_start,
373 "unterminated double quote string".into())
374 .raise()
375 }
376 let content_start = start + BytePos(1);
377 let content_end = suffix_start - BytePos(1);
378 self.validate_str_escape(content_start, content_end);
379 let id = self.symbol_from_to(content_start, content_end);
380 (token::Str, id)
381 }
382 rustc_lexer::LiteralKind::ByteStr { terminated } => {
383 if !terminated {
384 self.fatal_span_(start + BytePos(1), suffix_start,
385 "unterminated double quote byte string".into())
386 .raise()
387 }
388 let content_start = start + BytePos(2);
389 let content_end = suffix_start - BytePos(1);
390 self.validate_byte_str_escape(content_start, content_end);
391 let id = self.symbol_from_to(content_start, content_end);
392 (token::ByteStr, id)
393 }
394 rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => {
395 if !started {
396 self.report_non_started_raw_string(start);
397 }
398 if !terminated {
399 self.report_unterminated_raw_string(start, n_hashes)
400 }
401 let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
402 let n = u32::from(n_hashes);
403 let content_start = start + BytePos(2 + n);
404 let content_end = suffix_start - BytePos(1 + n);
405 self.validate_raw_str_escape(content_start, content_end);
406 let id = self.symbol_from_to(content_start, content_end);
407 (token::StrRaw(n_hashes), id)
408 }
409 rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => {
410 if !started {
411 self.report_non_started_raw_string(start);
412 }
413 if !terminated {
414 self.report_unterminated_raw_string(start, n_hashes)
415 }
416 let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes);
417 let n = u32::from(n_hashes);
418 let content_start = start + BytePos(3 + n);
419 let content_end = suffix_start - BytePos(1 + n);
420 self.validate_raw_byte_str_escape(content_start, content_end);
421 let id = self.symbol_from_to(content_start, content_end);
422 (token::ByteStrRaw(n_hashes), id)
423 }
424 rustc_lexer::LiteralKind::Int { base, empty_int } => {
425 if empty_int {
426 self.err_span_(start, suffix_start, "no valid digits found for number");
427 (token::Integer, sym::integer(0))
428 } else {
429 self.validate_int_literal(base, start, suffix_start);
430 (token::Integer, self.symbol_from_to(start, suffix_start))
431 }
432 },
433 rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
434 if empty_exponent {
435 let mut err = self.struct_span_fatal(
436 start, self.pos,
437 "expected at least one digit in exponent"
438 );
439 err.emit();
440 }
441
442 match base {
443 Base::Hexadecimal => {
444 self.err_span_(start, suffix_start,
445 "hexadecimal float literal is not supported")
446 }
447 Base::Octal => {
448 self.err_span_(start, suffix_start,
449 "octal float literal is not supported")
450 }
451 Base::Binary => {
452 self.err_span_(start, suffix_start,
453 "binary float literal is not supported")
454 }
455 _ => ()
456 }
457
458 let id = self.symbol_from_to(start, suffix_start);
459 (token::Float, id)
460 },
461 }
1a4d82fc
JJ
462 }
463
94b46f34
XL
464 #[inline]
465 fn src_index(&self, pos: BytePos) -> usize {
416331ca 466 (pos - self.start_pos).to_usize()
1a4d82fc
JJ
467 }
468
dc9dc135
XL
469 /// Slice of the source text from `start` up to but excluding `self.pos`,
470 /// meaning the slice does not include the character `self.ch`.
471 fn str_from(&self, start: BytePos) -> &str
1a4d82fc 472 {
dc9dc135 473 self.str_from_to(start, self.pos)
1a4d82fc
JJ
474 }
475
dc9dc135
XL
476 /// Creates a Symbol from a given offset to the current offset.
477 fn symbol_from(&self, start: BytePos) -> Symbol {
c30ab7b3 478 debug!("taking an ident from {:?} to {:?}", start, self.pos);
dc9dc135 479 Symbol::intern(self.str_from(start))
1a4d82fc
JJ
480 }
481
dc9dc135
XL
482 /// As symbol_from, with an explicit endpoint.
483 fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
1a4d82fc 484 debug!("taking an ident from {:?} to {:?}", start, end);
dc9dc135 485 Symbol::intern(self.str_from_to(start, end))
1a4d82fc
JJ
486 }
487
dc9dc135
XL
488 /// Slice of the source text spanning from `start` up to but excluding `end`.
489 fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
1a4d82fc 490 {
dc9dc135 491 &self.src[self.src_index(start)..self.src_index(end)]
1a4d82fc
JJ
492 }
493
494 /// Converts CRLF to LF in the given string, raising an error on bare CR.
9cc50fc6 495 fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
0731742a
XL
496 let mut chars = s.char_indices().peekable();
497 while let Some((i, ch)) = chars.next() {
1a4d82fc 498 if ch == '\r' {
0731742a
XL
499 if let Some((lf_idx, '\n')) = chars.peek() {
500 return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into();
1a4d82fc
JJ
501 }
502 let pos = start + BytePos(i as u32);
0731742a 503 let end_pos = start + BytePos((i + ch.len_utf8()) as u32);
1a4d82fc
JJ
504 self.err_span_(pos, end_pos, errmsg);
505 }
1a4d82fc 506 }
d9579d0f 507 return s.into();
1a4d82fc 508
9fa01778 509 fn translate_crlf_(rdr: &StringReader<'_>,
9cc50fc6
SL
510 start: BytePos,
511 s: &str,
0731742a
XL
512 mut j: usize,
513 mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>,
514 errmsg: &str)
9cc50fc6 515 -> String {
1a4d82fc 516 let mut buf = String::with_capacity(s.len());
0731742a
XL
517 // Skip first CR
518 buf.push_str(&s[.. j - 1]);
519 while let Some((i, ch)) = chars.next() {
1a4d82fc 520 if ch == '\r' {
9cc50fc6
SL
521 if j < i {
522 buf.push_str(&s[j..i]);
523 }
0731742a 524 let next = i + ch.len_utf8();
1a4d82fc 525 j = next;
0731742a 526 if chars.peek().map(|(_, ch)| *ch) != Some('\n') {
1a4d82fc
JJ
527 let pos = start + BytePos(i as u32);
528 let end_pos = start + BytePos(next as u32);
529 rdr.err_span_(pos, end_pos, errmsg);
530 }
531 }
1a4d82fc 532 }
9cc50fc6
SL
533 if j < s.len() {
534 buf.push_str(&s[j..]);
535 }
1a4d82fc
JJ
536 buf
537 }
538 }
539
416331ca
XL
540 fn report_non_started_raw_string(&self, start: BytePos) -> ! {
541 let bad_char = self.str_from(start).chars().last().unwrap();
542 self
543 .struct_fatal_span_char(
544 start,
545 self.pos,
546 "found invalid character; only `#` is allowed \
547 in raw string delimitation",
548 bad_char,
549 )
550 .emit();
551 FatalError.raise()
1a4d82fc
JJ
552 }
553
416331ca
XL
554 fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! {
555 let mut err = self.struct_span_fatal(
556 start, start,
557 "unterminated raw string",
558 );
559 err.span_label(
560 self.mk_sp(start, start),
561 "unterminated raw string",
562 );
1a4d82fc 563
416331ca
XL
564 if n_hashes > 0 {
565 err.note(&format!("this raw string should be terminated with `\"{}`",
566 "#".repeat(n_hashes as usize)));
48663c56 567 }
1a4d82fc 568
416331ca
XL
569 err.emit();
570 FatalError.raise()
1a4d82fc
JJ
571 }
572
416331ca
XL
573 fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 {
574 match n_hashes.try_into() {
575 Ok(n_hashes) => n_hashes,
576 Err(_) => {
577 self.fatal_span_(start,
578 self.pos,
dc9dc135 579 "too many `#` symbols: raw strings may be \
416331ca 580 delimited by up to 65535 `#` symbols").raise();
94b46f34 581 }
1a4d82fc 582 }
1a4d82fc 583 }
48663c56 584
416331ca
XL
585 fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) {
586 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
587 if let Err((off, err)) = unescape::unescape_char(lit) {
588 emit_unescape_error(
589 &self.sess.span_diagnostic,
590 lit,
416331ca 591 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
592 unescape::Mode::Char,
593 0..off,
594 err,
595 )
596 }
597 }
598
416331ca
XL
599 fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) {
600 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
601 if let Err((off, err)) = unescape::unescape_byte(lit) {
602 emit_unescape_error(
603 &self.sess.span_diagnostic,
604 lit,
416331ca 605 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
606 unescape::Mode::Byte,
607 0..off,
608 err,
609 )
610 }
611 }
612
416331ca
XL
613 fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) {
614 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
615 unescape::unescape_str(lit, &mut |range, c| {
616 if let Err(err) = c {
48663c56
XL
617 emit_unescape_error(
618 &self.sess.span_diagnostic,
619 lit,
416331ca 620 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
621 unescape::Mode::Str,
622 range,
48663c56
XL
623 err,
624 )
625 }
dc9dc135 626 })
48663c56
XL
627 }
628
dc9dc135
XL
629 fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
630 let lit = self.str_from_to(content_start, content_end);
631 unescape::unescape_raw_str(lit, &mut |range, c| {
632 if let Err(err) = c {
48663c56
XL
633 emit_unescape_error(
634 &self.sess.span_diagnostic,
635 lit,
dc9dc135
XL
636 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
637 unescape::Mode::Str,
638 range,
48663c56
XL
639 err,
640 )
641 }
dc9dc135 642 })
48663c56
XL
643 }
644
dc9dc135
XL
645 fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
646 let lit = self.str_from_to(content_start, content_end);
647 unescape::unescape_raw_byte_str(lit, &mut |range, c| {
648 if let Err(err) = c {
649 emit_unescape_error(
650 &self.sess.span_diagnostic,
651 lit,
652 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
653 unescape::Mode::ByteStr,
654 range,
655 err,
656 )
657 }
658 })
48663c56
XL
659 }
660
416331ca
XL
661 fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
662 let lit = self.str_from_to(content_start, content_end);
dc9dc135
XL
663 unescape::unescape_byte_str(lit, &mut |range, c| {
664 if let Err(err) = c {
665 emit_unescape_error(
666 &self.sess.span_diagnostic,
667 lit,
416331ca 668 self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
dc9dc135
XL
669 unescape::Mode::ByteStr,
670 range,
671 err,
672 )
673 }
674 })
48663c56 675 }
1a4d82fc 676
416331ca
XL
677 fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) {
678 let base = match base {
679 Base::Binary => 2,
680 Base::Octal => 8,
681 _ => return,
682 };
683 let s = self.str_from_to(content_start + BytePos(2), content_end);
684 for (idx, c) in s.char_indices() {
685 let idx = idx as u32;
686 if c != '_' && c.to_digit(base).is_none() {
687 let lo = content_start + BytePos(2 + idx);
688 let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32);
689 self.err_span_(lo, hi,
690 &format!("invalid digit for a base {} literal", base));
1a4d82fc 691
416331ca
XL
692 }
693 }
694 }
9cc50fc6 695}
1a4d82fc 696
94b46f34 697fn is_doc_comment(s: &str) -> bool {
9cc50fc6
SL
698 let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
699 s.starts_with("//!");
1a4d82fc
JJ
700 debug!("is {:?} a doc comment? {}", s, res);
701 res
702}
703
94b46f34 704fn is_block_doc_comment(s: &str) -> bool {
9cc50fc6
SL
705 // Prevent `/**/` from being parsed as a doc comment
706 let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
707 s.starts_with("/*!")) && s.len() >= 5;
1a4d82fc
JJ
708 debug!("is {:?} a doc comment? {}", s, res);
709 res
710}