4 use crate::lexer
::unicode_chars
::UNICODE_ARRAY
;
5 use crate::make_unclosed_delims_error
;
6 use rustc_ast
::ast
::{self, AttrStyle}
;
7 use rustc_ast
::token
::{self, CommentKind, Delimiter, Token, TokenKind}
;
8 use rustc_ast
::tokenstream
::TokenStream
;
9 use rustc_ast
::util
::unicode
::contains_text_flow_control_chars
;
10 use rustc_errors
::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}
;
11 use rustc_lexer
::unescape
::{self, EscapeError, Mode}
;
12 use rustc_lexer
::{Base, DocStyle, RawStrError}
;
13 use rustc_lexer
::{Cursor, LiteralKind}
;
14 use rustc_session
::lint
::builtin
::{
15 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX
, TEXT_DIRECTION_CODEPOINT_IN_COMMENT
,
17 use rustc_session
::lint
::BuiltinLintDiagnostics
;
18 use rustc_session
::parse
::ParseSess
;
19 use rustc_span
::symbol
::{sym, Symbol}
;
20 use rustc_span
::{edition::Edition, BytePos, Pos, Span}
;
24 mod unescape_error_reporting
;
27 use unescape_error_reporting
::{emit_unescape_error, escaped_char}
;
29 // This type is used a lot. Make sure it doesn't unintentionally get bigger.
31 // This assertion is in this crate, rather than in `rustc_lexer`, because that
32 // crate cannot depend on `rustc_data_structures`.
33 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
34 rustc_data_structures
::static_assert_size
!(rustc_lexer
::Token
, 12);
36 #[derive(Clone, Debug)]
37 pub struct UnmatchedDelim
{
38 pub expected_delim
: Delimiter
,
39 pub found_delim
: Option
<Delimiter
>,
41 pub unclosed_span
: Option
<Span
>,
42 pub candidate_span
: Option
<Span
>,
45 pub(crate) fn parse_token_trees
<'a
>(
48 mut start_pos
: BytePos
,
49 override_span
: Option
<Span
>,
50 ) -> Result
<TokenStream
, Vec
<Diagnostic
>> {
51 // Skip `#!`, if present.
52 if let Some(shebang_len
) = rustc_lexer
::strip_shebang(src
) {
53 src
= &src
[shebang_len
..];
54 start_pos
= start_pos
+ BytePos
::from_usize(shebang_len
);
57 let cursor
= Cursor
::new(src
);
58 let string_reader
= StringReader
{
65 nbsp_is_whitespace
: false,
67 let (stream
, res
, unmatched_delims
) =
68 tokentrees
::TokenTreesReader
::parse_all_token_trees(string_reader
);
70 Ok(_open_spacing
) if unmatched_delims
.is_empty() => Ok(stream
),
72 // Return error if there are unmatched delimiters or unclosed delimiters.
73 // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
74 // because the delimiter mismatch is more likely to be the root cause of error
76 let mut buffer
= Vec
::with_capacity(1);
77 for unmatched
in unmatched_delims
{
78 if let Some(err
) = make_unclosed_delims_error(unmatched
, sess
) {
79 err
.buffer(&mut buffer
);
82 if let Err(errs
) = res
{
83 // Add unclosing delimiter or diff marker errors
85 err
.buffer(&mut buffer
);
93 struct StringReader
<'a
> {
95 /// Initial position, read-only.
97 /// The absolute offset within the source_map of the current character.
99 /// Source text to tokenize.
101 /// Cursor for getting lexer tokens.
103 override_span
: Option
<Span
>,
104 /// When a "unknown start of token: \u{a0}" has already been emitted earlier
105 /// in this file, it's safe to treat further occurrences of the non-breaking
106 /// space character as whitespace.
107 nbsp_is_whitespace
: bool
,
110 impl<'a
> StringReader
<'a
> {
111 fn mk_sp(&self, lo
: BytePos
, hi
: BytePos
) -> Span
{
112 self.override_span
.unwrap_or_else(|| Span
::with_root_ctxt(lo
, hi
))
115 /// Returns the next token, paired with a bool indicating if the token was
116 /// preceded by whitespace.
117 fn next_token(&mut self) -> (Token
, bool
) {
118 let mut preceded_by_whitespace
= false;
119 let mut swallow_next_invalid
= 0;
120 // Skip trivial (whitespace & comments) tokens
122 let str_before
= self.cursor
.as_str();
123 let token
= self.cursor
.advance_token();
124 let start
= self.pos
;
125 self.pos
= self.pos
+ BytePos(token
.len
);
127 debug
!("next_token: {:?}({:?})", token
.kind
, self.str_from(start
));
129 // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
130 // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
131 // additional validation.
132 let kind
= match token
.kind
{
133 rustc_lexer
::TokenKind
::LineComment { doc_style }
=> {
134 // Skip non-doc comments
135 let Some(doc_style
) = doc_style
else {
136 self.lint_unicode_text_flow(start
);
137 preceded_by_whitespace
= true;
141 // Opening delimiter of the length 3 is not included into the symbol.
142 let content_start
= start
+ BytePos(3);
143 let content
= self.str_from(content_start
);
144 self.cook_doc_comment(content_start
, content
, CommentKind
::Line
, doc_style
)
146 rustc_lexer
::TokenKind
::BlockComment { doc_style, terminated }
=> {
148 self.report_unterminated_block_comment(start
, doc_style
);
151 // Skip non-doc comments
152 let Some(doc_style
) = doc_style
else {
153 self.lint_unicode_text_flow(start
);
154 preceded_by_whitespace
= true;
158 // Opening delimiter of the length 3 and closing delimiter of the length 2
159 // are not included into the symbol.
160 let content_start
= start
+ BytePos(3);
161 let content_end
= self.pos
- BytePos(if terminated { 2 }
else { 0 }
);
162 let content
= self.str_from_to(content_start
, content_end
);
163 self.cook_doc_comment(content_start
, content
, CommentKind
::Block
, doc_style
)
165 rustc_lexer
::TokenKind
::Whitespace
=> {
166 preceded_by_whitespace
= true;
169 rustc_lexer
::TokenKind
::Ident
=> {
172 rustc_lexer
::TokenKind
::RawIdent
=> {
173 let sym
= nfc_normalize(self.str_from(start
+ BytePos(2)));
174 let span
= self.mk_sp(start
, self.pos
);
175 self.sess
.symbol_gallery
.insert(sym
, span
);
176 if !sym
.can_be_raw() {
177 self.sess
.emit_err(errors
::CannotBeRawIdent { span, ident: sym }
);
179 self.sess
.raw_identifier_spans
.push(span
);
180 token
::Ident(sym
, true)
182 rustc_lexer
::TokenKind
::UnknownPrefix
=> {
183 self.report_unknown_prefix(start
);
186 rustc_lexer
::TokenKind
::InvalidIdent
187 // Do not recover an identifier with emoji if the codepoint is a confusable
188 // with a recoverable substitution token, like `âž–`.
192 let sym
= self.str_from(start
);
193 sym
.chars().count() == 1 && c
== sym
.chars().next().unwrap()
196 let sym
= nfc_normalize(self.str_from(start
));
197 let span
= self.mk_sp(start
, self.pos
);
198 self.sess
.bad_unicode_identifiers
.borrow_mut().entry(sym
).or_default()
200 token
::Ident(sym
, false)
202 // split up (raw) c string literals to an ident and a string literal when edition < 2021.
203 rustc_lexer
::TokenKind
::Literal
{
204 kind
: kind @
(LiteralKind
::CStr { .. }
| LiteralKind
::RawCStr { .. }
),
206 } if !self.mk_sp(start
, self.pos
).edition().at_least_rust_2021() => {
207 let prefix_len
= match kind
{
208 LiteralKind
::CStr { .. }
=> 1,
209 LiteralKind
::RawCStr { .. }
=> 2,
213 // reset the state so that only the prefix ("c" or "cr")
215 let lit_start
= start
+ BytePos(prefix_len
);
216 self.pos
= lit_start
;
217 self.cursor
= Cursor
::new(&str_before
[prefix_len
as usize..]);
219 self.report_unknown_prefix(start
);
220 let prefix_span
= self.mk_sp(start
, lit_start
);
221 return (Token
::new(self.ident(start
), prefix_span
), preceded_by_whitespace
);
223 rustc_lexer
::TokenKind
::Literal { kind, suffix_start }
=> {
224 let suffix_start
= start
+ BytePos(suffix_start
);
225 let (kind
, symbol
) = self.cook_lexer_literal(start
, suffix_start
, kind
);
226 if let token
::LitKind
::CStr
| token
::LitKind
::CStrRaw(_
) = kind
{
227 self.sess
.gated_spans
.gate(sym
::c_str_literals
, self.mk_sp(start
, self.pos
));
229 let suffix
= if suffix_start
< self.pos
{
230 let string
= self.str_from(suffix_start
);
234 .emit_err(errors
::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) }
);
237 Some(Symbol
::intern(string
))
242 token
::Literal(token
::Lit { kind, symbol, suffix }
)
244 rustc_lexer
::TokenKind
::Lifetime { starts_with_number }
=> {
245 // Include the leading `'` in the real identifier, for macro
246 // expansion purposes. See #12512 for the gory details of why
247 // this is necessary.
248 let lifetime_name
= self.str_from(start
);
249 if starts_with_number
{
250 let span
= self.mk_sp(start
, self.pos
);
251 let mut diag
= self.sess
.struct_err("lifetimes cannot start with a number");
253 diag
.stash(span
, StashKey
::LifetimeIsChar
);
255 let ident
= Symbol
::intern(lifetime_name
);
256 token
::Lifetime(ident
)
258 rustc_lexer
::TokenKind
::Semi
=> token
::Semi
,
259 rustc_lexer
::TokenKind
::Comma
=> token
::Comma
,
260 rustc_lexer
::TokenKind
::Dot
=> token
::Dot
,
261 rustc_lexer
::TokenKind
::OpenParen
=> token
::OpenDelim(Delimiter
::Parenthesis
),
262 rustc_lexer
::TokenKind
::CloseParen
=> token
::CloseDelim(Delimiter
::Parenthesis
),
263 rustc_lexer
::TokenKind
::OpenBrace
=> token
::OpenDelim(Delimiter
::Brace
),
264 rustc_lexer
::TokenKind
::CloseBrace
=> token
::CloseDelim(Delimiter
::Brace
),
265 rustc_lexer
::TokenKind
::OpenBracket
=> token
::OpenDelim(Delimiter
::Bracket
),
266 rustc_lexer
::TokenKind
::CloseBracket
=> token
::CloseDelim(Delimiter
::Bracket
),
267 rustc_lexer
::TokenKind
::At
=> token
::At
,
268 rustc_lexer
::TokenKind
::Pound
=> token
::Pound
,
269 rustc_lexer
::TokenKind
::Tilde
=> token
::Tilde
,
270 rustc_lexer
::TokenKind
::Question
=> token
::Question
,
271 rustc_lexer
::TokenKind
::Colon
=> token
::Colon
,
272 rustc_lexer
::TokenKind
::Dollar
=> token
::Dollar
,
273 rustc_lexer
::TokenKind
::Eq
=> token
::Eq
,
274 rustc_lexer
::TokenKind
::Bang
=> token
::Not
,
275 rustc_lexer
::TokenKind
::Lt
=> token
::Lt
,
276 rustc_lexer
::TokenKind
::Gt
=> token
::Gt
,
277 rustc_lexer
::TokenKind
::Minus
=> token
::BinOp(token
::Minus
),
278 rustc_lexer
::TokenKind
::And
=> token
::BinOp(token
::And
),
279 rustc_lexer
::TokenKind
::Or
=> token
::BinOp(token
::Or
),
280 rustc_lexer
::TokenKind
::Plus
=> token
::BinOp(token
::Plus
),
281 rustc_lexer
::TokenKind
::Star
=> token
::BinOp(token
::Star
),
282 rustc_lexer
::TokenKind
::Slash
=> token
::BinOp(token
::Slash
),
283 rustc_lexer
::TokenKind
::Caret
=> token
::BinOp(token
::Caret
),
284 rustc_lexer
::TokenKind
::Percent
=> token
::BinOp(token
::Percent
),
286 rustc_lexer
::TokenKind
::Unknown
| rustc_lexer
::TokenKind
::InvalidIdent
=> {
287 // Don't emit diagnostics for sequences of the same invalid token
288 if swallow_next_invalid
> 0 {
289 swallow_next_invalid
-= 1;
292 let mut it
= self.str_from_to_end(start
).chars();
293 let c
= it
.next().unwrap();
295 // If an error has already been reported on non-breaking
296 // space characters earlier in the file, treat all
297 // subsequent occurrences as whitespace.
298 if self.nbsp_is_whitespace
{
299 preceded_by_whitespace
= true;
302 self.nbsp_is_whitespace
= true;
304 let repeats
= it
.take_while(|c1
| *c1
== c
).count();
305 // FIXME: the lexer could be used to turn the ASCII version of unicode
306 // homoglyphs, instead of keeping a table in `check_for_substitution`into the
307 // token. Ideally, this should be inside `rustc_lexer`. However, we should
308 // first remove compound tokens like `<<` from `rustc_lexer`, and then add
309 // fancier error recovery to it, as there will be less overall work to do this
311 let (token
, sugg
) = unicode_chars
::check_for_substitution(self, start
, c
, repeats
+1);
312 self.sess
.emit_err(errors
::UnknownTokenStart
{
313 span
: self.mk_sp(start
, self.pos
+ Pos
::from_usize(repeats
* c
.len_utf8())),
314 escaped
: escaped_char(c
),
316 null
: if c
== '
\x00' {Some(errors::UnknownTokenNull)}
else {None}
,
317 repeat
: if repeats
> 0 {
318 swallow_next_invalid
= repeats
;
319 Some(errors
::UnknownTokenRepeat { repeats }
)
323 if let Some(token
) = token
{
326 preceded_by_whitespace
= true;
330 rustc_lexer
::TokenKind
::Eof
=> token
::Eof
,
332 let span
= self.mk_sp(start
, self.pos
);
333 return (Token
::new(kind
, span
), preceded_by_whitespace
);
337 fn ident(&self, start
: BytePos
) -> TokenKind
{
338 let sym
= nfc_normalize(self.str_from(start
));
339 let span
= self.mk_sp(start
, self.pos
);
340 self.sess
.symbol_gallery
.insert(sym
, span
);
341 token
::Ident(sym
, false)
344 fn struct_fatal_span_char(
350 ) -> DiagnosticBuilder
<'a
, !> {
353 .struct_span_fatal(self.mk_sp(from_pos
, to_pos
), format
!("{}: {}", m
, escaped_char(c
)))
356 /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
357 /// complain about it.
358 fn lint_unicode_text_flow(&self, start
: BytePos
) {
359 // Opening delimiter of the length 2 is not included into the comment text.
360 let content_start
= start
+ BytePos(2);
361 let content
= self.str_from(content_start
);
362 if contains_text_flow_control_chars(content
) {
363 let span
= self.mk_sp(start
, self.pos
);
364 self.sess
.buffer_lint_with_diagnostic(
365 TEXT_DIRECTION_CODEPOINT_IN_COMMENT
,
368 "unicode codepoint changing visible direction of text present in comment",
369 BuiltinLintDiagnostics
::UnicodeTextFlow(span
, content
.to_string()),
376 content_start
: BytePos
,
378 comment_kind
: CommentKind
,
381 if content
.contains('
\r'
) {
382 for (idx
, _
) in content
.char_indices().filter(|&(_
, c
)| c
== '
\r'
) {
383 let span
= self.mk_sp(
384 content_start
+ BytePos(idx
as u32),
385 content_start
+ BytePos(idx
as u32 + 1),
387 let block
= matches
!(comment_kind
, CommentKind
::Block
);
388 self.sess
.emit_err(errors
::CrDocComment { span, block }
);
392 let attr_style
= match doc_style
{
393 DocStyle
::Outer
=> AttrStyle
::Outer
,
394 DocStyle
::Inner
=> AttrStyle
::Inner
,
397 token
::DocComment(comment_kind
, attr_style
, Symbol
::intern(content
))
400 fn cook_lexer_literal(
404 kind
: rustc_lexer
::LiteralKind
,
405 ) -> (token
::LitKind
, Symbol
) {
407 rustc_lexer
::LiteralKind
::Char { terminated }
=> {
409 self.sess
.dcx
.span_fatal_with_code(
410 self.mk_sp(start
, end
),
411 "unterminated character literal",
415 self.cook_quoted(token
::Char
, Mode
::Char
, start
, end
, 1, 1) // ' '
417 rustc_lexer
::LiteralKind
::Byte { terminated }
=> {
419 self.sess
.dcx
.span_fatal_with_code(
420 self.mk_sp(start
+ BytePos(1), end
),
421 "unterminated byte constant",
425 self.cook_quoted(token
::Byte
, Mode
::Byte
, start
, end
, 2, 1) // b' '
427 rustc_lexer
::LiteralKind
::Str { terminated }
=> {
429 self.sess
.dcx
.span_fatal_with_code(
430 self.mk_sp(start
, end
),
431 "unterminated double quote string",
435 self.cook_quoted(token
::Str
, Mode
::Str
, start
, end
, 1, 1) // " "
437 rustc_lexer
::LiteralKind
::ByteStr { terminated }
=> {
439 self.sess
.dcx
.span_fatal_with_code(
440 self.mk_sp(start
+ BytePos(1), end
),
441 "unterminated double quote byte string",
445 self.cook_quoted(token
::ByteStr
, Mode
::ByteStr
, start
, end
, 2, 1) // b" "
447 rustc_lexer
::LiteralKind
::CStr { terminated }
=> {
449 self.sess
.dcx
.span_fatal_with_code(
450 self.mk_sp(start
+ BytePos(1), end
),
451 "unterminated C string",
455 self.cook_c_string(token
::CStr
, Mode
::CStr
, start
, end
, 2, 1) // c" "
457 rustc_lexer
::LiteralKind
::RawStr { n_hashes }
=> {
458 if let Some(n_hashes
) = n_hashes
{
459 let n
= u32::from(n_hashes
);
460 let kind
= token
::StrRaw(n_hashes
);
461 self.cook_quoted(kind
, Mode
::RawStr
, start
, end
, 2 + n
, 1 + n
) // r##" "##
463 self.report_raw_str_error(start
, 1);
466 rustc_lexer
::LiteralKind
::RawByteStr { n_hashes }
=> {
467 if let Some(n_hashes
) = n_hashes
{
468 let n
= u32::from(n_hashes
);
469 let kind
= token
::ByteStrRaw(n_hashes
);
470 self.cook_quoted(kind
, Mode
::RawByteStr
, start
, end
, 3 + n
, 1 + n
) // br##" "##
472 self.report_raw_str_error(start
, 2);
475 rustc_lexer
::LiteralKind
::RawCStr { n_hashes }
=> {
476 if let Some(n_hashes
) = n_hashes
{
477 let n
= u32::from(n_hashes
);
478 let kind
= token
::CStrRaw(n_hashes
);
479 self.cook_c_string(kind
, Mode
::RawCStr
, start
, end
, 3 + n
, 1 + n
) // cr##" "##
481 self.report_raw_str_error(start
, 2);
484 rustc_lexer
::LiteralKind
::Int { base, empty_int }
=> {
486 let span
= self.mk_sp(start
, end
);
487 self.sess
.emit_err(errors
::NoDigitsLiteral { span }
);
488 (token
::Integer
, sym
::integer(0))
490 if matches
!(base
, Base
::Binary
| Base
::Octal
) {
491 let base
= base
as u32;
492 let s
= self.str_from_to(start
+ BytePos(2), end
);
493 for (idx
, c
) in s
.char_indices() {
494 let span
= self.mk_sp(
495 start
+ BytePos
::from_usize(2 + idx
),
496 start
+ BytePos
::from_usize(2 + idx
+ c
.len_utf8()),
498 if c
!= '_'
&& c
.to_digit(base
).is_none() {
499 self.sess
.emit_err(errors
::InvalidDigitLiteral { span, base }
);
503 (token
::Integer
, self.symbol_from_to(start
, end
))
506 rustc_lexer
::LiteralKind
::Float { base, empty_exponent }
=> {
508 let span
= self.mk_sp(start
, self.pos
);
509 self.sess
.emit_err(errors
::EmptyExponentFloat { span }
);
511 let base
= match base
{
512 Base
::Hexadecimal
=> Some("hexadecimal"),
513 Base
::Octal
=> Some("octal"),
514 Base
::Binary
=> Some("binary"),
517 if let Some(base
) = base
{
518 let span
= self.mk_sp(start
, end
);
519 self.sess
.emit_err(errors
::FloatLiteralUnsupportedBase { span, base }
);
521 (token
::Float
, self.symbol_from_to(start
, end
))
527 fn src_index(&self, pos
: BytePos
) -> usize {
528 (pos
- self.start_pos
).to_usize()
531 /// Slice of the source text from `start` up to but excluding `self.pos`,
532 /// meaning the slice does not include the character `self.ch`.
533 fn str_from(&self, start
: BytePos
) -> &'a
str {
534 self.str_from_to(start
, self.pos
)
537 /// As symbol_from, with an explicit endpoint.
538 fn symbol_from_to(&self, start
: BytePos
, end
: BytePos
) -> Symbol
{
539 debug
!("taking an ident from {:?} to {:?}", start
, end
);
540 Symbol
::intern(self.str_from_to(start
, end
))
543 /// Slice of the source text spanning from `start` up to but excluding `end`.
544 fn str_from_to(&self, start
: BytePos
, end
: BytePos
) -> &'a
str {
545 &self.src
[self.src_index(start
)..self.src_index(end
)]
548 /// Slice of the source text spanning from `start` until the end
549 fn str_from_to_end(&self, start
: BytePos
) -> &'a
str {
550 &self.src
[self.src_index(start
)..]
553 fn report_raw_str_error(&self, start
: BytePos
, prefix_len
: u32) -> ! {
554 match rustc_lexer
::validate_raw_str(self.str_from(start
), prefix_len
) {
555 Err(RawStrError
::InvalidStarter { bad_char }
) => {
556 self.report_non_started_raw_string(start
, bad_char
)
558 Err(RawStrError
::NoTerminator { expected, found, possible_terminator_offset }
) => self
559 .report_unterminated_raw_string(start
, expected
, possible_terminator_offset
, found
),
560 Err(RawStrError
::TooManyDelimiters { found }
) => {
561 self.report_too_many_hashes(start
, found
)
563 Ok(()) => panic
!("no error found for supposedly invalid raw string literal"),
567 fn report_non_started_raw_string(&self, start
: BytePos
, bad_char
: char) -> ! {
568 self.struct_fatal_span_char(
571 "found invalid character; only `#` is allowed in raw string delimitation",
577 fn report_unterminated_raw_string(
581 possible_offset
: Option
<u32>,
582 found_terminators
: u32,
584 let mut err
= self.sess
.dcx
.struct_span_fatal_with_code(
585 self.mk_sp(start
, start
),
586 "unterminated raw string",
590 err
.span_label(self.mk_sp(start
, start
), "unterminated raw string");
594 "this raw string should be terminated with `\"{}`",
595 "#".repeat(n_hashes
as usize)
599 if let Some(possible_offset
) = possible_offset
{
600 let lo
= start
+ BytePos(possible_offset
);
601 let hi
= lo
+ BytePos(found_terminators
);
602 let span
= self.mk_sp(lo
, hi
);
605 "consider terminating the string here",
606 "#".repeat(n_hashes
as usize),
607 Applicability
::MaybeIncorrect
,
614 fn report_unterminated_block_comment(&self, start
: BytePos
, doc_style
: Option
<DocStyle
>) {
615 let msg
= match doc_style
{
616 Some(_
) => "unterminated block doc-comment",
617 None
=> "unterminated block comment",
619 let last_bpos
= self.pos
;
620 let mut err
= self.sess
.dcx
.struct_span_fatal_with_code(
621 self.mk_sp(start
, last_bpos
),
625 let mut nested_block_comment_open_idxs
= vec
![];
626 let mut last_nested_block_comment_idxs
= None
;
627 let mut content_chars
= self.str_from(start
).char_indices().peekable();
629 while let Some((idx
, current_char
)) = content_chars
.next() {
630 match content_chars
.peek() {
631 Some((_
, '
*'
)) if current_char
== '
/'
=> {
632 nested_block_comment_open_idxs
.push(idx
);
634 Some((_
, '
/'
)) if current_char
== '
*'
=> {
635 last_nested_block_comment_idxs
=
636 nested_block_comment_open_idxs
.pop().map(|open_idx
| (open_idx
, idx
));
642 if let Some((nested_open_idx
, nested_close_idx
)) = last_nested_block_comment_idxs
{
643 err
.span_label(self.mk_sp(start
, start
+ BytePos(2)), msg
)
646 start
+ BytePos(nested_open_idx
as u32),
647 start
+ BytePos(nested_open_idx
as u32 + 2),
649 "...as last nested comment starts here, maybe you want to close this instead?",
653 start
+ BytePos(nested_close_idx
as u32),
654 start
+ BytePos(nested_close_idx
as u32 + 2),
656 "...and last nested comment terminates here.",
663 // RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021,
664 // using a (unknown) prefix is an error. In earlier editions, however, they
665 // only result in a (allowed by default) lint, and are treated as regular
666 // identifier tokens.
667 fn report_unknown_prefix(&self, start
: BytePos
) {
668 let prefix_span
= self.mk_sp(start
, self.pos
);
669 let prefix
= self.str_from_to(start
, self.pos
);
671 let expn_data
= prefix_span
.ctxt().outer_expn_data();
673 if expn_data
.edition
>= Edition
::Edition2021
{
674 // In Rust 2021, this is a hard error.
675 let sugg
= if prefix
== "rb" {
676 Some(errors
::UnknownPrefixSugg
::UseBr(prefix_span
))
677 } else if expn_data
.is_root() {
678 Some(errors
::UnknownPrefixSugg
::Whitespace(prefix_span
.shrink_to_hi()))
682 self.sess
.emit_err(errors
::UnknownPrefix { span: prefix_span, prefix, sugg }
);
684 // Before Rust 2021, only emit a lint for migration.
685 self.sess
.buffer_lint_with_diagnostic(
686 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX
,
689 format
!("prefix `{prefix}` is unknown"),
690 BuiltinLintDiagnostics
::ReservedPrefix(prefix_span
),
695 fn report_too_many_hashes(&self, start
: BytePos
, num
: u32) -> ! {
696 self.sess
.emit_fatal(errors
::TooManyHashes { span: self.mk_sp(start, self.pos), num }
);
701 kind
: token
::LitKind
,
707 unescape
: fn(&str, Mode
, &mut dyn FnMut(Range
<usize>, Result
<(), EscapeError
>)),
708 ) -> (token
::LitKind
, Symbol
) {
709 let mut has_fatal_err
= false;
710 let content_start
= start
+ BytePos(prefix_len
);
711 let content_end
= end
- BytePos(postfix_len
);
712 let lit_content
= self.str_from_to(content_start
, content_end
);
713 unescape(lit_content
, mode
, &mut |range
, result
| {
714 // Here we only check for errors. The actual unescaping is done later.
715 if let Err(err
) = result
{
716 let span_with_quotes
= self.mk_sp(start
, end
);
717 let (start
, end
) = (range
.start
as u32, range
.end
as u32);
718 let lo
= content_start
+ BytePos(start
);
719 let hi
= lo
+ BytePos(end
- start
);
720 let span
= self.mk_sp(lo
, hi
);
722 has_fatal_err
= true;
736 // We normally exclude the quotes for the symbol, but for errors we
737 // include it because it results in clearer error messages.
739 (kind
, Symbol
::intern(lit_content
))
741 (token
::Err
, self.symbol_from_to(start
, end
))
747 kind
: token
::LitKind
,
753 ) -> (token
::LitKind
, Symbol
) {
754 self.cook_common(kind
, mode
, start
, end
, prefix_len
, postfix_len
, |src
, mode
, callback
| {
755 unescape
::unescape_literal(src
, mode
, &mut |span
, result
| {
756 callback(span
, result
.map(drop
))
763 kind
: token
::LitKind
,
769 ) -> (token
::LitKind
, Symbol
) {
770 self.cook_common(kind
, mode
, start
, end
, prefix_len
, postfix_len
, |src
, mode
, callback
| {
771 unescape
::unescape_c_string(src
, mode
, &mut |span
, result
| {
772 callback(span
, result
.map(drop
))
778 pub fn nfc_normalize(string
: &str) -> Symbol
{
779 use unicode_normalization
::{is_nfc_quick, IsNormalized, UnicodeNormalization}
;
780 match is_nfc_quick(string
.chars()) {
781 IsNormalized
::Yes
=> Symbol
::intern(string
),
783 let normalized_str
: String
= string
.chars().nfc().collect();
784 Symbol
::intern(&normalized_str
)