1 use crate::lexer
::unicode_chars
::UNICODE_ARRAY
;
2 use rustc_ast
::ast
::{self, AttrStyle}
;
3 use rustc_ast
::token
::{self, CommentKind, Delimiter, Token, TokenKind}
;
4 use rustc_ast
::tokenstream
::TokenStream
;
5 use rustc_ast
::util
::unicode
::contains_text_flow_control_chars
;
7 error_code
, Applicability
, DiagnosticBuilder
, ErrorGuaranteed
, PResult
, StashKey
,
9 use rustc_lexer
::unescape
::{self, Mode}
;
10 use rustc_lexer
::Cursor
;
11 use rustc_lexer
::{Base, DocStyle, RawStrError}
;
12 use rustc_session
::lint
::builtin
::{
13 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX
, TEXT_DIRECTION_CODEPOINT_IN_COMMENT
,
15 use rustc_session
::lint
::BuiltinLintDiagnostics
;
16 use rustc_session
::parse
::ParseSess
;
17 use rustc_span
::symbol
::{sym, Symbol}
;
18 use rustc_span
::{edition::Edition, BytePos, Pos, Span}
;
21 mod unescape_error_reporting
;
24 use unescape_error_reporting
::{emit_unescape_error, escaped_char}
;
26 // This type is used a lot. Make sure it doesn't unintentionally get bigger.
28 // This assertion is in this crate, rather than in `rustc_lexer`, because that
29 // crate cannot depend on `rustc_data_structures`.
30 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
31 rustc_data_structures
::static_assert_size
!(rustc_lexer
::Token
, 12);
33 #[derive(Clone, Debug)]
34 pub struct UnmatchedBrace
{
35 pub expected_delim
: Delimiter
,
36 pub found_delim
: Option
<Delimiter
>,
38 pub unclosed_span
: Option
<Span
>,
39 pub candidate_span
: Option
<Span
>,
42 pub(crate) fn parse_token_trees
<'a
>(
45 mut start_pos
: BytePos
,
46 override_span
: Option
<Span
>,
47 ) -> (PResult
<'a
, TokenStream
>, Vec
<UnmatchedBrace
>) {
48 // Skip `#!`, if present.
49 if let Some(shebang_len
) = rustc_lexer
::strip_shebang(src
) {
50 src
= &src
[shebang_len
..];
51 start_pos
= start_pos
+ BytePos
::from_usize(shebang_len
);
54 let cursor
= Cursor
::new(src
);
56 StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span }
;
57 tokentrees
::TokenTreesReader
::parse_all_token_trees(string_reader
)
60 struct StringReader
<'a
> {
62 /// Initial position, read-only.
64 /// The absolute offset within the source_map of the current character.
66 /// Source text to tokenize.
68 /// Cursor for getting lexer tokens.
70 override_span
: Option
<Span
>,
73 impl<'a
> StringReader
<'a
> {
74 fn mk_sp(&self, lo
: BytePos
, hi
: BytePos
) -> Span
{
75 self.override_span
.unwrap_or_else(|| Span
::with_root_ctxt(lo
, hi
))
78 /// Returns the next token, paired with a bool indicating if the token was
79 /// preceded by whitespace.
80 fn next_token(&mut self) -> (Token
, bool
) {
81 let mut preceded_by_whitespace
= false;
83 // Skip trivial (whitespace & comments) tokens
85 let token
= self.cursor
.advance_token();
87 self.pos
= self.pos
+ BytePos(token
.len
);
89 debug
!("next_token: {:?}({:?})", token
.kind
, self.str_from(start
));
91 // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
92 // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
93 // additional validation.
94 let kind
= match token
.kind
{
95 rustc_lexer
::TokenKind
::LineComment { doc_style }
=> {
96 // Skip non-doc comments
97 let Some(doc_style
) = doc_style
else {
98 self.lint_unicode_text_flow(start
);
99 preceded_by_whitespace
= true;
103 // Opening delimiter of the length 3 is not included into the symbol.
104 let content_start
= start
+ BytePos(3);
105 let content
= self.str_from(content_start
);
106 self.cook_doc_comment(content_start
, content
, CommentKind
::Line
, doc_style
)
108 rustc_lexer
::TokenKind
::BlockComment { doc_style, terminated }
=> {
110 self.report_unterminated_block_comment(start
, doc_style
);
113 // Skip non-doc comments
114 let Some(doc_style
) = doc_style
else {
115 self.lint_unicode_text_flow(start
);
116 preceded_by_whitespace
= true;
120 // Opening delimiter of the length 3 and closing delimiter of the length 2
121 // are not included into the symbol.
122 let content_start
= start
+ BytePos(3);
123 let content_end
= self.pos
- BytePos(if terminated { 2 }
else { 0 }
);
124 let content
= self.str_from_to(content_start
, content_end
);
125 self.cook_doc_comment(content_start
, content
, CommentKind
::Block
, doc_style
)
127 rustc_lexer
::TokenKind
::Whitespace
=> {
128 preceded_by_whitespace
= true;
131 rustc_lexer
::TokenKind
::Ident
=> {
132 let sym
= nfc_normalize(self.str_from(start
));
133 let span
= self.mk_sp(start
, self.pos
);
134 self.sess
.symbol_gallery
.insert(sym
, span
);
135 token
::Ident(sym
, false)
137 rustc_lexer
::TokenKind
::RawIdent
=> {
138 let sym
= nfc_normalize(self.str_from(start
+ BytePos(2)));
139 let span
= self.mk_sp(start
, self.pos
);
140 self.sess
.symbol_gallery
.insert(sym
, span
);
141 if !sym
.can_be_raw() {
142 self.err_span(span
, &format
!("`{}` cannot be a raw identifier", sym
));
144 self.sess
.raw_identifier_spans
.borrow_mut().push(span
);
145 token
::Ident(sym
, true)
147 rustc_lexer
::TokenKind
::UnknownPrefix
=> {
148 self.report_unknown_prefix(start
);
149 let sym
= nfc_normalize(self.str_from(start
));
150 let span
= self.mk_sp(start
, self.pos
);
151 self.sess
.symbol_gallery
.insert(sym
, span
);
152 token
::Ident(sym
, false)
154 rustc_lexer
::TokenKind
::InvalidIdent
155 // Do not recover an identifier with emoji if the codepoint is a confusable
156 // with a recoverable substitution token, like `➖`.
160 let sym
= self.str_from(start
);
161 sym
.chars().count() == 1 && c
== sym
.chars().next().unwrap()
164 let sym
= nfc_normalize(self.str_from(start
));
165 let span
= self.mk_sp(start
, self.pos
);
166 self.sess
.bad_unicode_identifiers
.borrow_mut().entry(sym
).or_default()
168 token
::Ident(sym
, false)
170 rustc_lexer
::TokenKind
::Literal { kind, suffix_start }
=> {
171 let suffix_start
= start
+ BytePos(suffix_start
);
172 let (kind
, symbol
) = self.cook_lexer_literal(start
, suffix_start
, kind
);
173 let suffix
= if suffix_start
< self.pos
{
174 let string
= self.str_from(suffix_start
);
179 self.mk_sp(suffix_start
, self.pos
),
180 "underscore literal suffix is not allowed",
185 Some(Symbol
::intern(string
))
190 token
::Literal(token
::Lit { kind, symbol, suffix }
)
192 rustc_lexer
::TokenKind
::Lifetime { starts_with_number }
=> {
193 // Include the leading `'` in the real identifier, for macro
194 // expansion purposes. See #12512 for the gory details of why
195 // this is necessary.
196 let lifetime_name
= self.str_from(start
);
197 if starts_with_number
{
198 let span
= self.mk_sp(start
, self.pos
);
199 let mut diag
= self.sess
.struct_err("lifetimes cannot start with a number");
201 diag
.stash(span
, StashKey
::LifetimeIsChar
);
203 let ident
= Symbol
::intern(lifetime_name
);
204 token
::Lifetime(ident
)
206 rustc_lexer
::TokenKind
::Semi
=> token
::Semi
,
207 rustc_lexer
::TokenKind
::Comma
=> token
::Comma
,
208 rustc_lexer
::TokenKind
::Dot
=> token
::Dot
,
209 rustc_lexer
::TokenKind
::OpenParen
=> token
::OpenDelim(Delimiter
::Parenthesis
),
210 rustc_lexer
::TokenKind
::CloseParen
=> token
::CloseDelim(Delimiter
::Parenthesis
),
211 rustc_lexer
::TokenKind
::OpenBrace
=> token
::OpenDelim(Delimiter
::Brace
),
212 rustc_lexer
::TokenKind
::CloseBrace
=> token
::CloseDelim(Delimiter
::Brace
),
213 rustc_lexer
::TokenKind
::OpenBracket
=> token
::OpenDelim(Delimiter
::Bracket
),
214 rustc_lexer
::TokenKind
::CloseBracket
=> token
::CloseDelim(Delimiter
::Bracket
),
215 rustc_lexer
::TokenKind
::At
=> token
::At
,
216 rustc_lexer
::TokenKind
::Pound
=> token
::Pound
,
217 rustc_lexer
::TokenKind
::Tilde
=> token
::Tilde
,
218 rustc_lexer
::TokenKind
::Question
=> token
::Question
,
219 rustc_lexer
::TokenKind
::Colon
=> token
::Colon
,
220 rustc_lexer
::TokenKind
::Dollar
=> token
::Dollar
,
221 rustc_lexer
::TokenKind
::Eq
=> token
::Eq
,
222 rustc_lexer
::TokenKind
::Bang
=> token
::Not
,
223 rustc_lexer
::TokenKind
::Lt
=> token
::Lt
,
224 rustc_lexer
::TokenKind
::Gt
=> token
::Gt
,
225 rustc_lexer
::TokenKind
::Minus
=> token
::BinOp(token
::Minus
),
226 rustc_lexer
::TokenKind
::And
=> token
::BinOp(token
::And
),
227 rustc_lexer
::TokenKind
::Or
=> token
::BinOp(token
::Or
),
228 rustc_lexer
::TokenKind
::Plus
=> token
::BinOp(token
::Plus
),
229 rustc_lexer
::TokenKind
::Star
=> token
::BinOp(token
::Star
),
230 rustc_lexer
::TokenKind
::Slash
=> token
::BinOp(token
::Slash
),
231 rustc_lexer
::TokenKind
::Caret
=> token
::BinOp(token
::Caret
),
232 rustc_lexer
::TokenKind
::Percent
=> token
::BinOp(token
::Percent
),
234 rustc_lexer
::TokenKind
::Unknown
| rustc_lexer
::TokenKind
::InvalidIdent
=> {
235 let c
= self.str_from(start
).chars().next().unwrap();
237 self.struct_err_span_char(start
, self.pos
, "unknown start of token", c
);
238 // FIXME: the lexer could be used to turn the ASCII version of unicode
239 // homoglyphs, instead of keeping a table in `check_for_substitution`into the
240 // token. Ideally, this should be inside `rustc_lexer`. However, we should
241 // first remove compound tokens like `<<` from `rustc_lexer`, and then add
242 // fancier error recovery to it, as there will be less overall work to do this
244 let token
= unicode_chars
::check_for_substitution(self, start
, c
, &mut err
);
246 err
.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
249 if let Some(token
) = token
{
252 preceded_by_whitespace
= true;
256 rustc_lexer
::TokenKind
::Eof
=> token
::Eof
,
258 let span
= self.mk_sp(start
, self.pos
);
259 return (Token
::new(kind
, span
), preceded_by_whitespace
);
263 /// Report a fatal lexical error with a given span.
264 fn fatal_span(&self, sp
: Span
, m
: &str) -> ! {
265 self.sess
.span_diagnostic
.span_fatal(sp
, m
)
268 /// Report a lexical error with a given span.
269 fn err_span(&self, sp
: Span
, m
: &str) {
270 self.sess
.span_diagnostic
.struct_span_err(sp
, m
).emit();
273 /// Report a fatal error spanning [`from_pos`, `to_pos`).
274 fn fatal_span_(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str) -> ! {
275 self.fatal_span(self.mk_sp(from_pos
, to_pos
), m
)
278 /// Report a lexical error spanning [`from_pos`, `to_pos`).
279 fn err_span_(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str) {
280 self.err_span(self.mk_sp(from_pos
, to_pos
), m
)
283 fn struct_fatal_span_char(
289 ) -> DiagnosticBuilder
<'a
, !> {
292 .struct_span_fatal(self.mk_sp(from_pos
, to_pos
), &format
!("{}: {}", m
, escaped_char(c
)))
295 fn struct_err_span_char(
301 ) -> DiagnosticBuilder
<'a
, ErrorGuaranteed
> {
304 .struct_span_err(self.mk_sp(from_pos
, to_pos
), &format
!("{}: {}", m
, escaped_char(c
)))
307 /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
308 /// complain about it.
309 fn lint_unicode_text_flow(&self, start
: BytePos
) {
310 // Opening delimiter of the length 2 is not included into the comment text.
311 let content_start
= start
+ BytePos(2);
312 let content
= self.str_from(content_start
);
313 if contains_text_flow_control_chars(content
) {
314 let span
= self.mk_sp(start
, self.pos
);
315 self.sess
.buffer_lint_with_diagnostic(
316 &TEXT_DIRECTION_CODEPOINT_IN_COMMENT
,
319 "unicode codepoint changing visible direction of text present in comment",
320 BuiltinLintDiagnostics
::UnicodeTextFlow(span
, content
.to_string()),
327 content_start
: BytePos
,
329 comment_kind
: CommentKind
,
332 if content
.contains('
\r'
) {
333 for (idx
, _
) in content
.char_indices().filter(|&(_
, c
)| c
== '
\r'
) {
335 content_start
+ BytePos(idx
as u32),
336 content_start
+ BytePos(idx
as u32 + 1),
338 CommentKind
::Line
=> "bare CR not allowed in doc-comment",
339 CommentKind
::Block
=> "bare CR not allowed in block doc-comment",
345 let attr_style
= match doc_style
{
346 DocStyle
::Outer
=> AttrStyle
::Outer
,
347 DocStyle
::Inner
=> AttrStyle
::Inner
,
350 token
::DocComment(comment_kind
, attr_style
, Symbol
::intern(content
))
353 fn cook_lexer_literal(
357 kind
: rustc_lexer
::LiteralKind
,
358 ) -> (token
::LitKind
, Symbol
) {
360 rustc_lexer
::LiteralKind
::Char { terminated }
=> {
362 self.sess
.span_diagnostic
.span_fatal_with_code(
363 self.mk_sp(start
, end
),
364 "unterminated character literal",
368 self.cook_quoted(token
::Char
, Mode
::Char
, start
, end
, 1, 1) // ' '
370 rustc_lexer
::LiteralKind
::Byte { terminated }
=> {
372 self.sess
.span_diagnostic
.span_fatal_with_code(
373 self.mk_sp(start
+ BytePos(1), end
),
374 "unterminated byte constant",
378 self.cook_quoted(token
::Byte
, Mode
::Byte
, start
, end
, 2, 1) // b' '
380 rustc_lexer
::LiteralKind
::Str { terminated }
=> {
382 self.sess
.span_diagnostic
.span_fatal_with_code(
383 self.mk_sp(start
, end
),
384 "unterminated double quote string",
388 self.cook_quoted(token
::Str
, Mode
::Str
, start
, end
, 1, 1) // " "
390 rustc_lexer
::LiteralKind
::ByteStr { terminated }
=> {
392 self.sess
.span_diagnostic
.span_fatal_with_code(
393 self.mk_sp(start
+ BytePos(1), end
),
394 "unterminated double quote byte string",
398 self.cook_quoted(token
::ByteStr
, Mode
::ByteStr
, start
, end
, 2, 1) // b" "
400 rustc_lexer
::LiteralKind
::RawStr { n_hashes }
=> {
401 if let Some(n_hashes
) = n_hashes
{
402 let n
= u32::from(n_hashes
);
403 let kind
= token
::StrRaw(n_hashes
);
404 self.cook_quoted(kind
, Mode
::RawStr
, start
, end
, 2 + n
, 1 + n
) // r##" "##
406 self.report_raw_str_error(start
, 1);
409 rustc_lexer
::LiteralKind
::RawByteStr { n_hashes }
=> {
410 if let Some(n_hashes
) = n_hashes
{
411 let n
= u32::from(n_hashes
);
412 let kind
= token
::ByteStrRaw(n_hashes
);
413 self.cook_quoted(kind
, Mode
::RawByteStr
, start
, end
, 3 + n
, 1 + n
) // br##" "##
415 self.report_raw_str_error(start
, 2);
418 rustc_lexer
::LiteralKind
::Int { base, empty_int }
=> {
422 .struct_span_err_with_code(
423 self.mk_sp(start
, end
),
424 "no valid digits found for number",
428 (token
::Integer
, sym
::integer(0))
430 if matches
!(base
, Base
::Binary
| Base
::Octal
) {
431 let base
= base
as u32;
432 let s
= self.str_from_to(start
+ BytePos(2), end
);
433 for (idx
, c
) in s
.char_indices() {
434 if c
!= '_'
&& c
.to_digit(base
).is_none() {
436 start
+ BytePos
::from_usize(2 + idx
),
437 start
+ BytePos
::from_usize(2 + idx
+ c
.len_utf8()),
438 &format
!("invalid digit for a base {} literal", base
),
443 (token
::Integer
, self.symbol_from_to(start
, end
))
446 rustc_lexer
::LiteralKind
::Float { base, empty_exponent }
=> {
448 self.err_span_(start
, self.pos
, "expected at least one digit in exponent");
451 Base
::Hexadecimal
=> {
452 self.err_span_(start
, end
, "hexadecimal float literal is not supported")
455 self.err_span_(start
, end
, "octal float literal is not supported")
458 self.err_span_(start
, end
, "binary float literal is not supported")
462 (token
::Float
, self.symbol_from_to(start
, end
))
468 fn src_index(&self, pos
: BytePos
) -> usize {
469 (pos
- self.start_pos
).to_usize()
472 /// Slice of the source text from `start` up to but excluding `self.pos`,
473 /// meaning the slice does not include the character `self.ch`.
474 fn str_from(&self, start
: BytePos
) -> &str {
475 self.str_from_to(start
, self.pos
)
478 /// As symbol_from, with an explicit endpoint.
479 fn symbol_from_to(&self, start
: BytePos
, end
: BytePos
) -> Symbol
{
480 debug
!("taking an ident from {:?} to {:?}", start
, end
);
481 Symbol
::intern(self.str_from_to(start
, end
))
484 /// Slice of the source text spanning from `start` up to but excluding `end`.
485 fn str_from_to(&self, start
: BytePos
, end
: BytePos
) -> &str {
486 &self.src
[self.src_index(start
)..self.src_index(end
)]
489 fn report_raw_str_error(&self, start
: BytePos
, prefix_len
: u32) -> ! {
490 match rustc_lexer
::validate_raw_str(self.str_from(start
), prefix_len
) {
491 Err(RawStrError
::InvalidStarter { bad_char }
) => {
492 self.report_non_started_raw_string(start
, bad_char
)
494 Err(RawStrError
::NoTerminator { expected, found, possible_terminator_offset }
) => self
495 .report_unterminated_raw_string(start
, expected
, possible_terminator_offset
, found
),
496 Err(RawStrError
::TooManyDelimiters { found }
) => {
497 self.report_too_many_hashes(start
, found
)
499 Ok(()) => panic
!("no error found for supposedly invalid raw string literal"),
503 fn report_non_started_raw_string(&self, start
: BytePos
, bad_char
: char) -> ! {
504 self.struct_fatal_span_char(
507 "found invalid character; only `#` is allowed in raw string delimitation",
513 fn report_unterminated_raw_string(
517 possible_offset
: Option
<u32>,
518 found_terminators
: u32,
520 let mut err
= self.sess
.span_diagnostic
.struct_span_fatal_with_code(
521 self.mk_sp(start
, start
),
522 "unterminated raw string",
526 err
.span_label(self.mk_sp(start
, start
), "unterminated raw string");
530 "this raw string should be terminated with `\"{}`",
531 "#".repeat(n_hashes
as usize)
535 if let Some(possible_offset
) = possible_offset
{
536 let lo
= start
+ BytePos(possible_offset
as u32);
537 let hi
= lo
+ BytePos(found_terminators
as u32);
538 let span
= self.mk_sp(lo
, hi
);
541 "consider terminating the string here",
542 "#".repeat(n_hashes
as usize),
543 Applicability
::MaybeIncorrect
,
550 fn report_unterminated_block_comment(&self, start
: BytePos
, doc_style
: Option
<DocStyle
>) {
551 let msg
= match doc_style
{
552 Some(_
) => "unterminated block doc-comment",
553 None
=> "unterminated block comment",
555 let last_bpos
= self.pos
;
556 let mut err
= self.sess
.span_diagnostic
.struct_span_fatal_with_code(
557 self.mk_sp(start
, last_bpos
),
561 let mut nested_block_comment_open_idxs
= vec
![];
562 let mut last_nested_block_comment_idxs
= None
;
563 let mut content_chars
= self.str_from(start
).char_indices().peekable();
565 while let Some((idx
, current_char
)) = content_chars
.next() {
566 match content_chars
.peek() {
567 Some((_
, '
*'
)) if current_char
== '
/'
=> {
568 nested_block_comment_open_idxs
.push(idx
);
570 Some((_
, '
/'
)) if current_char
== '
*'
=> {
571 last_nested_block_comment_idxs
=
572 nested_block_comment_open_idxs
.pop().map(|open_idx
| (open_idx
, idx
));
578 if let Some((nested_open_idx
, nested_close_idx
)) = last_nested_block_comment_idxs
{
579 err
.span_label(self.mk_sp(start
, start
+ BytePos(2)), msg
)
582 start
+ BytePos(nested_open_idx
as u32),
583 start
+ BytePos(nested_open_idx
as u32 + 2),
585 "...as last nested comment starts here, maybe you want to close this instead?",
589 start
+ BytePos(nested_close_idx
as u32),
590 start
+ BytePos(nested_close_idx
as u32 + 2),
592 "...and last nested comment terminates here.",
599 // RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021,
600 // using a (unknown) prefix is an error. In earlier editions, however, they
601 // only result in a (allowed by default) lint, and are treated as regular
602 // identifier tokens.
603 fn report_unknown_prefix(&self, start
: BytePos
) {
604 let prefix_span
= self.mk_sp(start
, self.pos
);
605 let prefix_str
= self.str_from_to(start
, self.pos
);
606 let msg
= format
!("prefix `{}` is unknown", prefix_str
);
608 let expn_data
= prefix_span
.ctxt().outer_expn_data();
610 if expn_data
.edition
>= Edition
::Edition2021
{
611 // In Rust 2021, this is a hard error.
612 let mut err
= self.sess
.span_diagnostic
.struct_span_err(prefix_span
, &msg
);
613 err
.span_label(prefix_span
, "unknown prefix");
614 if prefix_str
== "rb" {
615 err
.span_suggestion_verbose(
617 "use `br` for a raw byte string",
619 Applicability
::MaybeIncorrect
,
621 } else if expn_data
.is_root() {
622 err
.span_suggestion_verbose(
623 prefix_span
.shrink_to_hi(),
624 "consider inserting whitespace here",
626 Applicability
::MaybeIncorrect
,
629 err
.note("prefixed identifiers and literals are reserved since Rust 2021");
632 // Before Rust 2021, only emit a lint for migration.
633 self.sess
.buffer_lint_with_diagnostic(
634 &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX
,
638 BuiltinLintDiagnostics
::ReservedPrefix(prefix_span
),
643 fn report_too_many_hashes(&self, start
: BytePos
, found
: u32) -> ! {
648 "too many `#` symbols: raw strings may be delimited \
649 by up to 255 `#` symbols, but found {}",
657 kind
: token
::LitKind
,
663 ) -> (token
::LitKind
, Symbol
) {
664 let mut has_fatal_err
= false;
665 let content_start
= start
+ BytePos(prefix_len
);
666 let content_end
= end
- BytePos(postfix_len
);
667 let lit_content
= self.str_from_to(content_start
, content_end
);
668 unescape
::unescape_literal(lit_content
, mode
, &mut |range
, result
| {
669 // Here we only check for errors. The actual unescaping is done later.
670 if let Err(err
) = result
{
671 let span_with_quotes
= self.mk_sp(start
, end
);
672 let (start
, end
) = (range
.start
as u32, range
.end
as u32);
673 let lo
= content_start
+ BytePos(start
);
674 let hi
= lo
+ BytePos(end
- start
);
675 let span
= self.mk_sp(lo
, hi
);
677 has_fatal_err
= true;
680 &self.sess
.span_diagnostic
,
691 // We normally exclude the quotes for the symbol, but for errors we
692 // include it because it results in clearer error messages.
694 (kind
, Symbol
::intern(lit_content
))
696 (token
::Err
, self.symbol_from_to(start
, end
))
701 pub fn nfc_normalize(string
: &str) -> Symbol
{
702 use unicode_normalization
::{is_nfc_quick, IsNormalized, UnicodeNormalization}
;
703 match is_nfc_quick(string
.chars()) {
704 IsNormalized
::Yes
=> Symbol
::intern(string
),
706 let normalized_str
: String
= string
.chars().nfc().collect();
707 Symbol
::intern(&normalized_str
)