1 use crate::lexer
::unicode_chars
::UNICODE_ARRAY
;
2 use rustc_ast
::ast
::{self, AttrStyle}
;
3 use rustc_ast
::token
::{self, CommentKind, Delimiter, Token, TokenKind}
;
4 use rustc_ast
::tokenstream
::{Spacing, TokenStream}
;
5 use rustc_ast
::util
::unicode
::contains_text_flow_control_chars
;
6 use rustc_errors
::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult}
;
7 use rustc_lexer
::unescape
::{self, Mode}
;
8 use rustc_lexer
::{Base, DocStyle, RawStrError}
;
9 use rustc_session
::lint
::builtin
::{
10 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX
, TEXT_DIRECTION_CODEPOINT_IN_COMMENT
,
12 use rustc_session
::lint
::BuiltinLintDiagnostics
;
13 use rustc_session
::parse
::ParseSess
;
14 use rustc_span
::symbol
::{sym, Symbol}
;
15 use rustc_span
::{edition::Edition, BytePos, Pos, Span}
;
20 mod unescape_error_reporting
;
23 use unescape_error_reporting
::{emit_unescape_error, escaped_char}
;
25 #[derive(Clone, Debug)]
26 pub struct UnmatchedBrace
{
27 pub expected_delim
: Delimiter
,
28 pub found_delim
: Option
<Delimiter
>,
30 pub unclosed_span
: Option
<Span
>,
31 pub candidate_span
: Option
<Span
>,
34 crate fn parse_token_trees
<'a
>(
38 override_span
: Option
<Span
>,
39 ) -> (PResult
<'a
, TokenStream
>, Vec
<UnmatchedBrace
>) {
40 StringReader { sess, start_pos, pos: start_pos, end_src_index: src.len(), src, override_span }
44 struct StringReader
<'a
> {
46 /// Initial position, read-only.
48 /// The absolute offset within the source_map of the current character.
50 /// Stop reading src at this index.
52 /// Source text to tokenize.
54 override_span
: Option
<Span
>,
57 impl<'a
> StringReader
<'a
> {
58 fn mk_sp(&self, lo
: BytePos
, hi
: BytePos
) -> Span
{
59 self.override_span
.unwrap_or_else(|| Span
::with_root_ctxt(lo
, hi
))
62 /// Returns the next token, and info about preceding whitespace, if any.
63 fn next_token(&mut self) -> (Spacing
, Token
) {
64 let mut spacing
= Spacing
::Joint
;
66 // Skip `#!` at the start of the file
67 let start_src_index
= self.src_index(self.pos
);
68 let text
: &str = &self.src
[start_src_index
..self.end_src_index
];
69 let is_beginning_of_file
= self.pos
== self.start_pos
;
70 if is_beginning_of_file
{
71 if let Some(shebang_len
) = rustc_lexer
::strip_shebang(text
) {
72 self.pos
= self.pos
+ BytePos
::from_usize(shebang_len
);
73 spacing
= Spacing
::Alone
;
77 // Skip trivial (whitespace & comments) tokens
79 let start_src_index
= self.src_index(self.pos
);
80 let text
: &str = &self.src
[start_src_index
..self.end_src_index
];
83 let span
= self.mk_sp(self.pos
, self.pos
);
84 return (spacing
, Token
::new(token
::Eof
, span
));
87 let token
= rustc_lexer
::first_token(text
);
90 self.pos
= self.pos
+ BytePos
::from_usize(token
.len
);
92 debug
!("next_token: {:?}({:?})", token
.kind
, self.str_from(start
));
94 match self.cook_lexer_token(token
.kind
, start
) {
96 let span
= self.mk_sp(start
, self.pos
);
97 return (spacing
, Token
::new(kind
, span
));
99 None
=> spacing
= Spacing
::Alone
,
104 /// Report a fatal lexical error with a given span.
105 fn fatal_span(&self, sp
: Span
, m
: &str) -> ! {
106 self.sess
.span_diagnostic
.span_fatal(sp
, m
)
109 /// Report a lexical error with a given span.
110 fn err_span(&self, sp
: Span
, m
: &str) {
111 self.sess
.span_diagnostic
.struct_span_err(sp
, m
).emit();
114 /// Report a fatal error spanning [`from_pos`, `to_pos`).
115 fn fatal_span_(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str) -> ! {
116 self.fatal_span(self.mk_sp(from_pos
, to_pos
), m
)
119 /// Report a lexical error spanning [`from_pos`, `to_pos`).
120 fn err_span_(&self, from_pos
: BytePos
, to_pos
: BytePos
, m
: &str) {
121 self.err_span(self.mk_sp(from_pos
, to_pos
), m
)
124 fn struct_fatal_span_char(
130 ) -> DiagnosticBuilder
<'a
, !> {
133 .struct_span_fatal(self.mk_sp(from_pos
, to_pos
), &format
!("{}: {}", m
, escaped_char(c
)))
136 fn struct_err_span_char(
142 ) -> DiagnosticBuilder
<'a
, ErrorGuaranteed
> {
145 .struct_span_err(self.mk_sp(from_pos
, to_pos
), &format
!("{}: {}", m
, escaped_char(c
)))
148 /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
149 /// complain about it.
150 fn lint_unicode_text_flow(&self, start
: BytePos
) {
151 // Opening delimiter of the length 2 is not included into the comment text.
152 let content_start
= start
+ BytePos(2);
153 let content
= self.str_from(content_start
);
154 if contains_text_flow_control_chars(content
) {
155 let span
= self.mk_sp(start
, self.pos
);
156 self.sess
.buffer_lint_with_diagnostic(
157 &TEXT_DIRECTION_CODEPOINT_IN_COMMENT
,
160 "unicode codepoint changing visible direction of text present in comment",
161 BuiltinLintDiagnostics
::UnicodeTextFlow(span
, content
.to_string()),
166 /// Turns simple `rustc_lexer::TokenKind` enum into a rich
167 /// `rustc_ast::TokenKind`. This turns strings into interned
168 /// symbols and runs additional validation.
169 fn cook_lexer_token(&self, token
: rustc_lexer
::TokenKind
, start
: BytePos
) -> Option
<TokenKind
> {
171 rustc_lexer
::TokenKind
::LineComment { doc_style }
=> {
172 // Skip non-doc comments
173 let Some(doc_style
) = doc_style
else {
174 self.lint_unicode_text_flow(start
);
178 // Opening delimiter of the length 3 is not included into the symbol.
179 let content_start
= start
+ BytePos(3);
180 let content
= self.str_from(content_start
);
181 self.cook_doc_comment(content_start
, content
, CommentKind
::Line
, doc_style
)
183 rustc_lexer
::TokenKind
::BlockComment { doc_style, terminated }
=> {
185 self.report_unterminated_block_comment(start
, doc_style
);
188 // Skip non-doc comments
189 let Some(doc_style
) = doc_style
else {
190 self.lint_unicode_text_flow(start
);
194 // Opening delimiter of the length 3 and closing delimiter of the length 2
195 // are not included into the symbol.
196 let content_start
= start
+ BytePos(3);
197 let content_end
= self.pos
- BytePos(if terminated { 2 }
else { 0 }
);
198 let content
= self.str_from_to(content_start
, content_end
);
199 self.cook_doc_comment(content_start
, content
, CommentKind
::Block
, doc_style
)
201 rustc_lexer
::TokenKind
::Whitespace
=> return None
,
202 rustc_lexer
::TokenKind
::Ident
203 | rustc_lexer
::TokenKind
::RawIdent
204 | rustc_lexer
::TokenKind
::UnknownPrefix
=> {
205 let is_raw_ident
= token
== rustc_lexer
::TokenKind
::RawIdent
;
206 let is_unknown_prefix
= token
== rustc_lexer
::TokenKind
::UnknownPrefix
;
207 let mut ident_start
= start
;
209 ident_start
= ident_start
+ BytePos(2);
211 if is_unknown_prefix
{
212 self.report_unknown_prefix(start
);
214 let sym
= nfc_normalize(self.str_from(ident_start
));
215 let span
= self.mk_sp(start
, self.pos
);
216 self.sess
.symbol_gallery
.insert(sym
, span
);
218 if !sym
.can_be_raw() {
219 self.err_span(span
, &format
!("`{}` cannot be a raw identifier", sym
));
221 self.sess
.raw_identifier_spans
.borrow_mut().push(span
);
223 token
::Ident(sym
, is_raw_ident
)
225 rustc_lexer
::TokenKind
::InvalidIdent
226 // Do not recover an identifier with emoji if the codepoint is a confusable
227 // with a recoverable substitution token, like `âž–`.
231 let sym
= self.str_from(start
);
232 sym
.chars().count() == 1 && c
== sym
.chars().next().unwrap()
236 let sym
= nfc_normalize(self.str_from(start
));
237 let span
= self.mk_sp(start
, self.pos
);
238 self.sess
.bad_unicode_identifiers
.borrow_mut().entry(sym
).or_default().push(span
);
239 token
::Ident(sym
, false)
241 rustc_lexer
::TokenKind
::Literal { kind, suffix_start }
=> {
242 let suffix_start
= start
+ BytePos(suffix_start
as u32);
243 let (kind
, symbol
) = self.cook_lexer_literal(start
, suffix_start
, kind
);
244 let suffix
= if suffix_start
< self.pos
{
245 let string
= self.str_from(suffix_start
);
250 self.mk_sp(suffix_start
, self.pos
),
251 "underscore literal suffix is not allowed",
254 "this was previously accepted by the compiler but is \
255 being phased out; it will become a hard error in \
260 <https://github.com/rust-lang/rust/issues/42326> \
261 for more information",
266 Some(Symbol
::intern(string
))
271 token
::Literal(token
::Lit { kind, symbol, suffix }
)
273 rustc_lexer
::TokenKind
::Lifetime { starts_with_number }
=> {
274 // Include the leading `'` in the real identifier, for macro
275 // expansion purposes. See #12512 for the gory details of why
276 // this is necessary.
277 let lifetime_name
= self.str_from(start
);
278 if starts_with_number
{
279 self.err_span_(start
, self.pos
, "lifetimes cannot start with a number");
281 let ident
= Symbol
::intern(lifetime_name
);
282 token
::Lifetime(ident
)
284 rustc_lexer
::TokenKind
::Semi
=> token
::Semi
,
285 rustc_lexer
::TokenKind
::Comma
=> token
::Comma
,
286 rustc_lexer
::TokenKind
::Dot
=> token
::Dot
,
287 rustc_lexer
::TokenKind
::OpenParen
=> token
::OpenDelim(Delimiter
::Parenthesis
),
288 rustc_lexer
::TokenKind
::CloseParen
=> token
::CloseDelim(Delimiter
::Parenthesis
),
289 rustc_lexer
::TokenKind
::OpenBrace
=> token
::OpenDelim(Delimiter
::Brace
),
290 rustc_lexer
::TokenKind
::CloseBrace
=> token
::CloseDelim(Delimiter
::Brace
),
291 rustc_lexer
::TokenKind
::OpenBracket
=> token
::OpenDelim(Delimiter
::Bracket
),
292 rustc_lexer
::TokenKind
::CloseBracket
=> token
::CloseDelim(Delimiter
::Bracket
),
293 rustc_lexer
::TokenKind
::At
=> token
::At
,
294 rustc_lexer
::TokenKind
::Pound
=> token
::Pound
,
295 rustc_lexer
::TokenKind
::Tilde
=> token
::Tilde
,
296 rustc_lexer
::TokenKind
::Question
=> token
::Question
,
297 rustc_lexer
::TokenKind
::Colon
=> token
::Colon
,
298 rustc_lexer
::TokenKind
::Dollar
=> token
::Dollar
,
299 rustc_lexer
::TokenKind
::Eq
=> token
::Eq
,
300 rustc_lexer
::TokenKind
::Bang
=> token
::Not
,
301 rustc_lexer
::TokenKind
::Lt
=> token
::Lt
,
302 rustc_lexer
::TokenKind
::Gt
=> token
::Gt
,
303 rustc_lexer
::TokenKind
::Minus
=> token
::BinOp(token
::Minus
),
304 rustc_lexer
::TokenKind
::And
=> token
::BinOp(token
::And
),
305 rustc_lexer
::TokenKind
::Or
=> token
::BinOp(token
::Or
),
306 rustc_lexer
::TokenKind
::Plus
=> token
::BinOp(token
::Plus
),
307 rustc_lexer
::TokenKind
::Star
=> token
::BinOp(token
::Star
),
308 rustc_lexer
::TokenKind
::Slash
=> token
::BinOp(token
::Slash
),
309 rustc_lexer
::TokenKind
::Caret
=> token
::BinOp(token
::Caret
),
310 rustc_lexer
::TokenKind
::Percent
=> token
::BinOp(token
::Percent
),
312 rustc_lexer
::TokenKind
::Unknown
| rustc_lexer
::TokenKind
::InvalidIdent
=> {
313 let c
= self.str_from(start
).chars().next().unwrap();
315 self.struct_err_span_char(start
, self.pos
, "unknown start of token", c
);
316 // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
317 // instead of keeping a table in `check_for_substitution`into the token. Ideally,
318 // this should be inside `rustc_lexer`. However, we should first remove compound
319 // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
320 // as there will be less overall work to do this way.
321 let token
= unicode_chars
::check_for_substitution(self, start
, c
, &mut err
);
323 err
.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
333 content_start
: BytePos
,
335 comment_kind
: CommentKind
,
338 if content
.contains('
\r'
) {
339 for (idx
, _
) in content
.char_indices().filter(|&(_
, c
)| c
== '
\r'
) {
341 content_start
+ BytePos(idx
as u32),
342 content_start
+ BytePos(idx
as u32 + 1),
344 CommentKind
::Line
=> "bare CR not allowed in doc-comment",
345 CommentKind
::Block
=> "bare CR not allowed in block doc-comment",
351 let attr_style
= match doc_style
{
352 DocStyle
::Outer
=> AttrStyle
::Outer
,
353 DocStyle
::Inner
=> AttrStyle
::Inner
,
356 token
::DocComment(comment_kind
, attr_style
, Symbol
::intern(content
))
359 fn cook_lexer_literal(
362 suffix_start
: BytePos
,
363 kind
: rustc_lexer
::LiteralKind
,
364 ) -> (token
::LitKind
, Symbol
) {
365 // prefix means `"` or `br"` or `r###"`, ...
366 let (lit_kind
, mode
, prefix_len
, postfix_len
) = match kind
{
367 rustc_lexer
::LiteralKind
::Char { terminated }
=> {
369 self.sess
.span_diagnostic
.span_fatal_with_code(
370 self.mk_sp(start
, suffix_start
),
371 "unterminated character literal",
375 (token
::Char
, Mode
::Char
, 1, 1) // ' '
377 rustc_lexer
::LiteralKind
::Byte { terminated }
=> {
379 self.sess
.span_diagnostic
.span_fatal_with_code(
380 self.mk_sp(start
+ BytePos(1), suffix_start
),
381 "unterminated byte constant",
385 (token
::Byte
, Mode
::Byte
, 2, 1) // b' '
387 rustc_lexer
::LiteralKind
::Str { terminated }
=> {
389 self.sess
.span_diagnostic
.span_fatal_with_code(
390 self.mk_sp(start
, suffix_start
),
391 "unterminated double quote string",
395 (token
::Str
, Mode
::Str
, 1, 1) // " "
397 rustc_lexer
::LiteralKind
::ByteStr { terminated }
=> {
399 self.sess
.span_diagnostic
.span_fatal_with_code(
400 self.mk_sp(start
+ BytePos(1), suffix_start
),
401 "unterminated double quote byte string",
405 (token
::ByteStr
, Mode
::ByteStr
, 2, 1) // b" "
407 rustc_lexer
::LiteralKind
::RawStr { n_hashes, err }
=> {
408 self.report_raw_str_error(start
, err
);
409 let n
= u32::from(n_hashes
);
410 (token
::StrRaw(n_hashes
), Mode
::RawStr
, 2 + n
, 1 + n
) // r##" "##
412 rustc_lexer
::LiteralKind
::RawByteStr { n_hashes, err }
=> {
413 self.report_raw_str_error(start
, err
);
414 let n
= u32::from(n_hashes
);
415 (token
::ByteStrRaw(n_hashes
), Mode
::RawByteStr
, 3 + n
, 1 + n
) // br##" "##
417 rustc_lexer
::LiteralKind
::Int { base, empty_int }
=> {
418 return if empty_int
{
421 .struct_span_err_with_code(
422 self.mk_sp(start
, suffix_start
),
423 "no valid digits found for number",
427 (token
::Integer
, sym
::integer(0))
429 self.validate_int_literal(base
, start
, suffix_start
);
430 (token
::Integer
, self.symbol_from_to(start
, suffix_start
))
433 rustc_lexer
::LiteralKind
::Float { base, empty_exponent }
=> {
435 self.err_span_(start
, self.pos
, "expected at least one digit in exponent");
439 Base
::Hexadecimal
=> self.err_span_(
442 "hexadecimal float literal is not supported",
445 self.err_span_(start
, suffix_start
, "octal float literal is not supported")
448 self.err_span_(start
, suffix_start
, "binary float literal is not supported")
453 let id
= self.symbol_from_to(start
, suffix_start
);
454 return (token
::Float
, id
);
457 let content_start
= start
+ BytePos(prefix_len
);
458 let content_end
= suffix_start
- BytePos(postfix_len
);
459 let id
= self.symbol_from_to(content_start
, content_end
);
460 self.validate_literal_escape(mode
, content_start
, content_end
, prefix_len
, postfix_len
);
465 fn src_index(&self, pos
: BytePos
) -> usize {
466 (pos
- self.start_pos
).to_usize()
469 /// Slice of the source text from `start` up to but excluding `self.pos`,
470 /// meaning the slice does not include the character `self.ch`.
471 fn str_from(&self, start
: BytePos
) -> &str {
472 self.str_from_to(start
, self.pos
)
475 /// As symbol_from, with an explicit endpoint.
476 fn symbol_from_to(&self, start
: BytePos
, end
: BytePos
) -> Symbol
{
477 debug
!("taking an ident from {:?} to {:?}", start
, end
);
478 Symbol
::intern(self.str_from_to(start
, end
))
481 /// Slice of the source text spanning from `start` up to but excluding `end`.
482 fn str_from_to(&self, start
: BytePos
, end
: BytePos
) -> &str {
483 &self.src
[self.src_index(start
)..self.src_index(end
)]
486 fn report_raw_str_error(&self, start
: BytePos
, opt_err
: Option
<RawStrError
>) {
488 Some(RawStrError
::InvalidStarter { bad_char }
) => {
489 self.report_non_started_raw_string(start
, bad_char
)
491 Some(RawStrError
::NoTerminator { expected, found, possible_terminator_offset }
) => self
492 .report_unterminated_raw_string(start
, expected
, possible_terminator_offset
, found
),
493 Some(RawStrError
::TooManyDelimiters { found }
) => {
494 self.report_too_many_hashes(start
, found
)
500 fn report_non_started_raw_string(&self, start
: BytePos
, bad_char
: char) -> ! {
501 self.struct_fatal_span_char(
504 "found invalid character; only `#` is allowed in raw string delimitation",
510 fn report_unterminated_raw_string(
514 possible_offset
: Option
<usize>,
515 found_terminators
: usize,
517 let mut err
= self.sess
.span_diagnostic
.struct_span_fatal_with_code(
518 self.mk_sp(start
, start
),
519 "unterminated raw string",
523 err
.span_label(self.mk_sp(start
, start
), "unterminated raw string");
527 "this raw string should be terminated with `\"{}`",
532 if let Some(possible_offset
) = possible_offset
{
533 let lo
= start
+ BytePos(possible_offset
as u32);
534 let hi
= lo
+ BytePos(found_terminators
as u32);
535 let span
= self.mk_sp(lo
, hi
);
538 "consider terminating the string here",
539 "#".repeat(n_hashes
),
540 Applicability
::MaybeIncorrect
,
547 fn report_unterminated_block_comment(&self, start
: BytePos
, doc_style
: Option
<DocStyle
>) {
548 let msg
= match doc_style
{
549 Some(_
) => "unterminated block doc-comment",
550 None
=> "unterminated block comment",
552 let last_bpos
= self.pos
;
553 let mut err
= self.sess
.span_diagnostic
.struct_span_fatal_with_code(
554 self.mk_sp(start
, last_bpos
),
558 let mut nested_block_comment_open_idxs
= vec
![];
559 let mut last_nested_block_comment_idxs
= None
;
560 let mut content_chars
= self.str_from(start
).char_indices().peekable();
562 while let Some((idx
, current_char
)) = content_chars
.next() {
563 match content_chars
.peek() {
564 Some((_
, '
*'
)) if current_char
== '
/'
=> {
565 nested_block_comment_open_idxs
.push(idx
);
567 Some((_
, '
/'
)) if current_char
== '
*'
=> {
568 last_nested_block_comment_idxs
=
569 nested_block_comment_open_idxs
.pop().map(|open_idx
| (open_idx
, idx
));
575 if let Some((nested_open_idx
, nested_close_idx
)) = last_nested_block_comment_idxs
{
576 err
.span_label(self.mk_sp(start
, start
+ BytePos(2)), msg
)
579 start
+ BytePos(nested_open_idx
as u32),
580 start
+ BytePos(nested_open_idx
as u32 + 2),
582 "...as last nested comment starts here, maybe you want to close this instead?",
586 start
+ BytePos(nested_close_idx
as u32),
587 start
+ BytePos(nested_close_idx
as u32 + 2),
589 "...and last nested comment terminates here.",
596 // RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021,
597 // using a (unknown) prefix is an error. In earlier editions, however, they
598 // only result in a (allowed by default) lint, and are treated as regular
599 // identifier tokens.
600 fn report_unknown_prefix(&self, start
: BytePos
) {
601 let prefix_span
= self.mk_sp(start
, self.pos
);
602 let prefix_str
= self.str_from_to(start
, self.pos
);
603 let msg
= format
!("prefix `{}` is unknown", prefix_str
);
605 let expn_data
= prefix_span
.ctxt().outer_expn_data();
607 if expn_data
.edition
>= Edition
::Edition2021
{
608 // In Rust 2021, this is a hard error.
609 let mut err
= self.sess
.span_diagnostic
.struct_span_err(prefix_span
, &msg
);
610 err
.span_label(prefix_span
, "unknown prefix");
611 if prefix_str
== "rb" {
612 err
.span_suggestion_verbose(
614 "use `br` for a raw byte string",
616 Applicability
::MaybeIncorrect
,
618 } else if expn_data
.is_root() {
619 err
.span_suggestion_verbose(
620 prefix_span
.shrink_to_hi(),
621 "consider inserting whitespace here",
623 Applicability
::MaybeIncorrect
,
626 err
.note("prefixed identifiers and literals are reserved since Rust 2021");
629 // Before Rust 2021, only emit a lint for migration.
630 self.sess
.buffer_lint_with_diagnostic(
631 &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX
,
635 BuiltinLintDiagnostics
::ReservedPrefix(prefix_span
),
640 fn report_too_many_hashes(&self, start
: BytePos
, found
: usize) -> ! {
645 "too many `#` symbols: raw strings may be delimited \
646 by up to 255 `#` symbols, but found {}",
652 fn validate_literal_escape(
655 content_start
: BytePos
,
656 content_end
: BytePos
,
660 let lit_content
= self.str_from_to(content_start
, content_end
);
661 unescape
::unescape_literal(lit_content
, mode
, &mut |range
, result
| {
662 // Here we only check for errors. The actual unescaping is done later.
663 if let Err(err
) = result
{
664 let span_with_quotes
= self
665 .mk_sp(content_start
- BytePos(prefix_len
), content_end
+ BytePos(postfix_len
));
666 let (start
, end
) = (range
.start
as u32, range
.end
as u32);
667 let lo
= content_start
+ BytePos(start
);
668 let hi
= lo
+ BytePos(end
- start
);
669 let span
= self.mk_sp(lo
, hi
);
671 &self.sess
.span_diagnostic
,
683 fn validate_int_literal(&self, base
: Base
, content_start
: BytePos
, content_end
: BytePos
) {
684 let base
= match base
{
689 let s
= self.str_from_to(content_start
+ BytePos(2), content_end
);
690 for (idx
, c
) in s
.char_indices() {
691 let idx
= idx
as u32;
692 if c
!= '_'
&& c
.to_digit(base
).is_none() {
693 let lo
= content_start
+ BytePos(2 + idx
);
694 let hi
= content_start
+ BytePos(2 + idx
+ c
.len_utf8() as u32);
695 self.err_span_(lo
, hi
, &format
!("invalid digit for a base {} literal", base
));
701 pub fn nfc_normalize(string
: &str) -> Symbol
{
702 use unicode_normalization
::{is_nfc_quick, IsNormalized, UnicodeNormalization}
;
703 match is_nfc_quick(string
.chars()) {
704 IsNormalized
::Yes
=> Symbol
::intern(string
),
706 let normalized_str
: String
= string
.chars().nfc().collect();
707 Symbol
::intern(&normalized_str
)