]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_expand/src/mbe/macro_parser.rs
New upstream version 1.65.0+dfsg1
[rustc.git] / compiler / rustc_expand / src / mbe / macro_parser.rs
CommitLineData
fc512014 1//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals
3b2f2976
XL
2//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
3//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
4//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
5//! fit for Macro-by-Example-style rules.
6//!
7//! (In order to prevent the pathological case, we'd need to lazily construct the resulting
8//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
5e7ed085 9//! matcher positions, but it would also save overhead)
3b2f2976 10//!
94b46f34 11//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate.
3b2f2976
XL
12//! The macro parser restricts itself to the features of finite state automata. Earley parsers
13//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
1a4d82fc
JJ
14//!
15//! Quick intro to how the parser works:
16//!
5e7ed085
FG
17//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually
18//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`.
1a4d82fc 19//!
04454e1e 20//! The parser walks through the input a token at a time, maintaining a list
5e7ed085 21//! of threads consistent with the current position in the input string: `cur_mps`.
1a4d82fc 22//!
5e7ed085
FG
23//! As it processes them, it fills up `eof_mps` with threads that would be valid if
24//! the macro invocation is now over, `bb_mps` with threads that are waiting on
25//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting
b039eaaf 26//! on a particular token. Most of the logic concerns moving the · through the
3b2f2976
XL
27//! repetitions indicated by Kleene stars. The rules for moving the · without
28//! consuming any input are called epsilon transitions. It only advances or calls
5e7ed085 29//! out to the real Rust parser when no `cur_mps` threads remain.
1a4d82fc 30//!
7cac9316 31//! Example:
1a4d82fc 32//!
7cac9316
XL
33//! ```text, ignore
34//! Start parsing a a a a b against [· a $( a )* a b].
35//!
36//! Remaining input: a a a a b
3b2f2976 37//! next: [· a $( a )* a b]
1a4d82fc 38//!
7cac9316 39//! - - - Advance over an a. - - -
1a4d82fc 40//!
7cac9316 41//! Remaining input: a a a b
1a4d82fc 42//! cur: [a · $( a )* a b]
5e7ed085 43//! Descend/Skip (first position).
1a4d82fc
JJ
44//! next: [a $( · a )* a b] [a $( a )* · a b].
45//!
7cac9316 46//! - - - Advance over an a. - - -
1a4d82fc 47//!
7cac9316 48//! Remaining input: a a b
3b2f2976 49//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 50//! Follow epsilon transition: Finish/Repeat (first position)
1a4d82fc
JJ
51//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
52//!
7cac9316 53//! - - - Advance over an a. - - - (this looks exactly like the last step)
1a4d82fc 54//!
7cac9316 55//! Remaining input: a b
3b2f2976 56//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 57//! Follow epsilon transition: Finish/Repeat (first position)
1a4d82fc
JJ
58//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
59//!
7cac9316 60//! - - - Advance over an a. - - - (this looks exactly like the last step)
1a4d82fc 61//!
7cac9316 62//! Remaining input: b
3b2f2976 63//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 64//! Follow epsilon transition: Finish/Repeat (first position)
3b2f2976 65//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
1a4d82fc 66//!
7cac9316 67//! - - - Advance over a b. - - -
1a4d82fc 68//!
7cac9316 69//! Remaining input: ''
1a4d82fc 70//! eof: [a $( a )* a b ·]
7cac9316 71//! ```
1a4d82fc 72
923072b8
FG
73pub(crate) use NamedMatch::*;
74pub(crate) use ParseResult::*;
9fa01778 75
04454e1e 76use crate::mbe::{KleeneOp, TokenTree};
e74abb32 77
04454e1e
FG
78use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token};
79use rustc_lint_defs::pluralize;
5e7ed085 80use rustc_parse::parser::{NtOrTt, Parser};
5869c6ff 81use rustc_span::symbol::MacroRulesNormalizedIdent;
04454e1e 82use rustc_span::Span;
223e47cc 83
b7449926 84use rustc_data_structures::fx::FxHashMap;
9fa01778 85use rustc_data_structures::sync::Lrc;
17df50a5 86use rustc_span::symbol::Ident;
74b04a01 87use std::borrow::Cow;
b7449926 88use std::collections::hash_map::Entry::{Occupied, Vacant};
223e47cc 89
04454e1e
FG
90/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from)
91/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching.
92/// Notable differences to `mbe::TokenTree`:
93/// - It is non-recursive, i.e. there is no nesting.
94/// - The end pieces of each sequence (the separator, if present, and the Kleene op) are
95/// represented explicitly, as is the very end of the matcher.
96///
97/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
98/// simply incrementing the current matcher position index by one.
99pub(super) enum MatcherLoc {
100 Token {
101 token: Token,
102 },
103 Delimited,
104 Sequence {
105 op: KleeneOp,
106 num_metavar_decls: usize,
107 idx_first_after: usize,
108 next_metavar: usize,
109 seq_depth: usize,
110 },
111 SequenceKleeneOpNoSep {
112 op: KleeneOp,
113 idx_first: usize,
114 },
115 SequenceSep {
116 separator: Token,
117 },
118 SequenceKleeneOpAfterSep {
119 idx_first: usize,
120 },
121 MetaVarDecl {
122 span: Span,
123 bind: Ident,
124 kind: Option<NonterminalKind>,
125 next_metavar: usize,
126 seq_depth: usize,
127 },
128 Eof,
129}
5e7ed085 130
04454e1e
FG
131pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec<MatcherLoc> {
132 fn inner(
133 tts: &[TokenTree],
134 locs: &mut Vec<MatcherLoc>,
135 next_metavar: &mut usize,
136 seq_depth: usize,
137 ) {
138 for tt in tts {
139 match tt {
140 TokenTree::Token(token) => {
141 locs.push(MatcherLoc::Token { token: token.clone() });
142 }
143 TokenTree::Delimited(span, delimited) => {
144 let open_token = Token::new(token::OpenDelim(delimited.delim), span.open);
145 let close_token = Token::new(token::CloseDelim(delimited.delim), span.close);
146
147 locs.push(MatcherLoc::Delimited);
148 locs.push(MatcherLoc::Token { token: open_token });
149 inner(&delimited.tts, locs, next_metavar, seq_depth);
150 locs.push(MatcherLoc::Token { token: close_token });
151 }
152 TokenTree::Sequence(_, seq) => {
153 // We can't determine `idx_first_after` and construct the final
154 // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
155 // pieces are processed. So we push a dummy value (`Eof` is cheapest to
156 // construct) now, and overwrite it with the proper value below.
157 let dummy = MatcherLoc::Eof;
158 locs.push(dummy);
159
160 let next_metavar_orig = *next_metavar;
161 let op = seq.kleene.op;
162 let idx_first = locs.len();
163 let idx_seq = idx_first - 1;
164 inner(&seq.tts, locs, next_metavar, seq_depth + 1);
165
166 if let Some(separator) = &seq.separator {
167 locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
168 locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
169 } else {
170 locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
171 }
223e47cc 172
04454e1e
FG
173 // Overwrite the dummy value pushed above with the proper value.
174 locs[idx_seq] = MatcherLoc::Sequence {
175 op,
176 num_metavar_decls: seq.num_captures,
177 idx_first_after: locs.len(),
178 next_metavar: next_metavar_orig,
179 seq_depth,
180 };
181 }
182 &TokenTree::MetaVarDecl(span, bind, kind) => {
183 locs.push(MatcherLoc::MetaVarDecl {
184 span,
185 bind,
186 kind,
187 next_metavar: *next_metavar,
188 seq_depth,
189 });
190 *next_metavar += 1;
191 }
192 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
193 }
194 }
195 }
223e47cc 196
04454e1e
FG
197 let mut locs = vec![];
198 let mut next_metavar = 0;
199 inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
223e47cc 200
04454e1e
FG
201 // A final entry is needed for eof.
202 locs.push(MatcherLoc::Eof);
223e47cc 203
04454e1e 204 locs
5e7ed085 205}
a1dfa0c6 206
04454e1e
FG
207/// A single matcher position, representing the state of matching.
208struct MatcherPos {
209 /// The index into `TtParser::locs`, which represents the "dot".
5e7ed085 210 idx: usize,
2c00a5a8 211
04454e1e
FG
212 /// The matches made against metavar decls so far. On a successful match, this vector ends up
213 /// with one element per metavar decl in the matcher. Each element records token trees matched
214 /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if
215 /// the corresponding metavar decl is within a sequence.
216 ///
217 /// It is critical to performance that this is an `Lrc`, because it gets cloned frequently when
218 /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end
219 /// up failing.
220 matches: Lrc<Vec<NamedMatch>>,
223e47cc
LB
221}
222
5e7ed085
FG
223// This type is used a lot. Make sure it doesn't unintentionally get bigger.
224#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
04454e1e 225rustc_data_structures::static_assert_size!(MatcherPos, 16);
94b46f34 226
04454e1e
FG
227impl MatcherPos {
228 /// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites,
229 /// and both are hot enough to be always worth inlining.
230 #[inline(always)]
231 fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) {
5e7ed085 232 let matches = Lrc::make_mut(&mut self.matches);
04454e1e 233 match seq_depth {
5e7ed085
FG
234 0 => {
235 // We are not within a sequence. Just append `m`.
04454e1e 236 assert_eq!(metavar_idx, matches.len());
5e7ed085
FG
237 matches.push(m);
238 }
239 _ => {
240 // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth
241 // and append `m` to its vector.
04454e1e
FG
242 let mut curr = &mut matches[metavar_idx];
243 for _ in 0..seq_depth - 1 {
5e7ed085 244 match curr {
04454e1e 245 MatchedSeq(seq) => curr = seq.last_mut().unwrap(),
5e7ed085
FG
246 _ => unreachable!(),
247 }
248 }
249 match curr {
04454e1e 250 MatchedSeq(seq) => seq.push(m),
5e7ed085
FG
251 _ => unreachable!(),
252 }
253 }
94b46f34
XL
254 }
255 }
256}
257
04454e1e 258enum EofMatcherPositions {
5e7ed085 259 None,
04454e1e 260 One(MatcherPos),
5e7ed085
FG
261 Multiple,
262}
263
2c00a5a8 264/// Represents the possible results of an attempted parse.
923072b8 265pub(crate) enum ParseResult<T> {
2c00a5a8
XL
266 /// Parsed successfully.
267 Success(T),
268 /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected
269 /// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
dc9dc135 270 Failure(Token, &'static str),
2c00a5a8 271 /// Fatal error (malformed macro?). Abort compilation.
dfeec247 272 Error(rustc_span::Span, String),
ba9703b0 273 ErrorReported,
2c00a5a8
XL
274}
275
ba9703b0
XL
276/// A `ParseResult` where the `Success` variant contains a mapping of
277/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping
278/// of metavars to the token trees they bind to.
923072b8 279pub(crate) type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, NamedMatch>>;
476ff2be 280
5e7ed085
FG
281/// Count how many metavars declarations are in `matcher`.
282pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
283 matcher
284 .iter()
285 .map(|tt| match tt {
286 TokenTree::MetaVarDecl(..) => 1,
287 TokenTree::Sequence(_, seq) => seq.num_captures,
04454e1e 288 TokenTree::Delimited(_, delim) => count_metavar_decls(&delim.tts),
5e7ed085
FG
289 TokenTree::Token(..) => 0,
290 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
291 })
292 .sum()
223e47cc
LB
293}
294
5e7ed085 295/// `NamedMatch` is a pattern-match result for a single metavar. All
9fa01778 296/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
5e7ed085 297/// (expr, item, etc).
1a4d82fc 298///
7cac9316 299/// The in-memory structure of a particular `NamedMatch` represents the match
1a4d82fc
JJ
300/// that occurred when a particular subset of a matcher was applied to a
301/// particular token tree.
302///
7cac9316 303/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
5e7ed085
FG
304/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
305/// to: each `MatchedSeq` corresponds to a single repetition in the originating
7cac9316 306/// token tree. The depth of the `NamedMatch` structure will therefore depend
5e7ed085
FG
307/// only on the nesting depth of repetitions in the originating token tree it
308/// was derived from.
309///
04454e1e 310/// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular
5e7ed085
FG
311/// meta variable. For example, if we are matching the following macro against the following
312/// invocation...
313///
314/// ```rust
315/// macro_rules! foo {
316/// ($($($x:ident),+);+) => {}
317/// }
318///
319/// foo!(a, b, c, d; a, b, c, d, e);
320/// ```
321///
322/// Then, the tree will have the following shape:
323///
04454e1e
FG
324/// ```ignore (private-internal)
325/// # use NamedMatch::*;
5e7ed085
FG
326/// MatchedSeq([
327/// MatchedSeq([
328/// MatchedNonterminal(a),
329/// MatchedNonterminal(b),
330/// MatchedNonterminal(c),
331/// MatchedNonterminal(d),
332/// ]),
333/// MatchedSeq([
334/// MatchedNonterminal(a),
335/// MatchedNonterminal(b),
336/// MatchedNonterminal(c),
337/// MatchedNonterminal(d),
338/// MatchedNonterminal(e),
339/// ])
340/// ])
341/// ```
041b39d2 342#[derive(Debug, Clone)]
923072b8 343pub(crate) enum NamedMatch {
04454e1e 344 MatchedSeq(Vec<NamedMatch>),
5e7ed085
FG
345
346 // A metavar match of type `tt`.
347 MatchedTokenTree(rustc_ast::tokenstream::TokenTree),
348
349 // A metavar match of any type other than `tt`.
9fa01778 350 MatchedNonterminal(Lrc<Nonterminal>),
223e47cc
LB
351}
352
9fa01778 353/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison)
2c00a5a8 354fn token_name_eq(t1: &Token, t2: &Token) -> bool {
dc9dc135
XL
355 if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) {
356 ident1.name == ident2.name && is_raw1 == is_raw2
357 } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) {
358 ident1.name == ident2.name
cc61c64b 359 } else {
dc9dc135 360 t1.kind == t2.kind
1a4d82fc
JJ
361 }
362}
363
04454e1e 364// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
923072b8 365// allocations we have a single vector for each kind that is cleared and reused repeatedly.
04454e1e 366pub struct TtParser {
5e7ed085 367 macro_name: Ident,
223e47cc 368
5e7ed085
FG
369 /// The set of current mps to be processed. This should be empty by the end of a successful
370 /// execution of `parse_tt_inner`.
04454e1e 371 cur_mps: Vec<MatcherPos>,
476ff2be 372
5e7ed085
FG
373 /// The set of newly generated mps. These are used to replenish `cur_mps` in the function
374 /// `parse_tt`.
04454e1e 375 next_mps: Vec<MatcherPos>,
223e47cc 376
5e7ed085 377 /// The set of mps that are waiting for the black-box parser.
04454e1e 378 bb_mps: Vec<MatcherPos>,
223e47cc 379
5e7ed085
FG
380 /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules
381 /// that have no metavars.
04454e1e 382 empty_matches: Lrc<Vec<NamedMatch>>,
5e7ed085
FG
383}
384
04454e1e
FG
385impl TtParser {
386 pub(super) fn new(macro_name: Ident) -> TtParser {
5e7ed085
FG
387 TtParser {
388 macro_name,
389 cur_mps: vec![],
390 next_mps: vec![],
391 bb_mps: vec![],
04454e1e 392 empty_matches: Lrc::new(vec![]),
2c00a5a8 393 }
5e7ed085
FG
394 }
395
396 /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
397 /// produce more mps in `next_mps` and `bb_mps`.
398 ///
399 /// # Returns
400 ///
401 /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
402 /// track of through the mps generated.
403 fn parse_tt_inner(
404 &mut self,
04454e1e 405 matcher: &[MatcherLoc],
5e7ed085
FG
406 token: &Token,
407 ) -> Option<NamedParseResult> {
408 // Matcher positions that would be valid if the macro invocation was over now. Only
409 // modified if `token == Eof`.
410 let mut eof_mps = EofMatcherPositions::None;
411
412 while let Some(mut mp) = self.cur_mps.pop() {
04454e1e
FG
413 match &matcher[mp.idx] {
414 MatcherLoc::Token { token: t } => {
415 // If it's a doc comment, we just ignore it and move on to the next tt in the
416 // matcher. This is a bug, but #95267 showed that existing programs rely on
417 // this behaviour, and changing it would require some care and a transition
418 // period.
419 //
420 // If the token matches, we can just advance the parser.
421 //
422 // Otherwise, this match has failed, there is nothing to do, and hopefully
423 // another mp in `cur_mps` will match.
424 if matches!(t, Token { kind: DocComment(..), .. }) {
425 mp.idx += 1;
426 self.cur_mps.push(mp);
427 } else if token_name_eq(&t, token) {
428 mp.idx += 1;
429 self.next_mps.push(mp);
5e7ed085 430 }
04454e1e
FG
431 }
432 MatcherLoc::Delimited => {
f2b60f7d 433 // Entering the delimiter is trivial.
04454e1e
FG
434 mp.idx += 1;
435 self.cur_mps.push(mp);
436 }
437 &MatcherLoc::Sequence {
438 op,
439 num_metavar_decls,
440 idx_first_after,
441 next_metavar,
442 seq_depth,
443 } => {
444 // Install an empty vec for each metavar within the sequence.
445 for metavar_idx in next_metavar..next_metavar + num_metavar_decls {
446 mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![]));
b9856134 447 }
b9856134 448
04454e1e
FG
449 if op == KleeneOp::ZeroOrMore || op == KleeneOp::ZeroOrOne {
450 // Try zero matches of this sequence, by skipping over it.
451 self.cur_mps.push(MatcherPos {
452 idx: idx_first_after,
453 matches: mp.matches.clone(), // a cheap clone
5e7ed085 454 });
5e7ed085
FG
455 }
456
04454e1e
FG
457 // Try one or more matches of this sequence, by entering it.
458 mp.idx += 1;
459 self.cur_mps.push(mp);
476ff2be 460 }
04454e1e
FG
461 &MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => {
462 // We are past the end of a sequence with no separator. Try ending the
463 // sequence. If that's not possible, `ending_mp` will fail quietly when it is
5e7ed085 464 // processed next time around the loop.
04454e1e
FG
465 let ending_mp = MatcherPos {
466 idx: mp.idx + 1, // +1 skips the Kleene op
5e7ed085 467 matches: mp.matches.clone(), // a cheap clone
5e7ed085 468 };
04454e1e
FG
469 self.cur_mps.push(ending_mp);
470
471 if op != KleeneOp::ZeroOrOne {
472 // Try another repetition.
473 mp.idx = idx_first;
474 self.cur_mps.push(mp);
475 }
223e47cc 476 }
04454e1e
FG
477 MatcherLoc::SequenceSep { separator } => {
478 // We are past the end of a sequence with a separator but we haven't seen the
479 // separator yet. Try ending the sequence. If that's not possible, `ending_mp`
480 // will fail quietly when it is processed next time around the loop.
481 let ending_mp = MatcherPos {
482 idx: mp.idx + 2, // +2 skips the separator and the Kleene op
483 matches: mp.matches.clone(), // a cheap clone
484 };
485 self.cur_mps.push(ending_mp);
2c00a5a8 486
04454e1e
FG
487 if token_name_eq(token, separator) {
488 // The separator matches the current token. Advance past it.
5e7ed085
FG
489 mp.idx += 1;
490 self.next_mps.push(mp);
491 }
04454e1e
FG
492 }
493 &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
494 // We are past the sequence separator. This can't be a `?` Kleene op, because
495 // they don't permit separators. Try another repetition.
496 mp.idx = idx_first;
5e7ed085
FG
497 self.cur_mps.push(mp);
498 }
04454e1e
FG
499 &MatcherLoc::MetaVarDecl { span, kind, .. } => {
500 // Built-in nonterminals never start with these tokens, so we can eliminate
501 // them from consideration. We use the span of the metavariable declaration
502 // to determine any edition-specific matching behavior for non-terminals.
503 if let Some(kind) = kind {
504 if Parser::nonterminal_may_begin_with(kind, token) {
505 self.bb_mps.push(mp);
506 }
507 } else {
508 // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
509 // Both this check and the one in `nameize` are necessary, surprisingly.
510 return Some(Error(span, "missing fragment specifier".to_string()));
511 }
512 }
513 MatcherLoc::Eof => {
514 // We are past the matcher's end, and not in a sequence. Try to end things.
515 debug_assert_eq!(mp.idx, matcher.len() - 1);
516 if *token == token::Eof {
517 eof_mps = match eof_mps {
518 EofMatcherPositions::None => EofMatcherPositions::One(mp),
519 EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => {
520 EofMatcherPositions::Multiple
521 }
5e7ed085
FG
522 }
523 }
524 }
223e47cc
LB
525 }
526 }
476ff2be 527
5e7ed085
FG
528 // If we reached the end of input, check that there is EXACTLY ONE possible matcher.
529 // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error.
530 if *token == token::Eof {
531 Some(match eof_mps {
532 EofMatcherPositions::One(mut eof_mp) => {
5e7ed085
FG
533 // Need to take ownership of the matches from within the `Lrc`.
534 Lrc::make_mut(&mut eof_mp.matches);
535 let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
04454e1e 536 self.nameize(matcher, matches)
5e7ed085
FG
537 }
538 EofMatcherPositions::Multiple => {
539 Error(token.span, "ambiguity: multiple successful parses".to_string())
540 }
541 EofMatcherPositions::None => Failure(
dfeec247
XL
542 Token::new(
543 token::Eof,
5e7ed085 544 if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() },
dfeec247 545 ),
0731742a 546 "missing tokens in macro arguments",
2c00a5a8 547 ),
5e7ed085
FG
548 })
549 } else {
550 None
2c00a5a8 551 }
5e7ed085
FG
552 }
553
554 /// Match the token stream from `parser` against `matcher`.
555 pub(super) fn parse_tt(
556 &mut self,
557 parser: &mut Cow<'_, Parser<'_>>,
04454e1e 558 matcher: &[MatcherLoc],
5e7ed085
FG
559 ) -> NamedParseResult {
560 // A queue of possible matcher positions. We initialize it with the matcher position in
561 // which the "dot" is before the first token of the first token tree in `matcher`.
562 // `parse_tt_inner` then processes all of these possible matcher positions and produces
563 // possible next positions into `next_mps`. After some post-processing, the contents of
564 // `next_mps` replenish `cur_mps` and we start over again.
565 self.cur_mps.clear();
04454e1e 566 self.cur_mps.push(MatcherPos { idx: 0, matches: self.empty_matches.clone() });
5e7ed085
FG
567
568 loop {
569 self.next_mps.clear();
570 self.bb_mps.clear();
571
572 // Process `cur_mps` until either we have finished the input or we need to get some
573 // parsing from the black-box parser done.
04454e1e
FG
574 if let Some(res) = self.parse_tt_inner(matcher, &parser.token) {
575 return res;
5e7ed085
FG
576 }
577
578 // `parse_tt_inner` handled all of `cur_mps`, so it's empty.
579 assert!(self.cur_mps.is_empty());
580
581 // Error messages here could be improved with links to original rules.
582 match (self.next_mps.len(), self.bb_mps.len()) {
583 (0, 0) => {
584 // There are no possible next positions AND we aren't waiting for the black-box
585 // parser: syntax error.
586 return Failure(
587 parser.token.clone(),
588 "no rules expected this token in macro call",
589 );
590 }
591
592 (_, 0) => {
593 // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then
594 // process the next token.
04454e1e 595 self.cur_mps.append(&mut self.next_mps);
5e7ed085
FG
596 parser.to_mut().bump();
597 }
598
599 (0, 1) => {
600 // We need to call the black-box parser to get some nonterminal.
601 let mut mp = self.bb_mps.pop().unwrap();
04454e1e
FG
602 let loc = &matcher[mp.idx];
603 if let &MatcherLoc::MetaVarDecl {
604 span,
605 kind: Some(kind),
606 next_metavar,
607 seq_depth,
608 ..
609 } = loc
610 {
5e7ed085
FG
611 // We use the span of the metavariable declaration to determine any
612 // edition-specific matching behavior for non-terminals.
613 let nt = match parser.to_mut().parse_nonterminal(kind) {
614 Err(mut err) => {
615 err.span_label(
616 span,
617 format!(
618 "while parsing argument for this `{kind}` macro fragment"
619 ),
620 )
621 .emit();
622 return ErrorReported;
623 }
624 Ok(nt) => nt,
625 };
626 let m = match nt {
627 NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
628 NtOrTt::Tt(tt) => MatchedTokenTree(tt),
629 };
04454e1e 630 mp.push_match(next_metavar, seq_depth, m);
5e7ed085 631 mp.idx += 1;
5e7ed085
FG
632 } else {
633 unreachable!()
3dfed10e 634 }
5e7ed085
FG
635 self.cur_mps.push(mp);
636 }
637
638 (_, _) => {
639 // Too many possibilities!
04454e1e 640 return self.ambiguity_error(matcher, parser.token.span);
5e7ed085 641 }
223e47cc 642 }
5e7ed085
FG
643
644 assert!(!self.cur_mps.is_empty());
223e47cc 645 }
5e7ed085 646 }
223e47cc 647
04454e1e
FG
648 fn ambiguity_error(
649 &self,
650 matcher: &[MatcherLoc],
651 token_span: rustc_span::Span,
652 ) -> NamedParseResult {
5e7ed085
FG
653 let nts = self
654 .bb_mps
655 .iter()
04454e1e
FG
656 .map(|mp| match &matcher[mp.idx] {
657 MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
5e7ed085
FG
658 format!("{} ('{}')", kind, bind)
659 }
04454e1e 660 _ => unreachable!(),
5e7ed085
FG
661 })
662 .collect::<Vec<String>>()
663 .join(" or ");
664
665 Error(
666 token_span,
667 format!(
668 "local ambiguity when calling macro `{}`: multiple parsing options: {}",
669 self.macro_name,
670 match self.next_mps.len() {
671 0 => format!("built-in NTs {}.", nts),
04454e1e 672 n => format!("built-in NTs {} or {n} other option{s}.", nts, s = pluralize!(n)),
5e7ed085
FG
673 }
674 ),
675 )
223e47cc 676 }
04454e1e
FG
677
678 fn nameize<I: Iterator<Item = NamedMatch>>(
679 &self,
680 matcher: &[MatcherLoc],
681 mut res: I,
682 ) -> NamedParseResult {
683 // Make that each metavar has _exactly one_ binding. If so, insert the binding into the
684 // `NamedParseResult`. Otherwise, it's an error.
685 let mut ret_val = FxHashMap::default();
686 for loc in matcher {
687 if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
688 if kind.is_some() {
689 match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
690 Vacant(spot) => spot.insert(res.next().unwrap()),
691 Occupied(..) => {
692 return Error(span, format!("duplicated bind name: {}", bind));
693 }
694 };
695 } else {
696 // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
697 // Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
698 return Error(span, "missing fragment specifier".to_string());
699 }
700 }
701 }
702 Success(ret_val)
703 }
223e47cc 704}