]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_expand/src/mbe/macro_parser.rs
New upstream version 1.71.1+dfsg1
[rustc.git] / compiler / rustc_expand / src / mbe / macro_parser.rs
CommitLineData
fc512014 1//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals
3b2f2976
XL
2//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
3//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
4//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
5//! fit for Macro-by-Example-style rules.
6//!
7//! (In order to prevent the pathological case, we'd need to lazily construct the resulting
8//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
5e7ed085 9//! matcher positions, but it would also save overhead)
3b2f2976 10//!
94b46f34 11//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate.
3b2f2976
XL
12//! The macro parser restricts itself to the features of finite state automata. Earley parsers
13//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
1a4d82fc
JJ
14//!
15//! Quick intro to how the parser works:
16//!
5e7ed085
FG
17//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually
18//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`.
1a4d82fc 19//!
04454e1e 20//! The parser walks through the input a token at a time, maintaining a list
5e7ed085 21//! of threads consistent with the current position in the input string: `cur_mps`.
1a4d82fc 22//!
5e7ed085
FG
23//! As it processes them, it fills up `eof_mps` with threads that would be valid if
24//! the macro invocation is now over, `bb_mps` with threads that are waiting on
25//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting
b039eaaf 26//! on a particular token. Most of the logic concerns moving the · through the
3b2f2976
XL
27//! repetitions indicated by Kleene stars. The rules for moving the · without
28//! consuming any input are called epsilon transitions. It only advances or calls
5e7ed085 29//! out to the real Rust parser when no `cur_mps` threads remain.
1a4d82fc 30//!
7cac9316 31//! Example:
1a4d82fc 32//!
7cac9316
XL
33//! ```text, ignore
34//! Start parsing a a a a b against [· a $( a )* a b].
35//!
36//! Remaining input: a a a a b
3b2f2976 37//! next: [· a $( a )* a b]
1a4d82fc 38//!
7cac9316 39//! - - - Advance over an a. - - -
1a4d82fc 40//!
7cac9316 41//! Remaining input: a a a b
1a4d82fc 42//! cur: [a · $( a )* a b]
5e7ed085 43//! Descend/Skip (first position).
1a4d82fc
JJ
44//! next: [a $( · a )* a b] [a $( a )* · a b].
45//!
7cac9316 46//! - - - Advance over an a. - - -
1a4d82fc 47//!
7cac9316 48//! Remaining input: a a b
3b2f2976 49//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 50//! Follow epsilon transition: Finish/Repeat (first position)
1a4d82fc
JJ
51//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
52//!
7cac9316 53//! - - - Advance over an a. - - - (this looks exactly like the last step)
1a4d82fc 54//!
7cac9316 55//! Remaining input: a b
3b2f2976 56//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 57//! Follow epsilon transition: Finish/Repeat (first position)
1a4d82fc
JJ
58//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
59//!
7cac9316 60//! - - - Advance over an a. - - - (this looks exactly like the last step)
1a4d82fc 61//!
7cac9316 62//! Remaining input: b
3b2f2976 63//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 64//! Follow epsilon transition: Finish/Repeat (first position)
3b2f2976 65//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
1a4d82fc 66//!
7cac9316 67//! - - - Advance over a b. - - -
1a4d82fc 68//!
7cac9316 69//! Remaining input: ''
1a4d82fc 70//! eof: [a $( a )* a b ·]
7cac9316 71//! ```
1a4d82fc 72
923072b8
FG
73pub(crate) use NamedMatch::*;
74pub(crate) use ParseResult::*;
9fa01778 75
487cf647 76use crate::mbe::{macro_rules::Tracker, KleeneOp, TokenTree};
e74abb32 77
04454e1e 78use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token};
487cf647
FG
79use rustc_ast_pretty::pprust;
80use rustc_data_structures::fx::FxHashMap;
81use rustc_data_structures::sync::Lrc;
82use rustc_errors::ErrorGuaranteed;
04454e1e 83use rustc_lint_defs::pluralize;
5e7ed085 84use rustc_parse::parser::{NtOrTt, Parser};
487cf647 85use rustc_span::symbol::Ident;
5869c6ff 86use rustc_span::symbol::MacroRulesNormalizedIdent;
04454e1e 87use rustc_span::Span;
74b04a01 88use std::borrow::Cow;
b7449926 89use std::collections::hash_map::Entry::{Occupied, Vacant};
487cf647 90use std::fmt::Display;
49aad941 91use std::rc::Rc;
223e47cc 92
04454e1e
FG
93/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from)
94/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching.
95/// Notable differences to `mbe::TokenTree`:
96/// - It is non-recursive, i.e. there is no nesting.
97/// - The end pieces of each sequence (the separator, if present, and the Kleene op) are
98/// represented explicitly, as is the very end of the matcher.
99///
100/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
101/// simply incrementing the current matcher position index by one.
487cf647
FG
102#[derive(Debug, PartialEq, Clone)]
103pub(crate) enum MatcherLoc {
04454e1e
FG
104 Token {
105 token: Token,
106 },
107 Delimited,
108 Sequence {
109 op: KleeneOp,
110 num_metavar_decls: usize,
111 idx_first_after: usize,
112 next_metavar: usize,
113 seq_depth: usize,
114 },
115 SequenceKleeneOpNoSep {
116 op: KleeneOp,
117 idx_first: usize,
118 },
119 SequenceSep {
120 separator: Token,
121 },
122 SequenceKleeneOpAfterSep {
123 idx_first: usize,
124 },
125 MetaVarDecl {
126 span: Span,
127 bind: Ident,
128 kind: Option<NonterminalKind>,
129 next_metavar: usize,
130 seq_depth: usize,
131 },
132 Eof,
133}
5e7ed085 134
487cf647
FG
135impl MatcherLoc {
136 pub(super) fn span(&self) -> Option<Span> {
137 match self {
138 MatcherLoc::Token { token } => Some(token.span),
139 MatcherLoc::Delimited => None,
140 MatcherLoc::Sequence { .. } => None,
141 MatcherLoc::SequenceKleeneOpNoSep { .. } => None,
142 MatcherLoc::SequenceSep { .. } => None,
143 MatcherLoc::SequenceKleeneOpAfterSep { .. } => None,
144 MatcherLoc::MetaVarDecl { span, .. } => Some(*span),
145 MatcherLoc::Eof => None,
146 }
147 }
148}
149
150impl Display for MatcherLoc {
151 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152 match self {
153 MatcherLoc::Token { token } | MatcherLoc::SequenceSep { separator: token } => {
154 write!(f, "`{}`", pprust::token_to_string(token))
155 }
156 MatcherLoc::MetaVarDecl { bind, kind, .. } => {
157 write!(f, "meta-variable `${bind}")?;
158 if let Some(kind) = kind {
159 write!(f, ":{}", kind)?;
160 }
161 write!(f, "`")?;
162 Ok(())
163 }
164 MatcherLoc::Eof => f.write_str("end of macro"),
165
166 // These are not printed in the diagnostic
167 MatcherLoc::Delimited => f.write_str("delimiter"),
168 MatcherLoc::Sequence { .. } => f.write_str("sequence start"),
169 MatcherLoc::SequenceKleeneOpNoSep { .. } => f.write_str("sequence end"),
170 MatcherLoc::SequenceKleeneOpAfterSep { .. } => f.write_str("sequence end"),
171 }
172 }
173}
174
04454e1e
FG
175pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec<MatcherLoc> {
176 fn inner(
177 tts: &[TokenTree],
178 locs: &mut Vec<MatcherLoc>,
179 next_metavar: &mut usize,
180 seq_depth: usize,
181 ) {
182 for tt in tts {
183 match tt {
184 TokenTree::Token(token) => {
185 locs.push(MatcherLoc::Token { token: token.clone() });
186 }
187 TokenTree::Delimited(span, delimited) => {
188 let open_token = Token::new(token::OpenDelim(delimited.delim), span.open);
189 let close_token = Token::new(token::CloseDelim(delimited.delim), span.close);
190
191 locs.push(MatcherLoc::Delimited);
192 locs.push(MatcherLoc::Token { token: open_token });
193 inner(&delimited.tts, locs, next_metavar, seq_depth);
194 locs.push(MatcherLoc::Token { token: close_token });
195 }
196 TokenTree::Sequence(_, seq) => {
197 // We can't determine `idx_first_after` and construct the final
198 // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
199 // pieces are processed. So we push a dummy value (`Eof` is cheapest to
200 // construct) now, and overwrite it with the proper value below.
201 let dummy = MatcherLoc::Eof;
202 locs.push(dummy);
203
204 let next_metavar_orig = *next_metavar;
205 let op = seq.kleene.op;
206 let idx_first = locs.len();
207 let idx_seq = idx_first - 1;
208 inner(&seq.tts, locs, next_metavar, seq_depth + 1);
209
210 if let Some(separator) = &seq.separator {
211 locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
212 locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
213 } else {
214 locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
215 }
223e47cc 216
04454e1e
FG
217 // Overwrite the dummy value pushed above with the proper value.
218 locs[idx_seq] = MatcherLoc::Sequence {
219 op,
220 num_metavar_decls: seq.num_captures,
221 idx_first_after: locs.len(),
222 next_metavar: next_metavar_orig,
223 seq_depth,
224 };
225 }
226 &TokenTree::MetaVarDecl(span, bind, kind) => {
227 locs.push(MatcherLoc::MetaVarDecl {
228 span,
229 bind,
230 kind,
231 next_metavar: *next_metavar,
232 seq_depth,
233 });
234 *next_metavar += 1;
235 }
236 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
237 }
238 }
239 }
223e47cc 240
04454e1e
FG
241 let mut locs = vec![];
242 let mut next_metavar = 0;
243 inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
223e47cc 244
04454e1e
FG
245 // A final entry is needed for eof.
246 locs.push(MatcherLoc::Eof);
223e47cc 247
04454e1e 248 locs
5e7ed085 249}
a1dfa0c6 250
04454e1e
FG
251/// A single matcher position, representing the state of matching.
252struct MatcherPos {
253 /// The index into `TtParser::locs`, which represents the "dot".
5e7ed085 254 idx: usize,
2c00a5a8 255
04454e1e
FG
256 /// The matches made against metavar decls so far. On a successful match, this vector ends up
257 /// with one element per metavar decl in the matcher. Each element records token trees matched
258 /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if
259 /// the corresponding metavar decl is within a sequence.
260 ///
49aad941 261 /// It is critical to performance that this is an `Rc`, because it gets cloned frequently when
04454e1e
FG
262 /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end
263 /// up failing.
49aad941 264 matches: Rc<Vec<NamedMatch>>,
223e47cc
LB
265}
266
5e7ed085
FG
267// This type is used a lot. Make sure it doesn't unintentionally get bigger.
268#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
04454e1e 269rustc_data_structures::static_assert_size!(MatcherPos, 16);
94b46f34 270
04454e1e
FG
271impl MatcherPos {
272 /// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites,
273 /// and both are hot enough to be always worth inlining.
274 #[inline(always)]
275 fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) {
49aad941 276 let matches = Rc::make_mut(&mut self.matches);
04454e1e 277 match seq_depth {
5e7ed085
FG
278 0 => {
279 // We are not within a sequence. Just append `m`.
04454e1e 280 assert_eq!(metavar_idx, matches.len());
5e7ed085
FG
281 matches.push(m);
282 }
283 _ => {
284 // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth
285 // and append `m` to its vector.
04454e1e
FG
286 let mut curr = &mut matches[metavar_idx];
287 for _ in 0..seq_depth - 1 {
5e7ed085 288 match curr {
04454e1e 289 MatchedSeq(seq) => curr = seq.last_mut().unwrap(),
5e7ed085
FG
290 _ => unreachable!(),
291 }
292 }
293 match curr {
04454e1e 294 MatchedSeq(seq) => seq.push(m),
5e7ed085
FG
295 _ => unreachable!(),
296 }
297 }
94b46f34
XL
298 }
299 }
300}
301
04454e1e 302enum EofMatcherPositions {
5e7ed085 303 None,
04454e1e 304 One(MatcherPos),
5e7ed085
FG
305 Multiple,
306}
307
2c00a5a8 308/// Represents the possible results of an attempted parse.
9c376795 309pub(crate) enum ParseResult<T, F> {
2c00a5a8
XL
310 /// Parsed successfully.
311 Success(T),
312 /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected
313 /// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
9c376795
FG
314 /// The usize is the approximate position of the token in the input token stream.
315 Failure(F),
2c00a5a8 316 /// Fatal error (malformed macro?). Abort compilation.
dfeec247 317 Error(rustc_span::Span, String),
487cf647 318 ErrorReported(ErrorGuaranteed),
2c00a5a8
XL
319}
320
ba9703b0
XL
321/// A `ParseResult` where the `Success` variant contains a mapping of
322/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping
323/// of metavars to the token trees they bind to.
9c376795 324pub(crate) type NamedParseResult<F> = ParseResult<NamedMatches, F>;
487cf647
FG
325
326/// Contains a mapping of `MacroRulesNormalizedIdent`s to `NamedMatch`es.
327/// This represents the mapping of metavars to the token trees they bind to.
328pub(crate) type NamedMatches = FxHashMap<MacroRulesNormalizedIdent, NamedMatch>;
476ff2be 329
5e7ed085
FG
330/// Count how many metavars declarations are in `matcher`.
331pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
332 matcher
333 .iter()
334 .map(|tt| match tt {
335 TokenTree::MetaVarDecl(..) => 1,
336 TokenTree::Sequence(_, seq) => seq.num_captures,
04454e1e 337 TokenTree::Delimited(_, delim) => count_metavar_decls(&delim.tts),
5e7ed085
FG
338 TokenTree::Token(..) => 0,
339 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
340 })
341 .sum()
223e47cc
LB
342}
343
5e7ed085 344/// `NamedMatch` is a pattern-match result for a single metavar. All
9fa01778 345/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
5e7ed085 346/// (expr, item, etc).
1a4d82fc 347///
7cac9316 348/// The in-memory structure of a particular `NamedMatch` represents the match
1a4d82fc
JJ
349/// that occurred when a particular subset of a matcher was applied to a
350/// particular token tree.
351///
7cac9316 352/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
5e7ed085
FG
353/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
354/// to: each `MatchedSeq` corresponds to a single repetition in the originating
7cac9316 355/// token tree. The depth of the `NamedMatch` structure will therefore depend
5e7ed085
FG
356/// only on the nesting depth of repetitions in the originating token tree it
357/// was derived from.
358///
04454e1e 359/// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular
5e7ed085
FG
360/// meta variable. For example, if we are matching the following macro against the following
361/// invocation...
362///
363/// ```rust
364/// macro_rules! foo {
365/// ($($($x:ident),+);+) => {}
366/// }
367///
368/// foo!(a, b, c, d; a, b, c, d, e);
369/// ```
370///
371/// Then, the tree will have the following shape:
372///
04454e1e
FG
373/// ```ignore (private-internal)
374/// # use NamedMatch::*;
5e7ed085
FG
375/// MatchedSeq([
376/// MatchedSeq([
377/// MatchedNonterminal(a),
378/// MatchedNonterminal(b),
379/// MatchedNonterminal(c),
380/// MatchedNonterminal(d),
381/// ]),
382/// MatchedSeq([
383/// MatchedNonterminal(a),
384/// MatchedNonterminal(b),
385/// MatchedNonterminal(c),
386/// MatchedNonterminal(d),
387/// MatchedNonterminal(e),
388/// ])
389/// ])
390/// ```
041b39d2 391#[derive(Debug, Clone)]
923072b8 392pub(crate) enum NamedMatch {
04454e1e 393 MatchedSeq(Vec<NamedMatch>),
5e7ed085
FG
394
395 // A metavar match of type `tt`.
396 MatchedTokenTree(rustc_ast::tokenstream::TokenTree),
397
398 // A metavar match of any type other than `tt`.
9fa01778 399 MatchedNonterminal(Lrc<Nonterminal>),
223e47cc
LB
400}
401
9fa01778 402/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison)
2c00a5a8 403fn token_name_eq(t1: &Token, t2: &Token) -> bool {
dc9dc135
XL
404 if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) {
405 ident1.name == ident2.name && is_raw1 == is_raw2
406 } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) {
407 ident1.name == ident2.name
cc61c64b 408 } else {
dc9dc135 409 t1.kind == t2.kind
1a4d82fc
JJ
410 }
411}
412
04454e1e 413// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
923072b8 414// allocations we have a single vector for each kind that is cleared and reused repeatedly.
04454e1e 415pub struct TtParser {
5e7ed085 416 macro_name: Ident,
223e47cc 417
5e7ed085
FG
418 /// The set of current mps to be processed. This should be empty by the end of a successful
419 /// execution of `parse_tt_inner`.
04454e1e 420 cur_mps: Vec<MatcherPos>,
476ff2be 421
5e7ed085
FG
422 /// The set of newly generated mps. These are used to replenish `cur_mps` in the function
423 /// `parse_tt`.
04454e1e 424 next_mps: Vec<MatcherPos>,
223e47cc 425
5e7ed085 426 /// The set of mps that are waiting for the black-box parser.
04454e1e 427 bb_mps: Vec<MatcherPos>,
223e47cc 428
5e7ed085
FG
429 /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules
430 /// that have no metavars.
49aad941 431 empty_matches: Rc<Vec<NamedMatch>>,
5e7ed085
FG
432}
433
04454e1e
FG
434impl TtParser {
435 pub(super) fn new(macro_name: Ident) -> TtParser {
5e7ed085
FG
436 TtParser {
437 macro_name,
438 cur_mps: vec![],
439 next_mps: vec![],
440 bb_mps: vec![],
49aad941 441 empty_matches: Rc::new(vec![]),
2c00a5a8 442 }
5e7ed085
FG
443 }
444
487cf647
FG
445 pub(super) fn has_no_remaining_items_for_step(&self) -> bool {
446 self.cur_mps.is_empty()
447 }
448
5e7ed085
FG
449 /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
450 /// produce more mps in `next_mps` and `bb_mps`.
451 ///
452 /// # Returns
453 ///
454 /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
455 /// track of through the mps generated.
487cf647 456 fn parse_tt_inner<'matcher, T: Tracker<'matcher>>(
5e7ed085 457 &mut self,
487cf647 458 matcher: &'matcher [MatcherLoc],
5e7ed085 459 token: &Token,
9c376795 460 approx_position: usize,
487cf647 461 track: &mut T,
9c376795 462 ) -> Option<NamedParseResult<T::Failure>> {
5e7ed085
FG
463 // Matcher positions that would be valid if the macro invocation was over now. Only
464 // modified if `token == Eof`.
465 let mut eof_mps = EofMatcherPositions::None;
466
467 while let Some(mut mp) = self.cur_mps.pop() {
487cf647
FG
468 let matcher_loc = &matcher[mp.idx];
469 track.before_match_loc(self, matcher_loc);
470
471 match matcher_loc {
04454e1e
FG
472 MatcherLoc::Token { token: t } => {
473 // If it's a doc comment, we just ignore it and move on to the next tt in the
474 // matcher. This is a bug, but #95267 showed that existing programs rely on
475 // this behaviour, and changing it would require some care and a transition
476 // period.
477 //
478 // If the token matches, we can just advance the parser.
479 //
480 // Otherwise, this match has failed, there is nothing to do, and hopefully
481 // another mp in `cur_mps` will match.
482 if matches!(t, Token { kind: DocComment(..), .. }) {
483 mp.idx += 1;
484 self.cur_mps.push(mp);
485 } else if token_name_eq(&t, token) {
486 mp.idx += 1;
487 self.next_mps.push(mp);
5e7ed085 488 }
04454e1e
FG
489 }
490 MatcherLoc::Delimited => {
f2b60f7d 491 // Entering the delimiter is trivial.
04454e1e
FG
492 mp.idx += 1;
493 self.cur_mps.push(mp);
494 }
495 &MatcherLoc::Sequence {
496 op,
497 num_metavar_decls,
498 idx_first_after,
499 next_metavar,
500 seq_depth,
501 } => {
502 // Install an empty vec for each metavar within the sequence.
503 for metavar_idx in next_metavar..next_metavar + num_metavar_decls {
504 mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![]));
b9856134 505 }
b9856134 506
9ffffee4 507 if matches!(op, KleeneOp::ZeroOrMore | KleeneOp::ZeroOrOne) {
04454e1e
FG
508 // Try zero matches of this sequence, by skipping over it.
509 self.cur_mps.push(MatcherPos {
510 idx: idx_first_after,
49aad941 511 matches: Rc::clone(&mp.matches),
5e7ed085 512 });
5e7ed085
FG
513 }
514
04454e1e
FG
515 // Try one or more matches of this sequence, by entering it.
516 mp.idx += 1;
517 self.cur_mps.push(mp);
476ff2be 518 }
04454e1e
FG
519 &MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => {
520 // We are past the end of a sequence with no separator. Try ending the
521 // sequence. If that's not possible, `ending_mp` will fail quietly when it is
5e7ed085 522 // processed next time around the loop.
04454e1e 523 let ending_mp = MatcherPos {
487cf647 524 idx: mp.idx + 1, // +1 skips the Kleene op
49aad941 525 matches: Rc::clone(&mp.matches),
5e7ed085 526 };
04454e1e
FG
527 self.cur_mps.push(ending_mp);
528
529 if op != KleeneOp::ZeroOrOne {
530 // Try another repetition.
531 mp.idx = idx_first;
532 self.cur_mps.push(mp);
533 }
223e47cc 534 }
04454e1e
FG
535 MatcherLoc::SequenceSep { separator } => {
536 // We are past the end of a sequence with a separator but we haven't seen the
537 // separator yet. Try ending the sequence. If that's not possible, `ending_mp`
538 // will fail quietly when it is processed next time around the loop.
539 let ending_mp = MatcherPos {
487cf647 540 idx: mp.idx + 2, // +2 skips the separator and the Kleene op
49aad941 541 matches: Rc::clone(&mp.matches),
04454e1e
FG
542 };
543 self.cur_mps.push(ending_mp);
2c00a5a8 544
04454e1e
FG
545 if token_name_eq(token, separator) {
546 // The separator matches the current token. Advance past it.
5e7ed085
FG
547 mp.idx += 1;
548 self.next_mps.push(mp);
549 }
04454e1e
FG
550 }
551 &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
552 // We are past the sequence separator. This can't be a `?` Kleene op, because
553 // they don't permit separators. Try another repetition.
554 mp.idx = idx_first;
5e7ed085
FG
555 self.cur_mps.push(mp);
556 }
04454e1e
FG
557 &MatcherLoc::MetaVarDecl { span, kind, .. } => {
558 // Built-in nonterminals never start with these tokens, so we can eliminate
559 // them from consideration. We use the span of the metavariable declaration
560 // to determine any edition-specific matching behavior for non-terminals.
561 if let Some(kind) = kind {
562 if Parser::nonterminal_may_begin_with(kind, token) {
563 self.bb_mps.push(mp);
564 }
565 } else {
566 // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
567 // Both this check and the one in `nameize` are necessary, surprisingly.
568 return Some(Error(span, "missing fragment specifier".to_string()));
569 }
570 }
571 MatcherLoc::Eof => {
572 // We are past the matcher's end, and not in a sequence. Try to end things.
573 debug_assert_eq!(mp.idx, matcher.len() - 1);
574 if *token == token::Eof {
575 eof_mps = match eof_mps {
576 EofMatcherPositions::None => EofMatcherPositions::One(mp),
577 EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => {
578 EofMatcherPositions::Multiple
579 }
5e7ed085
FG
580 }
581 }
582 }
223e47cc
LB
583 }
584 }
476ff2be 585
5e7ed085
FG
586 // If we reached the end of input, check that there is EXACTLY ONE possible matcher.
587 // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error.
588 if *token == token::Eof {
589 Some(match eof_mps {
590 EofMatcherPositions::One(mut eof_mp) => {
49aad941
FG
591 // Need to take ownership of the matches from within the `Rc`.
592 Rc::make_mut(&mut eof_mp.matches);
593 let matches = Rc::try_unwrap(eof_mp.matches).unwrap().into_iter();
04454e1e 594 self.nameize(matcher, matches)
5e7ed085
FG
595 }
596 EofMatcherPositions::Multiple => {
597 Error(token.span, "ambiguity: multiple successful parses".to_string())
598 }
9c376795 599 EofMatcherPositions::None => Failure(T::build_failure(
dfeec247
XL
600 Token::new(
601 token::Eof,
5e7ed085 602 if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() },
dfeec247 603 ),
9c376795 604 approx_position,
0731742a 605 "missing tokens in macro arguments",
9c376795 606 )),
5e7ed085
FG
607 })
608 } else {
609 None
2c00a5a8 610 }
5e7ed085
FG
611 }
612
613 /// Match the token stream from `parser` against `matcher`.
487cf647 614 pub(super) fn parse_tt<'matcher, T: Tracker<'matcher>>(
5e7ed085
FG
615 &mut self,
616 parser: &mut Cow<'_, Parser<'_>>,
487cf647
FG
617 matcher: &'matcher [MatcherLoc],
618 track: &mut T,
9c376795 619 ) -> NamedParseResult<T::Failure> {
5e7ed085
FG
620 // A queue of possible matcher positions. We initialize it with the matcher position in
621 // which the "dot" is before the first token of the first token tree in `matcher`.
622 // `parse_tt_inner` then processes all of these possible matcher positions and produces
623 // possible next positions into `next_mps`. After some post-processing, the contents of
624 // `next_mps` replenish `cur_mps` and we start over again.
625 self.cur_mps.clear();
04454e1e 626 self.cur_mps.push(MatcherPos { idx: 0, matches: self.empty_matches.clone() });
5e7ed085
FG
627
628 loop {
629 self.next_mps.clear();
630 self.bb_mps.clear();
631
632 // Process `cur_mps` until either we have finished the input or we need to get some
633 // parsing from the black-box parser done.
9c376795
FG
634 let res = self.parse_tt_inner(
635 matcher,
636 &parser.token,
637 parser.approx_token_stream_pos(),
638 track,
639 );
487cf647 640 if let Some(res) = res {
04454e1e 641 return res;
5e7ed085
FG
642 }
643
644 // `parse_tt_inner` handled all of `cur_mps`, so it's empty.
645 assert!(self.cur_mps.is_empty());
646
647 // Error messages here could be improved with links to original rules.
648 match (self.next_mps.len(), self.bb_mps.len()) {
649 (0, 0) => {
650 // There are no possible next positions AND we aren't waiting for the black-box
651 // parser: syntax error.
9c376795 652 return Failure(T::build_failure(
5e7ed085 653 parser.token.clone(),
9c376795 654 parser.approx_token_stream_pos(),
5e7ed085 655 "no rules expected this token in macro call",
9c376795 656 ));
5e7ed085
FG
657 }
658
659 (_, 0) => {
660 // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then
661 // process the next token.
04454e1e 662 self.cur_mps.append(&mut self.next_mps);
5e7ed085
FG
663 parser.to_mut().bump();
664 }
665
666 (0, 1) => {
667 // We need to call the black-box parser to get some nonterminal.
668 let mut mp = self.bb_mps.pop().unwrap();
04454e1e
FG
669 let loc = &matcher[mp.idx];
670 if let &MatcherLoc::MetaVarDecl {
671 span,
672 kind: Some(kind),
673 next_metavar,
674 seq_depth,
675 ..
676 } = loc
677 {
5e7ed085
FG
678 // We use the span of the metavariable declaration to determine any
679 // edition-specific matching behavior for non-terminals.
680 let nt = match parser.to_mut().parse_nonterminal(kind) {
681 Err(mut err) => {
487cf647 682 let guarantee = err.span_label(
5e7ed085
FG
683 span,
684 format!(
685 "while parsing argument for this `{kind}` macro fragment"
686 ),
687 )
688 .emit();
487cf647 689 return ErrorReported(guarantee);
5e7ed085
FG
690 }
691 Ok(nt) => nt,
692 };
693 let m = match nt {
694 NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
695 NtOrTt::Tt(tt) => MatchedTokenTree(tt),
696 };
04454e1e 697 mp.push_match(next_metavar, seq_depth, m);
5e7ed085 698 mp.idx += 1;
5e7ed085
FG
699 } else {
700 unreachable!()
3dfed10e 701 }
5e7ed085
FG
702 self.cur_mps.push(mp);
703 }
704
705 (_, _) => {
706 // Too many possibilities!
04454e1e 707 return self.ambiguity_error(matcher, parser.token.span);
5e7ed085 708 }
223e47cc 709 }
5e7ed085
FG
710
711 assert!(!self.cur_mps.is_empty());
223e47cc 712 }
5e7ed085 713 }
223e47cc 714
9c376795 715 fn ambiguity_error<F>(
04454e1e
FG
716 &self,
717 matcher: &[MatcherLoc],
718 token_span: rustc_span::Span,
9c376795 719 ) -> NamedParseResult<F> {
5e7ed085
FG
720 let nts = self
721 .bb_mps
722 .iter()
04454e1e
FG
723 .map(|mp| match &matcher[mp.idx] {
724 MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
5e7ed085
FG
725 format!("{} ('{}')", kind, bind)
726 }
04454e1e 727 _ => unreachable!(),
5e7ed085
FG
728 })
729 .collect::<Vec<String>>()
730 .join(" or ");
731
732 Error(
733 token_span,
734 format!(
735 "local ambiguity when calling macro `{}`: multiple parsing options: {}",
736 self.macro_name,
737 match self.next_mps.len() {
738 0 => format!("built-in NTs {}.", nts),
04454e1e 739 n => format!("built-in NTs {} or {n} other option{s}.", nts, s = pluralize!(n)),
5e7ed085
FG
740 }
741 ),
742 )
223e47cc 743 }
04454e1e 744
9c376795 745 fn nameize<I: Iterator<Item = NamedMatch>, F>(
04454e1e
FG
746 &self,
747 matcher: &[MatcherLoc],
748 mut res: I,
9c376795 749 ) -> NamedParseResult<F> {
04454e1e
FG
750 // Make that each metavar has _exactly one_ binding. If so, insert the binding into the
751 // `NamedParseResult`. Otherwise, it's an error.
752 let mut ret_val = FxHashMap::default();
753 for loc in matcher {
754 if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
755 if kind.is_some() {
756 match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
757 Vacant(spot) => spot.insert(res.next().unwrap()),
758 Occupied(..) => {
759 return Error(span, format!("duplicated bind name: {}", bind));
760 }
761 };
762 } else {
763 // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
764 // Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
765 return Error(span, "missing fragment specifier".to_string());
766 }
767 }
768 }
769 Success(ret_val)
770 }
223e47cc 771}