]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_expand/src/mbe/macro_parser.rs
New upstream version 1.61.0+dfsg1
[rustc.git] / compiler / rustc_expand / src / mbe / macro_parser.rs
CommitLineData
fc512014 1//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals
3b2f2976
XL
2//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
3//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
4//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
5//! fit for Macro-by-Example-style rules.
6//!
7//! (In order to prevent the pathological case, we'd need to lazily construct the resulting
8//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
5e7ed085 9//! matcher positions, but it would also save overhead)
3b2f2976 10//!
94b46f34 11//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate.
3b2f2976
XL
12//! The macro parser restricts itself to the features of finite state automata. Earley parsers
13//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
1a4d82fc
JJ
14//!
15//! Quick intro to how the parser works:
16//!
5e7ed085
FG
17//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually
18//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`.
1a4d82fc
JJ
19//!
20//! The parser walks through the input a character at a time, maintaining a list
5e7ed085 21//! of threads consistent with the current position in the input string: `cur_mps`.
1a4d82fc 22//!
5e7ed085
FG
23//! As it processes them, it fills up `eof_mps` with threads that would be valid if
24//! the macro invocation is now over, `bb_mps` with threads that are waiting on
25//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting
b039eaaf 26//! on a particular token. Most of the logic concerns moving the · through the
3b2f2976
XL
27//! repetitions indicated by Kleene stars. The rules for moving the · without
28//! consuming any input are called epsilon transitions. It only advances or calls
5e7ed085 29//! out to the real Rust parser when no `cur_mps` threads remain.
1a4d82fc 30//!
7cac9316 31//! Example:
1a4d82fc 32//!
7cac9316
XL
33//! ```text, ignore
34//! Start parsing a a a a b against [· a $( a )* a b].
35//!
36//! Remaining input: a a a a b
3b2f2976 37//! next: [· a $( a )* a b]
1a4d82fc 38//!
7cac9316 39//! - - - Advance over an a. - - -
1a4d82fc 40//!
7cac9316 41//! Remaining input: a a a b
1a4d82fc 42//! cur: [a · $( a )* a b]
5e7ed085 43//! Descend/Skip (first position).
1a4d82fc
JJ
44//! next: [a $( · a )* a b] [a $( a )* · a b].
45//!
7cac9316 46//! - - - Advance over an a. - - -
1a4d82fc 47//!
7cac9316 48//! Remaining input: a a b
3b2f2976 49//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 50//! Follow epsilon transition: Finish/Repeat (first position)
1a4d82fc
JJ
51//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
52//!
7cac9316 53//! - - - Advance over an a. - - - (this looks exactly like the last step)
1a4d82fc 54//!
7cac9316 55//! Remaining input: a b
3b2f2976 56//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 57//! Follow epsilon transition: Finish/Repeat (first position)
1a4d82fc
JJ
58//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
59//!
7cac9316 60//! - - - Advance over an a. - - - (this looks exactly like the last step)
1a4d82fc 61//!
7cac9316 62//! Remaining input: b
3b2f2976 63//! cur: [a $( a · )* a b] [a $( a )* a · b]
5e7ed085 64//! Follow epsilon transition: Finish/Repeat (first position)
3b2f2976 65//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
1a4d82fc 66//!
7cac9316 67//! - - - Advance over a b. - - -
1a4d82fc 68//!
7cac9316 69//! Remaining input: ''
1a4d82fc 70//! eof: [a $( a )* a b ·]
7cac9316 71//! ```
1a4d82fc 72
e74abb32
XL
73crate use NamedMatch::*;
74crate use ParseResult::*;
9fa01778 75
5e7ed085 76use crate::mbe::{self, SequenceRepetition, TokenTree};
e74abb32 77
5e7ed085
FG
78use rustc_ast::token::{self, DocComment, Nonterminal, Token, TokenKind};
79use rustc_parse::parser::{NtOrTt, Parser};
74b04a01 80use rustc_session::parse::ParseSess;
5869c6ff 81use rustc_span::symbol::MacroRulesNormalizedIdent;
970d7e83 82
9fa01778 83use smallvec::{smallvec, SmallVec};
223e47cc 84
b7449926 85use rustc_data_structures::fx::FxHashMap;
9fa01778 86use rustc_data_structures::sync::Lrc;
17df50a5 87use rustc_span::symbol::Ident;
74b04a01 88use std::borrow::Cow;
b7449926 89use std::collections::hash_map::Entry::{Occupied, Vacant};
223e47cc 90
5e7ed085
FG
91// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
92// vectors longer than one frequently have many elements, not just two or
93// three.
94type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
95
96// This type is used a lot. Make sure it doesn't unintentionally get bigger.
97#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
98rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
223e47cc 99
1a4d82fc 100#[derive(Clone)]
5e7ed085
FG
101enum MatcherKind<'tt> {
102 TopLevel,
103 Delimited(Box<DelimitedSubmatcher<'tt>>),
104 Sequence(Box<SequenceSubmatcher<'tt>>),
1a4d82fc 105}
223e47cc 106
5e7ed085
FG
107#[derive(Clone)]
108struct DelimitedSubmatcher<'tt> {
109 parent: Parent<'tt>,
223e47cc
LB
110}
111
1a4d82fc 112#[derive(Clone)]
5e7ed085
FG
113struct SequenceSubmatcher<'tt> {
114 parent: Parent<'tt>,
115 seq: &'tt SequenceRepetition,
223e47cc
LB
116}
117
5e7ed085
FG
118/// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
119/// `MathcherPos`.
1a4d82fc 120#[derive(Clone)]
5e7ed085
FG
121struct Parent<'tt> {
122 tts: &'tt [TokenTree],
85aaf69f 123 idx: usize,
5e7ed085
FG
124 kind: MatcherKind<'tt>,
125}
a1dfa0c6 126
5e7ed085
FG
127/// A single matcher position, which could be within the top-level matcher, a submatcher, a
128/// subsubmatcher, etc. For example:
129/// ```text
130/// macro_rules! m { $id:ident ( $($e:expr),* ) } => { ... }
131/// <----------> second submatcher; one tt, one metavar
132/// <--------------> first submatcher; three tts, zero metavars
133/// <--------------------------> top-level matcher; two tts, one metavar
134/// ```
135struct MatcherPos<'tt> {
136 /// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
137 /// submatcher, this is just the contents of that submatcher.
138 tts: &'tt [TokenTree],
139
140 /// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or
141 /// two positions past the final elements in `tts` when dealing with sequences, see
142 /// `parse_tt_inner` for details.
143 idx: usize,
2c00a5a8 144
5e7ed085
FG
145 /// This vector ends up with one element per metavar in the *top-level* matcher, even when this
146 /// `MatcherPos` is for a submatcher. Each element records token trees matched against the
147 /// relevant metavar by the black box parser. The element will be a `MatchedSeq` if the
148 /// corresponding metavar is within a sequence.
149 matches: Lrc<NamedMatchVec>,
a1dfa0c6 150
5e7ed085
FG
151 /// The number of sequences this mp is within.
152 seq_depth: usize,
a1dfa0c6 153
5e7ed085
FG
154 /// The position in `matches` of the next metavar to be matched against the source token
155 /// stream. Should not be used if there are no metavars.
156 match_cur: usize,
2c00a5a8 157
5e7ed085
FG
158 /// What kind of matcher we are in. For submatchers, this contains enough information to
159 /// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
160 kind: MatcherKind<'tt>,
223e47cc
LB
161}
162
5e7ed085
FG
163// This type is used a lot. Make sure it doesn't unintentionally get bigger.
164#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
165rustc_data_structures::static_assert_size!(MatcherPos<'_>, 64);
166
167impl<'tt> MatcherPos<'tt> {
168 fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self {
169 MatcherPos {
170 tts: matcher,
171 idx: 0,
172 matches: empty_matches,
173 seq_depth: 0,
174 match_cur: 0,
175 kind: MatcherKind::TopLevel,
176 }
041b39d2 177 }
94b46f34 178
5e7ed085
FG
179 fn empty_sequence(
180 parent_mp: &MatcherPos<'tt>,
181 seq: &'tt SequenceRepetition,
182 empty_matches: Lrc<NamedMatchVec>,
183 ) -> Self {
184 let mut mp = MatcherPos {
185 tts: parent_mp.tts,
186 idx: parent_mp.idx + 1,
187 matches: parent_mp.matches.clone(), // a cheap clone
188 seq_depth: parent_mp.seq_depth,
189 match_cur: parent_mp.match_cur + seq.num_captures,
190 kind: parent_mp.kind.clone(), // an expensive clone
191 };
192 for idx in parent_mp.match_cur..parent_mp.match_cur + seq.num_captures {
193 mp.push_match(idx, MatchedSeq(empty_matches.clone()));
194 }
195 mp
94b46f34 196 }
94b46f34 197
5e7ed085
FG
198 fn sequence(
199 parent_mp: Box<MatcherPos<'tt>>,
200 seq: &'tt SequenceRepetition,
201 empty_matches: Lrc<NamedMatchVec>,
202 ) -> Self {
203 let seq_kind = box SequenceSubmatcher {
204 parent: Parent { tts: parent_mp.tts, idx: parent_mp.idx, kind: parent_mp.kind },
205 seq,
206 };
207 let mut mp = MatcherPos {
208 tts: &seq.tts,
209 idx: 0,
210 matches: parent_mp.matches,
211 seq_depth: parent_mp.seq_depth,
212 match_cur: parent_mp.match_cur,
213 kind: MatcherKind::Sequence(seq_kind),
214 };
215 // Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
216 // must have the parent's depth at this point for these `push_match` calls to work.
217 for idx in mp.match_cur..mp.match_cur + seq.num_captures {
218 mp.push_match(idx, MatchedSeq(empty_matches.clone()));
94b46f34 219 }
5e7ed085
FG
220 mp.seq_depth += 1;
221 mp
94b46f34 222 }
94b46f34 223
5e7ed085
FG
224 /// Adds `m` as a named match for the `idx`-th metavar.
225 fn push_match(&mut self, idx: usize, m: NamedMatch) {
226 let matches = Lrc::make_mut(&mut self.matches);
227 match self.seq_depth {
228 0 => {
229 // We are not within a sequence. Just append `m`.
230 assert_eq!(idx, matches.len());
231 matches.push(m);
232 }
233 _ => {
234 // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth
235 // and append `m` to its vector.
236 let mut curr = &mut matches[idx];
237 for _ in 0..self.seq_depth - 1 {
238 match curr {
239 MatchedSeq(seq) => {
240 let seq = Lrc::make_mut(seq);
241 curr = seq.last_mut().unwrap();
242 }
243 _ => unreachable!(),
244 }
245 }
246 match curr {
247 MatchedSeq(seq) => {
248 let seq = Lrc::make_mut(seq);
249 seq.push(m);
250 }
251 _ => unreachable!(),
252 }
253 }
94b46f34
XL
254 }
255 }
256}
257
5e7ed085
FG
258enum EofMatcherPositions<'tt> {
259 None,
260 One(Box<MatcherPos<'tt>>),
261 Multiple,
262}
263
2c00a5a8 264/// Represents the possible results of an attempted parse.
e74abb32 265crate enum ParseResult<T> {
2c00a5a8
XL
266 /// Parsed successfully.
267 Success(T),
268 /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected
269 /// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
dc9dc135 270 Failure(Token, &'static str),
2c00a5a8 271 /// Fatal error (malformed macro?). Abort compilation.
dfeec247 272 Error(rustc_span::Span, String),
ba9703b0 273 ErrorReported,
2c00a5a8
XL
274}
275
ba9703b0
XL
276/// A `ParseResult` where the `Success` variant contains a mapping of
277/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping
278/// of metavars to the token trees they bind to.
279crate type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, NamedMatch>>;
476ff2be 280
5e7ed085
FG
281/// Count how many metavars declarations are in `matcher`.
282pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
283 matcher
284 .iter()
285 .map(|tt| match tt {
286 TokenTree::MetaVarDecl(..) => 1,
287 TokenTree::Sequence(_, seq) => seq.num_captures,
288 TokenTree::Delimited(_, delim) => count_metavar_decls(delim.inner_tts()),
289 TokenTree::Token(..) => 0,
290 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
291 })
292 .sum()
223e47cc
LB
293}
294
5e7ed085 295/// `NamedMatch` is a pattern-match result for a single metavar. All
9fa01778 296/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
5e7ed085 297/// (expr, item, etc).
1a4d82fc 298///
7cac9316 299/// The in-memory structure of a particular `NamedMatch` represents the match
1a4d82fc
JJ
300/// that occurred when a particular subset of a matcher was applied to a
301/// particular token tree.
302///
7cac9316 303/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
5e7ed085
FG
304/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
305/// to: each `MatchedSeq` corresponds to a single repetition in the originating
7cac9316 306/// token tree. The depth of the `NamedMatch` structure will therefore depend
5e7ed085
FG
307/// only on the nesting depth of repetitions in the originating token tree it
308/// was derived from.
309///
310/// In layman's terms: `NamedMatch` will form a tree representing nested matches of a particular
311/// meta variable. For example, if we are matching the following macro against the following
312/// invocation...
313///
314/// ```rust
315/// macro_rules! foo {
316/// ($($($x:ident),+);+) => {}
317/// }
318///
319/// foo!(a, b, c, d; a, b, c, d, e);
320/// ```
321///
322/// Then, the tree will have the following shape:
323///
324/// ```rust
325/// MatchedSeq([
326/// MatchedSeq([
327/// MatchedNonterminal(a),
328/// MatchedNonterminal(b),
329/// MatchedNonterminal(c),
330/// MatchedNonterminal(d),
331/// ]),
332/// MatchedSeq([
333/// MatchedNonterminal(a),
334/// MatchedNonterminal(b),
335/// MatchedNonterminal(c),
336/// MatchedNonterminal(d),
337/// MatchedNonterminal(e),
338/// ])
339/// ])
340/// ```
041b39d2 341#[derive(Debug, Clone)]
e74abb32 342crate enum NamedMatch {
60c5eb7d 343 MatchedSeq(Lrc<NamedMatchVec>),
5e7ed085
FG
344
345 // A metavar match of type `tt`.
346 MatchedTokenTree(rustc_ast::tokenstream::TokenTree),
347
348 // A metavar match of any type other than `tt`.
9fa01778 349 MatchedNonterminal(Lrc<Nonterminal>),
223e47cc
LB
350}
351
2c00a5a8
XL
352fn nameize<I: Iterator<Item = NamedMatch>>(
353 sess: &ParseSess,
5e7ed085 354 matcher: &[TokenTree],
2c00a5a8
XL
355 mut res: I,
356) -> NamedParseResult {
0731742a 357 // Recursively descend into each type of matcher (e.g., sequences, delimited, metavars) and make
2c00a5a8
XL
358 // sure that each metavar has _exactly one_ binding. If a metavar does not have exactly one
359 // binding, then there is an error. If it does, then we insert the binding into the
360 // `NamedParseResult`.
361 fn n_rec<I: Iterator<Item = NamedMatch>>(
362 sess: &ParseSess,
5e7ed085 363 tt: &TokenTree,
2c00a5a8 364 res: &mut I,
ba9703b0 365 ret_val: &mut FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
dfeec247 366 ) -> Result<(), (rustc_span::Span, String)> {
5e7ed085 367 match *tt {
dfeec247
XL
368 TokenTree::Sequence(_, ref seq) => {
369 for next_m in &seq.tts {
370 n_rec(sess, next_m, res.by_ref(), ret_val)?
371 }
372 }
373 TokenTree::Delimited(_, ref delim) => {
5e7ed085 374 for next_m in delim.inner_tts() {
dfeec247
XL
375 n_rec(sess, next_m, res.by_ref(), ret_val)?;
376 }
377 }
b9856134
XL
378 TokenTree::MetaVarDecl(span, _, None) => {
379 if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
380 return Err((span, "missing fragment specifier".to_string()));
381 }
382 }
ba9703b0
XL
383 TokenTree::MetaVarDecl(sp, bind_name, _) => match ret_val
384 .entry(MacroRulesNormalizedIdent::new(bind_name))
385 {
dfeec247
XL
386 Vacant(spot) => {
387 spot.insert(res.next().unwrap());
1a4d82fc 388 }
dfeec247
XL
389 Occupied(..) => return Err((sp, format!("duplicated bind name: {}", bind_name))),
390 },
5e7ed085
FG
391 TokenTree::Token(..) => (),
392 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
223e47cc 393 }
92a42be0
SL
394
395 Ok(())
223e47cc 396 }
92a42be0 397
b7449926 398 let mut ret_val = FxHashMap::default();
5e7ed085
FG
399 for tt in matcher {
400 match n_rec(sess, tt, res.by_ref(), &mut ret_val) {
2c00a5a8 401 Ok(_) => {}
92a42be0
SL
402 Err((sp, msg)) => return Error(sp, msg),
403 }
404 }
405
406 Success(ret_val)
223e47cc
LB
407}
408
9fa01778 409/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison)
2c00a5a8 410fn token_name_eq(t1: &Token, t2: &Token) -> bool {
dc9dc135
XL
411 if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) {
412 ident1.name == ident2.name && is_raw1 == is_raw2
413 } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) {
414 ident1.name == ident2.name
cc61c64b 415 } else {
dc9dc135 416 t1.kind == t2.kind
1a4d82fc
JJ
417 }
418}
419
5e7ed085
FG
420// Note: the position vectors could be created and dropped within `parse_tt`, but to avoid excess
421// allocations we have a single vector fo each kind that is cleared and reused repeatedly.
422pub struct TtParser<'tt> {
423 macro_name: Ident,
223e47cc 424
5e7ed085
FG
425 /// The set of current mps to be processed. This should be empty by the end of a successful
426 /// execution of `parse_tt_inner`.
427 cur_mps: Vec<Box<MatcherPos<'tt>>>,
476ff2be 428
5e7ed085
FG
429 /// The set of newly generated mps. These are used to replenish `cur_mps` in the function
430 /// `parse_tt`.
431 next_mps: Vec<Box<MatcherPos<'tt>>>,
223e47cc 432
5e7ed085
FG
433 /// The set of mps that are waiting for the black-box parser.
434 bb_mps: Vec<Box<MatcherPos<'tt>>>,
223e47cc 435
5e7ed085
FG
436 /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules
437 /// that have no metavars.
438 empty_matches: Lrc<NamedMatchVec>,
439}
440
441impl<'tt> TtParser<'tt> {
442 pub(super) fn new(macro_name: Ident) -> TtParser<'tt> {
443 TtParser {
444 macro_name,
445 cur_mps: vec![],
446 next_mps: vec![],
447 bb_mps: vec![],
448 empty_matches: Lrc::new(smallvec![]),
2c00a5a8 449 }
5e7ed085
FG
450 }
451
452 /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
453 /// produce more mps in `next_mps` and `bb_mps`.
454 ///
455 /// # Returns
456 ///
457 /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
458 /// track of through the mps generated.
459 fn parse_tt_inner(
460 &mut self,
461 sess: &ParseSess,
462 matcher: &[TokenTree],
463 token: &Token,
464 ) -> Option<NamedParseResult> {
465 // Matcher positions that would be valid if the macro invocation was over now. Only
466 // modified if `token == Eof`.
467 let mut eof_mps = EofMatcherPositions::None;
468
469 while let Some(mut mp) = self.cur_mps.pop() {
470 // Get the current position of the "dot" (`idx`) in `mp` and the number of token
471 // trees in the matcher (`len`).
472 let idx = mp.idx;
473 let len = mp.tts.len();
474
475 if idx < len {
476 // We are in the middle of a matcher. Compare the matcher's current tt against
477 // `token`.
478 match &mp.tts[idx] {
479 TokenTree::Sequence(_sp, seq) => {
480 let op = seq.kleene.op;
481 if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
482 // Allow for the possibility of zero matches of this sequence.
483 self.cur_mps.push(box MatcherPos::empty_sequence(
484 &*mp,
485 &seq,
486 self.empty_matches.clone(),
487 ));
1a4d82fc 488 }
5e7ed085
FG
489
490 // Allow for the possibility of one or more matches of this sequence.
491 self.cur_mps.push(box MatcherPos::sequence(
492 mp,
493 &seq,
494 self.empty_matches.clone(),
495 ));
1a4d82fc 496 }
476ff2be 497
5e7ed085
FG
498 &TokenTree::MetaVarDecl(span, _, None) => {
499 // E.g. `$e` instead of `$e:expr`.
500 if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
501 return Some(Error(span, "missing fragment specifier".to_string()));
502 }
503 }
2c00a5a8 504
5e7ed085
FG
505 &TokenTree::MetaVarDecl(_, _, Some(kind)) => {
506 // Built-in nonterminals never start with these tokens, so we can eliminate
507 // them from consideration.
508 //
509 // We use the span of the metavariable declaration to determine any
510 // edition-specific matching behavior for non-terminals.
511 if Parser::nonterminal_may_begin_with(kind, token) {
512 self.bb_mps.push(mp);
513 }
b9856134 514 }
b9856134 515
5e7ed085
FG
516 TokenTree::Delimited(_, delimited) => {
517 // To descend into a delimited submatcher, we update `mp` appropriately,
518 // including enough information to re-ascend afterwards, and push it onto
519 // `cur_mps`. Later, when we reach the closing delimiter, we will recover
520 // the parent matcher position to ascend. Note that we use `all_tts` to
521 // include the open and close delimiter tokens.
522 let kind = MatcherKind::Delimited(box DelimitedSubmatcher {
523 parent: Parent { tts: mp.tts, idx: mp.idx, kind: mp.kind },
524 });
525 mp.tts = &delimited.all_tts;
526 mp.idx = 0;
527 mp.kind = kind;
528 self.cur_mps.push(mp);
529 }
530
531 TokenTree::Token(t) => {
532 // If it's a doc comment, we just ignore it and move on to the next tt in
533 // the matcher. This is a bug, but #95267 showed that existing programs
534 // rely on this behaviour, and changing it would require some care and a
535 // transition period.
536 //
537 // If the token matches, we can just advance the parser.
538 //
539 // Otherwise, this match has failed, there is nothing to do, and hopefully
540 // another mp in `cur_mps` will match.
541 if matches!(t, Token { kind: DocComment(..), .. }) {
542 mp.idx += 1;
543 self.cur_mps.push(mp);
544 } else if token_name_eq(&t, token) {
545 if let TokenKind::CloseDelim(_) = token.kind {
546 // Ascend out of the delimited submatcher.
547 debug_assert_eq!(idx, len - 1);
548 match mp.kind {
549 MatcherKind::Delimited(submatcher) => {
550 mp.tts = submatcher.parent.tts;
551 mp.idx = submatcher.parent.idx;
552 mp.kind = submatcher.parent.kind;
553 }
554 _ => unreachable!(),
555 }
556 }
557 mp.idx += 1;
558 self.next_mps.push(mp);
559 }
1a4d82fc 560 }
2c00a5a8 561
5e7ed085
FG
562 // These cannot appear in a matcher.
563 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
476ff2be 564 }
5e7ed085
FG
565 } else if let MatcherKind::Sequence(box SequenceSubmatcher { parent, seq }) = &mp.kind {
566 // We are past the end of a sequence.
567 // - If it has no separator, we must be only one past the end.
568 // - If it has a separator, we may be one past the end, in which case we must
569 // look for a separator. Or we may be two past the end, in which case we have
570 // already dealt with the separator.
571 debug_assert!(idx == len || idx == len + 1 && seq.separator.is_some());
2c00a5a8 572
5e7ed085
FG
573 if idx == len {
574 // Sequence matching may have finished: move the "dot" past the sequence in
575 // `parent`. This applies whether a separator is used or not. If sequence
576 // matching hasn't finished, this `new_mp` will fail quietly when it is
577 // processed next time around the loop.
578 let new_mp = box MatcherPos {
579 tts: parent.tts,
580 idx: parent.idx + 1,
581 matches: mp.matches.clone(), // a cheap clone
582 seq_depth: mp.seq_depth - 1,
583 match_cur: mp.match_cur,
584 kind: parent.kind.clone(), // an expensive clone
585 };
586 self.cur_mps.push(new_mp);
223e47cc 587 }
2c00a5a8 588
5e7ed085
FG
589 if seq.separator.is_some() && idx == len {
590 // Look for the separator.
591 if seq.separator.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
592 // The matcher has a separator, and it matches the current token. We can
593 // advance past the separator token.
594 mp.idx += 1;
595 self.next_mps.push(mp);
596 }
597 } else if seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
598 // We don't need to look for a separator: either this sequence doesn't have
599 // one, or it does and we've already handled it. Also, we are allowed to have
600 // more than one repetition. Move the "dot" back to the beginning of the
601 // matcher and try to match again.
602 mp.match_cur -= seq.num_captures;
603 mp.idx = 0;
604 self.cur_mps.push(mp);
605 }
606 } else {
607 // We are past the end of the matcher, and not in a sequence. Look for end of
608 // input.
609 debug_assert_eq!(idx, len);
610 if *token == token::Eof {
611 eof_mps = match eof_mps {
612 EofMatcherPositions::None => EofMatcherPositions::One(mp),
613 EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => {
614 EofMatcherPositions::Multiple
615 }
616 }
617 }
223e47cc
LB
618 }
619 }
476ff2be 620
5e7ed085
FG
621 // If we reached the end of input, check that there is EXACTLY ONE possible matcher.
622 // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error.
623 if *token == token::Eof {
624 Some(match eof_mps {
625 EofMatcherPositions::One(mut eof_mp) => {
626 assert_eq!(eof_mp.matches.len(), count_metavar_decls(matcher));
627 // Need to take ownership of the matches from within the `Lrc`.
628 Lrc::make_mut(&mut eof_mp.matches);
629 let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
630 nameize(sess, matcher, matches)
631 }
632 EofMatcherPositions::Multiple => {
633 Error(token.span, "ambiguity: multiple successful parses".to_string())
634 }
635 EofMatcherPositions::None => Failure(
dfeec247
XL
636 Token::new(
637 token::Eof,
5e7ed085 638 if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() },
dfeec247 639 ),
0731742a 640 "missing tokens in macro arguments",
2c00a5a8 641 ),
5e7ed085
FG
642 })
643 } else {
644 None
2c00a5a8 645 }
5e7ed085
FG
646 }
647
648 /// Match the token stream from `parser` against `matcher`.
649 pub(super) fn parse_tt(
650 &mut self,
651 parser: &mut Cow<'_, Parser<'_>>,
652 matcher: &'tt [TokenTree],
653 ) -> NamedParseResult {
654 // A queue of possible matcher positions. We initialize it with the matcher position in
655 // which the "dot" is before the first token of the first token tree in `matcher`.
656 // `parse_tt_inner` then processes all of these possible matcher positions and produces
657 // possible next positions into `next_mps`. After some post-processing, the contents of
658 // `next_mps` replenish `cur_mps` and we start over again.
659 self.cur_mps.clear();
660 self.cur_mps.push(box MatcherPos::top_level(matcher, self.empty_matches.clone()));
661
662 loop {
663 self.next_mps.clear();
664 self.bb_mps.clear();
665
666 // Process `cur_mps` until either we have finished the input or we need to get some
667 // parsing from the black-box parser done.
668 if let Some(result) = self.parse_tt_inner(parser.sess, matcher, &parser.token) {
669 return result;
670 }
671
672 // `parse_tt_inner` handled all of `cur_mps`, so it's empty.
673 assert!(self.cur_mps.is_empty());
674
675 // Error messages here could be improved with links to original rules.
676 match (self.next_mps.len(), self.bb_mps.len()) {
677 (0, 0) => {
678 // There are no possible next positions AND we aren't waiting for the black-box
679 // parser: syntax error.
680 return Failure(
681 parser.token.clone(),
682 "no rules expected this token in macro call",
683 );
684 }
685
686 (_, 0) => {
687 // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then
688 // process the next token.
689 self.cur_mps.extend(self.next_mps.drain(..));
690 parser.to_mut().bump();
691 }
692
693 (0, 1) => {
694 // We need to call the black-box parser to get some nonterminal.
695 let mut mp = self.bb_mps.pop().unwrap();
696 if let TokenTree::MetaVarDecl(span, _, Some(kind)) = mp.tts[mp.idx] {
697 let match_cur = mp.match_cur;
698 // We use the span of the metavariable declaration to determine any
699 // edition-specific matching behavior for non-terminals.
700 let nt = match parser.to_mut().parse_nonterminal(kind) {
701 Err(mut err) => {
702 err.span_label(
703 span,
704 format!(
705 "while parsing argument for this `{kind}` macro fragment"
706 ),
707 )
708 .emit();
709 return ErrorReported;
710 }
711 Ok(nt) => nt,
712 };
713 let m = match nt {
714 NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
715 NtOrTt::Tt(tt) => MatchedTokenTree(tt),
716 };
717 mp.push_match(match_cur, m);
718 mp.idx += 1;
719 mp.match_cur += 1;
720 } else {
721 unreachable!()
3dfed10e 722 }
5e7ed085
FG
723 self.cur_mps.push(mp);
724 }
725
726 (_, _) => {
727 // Too many possibilities!
728 return self.ambiguity_error(parser.token.span);
729 }
223e47cc 730 }
5e7ed085
FG
731
732 assert!(!self.cur_mps.is_empty());
223e47cc 733 }
5e7ed085 734 }
223e47cc 735
5e7ed085
FG
736 fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult {
737 let nts = self
738 .bb_mps
739 .iter()
740 .map(|mp| match mp.tts[mp.idx] {
741 TokenTree::MetaVarDecl(_, bind, Some(kind)) => {
742 format!("{} ('{}')", kind, bind)
743 }
744 _ => panic!(),
745 })
746 .collect::<Vec<String>>()
747 .join(" or ");
748
749 Error(
750 token_span,
751 format!(
752 "local ambiguity when calling macro `{}`: multiple parsing options: {}",
753 self.macro_name,
754 match self.next_mps.len() {
755 0 => format!("built-in NTs {}.", nts),
756 1 => format!("built-in NTs {} or 1 other option.", nts),
757 n => format!("built-in NTs {} or {} other options.", nts, n),
758 }
759 ),
760 )
223e47cc
LB
761 }
762}