]>
Commit | Line | Data |
---|---|---|
fc512014 | 1 | //! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals |
3b2f2976 XL |
2 | //! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads |
3 | //! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in | |
4 | //! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier | |
5 | //! fit for Macro-by-Example-style rules. | |
6 | //! | |
7 | //! (In order to prevent the pathological case, we'd need to lazily construct the resulting | |
8 | //! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old | |
5e7ed085 | 9 | //! matcher positions, but it would also save overhead) |
3b2f2976 | 10 | //! |
94b46f34 | 11 | //! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate. |
3b2f2976 XL |
12 | //! The macro parser restricts itself to the features of finite state automata. Earley parsers |
13 | //! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. | |
1a4d82fc JJ |
14 | //! |
15 | //! Quick intro to how the parser works: | |
16 | //! | |
5e7ed085 FG |
17 | //! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually |
18 | //! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`. | |
1a4d82fc | 19 | //! |
04454e1e | 20 | //! The parser walks through the input a token at a time, maintaining a list |
5e7ed085 | 21 | //! of threads consistent with the current position in the input string: `cur_mps`. |
1a4d82fc | 22 | //! |
5e7ed085 FG |
23 | //! As it processes them, it fills up `eof_mps` with threads that would be valid if |
24 | //! the macro invocation is now over, `bb_mps` with threads that are waiting on | |
25 | //! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting | |
b039eaaf | 26 | //! on a particular token. Most of the logic concerns moving the · through the |
3b2f2976 XL |
27 | //! repetitions indicated by Kleene stars. The rules for moving the · without |
28 | //! consuming any input are called epsilon transitions. It only advances or calls | |
5e7ed085 | 29 | //! out to the real Rust parser when no `cur_mps` threads remain. |
1a4d82fc | 30 | //! |
7cac9316 | 31 | //! Example: |
1a4d82fc | 32 | //! |
7cac9316 XL |
33 | //! ```text, ignore |
34 | //! Start parsing a a a a b against [· a $( a )* a b]. | |
35 | //! | |
36 | //! Remaining input: a a a a b | |
3b2f2976 | 37 | //! next: [· a $( a )* a b] |
1a4d82fc | 38 | //! |
7cac9316 | 39 | //! - - - Advance over an a. - - - |
1a4d82fc | 40 | //! |
7cac9316 | 41 | //! Remaining input: a a a b |
1a4d82fc | 42 | //! cur: [a · $( a )* a b] |
5e7ed085 | 43 | //! Descend/Skip (first position). |
1a4d82fc JJ |
44 | //! next: [a $( · a )* a b] [a $( a )* · a b]. |
45 | //! | |
7cac9316 | 46 | //! - - - Advance over an a. - - - |
1a4d82fc | 47 | //! |
7cac9316 | 48 | //! Remaining input: a a b |
3b2f2976 | 49 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
5e7ed085 | 50 | //! Follow epsilon transition: Finish/Repeat (first position) |
1a4d82fc JJ |
51 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
52 | //! | |
7cac9316 | 53 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 54 | //! |
7cac9316 | 55 | //! Remaining input: a b |
3b2f2976 | 56 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
5e7ed085 | 57 | //! Follow epsilon transition: Finish/Repeat (first position) |
1a4d82fc JJ |
58 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
59 | //! | |
7cac9316 | 60 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 61 | //! |
7cac9316 | 62 | //! Remaining input: b |
3b2f2976 | 63 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
5e7ed085 | 64 | //! Follow epsilon transition: Finish/Repeat (first position) |
3b2f2976 | 65 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
1a4d82fc | 66 | //! |
7cac9316 | 67 | //! - - - Advance over a b. - - - |
1a4d82fc | 68 | //! |
7cac9316 | 69 | //! Remaining input: '' |
1a4d82fc | 70 | //! eof: [a $( a )* a b ·] |
7cac9316 | 71 | //! ``` |
1a4d82fc | 72 | |
923072b8 FG |
73 | pub(crate) use NamedMatch::*; |
74 | pub(crate) use ParseResult::*; | |
9fa01778 | 75 | |
487cf647 | 76 | use crate::mbe::{macro_rules::Tracker, KleeneOp, TokenTree}; |
e74abb32 | 77 | |
04454e1e | 78 | use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token}; |
487cf647 FG |
79 | use rustc_ast_pretty::pprust; |
80 | use rustc_data_structures::fx::FxHashMap; | |
81 | use rustc_data_structures::sync::Lrc; | |
82 | use rustc_errors::ErrorGuaranteed; | |
04454e1e | 83 | use rustc_lint_defs::pluralize; |
5e7ed085 | 84 | use rustc_parse::parser::{NtOrTt, Parser}; |
487cf647 | 85 | use rustc_span::symbol::Ident; |
5869c6ff | 86 | use rustc_span::symbol::MacroRulesNormalizedIdent; |
04454e1e | 87 | use rustc_span::Span; |
74b04a01 | 88 | use std::borrow::Cow; |
b7449926 | 89 | use std::collections::hash_map::Entry::{Occupied, Vacant}; |
487cf647 | 90 | use std::fmt::Display; |
49aad941 | 91 | use std::rc::Rc; |
223e47cc | 92 | |
04454e1e FG |
93 | /// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from) |
94 | /// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching. | |
95 | /// Notable differences to `mbe::TokenTree`: | |
96 | /// - It is non-recursive, i.e. there is no nesting. | |
97 | /// - The end pieces of each sequence (the separator, if present, and the Kleene op) are | |
98 | /// represented explicitly, as is the very end of the matcher. | |
99 | /// | |
100 | /// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves | |
101 | /// simply incrementing the current matcher position index by one. | |
487cf647 FG |
102 | #[derive(Debug, PartialEq, Clone)] |
103 | pub(crate) enum MatcherLoc { | |
04454e1e FG |
104 | Token { |
105 | token: Token, | |
106 | }, | |
107 | Delimited, | |
108 | Sequence { | |
109 | op: KleeneOp, | |
110 | num_metavar_decls: usize, | |
111 | idx_first_after: usize, | |
112 | next_metavar: usize, | |
113 | seq_depth: usize, | |
114 | }, | |
115 | SequenceKleeneOpNoSep { | |
116 | op: KleeneOp, | |
117 | idx_first: usize, | |
118 | }, | |
119 | SequenceSep { | |
120 | separator: Token, | |
121 | }, | |
122 | SequenceKleeneOpAfterSep { | |
123 | idx_first: usize, | |
124 | }, | |
125 | MetaVarDecl { | |
126 | span: Span, | |
127 | bind: Ident, | |
128 | kind: Option<NonterminalKind>, | |
129 | next_metavar: usize, | |
130 | seq_depth: usize, | |
131 | }, | |
132 | Eof, | |
133 | } | |
5e7ed085 | 134 | |
487cf647 FG |
135 | impl MatcherLoc { |
136 | pub(super) fn span(&self) -> Option<Span> { | |
137 | match self { | |
138 | MatcherLoc::Token { token } => Some(token.span), | |
139 | MatcherLoc::Delimited => None, | |
140 | MatcherLoc::Sequence { .. } => None, | |
141 | MatcherLoc::SequenceKleeneOpNoSep { .. } => None, | |
142 | MatcherLoc::SequenceSep { .. } => None, | |
143 | MatcherLoc::SequenceKleeneOpAfterSep { .. } => None, | |
144 | MatcherLoc::MetaVarDecl { span, .. } => Some(*span), | |
145 | MatcherLoc::Eof => None, | |
146 | } | |
147 | } | |
148 | } | |
149 | ||
150 | impl Display for MatcherLoc { | |
151 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
152 | match self { | |
153 | MatcherLoc::Token { token } | MatcherLoc::SequenceSep { separator: token } => { | |
154 | write!(f, "`{}`", pprust::token_to_string(token)) | |
155 | } | |
156 | MatcherLoc::MetaVarDecl { bind, kind, .. } => { | |
157 | write!(f, "meta-variable `${bind}")?; | |
158 | if let Some(kind) = kind { | |
159 | write!(f, ":{}", kind)?; | |
160 | } | |
161 | write!(f, "`")?; | |
162 | Ok(()) | |
163 | } | |
164 | MatcherLoc::Eof => f.write_str("end of macro"), | |
165 | ||
166 | // These are not printed in the diagnostic | |
167 | MatcherLoc::Delimited => f.write_str("delimiter"), | |
168 | MatcherLoc::Sequence { .. } => f.write_str("sequence start"), | |
169 | MatcherLoc::SequenceKleeneOpNoSep { .. } => f.write_str("sequence end"), | |
170 | MatcherLoc::SequenceKleeneOpAfterSep { .. } => f.write_str("sequence end"), | |
171 | } | |
172 | } | |
173 | } | |
174 | ||
04454e1e FG |
175 | pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec<MatcherLoc> { |
176 | fn inner( | |
177 | tts: &[TokenTree], | |
178 | locs: &mut Vec<MatcherLoc>, | |
179 | next_metavar: &mut usize, | |
180 | seq_depth: usize, | |
181 | ) { | |
182 | for tt in tts { | |
183 | match tt { | |
184 | TokenTree::Token(token) => { | |
185 | locs.push(MatcherLoc::Token { token: token.clone() }); | |
186 | } | |
187 | TokenTree::Delimited(span, delimited) => { | |
188 | let open_token = Token::new(token::OpenDelim(delimited.delim), span.open); | |
189 | let close_token = Token::new(token::CloseDelim(delimited.delim), span.close); | |
190 | ||
191 | locs.push(MatcherLoc::Delimited); | |
192 | locs.push(MatcherLoc::Token { token: open_token }); | |
193 | inner(&delimited.tts, locs, next_metavar, seq_depth); | |
194 | locs.push(MatcherLoc::Token { token: close_token }); | |
195 | } | |
196 | TokenTree::Sequence(_, seq) => { | |
197 | // We can't determine `idx_first_after` and construct the final | |
198 | // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end | |
199 | // pieces are processed. So we push a dummy value (`Eof` is cheapest to | |
200 | // construct) now, and overwrite it with the proper value below. | |
201 | let dummy = MatcherLoc::Eof; | |
202 | locs.push(dummy); | |
203 | ||
204 | let next_metavar_orig = *next_metavar; | |
205 | let op = seq.kleene.op; | |
206 | let idx_first = locs.len(); | |
207 | let idx_seq = idx_first - 1; | |
208 | inner(&seq.tts, locs, next_metavar, seq_depth + 1); | |
209 | ||
210 | if let Some(separator) = &seq.separator { | |
211 | locs.push(MatcherLoc::SequenceSep { separator: separator.clone() }); | |
212 | locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first }); | |
213 | } else { | |
214 | locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first }); | |
215 | } | |
223e47cc | 216 | |
04454e1e FG |
217 | // Overwrite the dummy value pushed above with the proper value. |
218 | locs[idx_seq] = MatcherLoc::Sequence { | |
219 | op, | |
220 | num_metavar_decls: seq.num_captures, | |
221 | idx_first_after: locs.len(), | |
222 | next_metavar: next_metavar_orig, | |
223 | seq_depth, | |
224 | }; | |
225 | } | |
226 | &TokenTree::MetaVarDecl(span, bind, kind) => { | |
227 | locs.push(MatcherLoc::MetaVarDecl { | |
228 | span, | |
229 | bind, | |
230 | kind, | |
231 | next_metavar: *next_metavar, | |
232 | seq_depth, | |
233 | }); | |
234 | *next_metavar += 1; | |
235 | } | |
236 | TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), | |
237 | } | |
238 | } | |
239 | } | |
223e47cc | 240 | |
04454e1e FG |
241 | let mut locs = vec![]; |
242 | let mut next_metavar = 0; | |
243 | inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0); | |
223e47cc | 244 | |
04454e1e FG |
245 | // A final entry is needed for eof. |
246 | locs.push(MatcherLoc::Eof); | |
223e47cc | 247 | |
04454e1e | 248 | locs |
5e7ed085 | 249 | } |
a1dfa0c6 | 250 | |
04454e1e FG |
251 | /// A single matcher position, representing the state of matching. |
252 | struct MatcherPos { | |
253 | /// The index into `TtParser::locs`, which represents the "dot". | |
5e7ed085 | 254 | idx: usize, |
2c00a5a8 | 255 | |
04454e1e FG |
256 | /// The matches made against metavar decls so far. On a successful match, this vector ends up |
257 | /// with one element per metavar decl in the matcher. Each element records token trees matched | |
258 | /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if | |
259 | /// the corresponding metavar decl is within a sequence. | |
260 | /// | |
49aad941 | 261 | /// It is critical to performance that this is an `Rc`, because it gets cloned frequently when |
04454e1e FG |
262 | /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end |
263 | /// up failing. | |
49aad941 | 264 | matches: Rc<Vec<NamedMatch>>, |
223e47cc LB |
265 | } |
266 | ||
5e7ed085 FG |
267 | // This type is used a lot. Make sure it doesn't unintentionally get bigger. |
268 | #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] | |
04454e1e | 269 | rustc_data_structures::static_assert_size!(MatcherPos, 16); |
94b46f34 | 270 | |
04454e1e FG |
271 | impl MatcherPos { |
272 | /// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites, | |
273 | /// and both are hot enough to be always worth inlining. | |
274 | #[inline(always)] | |
275 | fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) { | |
49aad941 | 276 | let matches = Rc::make_mut(&mut self.matches); |
04454e1e | 277 | match seq_depth { |
5e7ed085 FG |
278 | 0 => { |
279 | // We are not within a sequence. Just append `m`. | |
04454e1e | 280 | assert_eq!(metavar_idx, matches.len()); |
5e7ed085 FG |
281 | matches.push(m); |
282 | } | |
283 | _ => { | |
284 | // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth | |
285 | // and append `m` to its vector. | |
04454e1e FG |
286 | let mut curr = &mut matches[metavar_idx]; |
287 | for _ in 0..seq_depth - 1 { | |
5e7ed085 | 288 | match curr { |
04454e1e | 289 | MatchedSeq(seq) => curr = seq.last_mut().unwrap(), |
5e7ed085 FG |
290 | _ => unreachable!(), |
291 | } | |
292 | } | |
293 | match curr { | |
04454e1e | 294 | MatchedSeq(seq) => seq.push(m), |
5e7ed085 FG |
295 | _ => unreachable!(), |
296 | } | |
297 | } | |
94b46f34 XL |
298 | } |
299 | } | |
300 | } | |
301 | ||
04454e1e | 302 | enum EofMatcherPositions { |
5e7ed085 | 303 | None, |
04454e1e | 304 | One(MatcherPos), |
5e7ed085 FG |
305 | Multiple, |
306 | } | |
307 | ||
2c00a5a8 | 308 | /// Represents the possible results of an attempted parse. |
9c376795 | 309 | pub(crate) enum ParseResult<T, F> { |
2c00a5a8 XL |
310 | /// Parsed successfully. |
311 | Success(T), | |
312 | /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected | |
313 | /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. | |
9c376795 FG |
314 | /// The usize is the approximate position of the token in the input token stream. |
315 | Failure(F), | |
2c00a5a8 | 316 | /// Fatal error (malformed macro?). Abort compilation. |
dfeec247 | 317 | Error(rustc_span::Span, String), |
487cf647 | 318 | ErrorReported(ErrorGuaranteed), |
2c00a5a8 XL |
319 | } |
320 | ||
ba9703b0 XL |
321 | /// A `ParseResult` where the `Success` variant contains a mapping of |
322 | /// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping | |
323 | /// of metavars to the token trees they bind to. | |
9c376795 | 324 | pub(crate) type NamedParseResult<F> = ParseResult<NamedMatches, F>; |
487cf647 FG |
325 | |
326 | /// Contains a mapping of `MacroRulesNormalizedIdent`s to `NamedMatch`es. | |
327 | /// This represents the mapping of metavars to the token trees they bind to. | |
328 | pub(crate) type NamedMatches = FxHashMap<MacroRulesNormalizedIdent, NamedMatch>; | |
476ff2be | 329 | |
5e7ed085 FG |
330 | /// Count how many metavars declarations are in `matcher`. |
331 | pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize { | |
332 | matcher | |
333 | .iter() | |
334 | .map(|tt| match tt { | |
335 | TokenTree::MetaVarDecl(..) => 1, | |
336 | TokenTree::Sequence(_, seq) => seq.num_captures, | |
04454e1e | 337 | TokenTree::Delimited(_, delim) => count_metavar_decls(&delim.tts), |
5e7ed085 FG |
338 | TokenTree::Token(..) => 0, |
339 | TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), | |
340 | }) | |
341 | .sum() | |
223e47cc LB |
342 | } |
343 | ||
5e7ed085 | 344 | /// `NamedMatch` is a pattern-match result for a single metavar. All |
9fa01778 | 345 | /// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type |
5e7ed085 | 346 | /// (expr, item, etc). |
1a4d82fc | 347 | /// |
7cac9316 | 348 | /// The in-memory structure of a particular `NamedMatch` represents the match |
1a4d82fc JJ |
349 | /// that occurred when a particular subset of a matcher was applied to a |
350 | /// particular token tree. | |
351 | /// | |
7cac9316 | 352 | /// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of |
5e7ed085 FG |
353 | /// the `MatchedNtNonTts`s, will depend on the token tree it was applied |
354 | /// to: each `MatchedSeq` corresponds to a single repetition in the originating | |
7cac9316 | 355 | /// token tree. The depth of the `NamedMatch` structure will therefore depend |
5e7ed085 FG |
356 | /// only on the nesting depth of repetitions in the originating token tree it |
357 | /// was derived from. | |
358 | /// | |
04454e1e | 359 | /// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular |
5e7ed085 FG |
360 | /// meta variable. For example, if we are matching the following macro against the following |
361 | /// invocation... | |
362 | /// | |
363 | /// ```rust | |
364 | /// macro_rules! foo { | |
365 | /// ($($($x:ident),+);+) => {} | |
366 | /// } | |
367 | /// | |
368 | /// foo!(a, b, c, d; a, b, c, d, e); | |
369 | /// ``` | |
370 | /// | |
371 | /// Then, the tree will have the following shape: | |
372 | /// | |
04454e1e FG |
373 | /// ```ignore (private-internal) |
374 | /// # use NamedMatch::*; | |
5e7ed085 FG |
375 | /// MatchedSeq([ |
376 | /// MatchedSeq([ | |
377 | /// MatchedNonterminal(a), | |
378 | /// MatchedNonterminal(b), | |
379 | /// MatchedNonterminal(c), | |
380 | /// MatchedNonterminal(d), | |
381 | /// ]), | |
382 | /// MatchedSeq([ | |
383 | /// MatchedNonterminal(a), | |
384 | /// MatchedNonterminal(b), | |
385 | /// MatchedNonterminal(c), | |
386 | /// MatchedNonterminal(d), | |
387 | /// MatchedNonterminal(e), | |
388 | /// ]) | |
389 | /// ]) | |
390 | /// ``` | |
041b39d2 | 391 | #[derive(Debug, Clone)] |
923072b8 | 392 | pub(crate) enum NamedMatch { |
04454e1e | 393 | MatchedSeq(Vec<NamedMatch>), |
5e7ed085 FG |
394 | |
395 | // A metavar match of type `tt`. | |
396 | MatchedTokenTree(rustc_ast::tokenstream::TokenTree), | |
397 | ||
398 | // A metavar match of any type other than `tt`. | |
9fa01778 | 399 | MatchedNonterminal(Lrc<Nonterminal>), |
223e47cc LB |
400 | } |
401 | ||
9fa01778 | 402 | /// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) |
2c00a5a8 | 403 | fn token_name_eq(t1: &Token, t2: &Token) -> bool { |
dc9dc135 XL |
404 | if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { |
405 | ident1.name == ident2.name && is_raw1 == is_raw2 | |
406 | } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { | |
407 | ident1.name == ident2.name | |
cc61c64b | 408 | } else { |
dc9dc135 | 409 | t1.kind == t2.kind |
1a4d82fc JJ |
410 | } |
411 | } | |
412 | ||
04454e1e | 413 | // Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess |
923072b8 | 414 | // allocations we have a single vector for each kind that is cleared and reused repeatedly. |
04454e1e | 415 | pub struct TtParser { |
5e7ed085 | 416 | macro_name: Ident, |
223e47cc | 417 | |
5e7ed085 FG |
418 | /// The set of current mps to be processed. This should be empty by the end of a successful |
419 | /// execution of `parse_tt_inner`. | |
04454e1e | 420 | cur_mps: Vec<MatcherPos>, |
476ff2be | 421 | |
5e7ed085 FG |
422 | /// The set of newly generated mps. These are used to replenish `cur_mps` in the function |
423 | /// `parse_tt`. | |
04454e1e | 424 | next_mps: Vec<MatcherPos>, |
223e47cc | 425 | |
5e7ed085 | 426 | /// The set of mps that are waiting for the black-box parser. |
04454e1e | 427 | bb_mps: Vec<MatcherPos>, |
223e47cc | 428 | |
5e7ed085 FG |
429 | /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules |
430 | /// that have no metavars. | |
49aad941 | 431 | empty_matches: Rc<Vec<NamedMatch>>, |
5e7ed085 FG |
432 | } |
433 | ||
04454e1e FG |
434 | impl TtParser { |
435 | pub(super) fn new(macro_name: Ident) -> TtParser { | |
5e7ed085 FG |
436 | TtParser { |
437 | macro_name, | |
438 | cur_mps: vec![], | |
439 | next_mps: vec![], | |
440 | bb_mps: vec![], | |
49aad941 | 441 | empty_matches: Rc::new(vec![]), |
2c00a5a8 | 442 | } |
5e7ed085 FG |
443 | } |
444 | ||
487cf647 FG |
445 | pub(super) fn has_no_remaining_items_for_step(&self) -> bool { |
446 | self.cur_mps.is_empty() | |
447 | } | |
448 | ||
5e7ed085 FG |
449 | /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will |
450 | /// produce more mps in `next_mps` and `bb_mps`. | |
451 | /// | |
452 | /// # Returns | |
453 | /// | |
454 | /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept | |
455 | /// track of through the mps generated. | |
487cf647 | 456 | fn parse_tt_inner<'matcher, T: Tracker<'matcher>>( |
5e7ed085 | 457 | &mut self, |
487cf647 | 458 | matcher: &'matcher [MatcherLoc], |
5e7ed085 | 459 | token: &Token, |
9c376795 | 460 | approx_position: usize, |
487cf647 | 461 | track: &mut T, |
9c376795 | 462 | ) -> Option<NamedParseResult<T::Failure>> { |
5e7ed085 FG |
463 | // Matcher positions that would be valid if the macro invocation was over now. Only |
464 | // modified if `token == Eof`. | |
465 | let mut eof_mps = EofMatcherPositions::None; | |
466 | ||
467 | while let Some(mut mp) = self.cur_mps.pop() { | |
487cf647 FG |
468 | let matcher_loc = &matcher[mp.idx]; |
469 | track.before_match_loc(self, matcher_loc); | |
470 | ||
471 | match matcher_loc { | |
04454e1e FG |
472 | MatcherLoc::Token { token: t } => { |
473 | // If it's a doc comment, we just ignore it and move on to the next tt in the | |
474 | // matcher. This is a bug, but #95267 showed that existing programs rely on | |
475 | // this behaviour, and changing it would require some care and a transition | |
476 | // period. | |
477 | // | |
478 | // If the token matches, we can just advance the parser. | |
479 | // | |
480 | // Otherwise, this match has failed, there is nothing to do, and hopefully | |
481 | // another mp in `cur_mps` will match. | |
482 | if matches!(t, Token { kind: DocComment(..), .. }) { | |
483 | mp.idx += 1; | |
484 | self.cur_mps.push(mp); | |
485 | } else if token_name_eq(&t, token) { | |
486 | mp.idx += 1; | |
487 | self.next_mps.push(mp); | |
5e7ed085 | 488 | } |
04454e1e FG |
489 | } |
490 | MatcherLoc::Delimited => { | |
f2b60f7d | 491 | // Entering the delimiter is trivial. |
04454e1e FG |
492 | mp.idx += 1; |
493 | self.cur_mps.push(mp); | |
494 | } | |
495 | &MatcherLoc::Sequence { | |
496 | op, | |
497 | num_metavar_decls, | |
498 | idx_first_after, | |
499 | next_metavar, | |
500 | seq_depth, | |
501 | } => { | |
502 | // Install an empty vec for each metavar within the sequence. | |
503 | for metavar_idx in next_metavar..next_metavar + num_metavar_decls { | |
504 | mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![])); | |
b9856134 | 505 | } |
b9856134 | 506 | |
9ffffee4 | 507 | if matches!(op, KleeneOp::ZeroOrMore | KleeneOp::ZeroOrOne) { |
04454e1e FG |
508 | // Try zero matches of this sequence, by skipping over it. |
509 | self.cur_mps.push(MatcherPos { | |
510 | idx: idx_first_after, | |
49aad941 | 511 | matches: Rc::clone(&mp.matches), |
5e7ed085 | 512 | }); |
5e7ed085 FG |
513 | } |
514 | ||
04454e1e FG |
515 | // Try one or more matches of this sequence, by entering it. |
516 | mp.idx += 1; | |
517 | self.cur_mps.push(mp); | |
476ff2be | 518 | } |
04454e1e FG |
519 | &MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => { |
520 | // We are past the end of a sequence with no separator. Try ending the | |
521 | // sequence. If that's not possible, `ending_mp` will fail quietly when it is | |
5e7ed085 | 522 | // processed next time around the loop. |
04454e1e | 523 | let ending_mp = MatcherPos { |
487cf647 | 524 | idx: mp.idx + 1, // +1 skips the Kleene op |
49aad941 | 525 | matches: Rc::clone(&mp.matches), |
5e7ed085 | 526 | }; |
04454e1e FG |
527 | self.cur_mps.push(ending_mp); |
528 | ||
529 | if op != KleeneOp::ZeroOrOne { | |
530 | // Try another repetition. | |
531 | mp.idx = idx_first; | |
532 | self.cur_mps.push(mp); | |
533 | } | |
223e47cc | 534 | } |
04454e1e FG |
535 | MatcherLoc::SequenceSep { separator } => { |
536 | // We are past the end of a sequence with a separator but we haven't seen the | |
537 | // separator yet. Try ending the sequence. If that's not possible, `ending_mp` | |
538 | // will fail quietly when it is processed next time around the loop. | |
539 | let ending_mp = MatcherPos { | |
487cf647 | 540 | idx: mp.idx + 2, // +2 skips the separator and the Kleene op |
49aad941 | 541 | matches: Rc::clone(&mp.matches), |
04454e1e FG |
542 | }; |
543 | self.cur_mps.push(ending_mp); | |
2c00a5a8 | 544 | |
04454e1e FG |
545 | if token_name_eq(token, separator) { |
546 | // The separator matches the current token. Advance past it. | |
5e7ed085 FG |
547 | mp.idx += 1; |
548 | self.next_mps.push(mp); | |
549 | } | |
04454e1e FG |
550 | } |
551 | &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => { | |
552 | // We are past the sequence separator. This can't be a `?` Kleene op, because | |
553 | // they don't permit separators. Try another repetition. | |
554 | mp.idx = idx_first; | |
5e7ed085 FG |
555 | self.cur_mps.push(mp); |
556 | } | |
04454e1e FG |
557 | &MatcherLoc::MetaVarDecl { span, kind, .. } => { |
558 | // Built-in nonterminals never start with these tokens, so we can eliminate | |
559 | // them from consideration. We use the span of the metavariable declaration | |
560 | // to determine any edition-specific matching behavior for non-terminals. | |
561 | if let Some(kind) = kind { | |
562 | if Parser::nonterminal_may_begin_with(kind, token) { | |
563 | self.bb_mps.push(mp); | |
564 | } | |
565 | } else { | |
566 | // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used. | |
567 | // Both this check and the one in `nameize` are necessary, surprisingly. | |
568 | return Some(Error(span, "missing fragment specifier".to_string())); | |
569 | } | |
570 | } | |
571 | MatcherLoc::Eof => { | |
572 | // We are past the matcher's end, and not in a sequence. Try to end things. | |
573 | debug_assert_eq!(mp.idx, matcher.len() - 1); | |
574 | if *token == token::Eof { | |
575 | eof_mps = match eof_mps { | |
576 | EofMatcherPositions::None => EofMatcherPositions::One(mp), | |
577 | EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => { | |
578 | EofMatcherPositions::Multiple | |
579 | } | |
5e7ed085 FG |
580 | } |
581 | } | |
582 | } | |
223e47cc LB |
583 | } |
584 | } | |
476ff2be | 585 | |
5e7ed085 FG |
586 | // If we reached the end of input, check that there is EXACTLY ONE possible matcher. |
587 | // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error. | |
588 | if *token == token::Eof { | |
589 | Some(match eof_mps { | |
590 | EofMatcherPositions::One(mut eof_mp) => { | |
49aad941 FG |
591 | // Need to take ownership of the matches from within the `Rc`. |
592 | Rc::make_mut(&mut eof_mp.matches); | |
593 | let matches = Rc::try_unwrap(eof_mp.matches).unwrap().into_iter(); | |
04454e1e | 594 | self.nameize(matcher, matches) |
5e7ed085 FG |
595 | } |
596 | EofMatcherPositions::Multiple => { | |
597 | Error(token.span, "ambiguity: multiple successful parses".to_string()) | |
598 | } | |
9c376795 | 599 | EofMatcherPositions::None => Failure(T::build_failure( |
dfeec247 XL |
600 | Token::new( |
601 | token::Eof, | |
5e7ed085 | 602 | if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() }, |
dfeec247 | 603 | ), |
9c376795 | 604 | approx_position, |
0731742a | 605 | "missing tokens in macro arguments", |
9c376795 | 606 | )), |
5e7ed085 FG |
607 | }) |
608 | } else { | |
609 | None | |
2c00a5a8 | 610 | } |
5e7ed085 FG |
611 | } |
612 | ||
613 | /// Match the token stream from `parser` against `matcher`. | |
487cf647 | 614 | pub(super) fn parse_tt<'matcher, T: Tracker<'matcher>>( |
5e7ed085 FG |
615 | &mut self, |
616 | parser: &mut Cow<'_, Parser<'_>>, | |
487cf647 FG |
617 | matcher: &'matcher [MatcherLoc], |
618 | track: &mut T, | |
9c376795 | 619 | ) -> NamedParseResult<T::Failure> { |
5e7ed085 FG |
620 | // A queue of possible matcher positions. We initialize it with the matcher position in |
621 | // which the "dot" is before the first token of the first token tree in `matcher`. | |
622 | // `parse_tt_inner` then processes all of these possible matcher positions and produces | |
623 | // possible next positions into `next_mps`. After some post-processing, the contents of | |
624 | // `next_mps` replenish `cur_mps` and we start over again. | |
625 | self.cur_mps.clear(); | |
04454e1e | 626 | self.cur_mps.push(MatcherPos { idx: 0, matches: self.empty_matches.clone() }); |
5e7ed085 FG |
627 | |
628 | loop { | |
629 | self.next_mps.clear(); | |
630 | self.bb_mps.clear(); | |
631 | ||
632 | // Process `cur_mps` until either we have finished the input or we need to get some | |
633 | // parsing from the black-box parser done. | |
9c376795 FG |
634 | let res = self.parse_tt_inner( |
635 | matcher, | |
636 | &parser.token, | |
637 | parser.approx_token_stream_pos(), | |
638 | track, | |
639 | ); | |
487cf647 | 640 | if let Some(res) = res { |
04454e1e | 641 | return res; |
5e7ed085 FG |
642 | } |
643 | ||
644 | // `parse_tt_inner` handled all of `cur_mps`, so it's empty. | |
645 | assert!(self.cur_mps.is_empty()); | |
646 | ||
647 | // Error messages here could be improved with links to original rules. | |
648 | match (self.next_mps.len(), self.bb_mps.len()) { | |
649 | (0, 0) => { | |
650 | // There are no possible next positions AND we aren't waiting for the black-box | |
651 | // parser: syntax error. | |
9c376795 | 652 | return Failure(T::build_failure( |
5e7ed085 | 653 | parser.token.clone(), |
9c376795 | 654 | parser.approx_token_stream_pos(), |
5e7ed085 | 655 | "no rules expected this token in macro call", |
9c376795 | 656 | )); |
5e7ed085 FG |
657 | } |
658 | ||
659 | (_, 0) => { | |
660 | // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then | |
661 | // process the next token. | |
04454e1e | 662 | self.cur_mps.append(&mut self.next_mps); |
5e7ed085 FG |
663 | parser.to_mut().bump(); |
664 | } | |
665 | ||
666 | (0, 1) => { | |
667 | // We need to call the black-box parser to get some nonterminal. | |
668 | let mut mp = self.bb_mps.pop().unwrap(); | |
04454e1e FG |
669 | let loc = &matcher[mp.idx]; |
670 | if let &MatcherLoc::MetaVarDecl { | |
671 | span, | |
672 | kind: Some(kind), | |
673 | next_metavar, | |
674 | seq_depth, | |
675 | .. | |
676 | } = loc | |
677 | { | |
5e7ed085 FG |
678 | // We use the span of the metavariable declaration to determine any |
679 | // edition-specific matching behavior for non-terminals. | |
680 | let nt = match parser.to_mut().parse_nonterminal(kind) { | |
681 | Err(mut err) => { | |
487cf647 | 682 | let guarantee = err.span_label( |
5e7ed085 FG |
683 | span, |
684 | format!( | |
685 | "while parsing argument for this `{kind}` macro fragment" | |
686 | ), | |
687 | ) | |
688 | .emit(); | |
487cf647 | 689 | return ErrorReported(guarantee); |
5e7ed085 FG |
690 | } |
691 | Ok(nt) => nt, | |
692 | }; | |
693 | let m = match nt { | |
694 | NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)), | |
695 | NtOrTt::Tt(tt) => MatchedTokenTree(tt), | |
696 | }; | |
04454e1e | 697 | mp.push_match(next_metavar, seq_depth, m); |
5e7ed085 | 698 | mp.idx += 1; |
5e7ed085 FG |
699 | } else { |
700 | unreachable!() | |
3dfed10e | 701 | } |
5e7ed085 FG |
702 | self.cur_mps.push(mp); |
703 | } | |
704 | ||
705 | (_, _) => { | |
706 | // Too many possibilities! | |
04454e1e | 707 | return self.ambiguity_error(matcher, parser.token.span); |
5e7ed085 | 708 | } |
223e47cc | 709 | } |
5e7ed085 FG |
710 | |
711 | assert!(!self.cur_mps.is_empty()); | |
223e47cc | 712 | } |
5e7ed085 | 713 | } |
223e47cc | 714 | |
9c376795 | 715 | fn ambiguity_error<F>( |
04454e1e FG |
716 | &self, |
717 | matcher: &[MatcherLoc], | |
718 | token_span: rustc_span::Span, | |
9c376795 | 719 | ) -> NamedParseResult<F> { |
5e7ed085 FG |
720 | let nts = self |
721 | .bb_mps | |
722 | .iter() | |
04454e1e FG |
723 | .map(|mp| match &matcher[mp.idx] { |
724 | MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => { | |
5e7ed085 FG |
725 | format!("{} ('{}')", kind, bind) |
726 | } | |
04454e1e | 727 | _ => unreachable!(), |
5e7ed085 FG |
728 | }) |
729 | .collect::<Vec<String>>() | |
730 | .join(" or "); | |
731 | ||
732 | Error( | |
733 | token_span, | |
734 | format!( | |
735 | "local ambiguity when calling macro `{}`: multiple parsing options: {}", | |
736 | self.macro_name, | |
737 | match self.next_mps.len() { | |
738 | 0 => format!("built-in NTs {}.", nts), | |
04454e1e | 739 | n => format!("built-in NTs {} or {n} other option{s}.", nts, s = pluralize!(n)), |
5e7ed085 FG |
740 | } |
741 | ), | |
742 | ) | |
223e47cc | 743 | } |
04454e1e | 744 | |
9c376795 | 745 | fn nameize<I: Iterator<Item = NamedMatch>, F>( |
04454e1e FG |
746 | &self, |
747 | matcher: &[MatcherLoc], | |
748 | mut res: I, | |
9c376795 | 749 | ) -> NamedParseResult<F> { |
04454e1e FG |
750 | // Make that each metavar has _exactly one_ binding. If so, insert the binding into the |
751 | // `NamedParseResult`. Otherwise, it's an error. | |
752 | let mut ret_val = FxHashMap::default(); | |
753 | for loc in matcher { | |
754 | if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc { | |
755 | if kind.is_some() { | |
756 | match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) { | |
757 | Vacant(spot) => spot.insert(res.next().unwrap()), | |
758 | Occupied(..) => { | |
759 | return Error(span, format!("duplicated bind name: {}", bind)); | |
760 | } | |
761 | }; | |
762 | } else { | |
763 | // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used. | |
764 | // Both this check and the one in `parse_tt_inner` are necessary, surprisingly. | |
765 | return Error(span, "missing fragment specifier".to_string()); | |
766 | } | |
767 | } | |
768 | } | |
769 | Success(ret_val) | |
770 | } | |
223e47cc | 771 | } |