]>
Commit | Line | Data |
---|---|---|
fc512014 | 1 | //! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals |
3b2f2976 XL |
2 | //! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads |
3 | //! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in | |
4 | //! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier | |
5 | //! fit for Macro-by-Example-style rules. | |
6 | //! | |
7 | //! (In order to prevent the pathological case, we'd need to lazily construct the resulting | |
8 | //! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old | |
5e7ed085 | 9 | //! matcher positions, but it would also save overhead) |
3b2f2976 | 10 | //! |
94b46f34 | 11 | //! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate. |
3b2f2976 XL |
12 | //! The macro parser restricts itself to the features of finite state automata. Earley parsers |
13 | //! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. | |
1a4d82fc JJ |
14 | //! |
15 | //! Quick intro to how the parser works: | |
16 | //! | |
5e7ed085 FG |
17 | //! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually |
18 | //! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`. | |
1a4d82fc JJ |
19 | //! |
20 | //! The parser walks through the input a character at a time, maintaining a list | |
5e7ed085 | 21 | //! of threads consistent with the current position in the input string: `cur_mps`. |
1a4d82fc | 22 | //! |
5e7ed085 FG |
23 | //! As it processes them, it fills up `eof_mps` with threads that would be valid if |
24 | //! the macro invocation is now over, `bb_mps` with threads that are waiting on | |
25 | //! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting | |
b039eaaf | 26 | //! on a particular token. Most of the logic concerns moving the · through the |
3b2f2976 XL |
27 | //! repetitions indicated by Kleene stars. The rules for moving the · without |
28 | //! consuming any input are called epsilon transitions. It only advances or calls | |
5e7ed085 | 29 | //! out to the real Rust parser when no `cur_mps` threads remain. |
1a4d82fc | 30 | //! |
7cac9316 | 31 | //! Example: |
1a4d82fc | 32 | //! |
7cac9316 XL |
33 | //! ```text, ignore |
34 | //! Start parsing a a a a b against [· a $( a )* a b]. | |
35 | //! | |
36 | //! Remaining input: a a a a b | |
3b2f2976 | 37 | //! next: [· a $( a )* a b] |
1a4d82fc | 38 | //! |
7cac9316 | 39 | //! - - - Advance over an a. - - - |
1a4d82fc | 40 | //! |
7cac9316 | 41 | //! Remaining input: a a a b |
1a4d82fc | 42 | //! cur: [a · $( a )* a b] |
5e7ed085 | 43 | //! Descend/Skip (first position). |
1a4d82fc JJ |
44 | //! next: [a $( · a )* a b] [a $( a )* · a b]. |
45 | //! | |
7cac9316 | 46 | //! - - - Advance over an a. - - - |
1a4d82fc | 47 | //! |
7cac9316 | 48 | //! Remaining input: a a b |
3b2f2976 | 49 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
5e7ed085 | 50 | //! Follow epsilon transition: Finish/Repeat (first position) |
1a4d82fc JJ |
51 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
52 | //! | |
7cac9316 | 53 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 54 | //! |
7cac9316 | 55 | //! Remaining input: a b |
3b2f2976 | 56 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
5e7ed085 | 57 | //! Follow epsilon transition: Finish/Repeat (first position) |
1a4d82fc JJ |
58 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
59 | //! | |
7cac9316 | 60 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 61 | //! |
7cac9316 | 62 | //! Remaining input: b |
3b2f2976 | 63 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
5e7ed085 | 64 | //! Follow epsilon transition: Finish/Repeat (first position) |
3b2f2976 | 65 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
1a4d82fc | 66 | //! |
7cac9316 | 67 | //! - - - Advance over a b. - - - |
1a4d82fc | 68 | //! |
7cac9316 | 69 | //! Remaining input: '' |
1a4d82fc | 70 | //! eof: [a $( a )* a b ·] |
7cac9316 | 71 | //! ``` |
1a4d82fc | 72 | |
e74abb32 XL |
73 | crate use NamedMatch::*; |
74 | crate use ParseResult::*; | |
9fa01778 | 75 | |
5e7ed085 | 76 | use crate::mbe::{self, SequenceRepetition, TokenTree}; |
e74abb32 | 77 | |
5e7ed085 FG |
78 | use rustc_ast::token::{self, DocComment, Nonterminal, Token, TokenKind}; |
79 | use rustc_parse::parser::{NtOrTt, Parser}; | |
74b04a01 | 80 | use rustc_session::parse::ParseSess; |
5869c6ff | 81 | use rustc_span::symbol::MacroRulesNormalizedIdent; |
970d7e83 | 82 | |
9fa01778 | 83 | use smallvec::{smallvec, SmallVec}; |
223e47cc | 84 | |
b7449926 | 85 | use rustc_data_structures::fx::FxHashMap; |
9fa01778 | 86 | use rustc_data_structures::sync::Lrc; |
17df50a5 | 87 | use rustc_span::symbol::Ident; |
74b04a01 | 88 | use std::borrow::Cow; |
b7449926 | 89 | use std::collections::hash_map::Entry::{Occupied, Vacant}; |
223e47cc | 90 | |
5e7ed085 FG |
91 | // One element is enough to cover 95-99% of vectors for most benchmarks. Also, |
92 | // vectors longer than one frequently have many elements, not just two or | |
93 | // three. | |
94 | type NamedMatchVec = SmallVec<[NamedMatch; 1]>; | |
95 | ||
96 | // This type is used a lot. Make sure it doesn't unintentionally get bigger. | |
97 | #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] | |
98 | rustc_data_structures::static_assert_size!(NamedMatchVec, 48); | |
223e47cc | 99 | |
1a4d82fc | 100 | #[derive(Clone)] |
5e7ed085 FG |
101 | enum MatcherKind<'tt> { |
102 | TopLevel, | |
103 | Delimited(Box<DelimitedSubmatcher<'tt>>), | |
104 | Sequence(Box<SequenceSubmatcher<'tt>>), | |
1a4d82fc | 105 | } |
223e47cc | 106 | |
5e7ed085 FG |
107 | #[derive(Clone)] |
108 | struct DelimitedSubmatcher<'tt> { | |
109 | parent: Parent<'tt>, | |
223e47cc LB |
110 | } |
111 | ||
1a4d82fc | 112 | #[derive(Clone)] |
5e7ed085 FG |
113 | struct SequenceSubmatcher<'tt> { |
114 | parent: Parent<'tt>, | |
115 | seq: &'tt SequenceRepetition, | |
223e47cc LB |
116 | } |
117 | ||
5e7ed085 FG |
118 | /// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from |
119 | /// `MathcherPos`. | |
1a4d82fc | 120 | #[derive(Clone)] |
5e7ed085 FG |
121 | struct Parent<'tt> { |
122 | tts: &'tt [TokenTree], | |
85aaf69f | 123 | idx: usize, |
5e7ed085 FG |
124 | kind: MatcherKind<'tt>, |
125 | } | |
a1dfa0c6 | 126 | |
5e7ed085 FG |
127 | /// A single matcher position, which could be within the top-level matcher, a submatcher, a |
128 | /// subsubmatcher, etc. For example: | |
129 | /// ```text | |
130 | /// macro_rules! m { $id:ident ( $($e:expr),* ) } => { ... } | |
131 | /// <----------> second submatcher; one tt, one metavar | |
132 | /// <--------------> first submatcher; three tts, zero metavars | |
133 | /// <--------------------------> top-level matcher; two tts, one metavar | |
134 | /// ``` | |
135 | struct MatcherPos<'tt> { | |
136 | /// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited` | |
137 | /// submatcher, this is just the contents of that submatcher. | |
138 | tts: &'tt [TokenTree], | |
139 | ||
140 | /// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or | |
141 | /// two positions past the final elements in `tts` when dealing with sequences, see | |
142 | /// `parse_tt_inner` for details. | |
143 | idx: usize, | |
2c00a5a8 | 144 | |
5e7ed085 FG |
145 | /// This vector ends up with one element per metavar in the *top-level* matcher, even when this |
146 | /// `MatcherPos` is for a submatcher. Each element records token trees matched against the | |
147 | /// relevant metavar by the black box parser. The element will be a `MatchedSeq` if the | |
148 | /// corresponding metavar is within a sequence. | |
149 | matches: Lrc<NamedMatchVec>, | |
a1dfa0c6 | 150 | |
5e7ed085 FG |
151 | /// The number of sequences this mp is within. |
152 | seq_depth: usize, | |
a1dfa0c6 | 153 | |
5e7ed085 FG |
154 | /// The position in `matches` of the next metavar to be matched against the source token |
155 | /// stream. Should not be used if there are no metavars. | |
156 | match_cur: usize, | |
2c00a5a8 | 157 | |
5e7ed085 FG |
158 | /// What kind of matcher we are in. For submatchers, this contains enough information to |
159 | /// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher. | |
160 | kind: MatcherKind<'tt>, | |
223e47cc LB |
161 | } |
162 | ||
5e7ed085 FG |
163 | // This type is used a lot. Make sure it doesn't unintentionally get bigger. |
164 | #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] | |
165 | rustc_data_structures::static_assert_size!(MatcherPos<'_>, 64); | |
166 | ||
167 | impl<'tt> MatcherPos<'tt> { | |
168 | fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self { | |
169 | MatcherPos { | |
170 | tts: matcher, | |
171 | idx: 0, | |
172 | matches: empty_matches, | |
173 | seq_depth: 0, | |
174 | match_cur: 0, | |
175 | kind: MatcherKind::TopLevel, | |
176 | } | |
041b39d2 | 177 | } |
94b46f34 | 178 | |
5e7ed085 FG |
179 | fn empty_sequence( |
180 | parent_mp: &MatcherPos<'tt>, | |
181 | seq: &'tt SequenceRepetition, | |
182 | empty_matches: Lrc<NamedMatchVec>, | |
183 | ) -> Self { | |
184 | let mut mp = MatcherPos { | |
185 | tts: parent_mp.tts, | |
186 | idx: parent_mp.idx + 1, | |
187 | matches: parent_mp.matches.clone(), // a cheap clone | |
188 | seq_depth: parent_mp.seq_depth, | |
189 | match_cur: parent_mp.match_cur + seq.num_captures, | |
190 | kind: parent_mp.kind.clone(), // an expensive clone | |
191 | }; | |
192 | for idx in parent_mp.match_cur..parent_mp.match_cur + seq.num_captures { | |
193 | mp.push_match(idx, MatchedSeq(empty_matches.clone())); | |
194 | } | |
195 | mp | |
94b46f34 | 196 | } |
94b46f34 | 197 | |
5e7ed085 FG |
198 | fn sequence( |
199 | parent_mp: Box<MatcherPos<'tt>>, | |
200 | seq: &'tt SequenceRepetition, | |
201 | empty_matches: Lrc<NamedMatchVec>, | |
202 | ) -> Self { | |
203 | let seq_kind = box SequenceSubmatcher { | |
204 | parent: Parent { tts: parent_mp.tts, idx: parent_mp.idx, kind: parent_mp.kind }, | |
205 | seq, | |
206 | }; | |
207 | let mut mp = MatcherPos { | |
208 | tts: &seq.tts, | |
209 | idx: 0, | |
210 | matches: parent_mp.matches, | |
211 | seq_depth: parent_mp.seq_depth, | |
212 | match_cur: parent_mp.match_cur, | |
213 | kind: MatcherKind::Sequence(seq_kind), | |
214 | }; | |
215 | // Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth` | |
216 | // must have the parent's depth at this point for these `push_match` calls to work. | |
217 | for idx in mp.match_cur..mp.match_cur + seq.num_captures { | |
218 | mp.push_match(idx, MatchedSeq(empty_matches.clone())); | |
94b46f34 | 219 | } |
5e7ed085 FG |
220 | mp.seq_depth += 1; |
221 | mp | |
94b46f34 | 222 | } |
94b46f34 | 223 | |
5e7ed085 FG |
224 | /// Adds `m` as a named match for the `idx`-th metavar. |
225 | fn push_match(&mut self, idx: usize, m: NamedMatch) { | |
226 | let matches = Lrc::make_mut(&mut self.matches); | |
227 | match self.seq_depth { | |
228 | 0 => { | |
229 | // We are not within a sequence. Just append `m`. | |
230 | assert_eq!(idx, matches.len()); | |
231 | matches.push(m); | |
232 | } | |
233 | _ => { | |
234 | // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth | |
235 | // and append `m` to its vector. | |
236 | let mut curr = &mut matches[idx]; | |
237 | for _ in 0..self.seq_depth - 1 { | |
238 | match curr { | |
239 | MatchedSeq(seq) => { | |
240 | let seq = Lrc::make_mut(seq); | |
241 | curr = seq.last_mut().unwrap(); | |
242 | } | |
243 | _ => unreachable!(), | |
244 | } | |
245 | } | |
246 | match curr { | |
247 | MatchedSeq(seq) => { | |
248 | let seq = Lrc::make_mut(seq); | |
249 | seq.push(m); | |
250 | } | |
251 | _ => unreachable!(), | |
252 | } | |
253 | } | |
94b46f34 XL |
254 | } |
255 | } | |
256 | } | |
257 | ||
5e7ed085 FG |
258 | enum EofMatcherPositions<'tt> { |
259 | None, | |
260 | One(Box<MatcherPos<'tt>>), | |
261 | Multiple, | |
262 | } | |
263 | ||
2c00a5a8 | 264 | /// Represents the possible results of an attempted parse. |
e74abb32 | 265 | crate enum ParseResult<T> { |
2c00a5a8 XL |
266 | /// Parsed successfully. |
267 | Success(T), | |
268 | /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected | |
269 | /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. | |
dc9dc135 | 270 | Failure(Token, &'static str), |
2c00a5a8 | 271 | /// Fatal error (malformed macro?). Abort compilation. |
dfeec247 | 272 | Error(rustc_span::Span, String), |
ba9703b0 | 273 | ErrorReported, |
2c00a5a8 XL |
274 | } |
275 | ||
ba9703b0 XL |
276 | /// A `ParseResult` where the `Success` variant contains a mapping of |
277 | /// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping | |
278 | /// of metavars to the token trees they bind to. | |
279 | crate type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, NamedMatch>>; | |
476ff2be | 280 | |
5e7ed085 FG |
281 | /// Count how many metavars declarations are in `matcher`. |
282 | pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize { | |
283 | matcher | |
284 | .iter() | |
285 | .map(|tt| match tt { | |
286 | TokenTree::MetaVarDecl(..) => 1, | |
287 | TokenTree::Sequence(_, seq) => seq.num_captures, | |
288 | TokenTree::Delimited(_, delim) => count_metavar_decls(delim.inner_tts()), | |
289 | TokenTree::Token(..) => 0, | |
290 | TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), | |
291 | }) | |
292 | .sum() | |
223e47cc LB |
293 | } |
294 | ||
5e7ed085 | 295 | /// `NamedMatch` is a pattern-match result for a single metavar. All |
9fa01778 | 296 | /// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type |
5e7ed085 | 297 | /// (expr, item, etc). |
1a4d82fc | 298 | /// |
7cac9316 | 299 | /// The in-memory structure of a particular `NamedMatch` represents the match |
1a4d82fc JJ |
300 | /// that occurred when a particular subset of a matcher was applied to a |
301 | /// particular token tree. | |
302 | /// | |
7cac9316 | 303 | /// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of |
5e7ed085 FG |
304 | /// the `MatchedNtNonTts`s, will depend on the token tree it was applied |
305 | /// to: each `MatchedSeq` corresponds to a single repetition in the originating | |
7cac9316 | 306 | /// token tree. The depth of the `NamedMatch` structure will therefore depend |
5e7ed085 FG |
307 | /// only on the nesting depth of repetitions in the originating token tree it |
308 | /// was derived from. | |
309 | /// | |
310 | /// In layman's terms: `NamedMatch` will form a tree representing nested matches of a particular | |
311 | /// meta variable. For example, if we are matching the following macro against the following | |
312 | /// invocation... | |
313 | /// | |
314 | /// ```rust | |
315 | /// macro_rules! foo { | |
316 | /// ($($($x:ident),+);+) => {} | |
317 | /// } | |
318 | /// | |
319 | /// foo!(a, b, c, d; a, b, c, d, e); | |
320 | /// ``` | |
321 | /// | |
322 | /// Then, the tree will have the following shape: | |
323 | /// | |
324 | /// ```rust | |
325 | /// MatchedSeq([ | |
326 | /// MatchedSeq([ | |
327 | /// MatchedNonterminal(a), | |
328 | /// MatchedNonterminal(b), | |
329 | /// MatchedNonterminal(c), | |
330 | /// MatchedNonterminal(d), | |
331 | /// ]), | |
332 | /// MatchedSeq([ | |
333 | /// MatchedNonterminal(a), | |
334 | /// MatchedNonterminal(b), | |
335 | /// MatchedNonterminal(c), | |
336 | /// MatchedNonterminal(d), | |
337 | /// MatchedNonterminal(e), | |
338 | /// ]) | |
339 | /// ]) | |
340 | /// ``` | |
041b39d2 | 341 | #[derive(Debug, Clone)] |
e74abb32 | 342 | crate enum NamedMatch { |
60c5eb7d | 343 | MatchedSeq(Lrc<NamedMatchVec>), |
5e7ed085 FG |
344 | |
345 | // A metavar match of type `tt`. | |
346 | MatchedTokenTree(rustc_ast::tokenstream::TokenTree), | |
347 | ||
348 | // A metavar match of any type other than `tt`. | |
9fa01778 | 349 | MatchedNonterminal(Lrc<Nonterminal>), |
223e47cc LB |
350 | } |
351 | ||
2c00a5a8 XL |
352 | fn nameize<I: Iterator<Item = NamedMatch>>( |
353 | sess: &ParseSess, | |
5e7ed085 | 354 | matcher: &[TokenTree], |
2c00a5a8 XL |
355 | mut res: I, |
356 | ) -> NamedParseResult { | |
0731742a | 357 | // Recursively descend into each type of matcher (e.g., sequences, delimited, metavars) and make |
2c00a5a8 XL |
358 | // sure that each metavar has _exactly one_ binding. If a metavar does not have exactly one |
359 | // binding, then there is an error. If it does, then we insert the binding into the | |
360 | // `NamedParseResult`. | |
361 | fn n_rec<I: Iterator<Item = NamedMatch>>( | |
362 | sess: &ParseSess, | |
5e7ed085 | 363 | tt: &TokenTree, |
2c00a5a8 | 364 | res: &mut I, |
ba9703b0 | 365 | ret_val: &mut FxHashMap<MacroRulesNormalizedIdent, NamedMatch>, |
dfeec247 | 366 | ) -> Result<(), (rustc_span::Span, String)> { |
5e7ed085 | 367 | match *tt { |
dfeec247 XL |
368 | TokenTree::Sequence(_, ref seq) => { |
369 | for next_m in &seq.tts { | |
370 | n_rec(sess, next_m, res.by_ref(), ret_val)? | |
371 | } | |
372 | } | |
373 | TokenTree::Delimited(_, ref delim) => { | |
5e7ed085 | 374 | for next_m in delim.inner_tts() { |
dfeec247 XL |
375 | n_rec(sess, next_m, res.by_ref(), ret_val)?; |
376 | } | |
377 | } | |
b9856134 XL |
378 | TokenTree::MetaVarDecl(span, _, None) => { |
379 | if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { | |
380 | return Err((span, "missing fragment specifier".to_string())); | |
381 | } | |
382 | } | |
ba9703b0 XL |
383 | TokenTree::MetaVarDecl(sp, bind_name, _) => match ret_val |
384 | .entry(MacroRulesNormalizedIdent::new(bind_name)) | |
385 | { | |
dfeec247 XL |
386 | Vacant(spot) => { |
387 | spot.insert(res.next().unwrap()); | |
1a4d82fc | 388 | } |
dfeec247 XL |
389 | Occupied(..) => return Err((sp, format!("duplicated bind name: {}", bind_name))), |
390 | }, | |
5e7ed085 FG |
391 | TokenTree::Token(..) => (), |
392 | TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), | |
223e47cc | 393 | } |
92a42be0 SL |
394 | |
395 | Ok(()) | |
223e47cc | 396 | } |
92a42be0 | 397 | |
b7449926 | 398 | let mut ret_val = FxHashMap::default(); |
5e7ed085 FG |
399 | for tt in matcher { |
400 | match n_rec(sess, tt, res.by_ref(), &mut ret_val) { | |
2c00a5a8 | 401 | Ok(_) => {} |
92a42be0 SL |
402 | Err((sp, msg)) => return Error(sp, msg), |
403 | } | |
404 | } | |
405 | ||
406 | Success(ret_val) | |
223e47cc LB |
407 | } |
408 | ||
9fa01778 | 409 | /// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) |
2c00a5a8 | 410 | fn token_name_eq(t1: &Token, t2: &Token) -> bool { |
dc9dc135 XL |
411 | if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { |
412 | ident1.name == ident2.name && is_raw1 == is_raw2 | |
413 | } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { | |
414 | ident1.name == ident2.name | |
cc61c64b | 415 | } else { |
dc9dc135 | 416 | t1.kind == t2.kind |
1a4d82fc JJ |
417 | } |
418 | } | |
419 | ||
5e7ed085 FG |
420 | // Note: the position vectors could be created and dropped within `parse_tt`, but to avoid excess |
421 | // allocations we have a single vector fo each kind that is cleared and reused repeatedly. | |
422 | pub struct TtParser<'tt> { | |
423 | macro_name: Ident, | |
223e47cc | 424 | |
5e7ed085 FG |
425 | /// The set of current mps to be processed. This should be empty by the end of a successful |
426 | /// execution of `parse_tt_inner`. | |
427 | cur_mps: Vec<Box<MatcherPos<'tt>>>, | |
476ff2be | 428 | |
5e7ed085 FG |
429 | /// The set of newly generated mps. These are used to replenish `cur_mps` in the function |
430 | /// `parse_tt`. | |
431 | next_mps: Vec<Box<MatcherPos<'tt>>>, | |
223e47cc | 432 | |
5e7ed085 FG |
433 | /// The set of mps that are waiting for the black-box parser. |
434 | bb_mps: Vec<Box<MatcherPos<'tt>>>, | |
223e47cc | 435 | |
5e7ed085 FG |
436 | /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules |
437 | /// that have no metavars. | |
438 | empty_matches: Lrc<NamedMatchVec>, | |
439 | } | |
440 | ||
441 | impl<'tt> TtParser<'tt> { | |
442 | pub(super) fn new(macro_name: Ident) -> TtParser<'tt> { | |
443 | TtParser { | |
444 | macro_name, | |
445 | cur_mps: vec![], | |
446 | next_mps: vec![], | |
447 | bb_mps: vec![], | |
448 | empty_matches: Lrc::new(smallvec![]), | |
2c00a5a8 | 449 | } |
5e7ed085 FG |
450 | } |
451 | ||
452 | /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will | |
453 | /// produce more mps in `next_mps` and `bb_mps`. | |
454 | /// | |
455 | /// # Returns | |
456 | /// | |
457 | /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept | |
458 | /// track of through the mps generated. | |
459 | fn parse_tt_inner( | |
460 | &mut self, | |
461 | sess: &ParseSess, | |
462 | matcher: &[TokenTree], | |
463 | token: &Token, | |
464 | ) -> Option<NamedParseResult> { | |
465 | // Matcher positions that would be valid if the macro invocation was over now. Only | |
466 | // modified if `token == Eof`. | |
467 | let mut eof_mps = EofMatcherPositions::None; | |
468 | ||
469 | while let Some(mut mp) = self.cur_mps.pop() { | |
470 | // Get the current position of the "dot" (`idx`) in `mp` and the number of token | |
471 | // trees in the matcher (`len`). | |
472 | let idx = mp.idx; | |
473 | let len = mp.tts.len(); | |
474 | ||
475 | if idx < len { | |
476 | // We are in the middle of a matcher. Compare the matcher's current tt against | |
477 | // `token`. | |
478 | match &mp.tts[idx] { | |
479 | TokenTree::Sequence(_sp, seq) => { | |
480 | let op = seq.kleene.op; | |
481 | if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne { | |
482 | // Allow for the possibility of zero matches of this sequence. | |
483 | self.cur_mps.push(box MatcherPos::empty_sequence( | |
484 | &*mp, | |
485 | &seq, | |
486 | self.empty_matches.clone(), | |
487 | )); | |
1a4d82fc | 488 | } |
5e7ed085 FG |
489 | |
490 | // Allow for the possibility of one or more matches of this sequence. | |
491 | self.cur_mps.push(box MatcherPos::sequence( | |
492 | mp, | |
493 | &seq, | |
494 | self.empty_matches.clone(), | |
495 | )); | |
1a4d82fc | 496 | } |
476ff2be | 497 | |
5e7ed085 FG |
498 | &TokenTree::MetaVarDecl(span, _, None) => { |
499 | // E.g. `$e` instead of `$e:expr`. | |
500 | if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { | |
501 | return Some(Error(span, "missing fragment specifier".to_string())); | |
502 | } | |
503 | } | |
2c00a5a8 | 504 | |
5e7ed085 FG |
505 | &TokenTree::MetaVarDecl(_, _, Some(kind)) => { |
506 | // Built-in nonterminals never start with these tokens, so we can eliminate | |
507 | // them from consideration. | |
508 | // | |
509 | // We use the span of the metavariable declaration to determine any | |
510 | // edition-specific matching behavior for non-terminals. | |
511 | if Parser::nonterminal_may_begin_with(kind, token) { | |
512 | self.bb_mps.push(mp); | |
513 | } | |
b9856134 | 514 | } |
b9856134 | 515 | |
5e7ed085 FG |
516 | TokenTree::Delimited(_, delimited) => { |
517 | // To descend into a delimited submatcher, we update `mp` appropriately, | |
518 | // including enough information to re-ascend afterwards, and push it onto | |
519 | // `cur_mps`. Later, when we reach the closing delimiter, we will recover | |
520 | // the parent matcher position to ascend. Note that we use `all_tts` to | |
521 | // include the open and close delimiter tokens. | |
522 | let kind = MatcherKind::Delimited(box DelimitedSubmatcher { | |
523 | parent: Parent { tts: mp.tts, idx: mp.idx, kind: mp.kind }, | |
524 | }); | |
525 | mp.tts = &delimited.all_tts; | |
526 | mp.idx = 0; | |
527 | mp.kind = kind; | |
528 | self.cur_mps.push(mp); | |
529 | } | |
530 | ||
531 | TokenTree::Token(t) => { | |
532 | // If it's a doc comment, we just ignore it and move on to the next tt in | |
533 | // the matcher. This is a bug, but #95267 showed that existing programs | |
534 | // rely on this behaviour, and changing it would require some care and a | |
535 | // transition period. | |
536 | // | |
537 | // If the token matches, we can just advance the parser. | |
538 | // | |
539 | // Otherwise, this match has failed, there is nothing to do, and hopefully | |
540 | // another mp in `cur_mps` will match. | |
541 | if matches!(t, Token { kind: DocComment(..), .. }) { | |
542 | mp.idx += 1; | |
543 | self.cur_mps.push(mp); | |
544 | } else if token_name_eq(&t, token) { | |
545 | if let TokenKind::CloseDelim(_) = token.kind { | |
546 | // Ascend out of the delimited submatcher. | |
547 | debug_assert_eq!(idx, len - 1); | |
548 | match mp.kind { | |
549 | MatcherKind::Delimited(submatcher) => { | |
550 | mp.tts = submatcher.parent.tts; | |
551 | mp.idx = submatcher.parent.idx; | |
552 | mp.kind = submatcher.parent.kind; | |
553 | } | |
554 | _ => unreachable!(), | |
555 | } | |
556 | } | |
557 | mp.idx += 1; | |
558 | self.next_mps.push(mp); | |
559 | } | |
1a4d82fc | 560 | } |
2c00a5a8 | 561 | |
5e7ed085 FG |
562 | // These cannot appear in a matcher. |
563 | TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), | |
476ff2be | 564 | } |
5e7ed085 FG |
565 | } else if let MatcherKind::Sequence(box SequenceSubmatcher { parent, seq }) = &mp.kind { |
566 | // We are past the end of a sequence. | |
567 | // - If it has no separator, we must be only one past the end. | |
568 | // - If it has a separator, we may be one past the end, in which case we must | |
569 | // look for a separator. Or we may be two past the end, in which case we have | |
570 | // already dealt with the separator. | |
571 | debug_assert!(idx == len || idx == len + 1 && seq.separator.is_some()); | |
2c00a5a8 | 572 | |
5e7ed085 FG |
573 | if idx == len { |
574 | // Sequence matching may have finished: move the "dot" past the sequence in | |
575 | // `parent`. This applies whether a separator is used or not. If sequence | |
576 | // matching hasn't finished, this `new_mp` will fail quietly when it is | |
577 | // processed next time around the loop. | |
578 | let new_mp = box MatcherPos { | |
579 | tts: parent.tts, | |
580 | idx: parent.idx + 1, | |
581 | matches: mp.matches.clone(), // a cheap clone | |
582 | seq_depth: mp.seq_depth - 1, | |
583 | match_cur: mp.match_cur, | |
584 | kind: parent.kind.clone(), // an expensive clone | |
585 | }; | |
586 | self.cur_mps.push(new_mp); | |
223e47cc | 587 | } |
2c00a5a8 | 588 | |
5e7ed085 FG |
589 | if seq.separator.is_some() && idx == len { |
590 | // Look for the separator. | |
591 | if seq.separator.as_ref().map_or(false, |sep| token_name_eq(token, sep)) { | |
592 | // The matcher has a separator, and it matches the current token. We can | |
593 | // advance past the separator token. | |
594 | mp.idx += 1; | |
595 | self.next_mps.push(mp); | |
596 | } | |
597 | } else if seq.kleene.op != mbe::KleeneOp::ZeroOrOne { | |
598 | // We don't need to look for a separator: either this sequence doesn't have | |
599 | // one, or it does and we've already handled it. Also, we are allowed to have | |
600 | // more than one repetition. Move the "dot" back to the beginning of the | |
601 | // matcher and try to match again. | |
602 | mp.match_cur -= seq.num_captures; | |
603 | mp.idx = 0; | |
604 | self.cur_mps.push(mp); | |
605 | } | |
606 | } else { | |
607 | // We are past the end of the matcher, and not in a sequence. Look for end of | |
608 | // input. | |
609 | debug_assert_eq!(idx, len); | |
610 | if *token == token::Eof { | |
611 | eof_mps = match eof_mps { | |
612 | EofMatcherPositions::None => EofMatcherPositions::One(mp), | |
613 | EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => { | |
614 | EofMatcherPositions::Multiple | |
615 | } | |
616 | } | |
617 | } | |
223e47cc LB |
618 | } |
619 | } | |
476ff2be | 620 | |
5e7ed085 FG |
621 | // If we reached the end of input, check that there is EXACTLY ONE possible matcher. |
622 | // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error. | |
623 | if *token == token::Eof { | |
624 | Some(match eof_mps { | |
625 | EofMatcherPositions::One(mut eof_mp) => { | |
626 | assert_eq!(eof_mp.matches.len(), count_metavar_decls(matcher)); | |
627 | // Need to take ownership of the matches from within the `Lrc`. | |
628 | Lrc::make_mut(&mut eof_mp.matches); | |
629 | let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter(); | |
630 | nameize(sess, matcher, matches) | |
631 | } | |
632 | EofMatcherPositions::Multiple => { | |
633 | Error(token.span, "ambiguity: multiple successful parses".to_string()) | |
634 | } | |
635 | EofMatcherPositions::None => Failure( | |
dfeec247 XL |
636 | Token::new( |
637 | token::Eof, | |
5e7ed085 | 638 | if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() }, |
dfeec247 | 639 | ), |
0731742a | 640 | "missing tokens in macro arguments", |
2c00a5a8 | 641 | ), |
5e7ed085 FG |
642 | }) |
643 | } else { | |
644 | None | |
2c00a5a8 | 645 | } |
5e7ed085 FG |
646 | } |
647 | ||
648 | /// Match the token stream from `parser` against `matcher`. | |
649 | pub(super) fn parse_tt( | |
650 | &mut self, | |
651 | parser: &mut Cow<'_, Parser<'_>>, | |
652 | matcher: &'tt [TokenTree], | |
653 | ) -> NamedParseResult { | |
654 | // A queue of possible matcher positions. We initialize it with the matcher position in | |
655 | // which the "dot" is before the first token of the first token tree in `matcher`. | |
656 | // `parse_tt_inner` then processes all of these possible matcher positions and produces | |
657 | // possible next positions into `next_mps`. After some post-processing, the contents of | |
658 | // `next_mps` replenish `cur_mps` and we start over again. | |
659 | self.cur_mps.clear(); | |
660 | self.cur_mps.push(box MatcherPos::top_level(matcher, self.empty_matches.clone())); | |
661 | ||
662 | loop { | |
663 | self.next_mps.clear(); | |
664 | self.bb_mps.clear(); | |
665 | ||
666 | // Process `cur_mps` until either we have finished the input or we need to get some | |
667 | // parsing from the black-box parser done. | |
668 | if let Some(result) = self.parse_tt_inner(parser.sess, matcher, &parser.token) { | |
669 | return result; | |
670 | } | |
671 | ||
672 | // `parse_tt_inner` handled all of `cur_mps`, so it's empty. | |
673 | assert!(self.cur_mps.is_empty()); | |
674 | ||
675 | // Error messages here could be improved with links to original rules. | |
676 | match (self.next_mps.len(), self.bb_mps.len()) { | |
677 | (0, 0) => { | |
678 | // There are no possible next positions AND we aren't waiting for the black-box | |
679 | // parser: syntax error. | |
680 | return Failure( | |
681 | parser.token.clone(), | |
682 | "no rules expected this token in macro call", | |
683 | ); | |
684 | } | |
685 | ||
686 | (_, 0) => { | |
687 | // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then | |
688 | // process the next token. | |
689 | self.cur_mps.extend(self.next_mps.drain(..)); | |
690 | parser.to_mut().bump(); | |
691 | } | |
692 | ||
693 | (0, 1) => { | |
694 | // We need to call the black-box parser to get some nonterminal. | |
695 | let mut mp = self.bb_mps.pop().unwrap(); | |
696 | if let TokenTree::MetaVarDecl(span, _, Some(kind)) = mp.tts[mp.idx] { | |
697 | let match_cur = mp.match_cur; | |
698 | // We use the span of the metavariable declaration to determine any | |
699 | // edition-specific matching behavior for non-terminals. | |
700 | let nt = match parser.to_mut().parse_nonterminal(kind) { | |
701 | Err(mut err) => { | |
702 | err.span_label( | |
703 | span, | |
704 | format!( | |
705 | "while parsing argument for this `{kind}` macro fragment" | |
706 | ), | |
707 | ) | |
708 | .emit(); | |
709 | return ErrorReported; | |
710 | } | |
711 | Ok(nt) => nt, | |
712 | }; | |
713 | let m = match nt { | |
714 | NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)), | |
715 | NtOrTt::Tt(tt) => MatchedTokenTree(tt), | |
716 | }; | |
717 | mp.push_match(match_cur, m); | |
718 | mp.idx += 1; | |
719 | mp.match_cur += 1; | |
720 | } else { | |
721 | unreachable!() | |
3dfed10e | 722 | } |
5e7ed085 FG |
723 | self.cur_mps.push(mp); |
724 | } | |
725 | ||
726 | (_, _) => { | |
727 | // Too many possibilities! | |
728 | return self.ambiguity_error(parser.token.span); | |
729 | } | |
223e47cc | 730 | } |
5e7ed085 FG |
731 | |
732 | assert!(!self.cur_mps.is_empty()); | |
223e47cc | 733 | } |
5e7ed085 | 734 | } |
223e47cc | 735 | |
5e7ed085 FG |
736 | fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult { |
737 | let nts = self | |
738 | .bb_mps | |
739 | .iter() | |
740 | .map(|mp| match mp.tts[mp.idx] { | |
741 | TokenTree::MetaVarDecl(_, bind, Some(kind)) => { | |
742 | format!("{} ('{}')", kind, bind) | |
743 | } | |
744 | _ => panic!(), | |
745 | }) | |
746 | .collect::<Vec<String>>() | |
747 | .join(" or "); | |
748 | ||
749 | Error( | |
750 | token_span, | |
751 | format!( | |
752 | "local ambiguity when calling macro `{}`: multiple parsing options: {}", | |
753 | self.macro_name, | |
754 | match self.next_mps.len() { | |
755 | 0 => format!("built-in NTs {}.", nts), | |
756 | 1 => format!("built-in NTs {} or 1 other option.", nts), | |
757 | n => format!("built-in NTs {} or {} other options.", nts, n), | |
758 | } | |
759 | ), | |
760 | ) | |
223e47cc LB |
761 | } |
762 | } |