]>
Commit | Line | Data |
---|---|---|
fc512014 | 1 | //! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals |
3b2f2976 XL |
2 | //! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads |
3 | //! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in | |
4 | //! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier | |
5 | //! fit for Macro-by-Example-style rules. | |
6 | //! | |
7 | //! (In order to prevent the pathological case, we'd need to lazily construct the resulting | |
8 | //! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old | |
9 | //! items, but it would also save overhead) | |
10 | //! | |
94b46f34 | 11 | //! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate. |
3b2f2976 XL |
12 | //! The macro parser restricts itself to the features of finite state automata. Earley parsers |
13 | //! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. | |
1a4d82fc JJ |
14 | //! |
15 | //! Quick intro to how the parser works: | |
16 | //! | |
17 | //! A 'position' is a dot in the middle of a matcher, usually represented as a | |
18 | //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. | |
19 | //! | |
20 | //! The parser walks through the input a character at a time, maintaining a list | |
3b2f2976 | 21 | //! of threads consistent with the current position in the input string: `cur_items`. |
1a4d82fc | 22 | //! |
3b2f2976 XL |
23 | //! As it processes them, it fills up `eof_items` with threads that would be valid if |
24 | //! the macro invocation is now over, `bb_items` with threads that are waiting on | |
9fa01778 | 25 | //! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting |
b039eaaf | 26 | //! on a particular token. Most of the logic concerns moving the · through the |
3b2f2976 XL |
27 | //! repetitions indicated by Kleene stars. The rules for moving the · without |
28 | //! consuming any input are called epsilon transitions. It only advances or calls | |
29 | //! out to the real Rust parser when no `cur_items` threads remain. | |
1a4d82fc | 30 | //! |
7cac9316 | 31 | //! Example: |
1a4d82fc | 32 | //! |
7cac9316 XL |
33 | //! ```text, ignore |
34 | //! Start parsing a a a a b against [· a $( a )* a b]. | |
35 | //! | |
36 | //! Remaining input: a a a a b | |
3b2f2976 | 37 | //! next: [· a $( a )* a b] |
1a4d82fc | 38 | //! |
7cac9316 | 39 | //! - - - Advance over an a. - - - |
1a4d82fc | 40 | //! |
7cac9316 | 41 | //! Remaining input: a a a b |
1a4d82fc JJ |
42 | //! cur: [a · $( a )* a b] |
43 | //! Descend/Skip (first item). | |
44 | //! next: [a $( · a )* a b] [a $( a )* · a b]. | |
45 | //! | |
7cac9316 | 46 | //! - - - Advance over an a. - - - |
1a4d82fc | 47 | //! |
7cac9316 | 48 | //! Remaining input: a a b |
3b2f2976 XL |
49 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
50 | //! Follow epsilon transition: Finish/Repeat (first item) | |
1a4d82fc JJ |
51 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
52 | //! | |
7cac9316 | 53 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 54 | //! |
7cac9316 | 55 | //! Remaining input: a b |
3b2f2976 XL |
56 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
57 | //! Follow epsilon transition: Finish/Repeat (first item) | |
1a4d82fc JJ |
58 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
59 | //! | |
7cac9316 | 60 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 61 | //! |
7cac9316 | 62 | //! Remaining input: b |
3b2f2976 XL |
63 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
64 | //! Follow epsilon transition: Finish/Repeat (first item) | |
65 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | |
1a4d82fc | 66 | //! |
7cac9316 | 67 | //! - - - Advance over a b. - - - |
1a4d82fc | 68 | //! |
7cac9316 | 69 | //! Remaining input: '' |
1a4d82fc | 70 | //! eof: [a $( a )* a b ·] |
7cac9316 | 71 | //! ``` |
1a4d82fc | 72 | |
e74abb32 XL |
73 | crate use NamedMatch::*; |
74 | crate use ParseResult::*; | |
9fa01778 XL |
75 | use TokenTreeOrTokenTreeSlice::*; |
76 | ||
e74abb32 XL |
77 | use crate::mbe::{self, TokenTree}; |
78 | ||
74b04a01 | 79 | use rustc_ast::token::{self, DocComment, Nonterminal, Token}; |
5869c6ff | 80 | use rustc_parse::parser::Parser; |
74b04a01 | 81 | use rustc_session::parse::ParseSess; |
5869c6ff | 82 | use rustc_span::symbol::MacroRulesNormalizedIdent; |
970d7e83 | 83 | |
9fa01778 | 84 | use smallvec::{smallvec, SmallVec}; |
223e47cc | 85 | |
b7449926 | 86 | use rustc_data_structures::fx::FxHashMap; |
9fa01778 | 87 | use rustc_data_structures::sync::Lrc; |
17df50a5 | 88 | use rustc_span::symbol::Ident; |
74b04a01 | 89 | use std::borrow::Cow; |
b7449926 | 90 | use std::collections::hash_map::Entry::{Occupied, Vacant}; |
1a4d82fc | 91 | use std::mem; |
94b46f34 | 92 | use std::ops::{Deref, DerefMut}; |
223e47cc | 93 | |
2c00a5a8 | 94 | // To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. |
223e47cc | 95 | |
2c00a5a8 XL |
96 | /// Either a sequence of token trees or a single one. This is used as the representation of the |
97 | /// sequence of tokens that make up a matcher. | |
1a4d82fc | 98 | #[derive(Clone)] |
a1dfa0c6 | 99 | enum TokenTreeOrTokenTreeSlice<'tt> { |
8bb4bdeb | 100 | Tt(TokenTree), |
a1dfa0c6 | 101 | TtSeq(&'tt [TokenTree]), |
1a4d82fc | 102 | } |
223e47cc | 103 | |
a1dfa0c6 | 104 | impl<'tt> TokenTreeOrTokenTreeSlice<'tt> { |
2c00a5a8 XL |
105 | /// Returns the number of constituent top-level token trees of `self` (top-level in that it |
106 | /// will not recursively descend into subtrees). | |
85aaf69f | 107 | fn len(&self) -> usize { |
92a42be0 SL |
108 | match *self { |
109 | TtSeq(ref v) => v.len(), | |
110 | Tt(ref tt) => tt.len(), | |
1a4d82fc JJ |
111 | } |
112 | } | |
223e47cc | 113 | |
a1dfa0c6 | 114 | /// The `index`-th token tree of `self`. |
85aaf69f | 115 | fn get_tt(&self, index: usize) -> TokenTree { |
92a42be0 SL |
116 | match *self { |
117 | TtSeq(ref v) => v[index].clone(), | |
118 | Tt(ref tt) => tt.get_tt(index), | |
1a4d82fc JJ |
119 | } |
120 | } | |
223e47cc LB |
121 | } |
122 | ||
2c00a5a8 XL |
123 | /// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`. |
124 | /// | |
125 | /// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have | |
126 | /// descended into. | |
1a4d82fc | 127 | #[derive(Clone)] |
a1dfa0c6 | 128 | struct MatcherTtFrame<'tt> { |
2c00a5a8 | 129 | /// The "parent" matcher that we are descending into. |
a1dfa0c6 | 130 | elts: TokenTreeOrTokenTreeSlice<'tt>, |
2c00a5a8 | 131 | /// The position of the "dot" in `elts` at the time we descended. |
85aaf69f | 132 | idx: usize, |
223e47cc LB |
133 | } |
134 | ||
a1dfa0c6 XL |
135 | type NamedMatchVec = SmallVec<[NamedMatch; 4]>; |
136 | ||
137 | /// Represents a single "position" (aka "matcher position", aka "item"), as | |
138 | /// described in the module documentation. | |
139 | /// | |
140 | /// Here: | |
141 | /// | |
142 | /// - `'root` represents the lifetime of the stack slot that holds the root | |
143 | /// `MatcherPos`. As described in `MatcherPosHandle`, the root `MatcherPos` | |
144 | /// structure is stored on the stack, but subsequent instances are put into | |
145 | /// the heap. | |
146 | /// - `'tt` represents the lifetime of the token trees that this matcher | |
147 | /// position refers to. | |
148 | /// | |
149 | /// It is important to distinguish these two lifetimes because we have a | |
150 | /// `SmallVec<TokenTreeOrTokenTreeSlice<'tt>>` below, and the destructor of | |
151 | /// that is considered to possibly access the data from its elements (it lacks | |
152 | /// a `#[may_dangle]` attribute). As a result, the compiler needs to know that | |
153 | /// all the elements in that `SmallVec` strictly outlive the root stack slot | |
154 | /// lifetime. By separating `'tt` from `'root`, we can show that. | |
1a4d82fc | 155 | #[derive(Clone)] |
dc9dc135 | 156 | struct MatcherPos<'root, 'tt> { |
2c00a5a8 | 157 | /// The token or sequence of tokens that make up the matcher |
a1dfa0c6 XL |
158 | top_elts: TokenTreeOrTokenTreeSlice<'tt>, |
159 | ||
2c00a5a8 | 160 | /// The position of the "dot" in this matcher |
85aaf69f | 161 | idx: usize, |
a1dfa0c6 | 162 | |
2c00a5a8 XL |
163 | /// For each named metavar in the matcher, we keep track of token trees matched against the |
164 | /// metavar by the black box parser. In particular, there may be more than one match per | |
165 | /// metavar if we are in a repetition (each repetition matches each of the variables). | |
166 | /// Moreover, matchers and repetitions can be nested; the `matches` field is shared (hence the | |
167 | /// `Rc`) among all "nested" matchers. `match_lo`, `match_cur`, and `match_hi` keep track of | |
168 | /// the current position of the `self` matcher position in the shared `matches` list. | |
169 | /// | |
170 | /// Also, note that while we are descending into a sequence, matchers are given their own | |
171 | /// `matches` vector. Only once we reach the end of a full repetition of the sequence do we add | |
172 | /// all bound matches from the submatcher into the shared top-level `matches` vector. If `sep` | |
173 | /// and `up` are `Some`, then `matches` is _not_ the shared top-level list. Instead, if one | |
174 | /// wants the shared `matches`, one should use `up.matches`. | |
9fa01778 | 175 | matches: Box<[Lrc<NamedMatchVec>]>, |
2c00a5a8 XL |
176 | /// The position in `matches` corresponding to the first metavar in this matcher's sequence of |
177 | /// token trees. In other words, the first metavar in the first token of `top_elts` corresponds | |
178 | /// to `matches[match_lo]`. | |
85aaf69f | 179 | match_lo: usize, |
2c00a5a8 XL |
180 | /// The position in `matches` corresponding to the metavar we are currently trying to match |
181 | /// against the source token stream. `match_lo <= match_cur <= match_hi`. | |
85aaf69f | 182 | match_cur: usize, |
2c00a5a8 XL |
183 | /// Similar to `match_lo` except `match_hi` is the position in `matches` of the _last_ metavar |
184 | /// in this matcher. | |
85aaf69f | 185 | match_hi: usize, |
2c00a5a8 | 186 | |
a1dfa0c6 XL |
187 | // The following fields are used if we are matching a repetition. If we aren't, they should be |
188 | // `None`. | |
2c00a5a8 | 189 | /// The KleeneOp of this sequence if we are in a repetition. |
e74abb32 | 190 | seq_op: Option<mbe::KleeneOp>, |
a1dfa0c6 XL |
191 | |
192 | /// The separator if we are in a repetition. | |
2c00a5a8 | 193 | sep: Option<Token>, |
a1dfa0c6 | 194 | |
2c00a5a8 XL |
195 | /// The "parent" matcher position if we are in a repetition. That is, the matcher position just |
196 | /// before we enter the sequence. | |
a1dfa0c6 | 197 | up: Option<MatcherPosHandle<'root, 'tt>>, |
2c00a5a8 | 198 | |
a1dfa0c6 | 199 | /// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from |
0731742a | 200 | /// a delimited token tree (e.g., something wrapped in `(` `)`) or to get the contents of a doc |
a1dfa0c6 XL |
201 | /// comment... |
202 | /// | |
0731742a | 203 | /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) |
2c00a5a8 XL |
204 | /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does |
205 | /// that where the bottom of the stack is the outermost matcher. | |
a1dfa0c6 XL |
206 | /// Also, throughout the comments, this "descent" is often referred to as "unzipping"... |
207 | stack: SmallVec<[MatcherTtFrame<'tt>; 1]>, | |
223e47cc LB |
208 | } |
209 | ||
a1dfa0c6 | 210 | impl<'root, 'tt> MatcherPos<'root, 'tt> { |
9fa01778 | 211 | /// Adds `m` as a named match for the `idx`-th metavar. |
041b39d2 | 212 | fn push_match(&mut self, idx: usize, m: NamedMatch) { |
9fa01778 | 213 | let matches = Lrc::make_mut(&mut self.matches[idx]); |
041b39d2 XL |
214 | matches.push(m); |
215 | } | |
216 | } | |
217 | ||
94b46f34 XL |
218 | // Lots of MatcherPos instances are created at runtime. Allocating them on the |
219 | // heap is slow. Furthermore, using SmallVec<MatcherPos> to allocate them all | |
220 | // on the stack is also slow, because MatcherPos is quite a large type and | |
221 | // instances get moved around a lot between vectors, which requires lots of | |
222 | // slow memcpy calls. | |
223 | // | |
224 | // Therefore, the initial MatcherPos is always allocated on the stack, | |
225 | // subsequent ones (of which there aren't that many) are allocated on the heap, | |
226 | // and this type is used to encapsulate both cases. | |
dc9dc135 | 227 | enum MatcherPosHandle<'root, 'tt> { |
a1dfa0c6 XL |
228 | Ref(&'root mut MatcherPos<'root, 'tt>), |
229 | Box(Box<MatcherPos<'root, 'tt>>), | |
94b46f34 XL |
230 | } |
231 | ||
a1dfa0c6 | 232 | impl<'root, 'tt> Clone for MatcherPosHandle<'root, 'tt> { |
94b46f34 XL |
233 | // This always produces a new Box. |
234 | fn clone(&self) -> Self { | |
235 | MatcherPosHandle::Box(match *self { | |
236 | MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()), | |
237 | MatcherPosHandle::Box(ref b) => b.clone(), | |
238 | }) | |
239 | } | |
240 | } | |
241 | ||
a1dfa0c6 XL |
242 | impl<'root, 'tt> Deref for MatcherPosHandle<'root, 'tt> { |
243 | type Target = MatcherPos<'root, 'tt>; | |
94b46f34 XL |
244 | fn deref(&self) -> &Self::Target { |
245 | match *self { | |
246 | MatcherPosHandle::Ref(ref r) => r, | |
247 | MatcherPosHandle::Box(ref b) => b, | |
248 | } | |
249 | } | |
250 | } | |
251 | ||
a1dfa0c6 XL |
252 | impl<'root, 'tt> DerefMut for MatcherPosHandle<'root, 'tt> { |
253 | fn deref_mut(&mut self) -> &mut MatcherPos<'root, 'tt> { | |
94b46f34 XL |
254 | match *self { |
255 | MatcherPosHandle::Ref(ref mut r) => r, | |
256 | MatcherPosHandle::Box(ref mut b) => b, | |
257 | } | |
258 | } | |
259 | } | |
260 | ||
2c00a5a8 | 261 | /// Represents the possible results of an attempted parse. |
e74abb32 | 262 | crate enum ParseResult<T> { |
2c00a5a8 XL |
263 | /// Parsed successfully. |
264 | Success(T), | |
265 | /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected | |
266 | /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. | |
dc9dc135 | 267 | Failure(Token, &'static str), |
2c00a5a8 | 268 | /// Fatal error (malformed macro?). Abort compilation. |
dfeec247 | 269 | Error(rustc_span::Span, String), |
ba9703b0 | 270 | ErrorReported, |
2c00a5a8 XL |
271 | } |
272 | ||
ba9703b0 XL |
273 | /// A `ParseResult` where the `Success` variant contains a mapping of |
274 | /// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping | |
275 | /// of metavars to the token trees they bind to. | |
276 | crate type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, NamedMatch>>; | |
476ff2be | 277 | |
2c00a5a8 | 278 | /// Count how many metavars are named in the given matcher `ms`. |
e74abb32 | 279 | pub(super) fn count_names(ms: &[TokenTree]) -> usize { |
1a4d82fc | 280 | ms.iter().fold(0, |count, elt| { |
dfeec247 XL |
281 | count |
282 | + match *elt { | |
283 | TokenTree::Sequence(_, ref seq) => seq.num_captures, | |
284 | TokenTree::Delimited(_, ref delim) => count_names(&delim.tts), | |
285 | TokenTree::MetaVar(..) => 0, | |
286 | TokenTree::MetaVarDecl(..) => 1, | |
287 | TokenTree::Token(..) => 0, | |
288 | } | |
1a4d82fc | 289 | }) |
223e47cc LB |
290 | } |
291 | ||
a1dfa0c6 | 292 | /// `len` `Vec`s (initially shared and empty) that will store matches of metavars. |
9fa01778 | 293 | fn create_matches(len: usize) -> Box<[Lrc<NamedMatchVec>]> { |
a1dfa0c6 XL |
294 | if len == 0 { |
295 | vec![] | |
296 | } else { | |
9fa01778 | 297 | let empty_matches = Lrc::new(SmallVec::new()); |
0731742a | 298 | vec![empty_matches; len] |
dfeec247 XL |
299 | } |
300 | .into_boxed_slice() | |
2c00a5a8 XL |
301 | } |
302 | ||
9fa01778 | 303 | /// Generates the top-level matcher position in which the "dot" is before the first token of the |
60c5eb7d XL |
304 | /// matcher `ms`. |
305 | fn initial_matcher_pos<'root, 'tt>(ms: &'tt [TokenTree]) -> MatcherPos<'root, 'tt> { | |
94b46f34 | 306 | let match_idx_hi = count_names(ms); |
476ff2be | 307 | let matches = create_matches(match_idx_hi); |
94b46f34 | 308 | MatcherPos { |
2c00a5a8 XL |
309 | // Start with the top level matcher given to us |
310 | top_elts: TtSeq(ms), // "elts" is an abbr. for "elements" | |
311 | // The "dot" is before the first token of the matcher | |
85aaf69f | 312 | idx: 0, |
2c00a5a8 XL |
313 | |
314 | // Initialize `matches` to a bunch of empty `Vec`s -- one for each metavar in `top_elts`. | |
315 | // `match_lo` for `top_elts` is 0 and `match_hi` is `matches.len()`. `match_cur` is 0 since | |
316 | // we haven't actually matched anything yet. | |
3b2f2976 | 317 | matches, |
85aaf69f SL |
318 | match_lo: 0, |
319 | match_cur: 0, | |
223e47cc | 320 | match_hi: match_idx_hi, |
2c00a5a8 XL |
321 | |
322 | // Haven't descended into any delimiters, so empty stack | |
a1dfa0c6 | 323 | stack: smallvec![], |
2c00a5a8 XL |
324 | |
325 | // Haven't descended into any sequences, so both of these are `None`. | |
326 | seq_op: None, | |
327 | sep: None, | |
328 | up: None, | |
94b46f34 | 329 | } |
223e47cc LB |
330 | } |
331 | ||
7cac9316 | 332 | /// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`: |
1a4d82fc | 333 | /// so it is associated with a single ident in a parse, and all |
9fa01778 | 334 | /// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type |
7cac9316 XL |
335 | /// (expr, item, etc). Each leaf in a single `NamedMatch` corresponds to a |
336 | /// single `token::MATCH_NONTERMINAL` in the `TokenTree` that produced it. | |
1a4d82fc | 337 | /// |
7cac9316 | 338 | /// The in-memory structure of a particular `NamedMatch` represents the match |
1a4d82fc JJ |
339 | /// that occurred when a particular subset of a matcher was applied to a |
340 | /// particular token tree. | |
341 | /// | |
7cac9316 XL |
342 | /// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of |
343 | /// the `MatchedNonterminal`s, will depend on the token tree it was applied | |
344 | /// to: each `MatchedSeq` corresponds to a single `TTSeq` in the originating | |
345 | /// token tree. The depth of the `NamedMatch` structure will therefore depend | |
1a4d82fc JJ |
346 | /// only on the nesting depth of `ast::TTSeq`s in the originating |
347 | /// token tree it was derived from. | |
041b39d2 | 348 | #[derive(Debug, Clone)] |
e74abb32 | 349 | crate enum NamedMatch { |
60c5eb7d | 350 | MatchedSeq(Lrc<NamedMatchVec>), |
9fa01778 | 351 | MatchedNonterminal(Lrc<Nonterminal>), |
223e47cc LB |
352 | } |
353 | ||
2c00a5a8 XL |
354 | /// Takes a sequence of token trees `ms` representing a matcher which successfully matched input |
355 | /// and an iterator of items that matched input and produces a `NamedParseResult`. | |
356 | fn nameize<I: Iterator<Item = NamedMatch>>( | |
357 | sess: &ParseSess, | |
358 | ms: &[TokenTree], | |
359 | mut res: I, | |
360 | ) -> NamedParseResult { | |
0731742a | 361 | // Recursively descend into each type of matcher (e.g., sequences, delimited, metavars) and make |
2c00a5a8 XL |
362 | // sure that each metavar has _exactly one_ binding. If a metavar does not have exactly one |
363 | // binding, then there is an error. If it does, then we insert the binding into the | |
364 | // `NamedParseResult`. | |
365 | fn n_rec<I: Iterator<Item = NamedMatch>>( | |
366 | sess: &ParseSess, | |
367 | m: &TokenTree, | |
368 | res: &mut I, | |
ba9703b0 | 369 | ret_val: &mut FxHashMap<MacroRulesNormalizedIdent, NamedMatch>, |
dfeec247 | 370 | ) -> Result<(), (rustc_span::Span, String)> { |
92a42be0 | 371 | match *m { |
dfeec247 XL |
372 | TokenTree::Sequence(_, ref seq) => { |
373 | for next_m in &seq.tts { | |
374 | n_rec(sess, next_m, res.by_ref(), ret_val)? | |
375 | } | |
376 | } | |
377 | TokenTree::Delimited(_, ref delim) => { | |
378 | for next_m in &delim.tts { | |
379 | n_rec(sess, next_m, res.by_ref(), ret_val)?; | |
380 | } | |
381 | } | |
b9856134 XL |
382 | TokenTree::MetaVarDecl(span, _, None) => { |
383 | if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { | |
384 | return Err((span, "missing fragment specifier".to_string())); | |
385 | } | |
386 | } | |
ba9703b0 XL |
387 | TokenTree::MetaVarDecl(sp, bind_name, _) => match ret_val |
388 | .entry(MacroRulesNormalizedIdent::new(bind_name)) | |
389 | { | |
dfeec247 XL |
390 | Vacant(spot) => { |
391 | spot.insert(res.next().unwrap()); | |
1a4d82fc | 392 | } |
dfeec247 XL |
393 | Occupied(..) => return Err((sp, format!("duplicated bind name: {}", bind_name))), |
394 | }, | |
041b39d2 | 395 | TokenTree::MetaVar(..) | TokenTree::Token(..) => (), |
223e47cc | 396 | } |
92a42be0 SL |
397 | |
398 | Ok(()) | |
223e47cc | 399 | } |
92a42be0 | 400 | |
b7449926 | 401 | let mut ret_val = FxHashMap::default(); |
92a42be0 | 402 | for m in ms { |
8bb4bdeb | 403 | match n_rec(sess, m, res.by_ref(), &mut ret_val) { |
2c00a5a8 | 404 | Ok(_) => {} |
92a42be0 SL |
405 | Err((sp, msg)) => return Error(sp, msg), |
406 | } | |
407 | } | |
408 | ||
409 | Success(ret_val) | |
223e47cc LB |
410 | } |
411 | ||
9fa01778 | 412 | /// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) |
2c00a5a8 | 413 | fn token_name_eq(t1: &Token, t2: &Token) -> bool { |
dc9dc135 XL |
414 | if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { |
415 | ident1.name == ident2.name && is_raw1 == is_raw2 | |
416 | } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { | |
417 | ident1.name == ident2.name | |
cc61c64b | 418 | } else { |
dc9dc135 | 419 | t1.kind == t2.kind |
1a4d82fc JJ |
420 | } |
421 | } | |
422 | ||
2c00a5a8 XL |
423 | /// Process the matcher positions of `cur_items` until it is empty. In the process, this will |
424 | /// produce more items in `next_items`, `eof_items`, and `bb_items`. | |
425 | /// | |
426 | /// For more info about the how this happens, see the module-level doc comments and the inline | |
427 | /// comments of this function. | |
428 | /// | |
429 | /// # Parameters | |
430 | /// | |
2c00a5a8 XL |
431 | /// - `cur_items`: the set of current items to be processed. This should be empty by the end of a |
432 | /// successful execution of this function. | |
433 | /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in | |
434 | /// the function `parse`. | |
435 | /// - `eof_items`: the set of items that would be valid if this was the EOF. | |
436 | /// - `bb_items`: the set of items that are waiting for the black-box parser. | |
437 | /// - `token`: the current token of the parser. | |
2c00a5a8 XL |
438 | /// |
439 | /// # Returns | |
440 | /// | |
441 | /// A `ParseResult`. Note that matches are kept track of through the items generated. | |
a1dfa0c6 | 442 | fn inner_parse_loop<'root, 'tt>( |
b9856134 | 443 | sess: &ParseSess, |
a1dfa0c6 XL |
444 | cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, |
445 | next_items: &mut Vec<MatcherPosHandle<'root, 'tt>>, | |
446 | eof_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, | |
447 | bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, | |
2c00a5a8 | 448 | token: &Token, |
2c00a5a8 XL |
449 | ) -> ParseResult<()> { |
450 | // Pop items from `cur_items` until it is empty. | |
3b2f2976 | 451 | while let Some(mut item) = cur_items.pop() { |
2c00a5a8 XL |
452 | // When unzipped trees end, remove them. This corresponds to backtracking out of a |
453 | // delimited submatcher into which we already descended. In backtracking out again, we need | |
454 | // to advance the "dot" past the delimiters in the outer matcher. | |
3b2f2976 XL |
455 | while item.idx >= item.top_elts.len() { |
456 | match item.stack.pop() { | |
476ff2be | 457 | Some(MatcherTtFrame { elts, idx }) => { |
3b2f2976 XL |
458 | item.top_elts = elts; |
459 | item.idx = idx + 1; | |
1a4d82fc | 460 | } |
2c00a5a8 | 461 | None => break, |
1a4d82fc | 462 | } |
476ff2be | 463 | } |
223e47cc | 464 | |
2c00a5a8 XL |
465 | // Get the current position of the "dot" (`idx`) in `item` and the number of token trees in |
466 | // the matcher (`len`). | |
3b2f2976 XL |
467 | let idx = item.idx; |
468 | let len = item.top_elts.len(); | |
476ff2be | 469 | |
2c00a5a8 | 470 | // If `idx >= len`, then we are at or past the end of the matcher of `item`. |
476ff2be | 471 | if idx >= len { |
2c00a5a8 XL |
472 | // We are repeating iff there is a parent. If the matcher is inside of a repetition, |
473 | // then we could be at the end of a sequence or at the beginning of the next | |
474 | // repetition. | |
3b2f2976 | 475 | if item.up.is_some() { |
2c00a5a8 XL |
476 | // At this point, regardless of whether there is a separator, we should add all |
477 | // matches from the complete repetition of the sequence to the shared, top-level | |
478 | // `matches` list (actually, `up.matches`, which could itself not be the top-level, | |
479 | // but anyway...). Moreover, we add another item to `cur_items` in which the "dot" | |
480 | // is at the end of the `up` matcher. This ensures that the "dot" in the `up` | |
481 | // matcher is also advanced sufficiently. | |
482 | // | |
483 | // NOTE: removing the condition `idx == len` allows trailing separators. | |
476ff2be | 484 | if idx == len { |
2c00a5a8 | 485 | // Get the `up` matcher |
3b2f2976 | 486 | let mut new_pos = item.up.clone().unwrap(); |
476ff2be | 487 | |
2c00a5a8 | 488 | // Add matches from this repetition to the `matches` of `up` |
3b2f2976 XL |
489 | for idx in item.match_lo..item.match_hi { |
490 | let sub = item.matches[idx].clone(); | |
60c5eb7d | 491 | new_pos.push_match(idx, MatchedSeq(sub)); |
223e47cc LB |
492 | } |
493 | ||
2c00a5a8 | 494 | // Move the "dot" past the repetition in `up` |
3b2f2976 | 495 | new_pos.match_cur = item.match_hi; |
476ff2be | 496 | new_pos.idx += 1; |
3b2f2976 | 497 | cur_items.push(new_pos); |
223e47cc | 498 | } |
223e47cc | 499 | |
2c00a5a8 | 500 | // Check if we need a separator. |
3b2f2976 | 501 | if idx == len && item.sep.is_some() { |
2c00a5a8 XL |
502 | // We have a separator, and it is the current token. We can advance past the |
503 | // separator token. | |
5869c6ff | 504 | if item.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) { |
3b2f2976 XL |
505 | item.idx += 1; |
506 | next_items.push(item); | |
223e47cc | 507 | } |
2c00a5a8 XL |
508 | } |
509 | // We don't need a separator. Move the "dot" back to the beginning of the matcher | |
510 | // and try to match again UNLESS we are only allowed to have _one_ repetition. | |
e74abb32 | 511 | else if item.seq_op != Some(mbe::KleeneOp::ZeroOrOne) { |
3b2f2976 XL |
512 | item.match_cur = item.match_lo; |
513 | item.idx = 0; | |
514 | cur_items.push(item); | |
476ff2be | 515 | } |
2c00a5a8 XL |
516 | } |
517 | // If we are not in a repetition, then being at the end of a matcher means that we have | |
518 | // reached the potential end of the input. | |
519 | else { | |
3b2f2976 | 520 | eof_items.push(item); |
476ff2be | 521 | } |
2c00a5a8 XL |
522 | } |
523 | // We are in the middle of a matcher. | |
524 | else { | |
525 | // Look at what token in the matcher we are trying to match the current token (`token`) | |
526 | // against. Depending on that, we may generate new items. | |
3b2f2976 | 527 | match item.top_elts.get_tt(idx) { |
2c00a5a8 | 528 | // Need to descend into a sequence |
476ff2be | 529 | TokenTree::Sequence(sp, seq) => { |
48663c56 XL |
530 | // Examine the case where there are 0 matches of this sequence. We are |
531 | // implicitly disallowing OneOrMore from having 0 matches here. Thus, that will | |
532 | // result in a "no rules expected token" error by virtue of this matcher not | |
533 | // working. | |
e74abb32 XL |
534 | if seq.kleene.op == mbe::KleeneOp::ZeroOrMore |
535 | || seq.kleene.op == mbe::KleeneOp::ZeroOrOne | |
2c00a5a8 | 536 | { |
3b2f2976 XL |
537 | let mut new_item = item.clone(); |
538 | new_item.match_cur += seq.num_captures; | |
539 | new_item.idx += 1; | |
540 | for idx in item.match_cur..item.match_cur + seq.num_captures { | |
60c5eb7d | 541 | new_item.push_match(idx, MatchedSeq(Lrc::new(smallvec![]))); |
1a4d82fc | 542 | } |
3b2f2976 | 543 | cur_items.push(new_item); |
1a4d82fc | 544 | } |
476ff2be | 545 | |
3b2f2976 | 546 | let matches = create_matches(item.matches.len()); |
94b46f34 | 547 | cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos { |
a1dfa0c6 | 548 | stack: smallvec![], |
476ff2be | 549 | sep: seq.separator.clone(), |
416331ca | 550 | seq_op: Some(seq.kleene.op), |
476ff2be | 551 | idx: 0, |
3b2f2976 XL |
552 | matches, |
553 | match_lo: item.match_cur, | |
554 | match_cur: item.match_cur, | |
555 | match_hi: item.match_cur + seq.num_captures, | |
556 | up: Some(item), | |
476ff2be | 557 | top_elts: Tt(TokenTree::Sequence(sp, seq)), |
94b46f34 | 558 | }))); |
476ff2be | 559 | } |
2c00a5a8 | 560 | |
b9856134 XL |
561 | // We need to match a metavar (but the identifier is invalid)... this is an error |
562 | TokenTree::MetaVarDecl(span, _, None) => { | |
563 | if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { | |
564 | return Error(span, "missing fragment specifier".to_string()); | |
565 | } | |
566 | } | |
567 | ||
2c00a5a8 XL |
568 | // We need to match a metavar with a valid ident... call out to the black-box |
569 | // parser by adding an item to `bb_items`. | |
5869c6ff | 570 | TokenTree::MetaVarDecl(_, _, Some(kind)) => { |
fc512014 XL |
571 | // Built-in nonterminals never start with these tokens, so we can eliminate |
572 | // them from consideration. | |
573 | // | |
574 | // We use the span of the metavariable declaration to determine any | |
575 | // edition-specific matching behavior for non-terminals. | |
5869c6ff | 576 | if Parser::nonterminal_may_begin_with(kind, token) { |
3b2f2976 | 577 | bb_items.push(item); |
1a4d82fc | 578 | } |
476ff2be | 579 | } |
2c00a5a8 XL |
580 | |
581 | // We need to descend into a delimited submatcher or a doc comment. To do this, we | |
582 | // push the current matcher onto a stack and push a new item containing the | |
583 | // submatcher onto `cur_items`. | |
584 | // | |
585 | // At the beginning of the loop, if we reach the end of the delimited submatcher, | |
586 | // we pop the stack to backtrack out of the descent. | |
ba9703b0 XL |
587 | seq |
588 | @ | |
589 | (TokenTree::Delimited(..) | |
590 | | TokenTree::Token(Token { kind: DocComment(..), .. })) => { | |
3b2f2976 XL |
591 | let lower_elts = mem::replace(&mut item.top_elts, Tt(seq)); |
592 | let idx = item.idx; | |
dfeec247 | 593 | item.stack.push(MatcherTtFrame { elts: lower_elts, idx }); |
3b2f2976 XL |
594 | item.idx = 0; |
595 | cur_items.push(item); | |
476ff2be | 596 | } |
2c00a5a8 XL |
597 | |
598 | // We just matched a normal token. We can just advance the parser. | |
dc9dc135 | 599 | TokenTree::Token(t) if token_name_eq(&t, token) => { |
3b2f2976 XL |
600 | item.idx += 1; |
601 | next_items.push(item); | |
223e47cc | 602 | } |
2c00a5a8 XL |
603 | |
604 | // There was another token that was not `token`... This means we can't add any | |
605 | // rules. NOTE that this is not necessarily an error unless _all_ items in | |
606 | // `cur_items` end up doing this. There may still be some other matchers that do | |
607 | // end up working out. | |
041b39d2 | 608 | TokenTree::Token(..) | TokenTree::MetaVar(..) => {} |
223e47cc LB |
609 | } |
610 | } | |
476ff2be SL |
611 | } |
612 | ||
2c00a5a8 | 613 | // Yay a successful parse (so far)! |
476ff2be SL |
614 | Success(()) |
615 | } | |
616 | ||
74b04a01 XL |
617 | /// Use the given sequence of token trees (`ms`) as a matcher. Match the token |
618 | /// stream from the given `parser` against it and return the match. | |
17df50a5 XL |
619 | pub(super) fn parse_tt( |
620 | parser: &mut Cow<'_, Parser<'_>>, | |
621 | ms: &[TokenTree], | |
622 | macro_name: Ident, | |
623 | ) -> NamedParseResult { | |
2c00a5a8 XL |
624 | // A queue of possible matcher positions. We initialize it with the matcher position in which |
625 | // the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then | |
b7449926 | 626 | // processes all of these possible matcher positions and produces possible next positions into |
2c00a5a8 XL |
627 | // `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items` |
628 | // and we start over again. | |
94b46f34 XL |
629 | // |
630 | // This MatcherPos instance is allocated on the stack. All others -- and | |
631 | // there are frequently *no* others! -- are allocated on the heap. | |
60c5eb7d | 632 | let mut initial = initial_matcher_pos(ms); |
b7449926 | 633 | let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)]; |
2c00a5a8 | 634 | let mut next_items = Vec::new(); |
476ff2be SL |
635 | |
636 | loop { | |
2c00a5a8 | 637 | // Matcher positions black-box parsed by parser.rs (`parser`) |
0bf4aa26 | 638 | let mut bb_items = SmallVec::new(); |
2c00a5a8 XL |
639 | |
640 | // Matcher positions that would be valid if the macro invocation was over now | |
0bf4aa26 | 641 | let mut eof_items = SmallVec::new(); |
3b2f2976 | 642 | assert!(next_items.is_empty()); |
476ff2be | 643 | |
2c00a5a8 XL |
644 | // Process `cur_items` until either we have finished the input or we need to get some |
645 | // parsing from the black-box parser done. The result is that `next_items` will contain a | |
646 | // bunch of possible next matcher positions in `next_items`. | |
647 | match inner_parse_loop( | |
b9856134 | 648 | parser.sess, |
2c00a5a8 XL |
649 | &mut cur_items, |
650 | &mut next_items, | |
651 | &mut eof_items, | |
652 | &mut bb_items, | |
653 | &parser.token, | |
2c00a5a8 XL |
654 | ) { |
655 | Success(_) => {} | |
dc9dc135 | 656 | Failure(token, msg) => return Failure(token, msg), |
476ff2be | 657 | Error(sp, msg) => return Error(sp, msg), |
ba9703b0 | 658 | ErrorReported => return ErrorReported, |
476ff2be SL |
659 | } |
660 | ||
3b2f2976 XL |
661 | // inner parse loop handled all cur_items, so it's empty |
662 | assert!(cur_items.is_empty()); | |
223e47cc | 663 | |
2c00a5a8 XL |
664 | // We need to do some post processing after the `inner_parser_loop`. |
665 | // | |
666 | // Error messages here could be improved with links to original rules. | |
667 | ||
668 | // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, | |
a1dfa0c6 | 669 | // either the parse is ambiguous (which should never happen) or there is a syntax error. |
dc9dc135 | 670 | if parser.token == token::Eof { |
3b2f2976 | 671 | if eof_items.len() == 1 { |
dfeec247 XL |
672 | let matches = |
673 | eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap()); | |
74b04a01 | 674 | return nameize(parser.sess, ms, matches); |
3b2f2976 | 675 | } else if eof_items.len() > 1 { |
2c00a5a8 | 676 | return Error( |
dc9dc135 | 677 | parser.token.span, |
2c00a5a8 XL |
678 | "ambiguity: multiple successful parses".to_string(), |
679 | ); | |
223e47cc | 680 | } else { |
a1dfa0c6 | 681 | return Failure( |
dfeec247 XL |
682 | Token::new( |
683 | token::Eof, | |
684 | if parser.token.span.is_dummy() { | |
685 | parser.token.span | |
686 | } else { | |
687 | parser.token.span.shrink_to_hi() | |
688 | }, | |
689 | ), | |
0731742a | 690 | "missing tokens in macro arguments", |
a1dfa0c6 | 691 | ); |
223e47cc | 692 | } |
2c00a5a8 | 693 | } |
8faf50e0 XL |
694 | // Performance hack: eof_items may share matchers via Rc with other things that we want |
695 | // to modify. Dropping eof_items now may drop these refcounts to 1, preventing an | |
696 | // unnecessary implicit clone later in Rc::make_mut. | |
697 | drop(eof_items); | |
698 | ||
74b04a01 XL |
699 | // If there are no possible next positions AND we aren't waiting for the black-box parser, |
700 | // then there is a syntax error. | |
701 | if bb_items.is_empty() && next_items.is_empty() { | |
702 | return Failure(parser.token.clone(), "no rules expected this token in macro call"); | |
703 | } | |
2c00a5a8 XL |
704 | // Another possibility is that we need to call out to parse some rust nonterminal |
705 | // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. | |
74b04a01 | 706 | else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 { |
2c00a5a8 XL |
707 | let nts = bb_items |
708 | .iter() | |
709 | .map(|item| match item.top_elts.get_tt(item.idx) { | |
b9856134 | 710 | TokenTree::MetaVarDecl(_, bind, Some(kind)) => format!("{} ('{}')", kind, bind), |
2c00a5a8 XL |
711 | _ => panic!(), |
712 | }) | |
713 | .collect::<Vec<String>>() | |
714 | .join(" or "); | |
715 | ||
716 | return Error( | |
dc9dc135 | 717 | parser.token.span, |
2c00a5a8 | 718 | format!( |
17df50a5 | 719 | "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", |
2c00a5a8 XL |
720 | match next_items.len() { |
721 | 0 => format!("built-in NTs {}.", nts), | |
722 | 1 => format!("built-in NTs {} or 1 other option.", nts), | |
723 | n => format!("built-in NTs {} or {} other options.", nts, n), | |
724 | } | |
725 | ), | |
726 | ); | |
727 | } | |
2c00a5a8 XL |
728 | // Dump all possible `next_items` into `cur_items` for the next iteration. |
729 | else if !next_items.is_empty() { | |
730 | // Now process the next token | |
3b2f2976 | 731 | cur_items.extend(next_items.drain(..)); |
74b04a01 | 732 | parser.to_mut().bump(); |
2c00a5a8 XL |
733 | } |
734 | // Finally, we have the case where we need to call the black-box parser to get some | |
735 | // nonterminal. | |
736 | else { | |
737 | assert_eq!(bb_items.len(), 1); | |
738 | ||
3b2f2976 | 739 | let mut item = bb_items.pop().unwrap(); |
b9856134 | 740 | if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts.get_tt(item.idx) { |
3b2f2976 | 741 | let match_cur = item.match_cur; |
fc512014 XL |
742 | // We use the span of the metavariable declaration to determine any |
743 | // edition-specific matching behavior for non-terminals. | |
5869c6ff | 744 | let nt = match parser.to_mut().parse_nonterminal(kind) { |
3dfed10e XL |
745 | Err(mut err) => { |
746 | err.span_label( | |
747 | span, | |
748 | format!("while parsing argument for this `{}` macro fragment", kind), | |
749 | ) | |
750 | .emit(); | |
751 | return ErrorReported; | |
752 | } | |
ba9703b0 XL |
753 | Ok(nt) => nt, |
754 | }; | |
755 | item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt))); | |
3b2f2976 XL |
756 | item.idx += 1; |
757 | item.match_cur += 1; | |
476ff2be SL |
758 | } else { |
759 | unreachable!() | |
223e47cc | 760 | } |
3b2f2976 | 761 | cur_items.push(item); |
223e47cc LB |
762 | } |
763 | ||
3b2f2976 | 764 | assert!(!cur_items.is_empty()); |
223e47cc LB |
765 | } |
766 | } |