]>
Commit | Line | Data |
---|---|---|
3b2f2976 | 1 | // Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT |
223e47cc LB |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
1a4d82fc | 10 | |
3b2f2976 XL |
11 | //! This is an NFA-based parser, which calls out to the main rust parser for named nonterminals |
12 | //! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads | |
13 | //! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in | |
14 | //! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier | |
15 | //! fit for Macro-by-Example-style rules. | |
16 | //! | |
17 | //! (In order to prevent the pathological case, we'd need to lazily construct the resulting | |
18 | //! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old | |
19 | //! items, but it would also save overhead) | |
20 | //! | |
21 | //! We don't say this parser uses the Earley algorithm, because it's unnecessarily innacurate. | |
22 | //! The macro parser restricts itself to the features of finite state automata. Earley parsers | |
23 | //! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. | |
1a4d82fc JJ |
24 | //! |
25 | //! Quick intro to how the parser works: | |
26 | //! | |
27 | //! A 'position' is a dot in the middle of a matcher, usually represented as a | |
28 | //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. | |
29 | //! | |
30 | //! The parser walks through the input a character at a time, maintaining a list | |
3b2f2976 | 31 | //! of threads consistent with the current position in the input string: `cur_items`. |
1a4d82fc | 32 | //! |
3b2f2976 XL |
33 | //! As it processes them, it fills up `eof_items` with threads that would be valid if |
34 | //! the macro invocation is now over, `bb_items` with threads that are waiting on | |
35 | //! a Rust nonterminal like `$e:expr`, and `next_items` with threads that are waiting | |
b039eaaf | 36 | //! on a particular token. Most of the logic concerns moving the · through the |
3b2f2976 XL |
37 | //! repetitions indicated by Kleene stars. The rules for moving the · without |
38 | //! consuming any input are called epsilon transitions. It only advances or calls | |
39 | //! out to the real Rust parser when no `cur_items` threads remain. | |
1a4d82fc | 40 | //! |
7cac9316 | 41 | //! Example: |
1a4d82fc | 42 | //! |
7cac9316 XL |
43 | //! ```text, ignore |
44 | //! Start parsing a a a a b against [· a $( a )* a b]. | |
45 | //! | |
46 | //! Remaining input: a a a a b | |
3b2f2976 | 47 | //! next: [· a $( a )* a b] |
1a4d82fc | 48 | //! |
7cac9316 | 49 | //! - - - Advance over an a. - - - |
1a4d82fc | 50 | //! |
7cac9316 | 51 | //! Remaining input: a a a b |
1a4d82fc JJ |
52 | //! cur: [a · $( a )* a b] |
53 | //! Descend/Skip (first item). | |
54 | //! next: [a $( · a )* a b] [a $( a )* · a b]. | |
55 | //! | |
7cac9316 | 56 | //! - - - Advance over an a. - - - |
1a4d82fc | 57 | //! |
7cac9316 | 58 | //! Remaining input: a a b |
3b2f2976 XL |
59 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
60 | //! Follow epsilon transition: Finish/Repeat (first item) | |
1a4d82fc JJ |
61 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
62 | //! | |
7cac9316 | 63 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 64 | //! |
7cac9316 | 65 | //! Remaining input: a b |
3b2f2976 XL |
66 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
67 | //! Follow epsilon transition: Finish/Repeat (first item) | |
1a4d82fc JJ |
68 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] |
69 | //! | |
7cac9316 | 70 | //! - - - Advance over an a. - - - (this looks exactly like the last step) |
1a4d82fc | 71 | //! |
7cac9316 | 72 | //! Remaining input: b |
3b2f2976 XL |
73 | //! cur: [a $( a · )* a b] [a $( a )* a · b] |
74 | //! Follow epsilon transition: Finish/Repeat (first item) | |
75 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | |
1a4d82fc | 76 | //! |
7cac9316 | 77 | //! - - - Advance over a b. - - - |
1a4d82fc | 78 | //! |
7cac9316 | 79 | //! Remaining input: '' |
1a4d82fc | 80 | //! eof: [a $( a )* a b ·] |
7cac9316 | 81 | //! ``` |
1a4d82fc JJ |
82 | |
83 | pub use self::NamedMatch::*; | |
84 | pub use self::ParseResult::*; | |
85 | use self::TokenTreeOrTokenTreeVec::*; | |
970d7e83 | 86 | |
5bcae85e | 87 | use ast::Ident; |
cc61c64b | 88 | use syntax_pos::{self, BytePos, Span}; |
9cc50fc6 | 89 | use errors::FatalError; |
8bb4bdeb | 90 | use ext::tt::quoted::{self, TokenTree}; |
476ff2be | 91 | use parse::{Directory, ParseSess}; |
2c00a5a8 XL |
92 | use parse::parser::{Parser, PathStyle}; |
93 | use parse::token::{self, DocComment, Nonterminal, Token}; | |
1a4d82fc | 94 | use print::pprust; |
8bb4bdeb XL |
95 | use symbol::keywords; |
96 | use tokenstream::TokenStream; | |
c30ab7b3 | 97 | use util::small_vector::SmallVector; |
223e47cc | 98 | |
1a4d82fc JJ |
99 | use std::mem; |
100 | use std::rc::Rc; | |
101 | use std::collections::HashMap; | |
2c00a5a8 | 102 | use std::collections::hash_map::Entry::{Occupied, Vacant}; |
223e47cc | 103 | |
2c00a5a8 | 104 | // To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. |
223e47cc | 105 | |
2c00a5a8 XL |
106 | /// Either a sequence of token trees or a single one. This is used as the representation of the |
107 | /// sequence of tokens that make up a matcher. | |
1a4d82fc JJ |
108 | #[derive(Clone)] |
109 | enum TokenTreeOrTokenTreeVec { | |
8bb4bdeb XL |
110 | Tt(TokenTree), |
111 | TtSeq(Vec<TokenTree>), | |
1a4d82fc | 112 | } |
223e47cc | 113 | |
1a4d82fc | 114 | impl TokenTreeOrTokenTreeVec { |
2c00a5a8 XL |
115 | /// Returns the number of constituent top-level token trees of `self` (top-level in that it |
116 | /// will not recursively descend into subtrees). | |
85aaf69f | 117 | fn len(&self) -> usize { |
92a42be0 SL |
118 | match *self { |
119 | TtSeq(ref v) => v.len(), | |
120 | Tt(ref tt) => tt.len(), | |
1a4d82fc JJ |
121 | } |
122 | } | |
223e47cc | 123 | |
2c00a5a8 | 124 | /// The the `index`-th token tree of `self`. |
85aaf69f | 125 | fn get_tt(&self, index: usize) -> TokenTree { |
92a42be0 SL |
126 | match *self { |
127 | TtSeq(ref v) => v[index].clone(), | |
128 | Tt(ref tt) => tt.get_tt(index), | |
1a4d82fc JJ |
129 | } |
130 | } | |
223e47cc LB |
131 | } |
132 | ||
2c00a5a8 XL |
133 | /// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`. |
134 | /// | |
135 | /// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have | |
136 | /// descended into. | |
1a4d82fc JJ |
137 | #[derive(Clone)] |
138 | struct MatcherTtFrame { | |
2c00a5a8 | 139 | /// The "parent" matcher that we are descending into. |
1a4d82fc | 140 | elts: TokenTreeOrTokenTreeVec, |
2c00a5a8 | 141 | /// The position of the "dot" in `elts` at the time we descended. |
85aaf69f | 142 | idx: usize, |
223e47cc LB |
143 | } |
144 | ||
2c00a5a8 XL |
145 | /// Represents a single "position" (aka "matcher position", aka "item"), as described in the module |
146 | /// documentation. | |
1a4d82fc | 147 | #[derive(Clone)] |
476ff2be | 148 | struct MatcherPos { |
2c00a5a8 | 149 | /// The token or sequence of tokens that make up the matcher |
1a4d82fc | 150 | top_elts: TokenTreeOrTokenTreeVec, |
2c00a5a8 | 151 | /// The position of the "dot" in this matcher |
85aaf69f | 152 | idx: usize, |
2c00a5a8 XL |
153 | /// The beginning position in the source that the beginning of this matcher corresponds to. In |
154 | /// other words, the token in the source at `sp_lo` is matched against the first token of the | |
155 | /// matcher. | |
156 | sp_lo: BytePos, | |
157 | ||
158 | /// For each named metavar in the matcher, we keep track of token trees matched against the | |
159 | /// metavar by the black box parser. In particular, there may be more than one match per | |
160 | /// metavar if we are in a repetition (each repetition matches each of the variables). | |
161 | /// Moreover, matchers and repetitions can be nested; the `matches` field is shared (hence the | |
162 | /// `Rc`) among all "nested" matchers. `match_lo`, `match_cur`, and `match_hi` keep track of | |
163 | /// the current position of the `self` matcher position in the shared `matches` list. | |
164 | /// | |
165 | /// Also, note that while we are descending into a sequence, matchers are given their own | |
166 | /// `matches` vector. Only once we reach the end of a full repetition of the sequence do we add | |
167 | /// all bound matches from the submatcher into the shared top-level `matches` vector. If `sep` | |
168 | /// and `up` are `Some`, then `matches` is _not_ the shared top-level list. Instead, if one | |
169 | /// wants the shared `matches`, one should use `up.matches`. | |
041b39d2 | 170 | matches: Vec<Rc<Vec<NamedMatch>>>, |
2c00a5a8 XL |
171 | /// The position in `matches` corresponding to the first metavar in this matcher's sequence of |
172 | /// token trees. In other words, the first metavar in the first token of `top_elts` corresponds | |
173 | /// to `matches[match_lo]`. | |
85aaf69f | 174 | match_lo: usize, |
2c00a5a8 XL |
175 | /// The position in `matches` corresponding to the metavar we are currently trying to match |
176 | /// against the source token stream. `match_lo <= match_cur <= match_hi`. | |
85aaf69f | 177 | match_cur: usize, |
2c00a5a8 XL |
178 | /// Similar to `match_lo` except `match_hi` is the position in `matches` of the _last_ metavar |
179 | /// in this matcher. | |
85aaf69f | 180 | match_hi: usize, |
2c00a5a8 XL |
181 | |
182 | // Specifically used if we are matching a repetition. If we aren't both should be `None`. | |
183 | /// The KleeneOp of this sequence if we are in a repetition. | |
184 | seq_op: Option<quoted::KleeneOp>, | |
185 | /// The separator if we are in a repetition | |
186 | sep: Option<Token>, | |
187 | /// The "parent" matcher position if we are in a repetition. That is, the matcher position just | |
188 | /// before we enter the sequence. | |
189 | up: Option<Box<MatcherPos>>, | |
190 | ||
191 | // Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from | |
192 | // a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc | |
193 | // comment... | |
194 | /// When matching against matchers with nested delimited submatchers (e.g. `pat ( pat ( .. ) | |
195 | /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does | |
196 | /// that where the bottom of the stack is the outermost matcher. | |
197 | // Also, throughout the comments, this "descent" is often referred to as "unzipping"... | |
198 | stack: Vec<MatcherTtFrame>, | |
223e47cc LB |
199 | } |
200 | ||
041b39d2 | 201 | impl MatcherPos { |
2c00a5a8 | 202 | /// Add `m` as a named match for the `idx`-th metavar. |
041b39d2 XL |
203 | fn push_match(&mut self, idx: usize, m: NamedMatch) { |
204 | let matches = Rc::make_mut(&mut self.matches[idx]); | |
205 | matches.push(m); | |
206 | } | |
207 | } | |
208 | ||
2c00a5a8 XL |
209 | /// Represents the possible results of an attempted parse. |
210 | pub enum ParseResult<T> { | |
211 | /// Parsed successfully. | |
212 | Success(T), | |
213 | /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected | |
214 | /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. | |
215 | Failure(syntax_pos::Span, Token), | |
216 | /// Fatal error (malformed macro?). Abort compilation. | |
217 | Error(syntax_pos::Span, String), | |
218 | } | |
219 | ||
220 | /// A `ParseResult` where the `Success` variant contains a mapping of `Ident`s to `NamedMatch`es. | |
221 | /// This represents the mapping of metavars to the token trees they bind to. | |
476ff2be SL |
222 | pub type NamedParseResult = ParseResult<HashMap<Ident, Rc<NamedMatch>>>; |
223 | ||
2c00a5a8 | 224 | /// Count how many metavars are named in the given matcher `ms`. |
85aaf69f | 225 | pub fn count_names(ms: &[TokenTree]) -> usize { |
1a4d82fc | 226 | ms.iter().fold(0, |count, elt| { |
92a42be0 | 227 | count + match *elt { |
041b39d2 XL |
228 | TokenTree::Sequence(_, ref seq) => seq.num_captures, |
229 | TokenTree::Delimited(_, ref delim) => count_names(&delim.tts), | |
230 | TokenTree::MetaVar(..) => 0, | |
231 | TokenTree::MetaVarDecl(..) => 1, | |
9e0c209e | 232 | TokenTree::Token(..) => 0, |
1a4d82fc JJ |
233 | } |
234 | }) | |
223e47cc LB |
235 | } |
236 | ||
2c00a5a8 XL |
237 | /// Initialize `len` empty shared `Vec`s to be used to store matches of metavars. |
238 | fn create_matches(len: usize) -> Vec<Rc<Vec<NamedMatch>>> { | |
239 | (0..len).into_iter().map(|_| Rc::new(Vec::new())).collect() | |
240 | } | |
241 | ||
242 | /// Generate the top-level matcher position in which the "dot" is before the first token of the | |
243 | /// matcher `ms` and we are going to start matching at position `lo` in the source. | |
476ff2be | 244 | fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> { |
85aaf69f | 245 | let match_idx_hi = count_names(&ms[..]); |
476ff2be | 246 | let matches = create_matches(match_idx_hi); |
d9579d0f | 247 | Box::new(MatcherPos { |
2c00a5a8 XL |
248 | // Start with the top level matcher given to us |
249 | top_elts: TtSeq(ms), // "elts" is an abbr. for "elements" | |
250 | // The "dot" is before the first token of the matcher | |
85aaf69f | 251 | idx: 0, |
2c00a5a8 XL |
252 | // We start matching with byte `lo` in the source code |
253 | sp_lo: lo, | |
254 | ||
255 | // Initialize `matches` to a bunch of empty `Vec`s -- one for each metavar in `top_elts`. | |
256 | // `match_lo` for `top_elts` is 0 and `match_hi` is `matches.len()`. `match_cur` is 0 since | |
257 | // we haven't actually matched anything yet. | |
3b2f2976 | 258 | matches, |
85aaf69f SL |
259 | match_lo: 0, |
260 | match_cur: 0, | |
223e47cc | 261 | match_hi: match_idx_hi, |
2c00a5a8 XL |
262 | |
263 | // Haven't descended into any delimiters, so empty stack | |
264 | stack: vec![], | |
265 | ||
266 | // Haven't descended into any sequences, so both of these are `None`. | |
267 | seq_op: None, | |
268 | sep: None, | |
269 | up: None, | |
d9579d0f | 270 | }) |
223e47cc LB |
271 | } |
272 | ||
7cac9316 | 273 | /// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`: |
1a4d82fc | 274 | /// so it is associated with a single ident in a parse, and all |
7cac9316 XL |
275 | /// `MatchedNonterminal`s in the `NamedMatch` have the same nonterminal type |
276 | /// (expr, item, etc). Each leaf in a single `NamedMatch` corresponds to a | |
277 | /// single `token::MATCH_NONTERMINAL` in the `TokenTree` that produced it. | |
1a4d82fc | 278 | /// |
7cac9316 | 279 | /// The in-memory structure of a particular `NamedMatch` represents the match |
1a4d82fc JJ |
280 | /// that occurred when a particular subset of a matcher was applied to a |
281 | /// particular token tree. | |
282 | /// | |
7cac9316 XL |
283 | /// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of |
284 | /// the `MatchedNonterminal`s, will depend on the token tree it was applied | |
285 | /// to: each `MatchedSeq` corresponds to a single `TTSeq` in the originating | |
286 | /// token tree. The depth of the `NamedMatch` structure will therefore depend | |
1a4d82fc JJ |
287 | /// only on the nesting depth of `ast::TTSeq`s in the originating |
288 | /// token tree it was derived from. | |
041b39d2 | 289 | #[derive(Debug, Clone)] |
1a4d82fc | 290 | pub enum NamedMatch { |
041b39d2 | 291 | MatchedSeq(Rc<Vec<NamedMatch>>, syntax_pos::Span), |
2c00a5a8 | 292 | MatchedNonterminal(Rc<Nonterminal>), |
223e47cc LB |
293 | } |
294 | ||
2c00a5a8 XL |
295 | /// Takes a sequence of token trees `ms` representing a matcher which successfully matched input |
296 | /// and an iterator of items that matched input and produces a `NamedParseResult`. | |
297 | fn nameize<I: Iterator<Item = NamedMatch>>( | |
298 | sess: &ParseSess, | |
299 | ms: &[TokenTree], | |
300 | mut res: I, | |
301 | ) -> NamedParseResult { | |
302 | // Recursively descend into each type of matcher (e.g. sequences, delimited, metavars) and make | |
303 | // sure that each metavar has _exactly one_ binding. If a metavar does not have exactly one | |
304 | // binding, then there is an error. If it does, then we insert the binding into the | |
305 | // `NamedParseResult`. | |
306 | fn n_rec<I: Iterator<Item = NamedMatch>>( | |
307 | sess: &ParseSess, | |
308 | m: &TokenTree, | |
309 | res: &mut I, | |
310 | ret_val: &mut HashMap<Ident, Rc<NamedMatch>>, | |
311 | ) -> Result<(), (syntax_pos::Span, String)> { | |
92a42be0 | 312 | match *m { |
2c00a5a8 XL |
313 | TokenTree::Sequence(_, ref seq) => for next_m in &seq.tts { |
314 | n_rec(sess, next_m, res.by_ref(), ret_val)? | |
315 | }, | |
316 | TokenTree::Delimited(_, ref delim) => for next_m in &delim.tts { | |
317 | n_rec(sess, next_m, res.by_ref(), ret_val)?; | |
318 | }, | |
8bb4bdeb XL |
319 | TokenTree::MetaVarDecl(span, _, id) if id.name == keywords::Invalid.name() => { |
320 | if sess.missing_fragment_specifiers.borrow_mut().remove(&span) { | |
321 | return Err((span, "missing fragment specifier".to_string())); | |
322 | } | |
323 | } | |
324 | TokenTree::MetaVarDecl(sp, bind_name, _) => { | |
5bcae85e | 325 | match ret_val.entry(bind_name) { |
1a4d82fc | 326 | Vacant(spot) => { |
041b39d2 XL |
327 | // FIXME(simulacrum): Don't construct Rc here |
328 | spot.insert(Rc::new(res.next().unwrap())); | |
1a4d82fc JJ |
329 | } |
330 | Occupied(..) => { | |
92a42be0 | 331 | return Err((sp, format!("duplicated bind name: {}", bind_name))) |
1a4d82fc JJ |
332 | } |
333 | } | |
223e47cc | 334 | } |
041b39d2 | 335 | TokenTree::MetaVar(..) | TokenTree::Token(..) => (), |
223e47cc | 336 | } |
92a42be0 SL |
337 | |
338 | Ok(()) | |
223e47cc | 339 | } |
92a42be0 | 340 | |
970d7e83 | 341 | let mut ret_val = HashMap::new(); |
92a42be0 | 342 | for m in ms { |
8bb4bdeb | 343 | match n_rec(sess, m, res.by_ref(), &mut ret_val) { |
2c00a5a8 | 344 | Ok(_) => {} |
92a42be0 SL |
345 | Err((sp, msg)) => return Error(sp, msg), |
346 | } | |
347 | } | |
348 | ||
349 | Success(ret_val) | |
223e47cc LB |
350 | } |
351 | ||
2c00a5a8 XL |
352 | /// Generate an appropriate parsing failure message. For EOF, this is "unexpected end...". For |
353 | /// other tokens, this is "unexpected token...". | |
c30ab7b3 SL |
354 | pub fn parse_failure_msg(tok: Token) -> String { |
355 | match tok { | |
356 | token::Eof => "unexpected end of macro invocation".to_string(), | |
2c00a5a8 XL |
357 | _ => format!( |
358 | "no rules expected the token `{}`", | |
359 | pprust::token_to_string(&tok) | |
360 | ), | |
c30ab7b3 SL |
361 | } |
362 | } | |
363 | ||
476ff2be | 364 | /// Perform a token equality check, ignoring syntax context (that is, an unhygienic comparison) |
2c00a5a8 | 365 | fn token_name_eq(t1: &Token, t2: &Token) -> bool { |
0531ce1d XL |
366 | if let (Some((id1, is_raw1)), Some((id2, is_raw2))) = (t1.ident(), t2.ident()) { |
367 | id1.name == id2.name && is_raw1 == is_raw2 | |
83c7162d | 368 | } else if let (Some(id1), Some(id2)) = (t1.lifetime(), t2.lifetime()) { |
cc61c64b XL |
369 | id1.name == id2.name |
370 | } else { | |
371 | *t1 == *t2 | |
1a4d82fc JJ |
372 | } |
373 | } | |
374 | ||
2c00a5a8 XL |
375 | /// Process the matcher positions of `cur_items` until it is empty. In the process, this will |
376 | /// produce more items in `next_items`, `eof_items`, and `bb_items`. | |
377 | /// | |
378 | /// For more info about the how this happens, see the module-level doc comments and the inline | |
379 | /// comments of this function. | |
380 | /// | |
381 | /// # Parameters | |
382 | /// | |
383 | /// - `sess`: the parsing session into which errors are emitted. | |
384 | /// - `cur_items`: the set of current items to be processed. This should be empty by the end of a | |
385 | /// successful execution of this function. | |
386 | /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in | |
387 | /// the function `parse`. | |
388 | /// - `eof_items`: the set of items that would be valid if this was the EOF. | |
389 | /// - `bb_items`: the set of items that are waiting for the black-box parser. | |
390 | /// - `token`: the current token of the parser. | |
391 | /// - `span`: the `Span` in the source code corresponding to the token trees we are trying to match | |
392 | /// against the matcher positions in `cur_items`. | |
393 | /// | |
394 | /// # Returns | |
395 | /// | |
396 | /// A `ParseResult`. Note that matches are kept track of through the items generated. | |
397 | fn inner_parse_loop( | |
398 | sess: &ParseSess, | |
399 | cur_items: &mut SmallVector<Box<MatcherPos>>, | |
400 | next_items: &mut Vec<Box<MatcherPos>>, | |
401 | eof_items: &mut SmallVector<Box<MatcherPos>>, | |
402 | bb_items: &mut SmallVector<Box<MatcherPos>>, | |
403 | token: &Token, | |
404 | span: syntax_pos::Span, | |
405 | ) -> ParseResult<()> { | |
406 | // Pop items from `cur_items` until it is empty. | |
3b2f2976 | 407 | while let Some(mut item) = cur_items.pop() { |
2c00a5a8 XL |
408 | // When unzipped trees end, remove them. This corresponds to backtracking out of a |
409 | // delimited submatcher into which we already descended. In backtracking out again, we need | |
410 | // to advance the "dot" past the delimiters in the outer matcher. | |
3b2f2976 XL |
411 | while item.idx >= item.top_elts.len() { |
412 | match item.stack.pop() { | |
476ff2be | 413 | Some(MatcherTtFrame { elts, idx }) => { |
3b2f2976 XL |
414 | item.top_elts = elts; |
415 | item.idx = idx + 1; | |
1a4d82fc | 416 | } |
2c00a5a8 | 417 | None => break, |
1a4d82fc | 418 | } |
476ff2be | 419 | } |
223e47cc | 420 | |
2c00a5a8 XL |
421 | // Get the current position of the "dot" (`idx`) in `item` and the number of token trees in |
422 | // the matcher (`len`). | |
3b2f2976 XL |
423 | let idx = item.idx; |
424 | let len = item.top_elts.len(); | |
476ff2be | 425 | |
2c00a5a8 | 426 | // If `idx >= len`, then we are at or past the end of the matcher of `item`. |
476ff2be | 427 | if idx >= len { |
2c00a5a8 XL |
428 | // We are repeating iff there is a parent. If the matcher is inside of a repetition, |
429 | // then we could be at the end of a sequence or at the beginning of the next | |
430 | // repetition. | |
3b2f2976 | 431 | if item.up.is_some() { |
2c00a5a8 XL |
432 | // At this point, regardless of whether there is a separator, we should add all |
433 | // matches from the complete repetition of the sequence to the shared, top-level | |
434 | // `matches` list (actually, `up.matches`, which could itself not be the top-level, | |
435 | // but anyway...). Moreover, we add another item to `cur_items` in which the "dot" | |
436 | // is at the end of the `up` matcher. This ensures that the "dot" in the `up` | |
437 | // matcher is also advanced sufficiently. | |
438 | // | |
439 | // NOTE: removing the condition `idx == len` allows trailing separators. | |
476ff2be | 440 | if idx == len { |
2c00a5a8 | 441 | // Get the `up` matcher |
3b2f2976 | 442 | let mut new_pos = item.up.clone().unwrap(); |
476ff2be | 443 | |
2c00a5a8 | 444 | // Add matches from this repetition to the `matches` of `up` |
3b2f2976 XL |
445 | for idx in item.match_lo..item.match_hi { |
446 | let sub = item.matches[idx].clone(); | |
ea8adc8c XL |
447 | let span = span.with_lo(item.sp_lo); |
448 | new_pos.push_match(idx, MatchedSeq(sub, span)); | |
223e47cc LB |
449 | } |
450 | ||
2c00a5a8 | 451 | // Move the "dot" past the repetition in `up` |
3b2f2976 | 452 | new_pos.match_cur = item.match_hi; |
476ff2be | 453 | new_pos.idx += 1; |
3b2f2976 | 454 | cur_items.push(new_pos); |
223e47cc | 455 | } |
223e47cc | 456 | |
2c00a5a8 | 457 | // Check if we need a separator. |
3b2f2976 | 458 | if idx == len && item.sep.is_some() { |
2c00a5a8 XL |
459 | // We have a separator, and it is the current token. We can advance past the |
460 | // separator token. | |
461 | if item.sep | |
462 | .as_ref() | |
463 | .map(|sep| token_name_eq(token, sep)) | |
464 | .unwrap_or(false) | |
465 | { | |
3b2f2976 XL |
466 | item.idx += 1; |
467 | next_items.push(item); | |
223e47cc | 468 | } |
2c00a5a8 XL |
469 | } |
470 | // We don't need a separator. Move the "dot" back to the beginning of the matcher | |
471 | // and try to match again UNLESS we are only allowed to have _one_ repetition. | |
472 | else if item.seq_op != Some(quoted::KleeneOp::ZeroOrOne) { | |
3b2f2976 XL |
473 | item.match_cur = item.match_lo; |
474 | item.idx = 0; | |
475 | cur_items.push(item); | |
476ff2be | 476 | } |
2c00a5a8 XL |
477 | } |
478 | // If we are not in a repetition, then being at the end of a matcher means that we have | |
479 | // reached the potential end of the input. | |
480 | else { | |
3b2f2976 | 481 | eof_items.push(item); |
476ff2be | 482 | } |
2c00a5a8 XL |
483 | } |
484 | // We are in the middle of a matcher. | |
485 | else { | |
486 | // Look at what token in the matcher we are trying to match the current token (`token`) | |
487 | // against. Depending on that, we may generate new items. | |
3b2f2976 | 488 | match item.top_elts.get_tt(idx) { |
2c00a5a8 | 489 | // Need to descend into a sequence |
476ff2be | 490 | TokenTree::Sequence(sp, seq) => { |
2c00a5a8 XL |
491 | // Examine the case where there are 0 matches of this sequence |
492 | if seq.op == quoted::KleeneOp::ZeroOrMore | |
493 | || seq.op == quoted::KleeneOp::ZeroOrOne | |
494 | { | |
3b2f2976 XL |
495 | let mut new_item = item.clone(); |
496 | new_item.match_cur += seq.num_captures; | |
497 | new_item.idx += 1; | |
498 | for idx in item.match_cur..item.match_cur + seq.num_captures { | |
499 | new_item.push_match(idx, MatchedSeq(Rc::new(vec![]), sp)); | |
1a4d82fc | 500 | } |
3b2f2976 | 501 | cur_items.push(new_item); |
1a4d82fc | 502 | } |
476ff2be | 503 | |
3b2f2976 XL |
504 | let matches = create_matches(item.matches.len()); |
505 | cur_items.push(Box::new(MatcherPos { | |
476ff2be SL |
506 | stack: vec![], |
507 | sep: seq.separator.clone(), | |
2c00a5a8 | 508 | seq_op: Some(seq.op), |
476ff2be | 509 | idx: 0, |
3b2f2976 XL |
510 | matches, |
511 | match_lo: item.match_cur, | |
512 | match_cur: item.match_cur, | |
513 | match_hi: item.match_cur + seq.num_captures, | |
514 | up: Some(item), | |
ea8adc8c | 515 | sp_lo: sp.lo(), |
476ff2be SL |
516 | top_elts: Tt(TokenTree::Sequence(sp, seq)), |
517 | })); | |
518 | } | |
2c00a5a8 XL |
519 | |
520 | // We need to match a metavar (but the identifier is invalid)... this is an error | |
8bb4bdeb XL |
521 | TokenTree::MetaVarDecl(span, _, id) if id.name == keywords::Invalid.name() => { |
522 | if sess.missing_fragment_specifiers.borrow_mut().remove(&span) { | |
523 | return Error(span, "missing fragment specifier".to_string()); | |
524 | } | |
525 | } | |
2c00a5a8 XL |
526 | |
527 | // We need to match a metavar with a valid ident... call out to the black-box | |
528 | // parser by adding an item to `bb_items`. | |
041b39d2 | 529 | TokenTree::MetaVarDecl(_, _, id) => { |
476ff2be SL |
530 | // Built-in nonterminals never start with these tokens, |
531 | // so we can eliminate them from consideration. | |
041b39d2 | 532 | if may_begin_with(&*id.name.as_str(), token) { |
3b2f2976 | 533 | bb_items.push(item); |
1a4d82fc | 534 | } |
476ff2be | 535 | } |
2c00a5a8 XL |
536 | |
537 | // We need to descend into a delimited submatcher or a doc comment. To do this, we | |
538 | // push the current matcher onto a stack and push a new item containing the | |
539 | // submatcher onto `cur_items`. | |
540 | // | |
541 | // At the beginning of the loop, if we reach the end of the delimited submatcher, | |
542 | // we pop the stack to backtrack out of the descent. | |
476ff2be | 543 | seq @ TokenTree::Delimited(..) | seq @ TokenTree::Token(_, DocComment(..)) => { |
3b2f2976 XL |
544 | let lower_elts = mem::replace(&mut item.top_elts, Tt(seq)); |
545 | let idx = item.idx; | |
546 | item.stack.push(MatcherTtFrame { | |
476ff2be | 547 | elts: lower_elts, |
3b2f2976 | 548 | idx, |
476ff2be | 549 | }); |
3b2f2976 XL |
550 | item.idx = 0; |
551 | cur_items.push(item); | |
476ff2be | 552 | } |
2c00a5a8 XL |
553 | |
554 | // We just matched a normal token. We can just advance the parser. | |
041b39d2 | 555 | TokenTree::Token(_, ref t) if token_name_eq(t, token) => { |
3b2f2976 XL |
556 | item.idx += 1; |
557 | next_items.push(item); | |
223e47cc | 558 | } |
2c00a5a8 XL |
559 | |
560 | // There was another token that was not `token`... This means we can't add any | |
561 | // rules. NOTE that this is not necessarily an error unless _all_ items in | |
562 | // `cur_items` end up doing this. There may still be some other matchers that do | |
563 | // end up working out. | |
041b39d2 | 564 | TokenTree::Token(..) | TokenTree::MetaVar(..) => {} |
223e47cc LB |
565 | } |
566 | } | |
476ff2be SL |
567 | } |
568 | ||
2c00a5a8 | 569 | // Yay a successful parse (so far)! |
476ff2be SL |
570 | Success(()) |
571 | } | |
572 | ||
2c00a5a8 XL |
573 | /// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts` |
574 | /// against it and return the match. | |
575 | /// | |
576 | /// # Parameters | |
577 | /// | |
578 | /// - `sess`: The session into which errors are emitted | |
579 | /// - `tts`: The tokenstream we are matching against the pattern `ms` | |
580 | /// - `ms`: A sequence of token trees representing a pattern against which we are matching | |
581 | /// - `directory`: Information about the file locations (needed for the black-box parser) | |
582 | /// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box | |
583 | /// parser) | |
584 | pub fn parse( | |
585 | sess: &ParseSess, | |
586 | tts: TokenStream, | |
587 | ms: &[TokenTree], | |
588 | directory: Option<Directory>, | |
589 | recurse_into_modules: bool, | |
590 | ) -> NamedParseResult { | |
591 | // Create a parser that can be used for the "black box" parts. | |
7cac9316 | 592 | let mut parser = Parser::new(sess, tts, directory, recurse_into_modules, true); |
2c00a5a8 XL |
593 | |
594 | // A queue of possible matcher positions. We initialize it with the matcher position in which | |
595 | // the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then | |
596 | // processes all of these possible matcher positions and produces posible next positions into | |
597 | // `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items` | |
598 | // and we start over again. | |
ea8adc8c | 599 | let mut cur_items = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo())); |
2c00a5a8 | 600 | let mut next_items = Vec::new(); |
476ff2be SL |
601 | |
602 | loop { | |
2c00a5a8 XL |
603 | // Matcher positions black-box parsed by parser.rs (`parser`) |
604 | let mut bb_items = SmallVector::new(); | |
605 | ||
606 | // Matcher positions that would be valid if the macro invocation was over now | |
3b2f2976 XL |
607 | let mut eof_items = SmallVector::new(); |
608 | assert!(next_items.is_empty()); | |
476ff2be | 609 | |
2c00a5a8 XL |
610 | // Process `cur_items` until either we have finished the input or we need to get some |
611 | // parsing from the black-box parser done. The result is that `next_items` will contain a | |
612 | // bunch of possible next matcher positions in `next_items`. | |
613 | match inner_parse_loop( | |
614 | sess, | |
615 | &mut cur_items, | |
616 | &mut next_items, | |
617 | &mut eof_items, | |
618 | &mut bb_items, | |
619 | &parser.token, | |
620 | parser.span, | |
621 | ) { | |
622 | Success(_) => {} | |
476ff2be SL |
623 | Failure(sp, tok) => return Failure(sp, tok), |
624 | Error(sp, msg) => return Error(sp, msg), | |
625 | } | |
626 | ||
3b2f2976 XL |
627 | // inner parse loop handled all cur_items, so it's empty |
628 | assert!(cur_items.is_empty()); | |
223e47cc | 629 | |
2c00a5a8 XL |
630 | // We need to do some post processing after the `inner_parser_loop`. |
631 | // | |
632 | // Error messages here could be improved with links to original rules. | |
633 | ||
634 | // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, | |
635 | // either the parse is ambiguous (which should never happen) or their is a syntax error. | |
476ff2be | 636 | if token_name_eq(&parser.token, &token::Eof) { |
3b2f2976 | 637 | if eof_items.len() == 1 { |
2c00a5a8 XL |
638 | let matches = eof_items[0] |
639 | .matches | |
640 | .iter_mut() | |
641 | .map(|dv| Rc::make_mut(dv).pop().unwrap()); | |
8bb4bdeb | 642 | return nameize(sess, ms, matches); |
3b2f2976 | 643 | } else if eof_items.len() > 1 { |
2c00a5a8 XL |
644 | return Error( |
645 | parser.span, | |
646 | "ambiguity: multiple successful parses".to_string(), | |
647 | ); | |
223e47cc | 648 | } else { |
476ff2be | 649 | return Failure(parser.span, token::Eof); |
223e47cc | 650 | } |
2c00a5a8 XL |
651 | } |
652 | // Another possibility is that we need to call out to parse some rust nonterminal | |
653 | // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. | |
654 | else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 { | |
655 | let nts = bb_items | |
656 | .iter() | |
657 | .map(|item| match item.top_elts.get_tt(item.idx) { | |
658 | TokenTree::MetaVarDecl(_, bind, name) => format!("{} ('{}')", name, bind), | |
659 | _ => panic!(), | |
660 | }) | |
661 | .collect::<Vec<String>>() | |
662 | .join(" or "); | |
663 | ||
664 | return Error( | |
665 | parser.span, | |
666 | format!( | |
667 | "local ambiguity: multiple parsing options: {}", | |
668 | match next_items.len() { | |
669 | 0 => format!("built-in NTs {}.", nts), | |
670 | 1 => format!("built-in NTs {} or 1 other option.", nts), | |
671 | n => format!("built-in NTs {} or {} other options.", nts, n), | |
672 | } | |
673 | ), | |
674 | ); | |
675 | } | |
676 | // If there are no posible next positions AND we aren't waiting for the black-box parser, | |
677 | // then their is a syntax error. | |
678 | else if bb_items.is_empty() && next_items.is_empty() { | |
476ff2be | 679 | return Failure(parser.span, parser.token); |
2c00a5a8 XL |
680 | } |
681 | // Dump all possible `next_items` into `cur_items` for the next iteration. | |
682 | else if !next_items.is_empty() { | |
683 | // Now process the next token | |
3b2f2976 | 684 | cur_items.extend(next_items.drain(..)); |
476ff2be | 685 | parser.bump(); |
2c00a5a8 XL |
686 | } |
687 | // Finally, we have the case where we need to call the black-box parser to get some | |
688 | // nonterminal. | |
689 | else { | |
690 | assert_eq!(bb_items.len(), 1); | |
691 | ||
3b2f2976 XL |
692 | let mut item = bb_items.pop().unwrap(); |
693 | if let TokenTree::MetaVarDecl(span, _, ident) = item.top_elts.get_tt(item.idx) { | |
694 | let match_cur = item.match_cur; | |
2c00a5a8 XL |
695 | item.push_match( |
696 | match_cur, | |
697 | MatchedNonterminal(Rc::new(parse_nt(&mut parser, span, &ident.name.as_str()))), | |
698 | ); | |
3b2f2976 XL |
699 | item.idx += 1; |
700 | item.match_cur += 1; | |
476ff2be SL |
701 | } else { |
702 | unreachable!() | |
223e47cc | 703 | } |
3b2f2976 | 704 | cur_items.push(item); |
223e47cc LB |
705 | } |
706 | ||
3b2f2976 | 707 | assert!(!cur_items.is_empty()); |
223e47cc LB |
708 | } |
709 | } | |
710 | ||
0531ce1d XL |
711 | /// The token is an identifier, but not `_`. |
712 | /// We prohibit passing `_` to macros expecting `ident` for now. | |
713 | fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> { | |
714 | match *token { | |
715 | token::Ident(ident, is_raw) if ident.name != keywords::Underscore.name() => | |
716 | Some((ident, is_raw)), | |
717 | _ => None, | |
718 | } | |
719 | } | |
720 | ||
041b39d2 XL |
721 | /// Checks whether a non-terminal may begin with a particular token. |
722 | /// | |
723 | /// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that | |
724 | /// token. Be conservative (return true) if not sure. | |
725 | fn may_begin_with(name: &str, token: &Token) -> bool { | |
726 | /// Checks whether the non-terminal may contain a single (non-keyword) identifier. | |
727 | fn may_be_ident(nt: &token::Nonterminal) -> bool { | |
728 | match *nt { | |
729 | token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) => false, | |
730 | _ => true, | |
731 | } | |
732 | } | |
733 | ||
734 | match name { | |
735 | "expr" => token.can_begin_expr(), | |
736 | "ty" => token.can_begin_type(), | |
0531ce1d | 737 | "ident" => get_macro_ident(token).is_some(), |
2c00a5a8 XL |
738 | "vis" => match *token { |
739 | // The follow-set of :vis + "priv" keyword + interpolated | |
0531ce1d | 740 | Token::Comma | Token::Ident(..) | Token::Interpolated(_) => true, |
041b39d2 XL |
741 | _ => token.can_begin_type(), |
742 | }, | |
743 | "block" => match *token { | |
744 | Token::OpenDelim(token::Brace) => true, | |
745 | Token::Interpolated(ref nt) => match nt.0 { | |
2c00a5a8 XL |
746 | token::NtItem(_) |
747 | | token::NtPat(_) | |
748 | | token::NtTy(_) | |
0531ce1d | 749 | | token::NtIdent(..) |
2c00a5a8 XL |
750 | | token::NtMeta(_) |
751 | | token::NtPath(_) | |
752 | | token::NtVis(_) => false, // none of these may start with '{'. | |
041b39d2 XL |
753 | _ => true, |
754 | }, | |
755 | _ => false, | |
756 | }, | |
757 | "path" | "meta" => match *token { | |
0531ce1d | 758 | Token::ModSep | Token::Ident(..) => true, |
041b39d2 XL |
759 | Token::Interpolated(ref nt) => match nt.0 { |
760 | token::NtPath(_) | token::NtMeta(_) => true, | |
761 | _ => may_be_ident(&nt.0), | |
762 | }, | |
763 | _ => false, | |
764 | }, | |
765 | "pat" => match *token { | |
0531ce1d | 766 | Token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) |
041b39d2 XL |
767 | Token::OpenDelim(token::Paren) | // tuple pattern |
768 | Token::OpenDelim(token::Bracket) | // slice pattern | |
769 | Token::BinOp(token::And) | // reference | |
770 | Token::BinOp(token::Minus) | // negative literal | |
771 | Token::AndAnd | // double reference | |
772 | Token::Literal(..) | // literal | |
773 | Token::DotDot | // range pattern (future compat) | |
774 | Token::DotDotDot | // range pattern (future compat) | |
775 | Token::ModSep | // path | |
776 | Token::Lt | // path (UFCS constant) | |
0531ce1d | 777 | Token::BinOp(token::Shl) => true, // path (double UFCS) |
041b39d2 XL |
778 | Token::Interpolated(ref nt) => may_be_ident(&nt.0), |
779 | _ => false, | |
780 | }, | |
781 | _ => match *token { | |
782 | token::CloseDelim(_) => false, | |
783 | _ => true, | |
784 | }, | |
785 | } | |
786 | } | |
787 | ||
2c00a5a8 XL |
788 | /// A call to the "black-box" parser to parse some rust nonterminal. |
789 | /// | |
790 | /// # Parameters | |
791 | /// | |
792 | /// - `p`: the "black-box" parser to use | |
793 | /// - `sp`: the `Span` we want to parse | |
794 | /// - `name`: the name of the metavar _matcher_ we want to match (e.g. `tt`, `ident`, `block`, | |
795 | /// etc...) | |
796 | /// | |
797 | /// # Returns | |
798 | /// | |
799 | /// The parsed nonterminal. | |
476ff2be | 800 | fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { |
7cac9316 XL |
801 | if name == "tt" { |
802 | return token::NtTT(p.parse_token_tree()); | |
1a4d82fc JJ |
803 | } |
804 | // check at the beginning and the parser checks after each bump | |
cc61c64b | 805 | p.process_potential_macro_variable(); |
1a4d82fc | 806 | match name { |
92a42be0 | 807 | "item" => match panictry!(p.parse_item()) { |
e9174d1e | 808 | Some(i) => token::NtItem(i), |
9cc50fc6 SL |
809 | None => { |
810 | p.fatal("expected an item keyword").emit(); | |
2c00a5a8 | 811 | FatalError.raise(); |
9cc50fc6 | 812 | } |
e9174d1e SL |
813 | }, |
814 | "block" => token::NtBlock(panictry!(p.parse_block())), | |
92a42be0 | 815 | "stmt" => match panictry!(p.parse_stmt()) { |
c30ab7b3 | 816 | Some(s) => token::NtStmt(s), |
9cc50fc6 SL |
817 | None => { |
818 | p.fatal("expected a statement").emit(); | |
2c00a5a8 | 819 | FatalError.raise(); |
9cc50fc6 | 820 | } |
e9174d1e | 821 | }, |
92a42be0 SL |
822 | "pat" => token::NtPat(panictry!(p.parse_pat())), |
823 | "expr" => token::NtExpr(panictry!(p.parse_expr())), | |
cc61c64b | 824 | "ty" => token::NtTy(panictry!(p.parse_ty())), |
e9174d1e | 825 | // this could be handled like a token, since it is one |
0531ce1d | 826 | "ident" => if let Some((ident, is_raw)) = get_macro_ident(&p.token) { |
83c7162d | 827 | let span = p.span; |
0531ce1d | 828 | p.bump(); |
83c7162d | 829 | token::NtIdent(Ident::new(ident.name, span), is_raw) |
0531ce1d XL |
830 | } else { |
831 | let token_str = pprust::token_to_string(&p.token); | |
832 | p.fatal(&format!("expected ident, found {}", &token_str)).emit(); | |
833 | FatalError.raise() | |
834 | } | |
3b2f2976 | 835 | "path" => token::NtPath(panictry!(p.parse_path_common(PathStyle::Type, false))), |
92a42be0 | 836 | "meta" => token::NtMeta(panictry!(p.parse_meta_item())), |
cc61c64b | 837 | "vis" => token::NtVis(panictry!(p.parse_visibility(true))), |
83c7162d XL |
838 | "lifetime" => if p.check_lifetime() { |
839 | token::NtLifetime(p.expect_lifetime().ident) | |
840 | } else { | |
841 | let token_str = pprust::token_to_string(&p.token); | |
842 | p.fatal(&format!("expected a lifetime, found `{}`", &token_str)).emit(); | |
843 | FatalError.raise(); | |
844 | } | |
3157f602 XL |
845 | // this is not supposed to happen, since it has been checked |
846 | // when compiling the macro. | |
2c00a5a8 | 847 | _ => p.span_bug(sp, "invalid fragment specifier"), |
223e47cc LB |
848 | } |
849 | } |