1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! This is an Earley-like parser, without support for in-grammar nonterminals,
12 //! only by calling out to the main rust parser for named nonterminals (which it
13 //! commits to fully when it hits one in a grammar). This means that there are no
14 //! completer or predictor rules, and therefore no need to store one column per
15 //! token: instead, there's a set of current Earley items and a set of next
16 //! ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
17 //! pathological cases, is worse than traditional Earley parsing, but it's an
18 //! easier fit for Macro-by-Example-style rules, and I think the overhead is
19 //! lower. (In order to prevent the pathological case, we'd need to lazily
20 //! construct the resulting `NamedMatch`es at the very end. It'd be a pain,
21 //! and require more memory to keep around old items, but it would also save
24 //! Quick intro to how the parser works:
26 //! A 'position' is a dot in the middle of a matcher, usually represented as a
27 //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
29 //! The parser walks through the input a character at a time, maintaining a list
30 //! of items consistent with the current position in the input string: `cur_eis`.
32 //! As it processes them, it fills up `eof_eis` with items that would be valid if
33 //! the macro invocation is now over, `bb_eis` with items that are waiting on
34 //! a Rust nonterminal like `$e:expr`, and `next_eis` with items that are waiting
35 //! on a particular token. Most of the logic concerns moving the · through the
36 //! repetitions indicated by Kleene stars. It only advances or calls out to the
37 //! real Rust parser when no `cur_eis` items remain
39 //! Example: Start parsing `a a a a b` against [· a $( a )* a b].
41 //! Remaining input: `a a a a b`
42 //! next_eis: [· a $( a )* a b]
44 //! - - - Advance over an `a`. - - -
46 //! Remaining input: `a a a b`
47 //! cur: [a · $( a )* a b]
48 //! Descend/Skip (first item).
49 //! next: [a $( · a )* a b] [a $( a )* · a b].
51 //! - - - Advance over an `a`. - - -
53 //! Remaining input: `a a b`
54 //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
55 //! Finish/Repeat (first item)
56 //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
58 //! - - - Advance over an `a`. - - - (this looks exactly like the last step)
60 //! Remaining input: `a b`
61 //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
62 //! Finish/Repeat (first item)
63 //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
65 //! - - - Advance over an `a`. - - - (this looks exactly like the last step)
67 //! Remaining input: `b`
68 //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
69 //! Finish/Repeat (first item)
70 //! next: [a $( a )* · a b] [a $( · a )* a b]
72 //! - - - Advance over a `b`. - - -
74 //! Remaining input: ``
75 //! eof: [a $( a )* a b ·]
77 pub use self::NamedMatch
::*;
78 pub use self::ParseResult
::*;
79 use self::TokenTreeOrTokenTreeVec
::*;
82 use ast
::{TokenTree, Name, Ident}
;
83 use codemap
::{BytePos, mk_sp, Span, Spanned}
;
85 use errors
::FatalError
;
86 use parse
::lexer
::*; //resolve bug?
88 use parse
::parser
::{LifetimeAndTypesWithoutColons, Parser}
;
89 use parse
::token
::{DocComment, MatchNt, SubstNt}
;
90 use parse
::token
::{Token, Nonterminal}
;
97 use std
::collections
::HashMap
;
98 use std
::collections
::hash_map
::Entry
::{Vacant, Occupied}
;
100 // To avoid costly uniqueness checks, we require that `MatchSeq` always has
104 enum TokenTreeOrTokenTreeVec
{
106 TtSeq(Rc
<Vec
<ast
::TokenTree
>>),
109 impl TokenTreeOrTokenTreeVec
{
110 fn len(&self) -> usize {
112 TtSeq(ref v
) => v
.len(),
113 Tt(ref tt
) => tt
.len(),
117 fn get_tt(&self, index
: usize) -> TokenTree
{
119 TtSeq(ref v
) => v
[index
].clone(),
120 Tt(ref tt
) => tt
.get_tt(index
),
125 /// an unzipping of `TokenTree`s
127 struct MatcherTtFrame
{
128 elts
: TokenTreeOrTokenTreeVec
,
133 pub struct MatcherPos
{
134 stack
: Vec
<MatcherTtFrame
>,
135 top_elts
: TokenTreeOrTokenTreeVec
,
138 up
: Option
<Box
<MatcherPos
>>,
139 matches
: Vec
<Vec
<Rc
<NamedMatch
>>>,
146 pub fn count_names(ms
: &[TokenTree
]) -> usize {
147 ms
.iter().fold(0, |count
, elt
| {
149 TokenTree
::Sequence(_
, ref seq
) => {
152 TokenTree
::Delimited(_
, ref delim
) => {
153 count_names(&delim
.tts
)
155 TokenTree
::Token(_
, MatchNt(..)) => {
158 TokenTree
::Token(_
, _
) => 0,
163 pub fn initial_matcher_pos(ms
: Rc
<Vec
<TokenTree
>>, sep
: Option
<Token
>, lo
: BytePos
)
165 let match_idx_hi
= count_names(&ms
[..]);
166 let matches
: Vec
<_
> = (0..match_idx_hi
).map(|_
| Vec
::new()).collect();
167 Box
::new(MatcherPos
{
176 match_hi
: match_idx_hi
,
181 /// NamedMatch is a pattern-match result for a single token::MATCH_NONTERMINAL:
182 /// so it is associated with a single ident in a parse, and all
183 /// `MatchedNonterminal`s in the NamedMatch have the same nonterminal type
184 /// (expr, item, etc). Each leaf in a single NamedMatch corresponds to a
185 /// single token::MATCH_NONTERMINAL in the TokenTree that produced it.
187 /// The in-memory structure of a particular NamedMatch represents the match
188 /// that occurred when a particular subset of a matcher was applied to a
189 /// particular token tree.
191 /// The width of each MatchedSeq in the NamedMatch, and the identity of the
192 /// `MatchedNonterminal`s, will depend on the token tree it was applied to:
193 /// each MatchedSeq corresponds to a single TTSeq in the originating
194 /// token tree. The depth of the NamedMatch structure will therefore depend
195 /// only on the nesting depth of `ast::TTSeq`s in the originating
196 /// token tree it was derived from.
198 pub enum NamedMatch
{
199 MatchedSeq(Vec
<Rc
<NamedMatch
>>, codemap
::Span
),
200 MatchedNonterminal(Nonterminal
)
203 pub fn nameize(p_s
: &ParseSess
, ms
: &[TokenTree
], res
: &[Rc
<NamedMatch
>])
204 -> ParseResult
<HashMap
<Name
, Rc
<NamedMatch
>>> {
205 fn n_rec(p_s
: &ParseSess
, m
: &TokenTree
, res
: &[Rc
<NamedMatch
>],
206 ret_val
: &mut HashMap
<Name
, Rc
<NamedMatch
>>, idx
: &mut usize)
207 -> Result
<(), (codemap
::Span
, String
)> {
209 TokenTree
::Sequence(_
, ref seq
) => {
210 for next_m
in &seq
.tts
{
211 n_rec(p_s
, next_m
, res
, ret_val
, idx
)?
214 TokenTree
::Delimited(_
, ref delim
) => {
215 for next_m
in &delim
.tts
{
216 n_rec(p_s
, next_m
, res
, ret_val
, idx
)?
;
219 TokenTree
::Token(sp
, MatchNt(bind_name
, _
, _
, _
)) => {
220 match ret_val
.entry(bind_name
.name
) {
222 spot
.insert(res
[*idx
].clone());
226 return Err((sp
, format
!("duplicated bind name: {}", bind_name
)))
230 TokenTree
::Token(sp
, SubstNt(..)) => {
231 return Err((sp
, "missing fragment specifier".to_string()))
233 TokenTree
::Token(_
, _
) => (),
239 let mut ret_val
= HashMap
::new();
242 match n_rec(p_s
, m
, res
, &mut ret_val
, &mut idx
) {
244 Err((sp
, msg
)) => return Error(sp
, msg
),
251 pub enum ParseResult
<T
> {
253 /// Arm failed to match
254 Failure(codemap
::Span
, String
),
255 /// Fatal error (malformed macro?). Abort compilation.
256 Error(codemap
::Span
, String
)
259 pub type NamedParseResult
= ParseResult
<HashMap
<Name
, Rc
<NamedMatch
>>>;
260 pub type PositionalParseResult
= ParseResult
<Vec
<Rc
<NamedMatch
>>>;
262 /// Perform a token equality check, ignoring syntax context (that is, an
263 /// unhygienic comparison)
264 pub fn token_name_eq(t1
: &Token
, t2
: &Token
) -> bool
{
266 (&token
::Ident(id1
,_
),&token
::Ident(id2
,_
))
267 | (&token
::Lifetime(id1
),&token
::Lifetime(id2
)) =>
268 id1
.name
== id2
.name
,
273 pub fn parse(sess
: &ParseSess
,
274 cfg
: ast
::CrateConfig
,
277 -> NamedParseResult
{
278 let mut cur_eis
= Vec
::new();
279 cur_eis
.push(initial_matcher_pos(Rc
::new(ms
.iter()
286 let mut bb_eis
= Vec
::new(); // black-box parsed by parser.rs
287 let mut next_eis
= Vec
::new(); // or proceed normally
288 let mut eof_eis
= Vec
::new();
290 let TokenAndSpan { tok, sp }
= rdr
.peek();
292 /* we append new items to this while we go */
294 let mut ei
= match cur_eis
.pop() {
295 None
=> break, /* for each Earley Item */
299 // When unzipped trees end, remove them
300 while ei
.idx
>= ei
.top_elts
.len() {
301 match ei
.stack
.pop() {
302 Some(MatcherTtFrame { elts, idx }
) => {
311 let len
= ei
.top_elts
.len();
313 /* at end of sequence */
315 // can't move out of `match`es, so:
317 // hack: a matcher sequence is repeating iff it has a
318 // parent (the top level is just a container)
321 // disregard separator, try to go up
322 // (remove this condition to make trailing seps ok)
324 // pop from the matcher position
326 let mut new_pos
= ei
.up
.clone().unwrap();
328 // update matches (the MBE "parse tree") by appending
329 // each tree as a subtree.
331 // I bet this is a perf problem: we're preemptively
332 // doing a lot of array work that will get thrown away
335 // Only touch the binders we have actually bound
336 for idx
in ei
.match_lo
..ei
.match_hi
{
337 let sub
= (ei
.matches
[idx
]).clone();
338 (&mut new_pos
.matches
[idx
])
339 .push(Rc
::new(MatchedSeq(sub
, mk_sp(ei
.sp_lo
,
343 new_pos
.match_cur
= ei
.match_hi
;
345 cur_eis
.push(new_pos
);
348 // can we go around again?
350 // the *_t vars are workarounds for the lack of unary move
352 Some(ref t
) if idx
== len
=> { // we need a separator
353 // i'm conflicted about whether this should be hygienic....
354 // though in this case, if the separators are never legal
355 // idents, it shouldn't matter.
356 if token_name_eq(&tok
, t
) { //pass the separator
357 let mut ei_t
= ei
.clone();
358 // ei_t.match_cur = ei_t.match_lo;
363 _
=> { // we don't need a separator
365 ei_t
.match_cur
= ei_t
.match_lo
;
374 match ei
.top_elts
.get_tt(idx
) {
375 /* need to descend into sequence */
376 TokenTree
::Sequence(sp
, seq
) => {
377 if seq
.op
== ast
::KleeneOp
::ZeroOrMore
{
378 let mut new_ei
= ei
.clone();
379 new_ei
.match_cur
+= seq
.num_captures
;
381 //we specifically matched zero repeats.
382 for idx
in ei
.match_cur
..ei
.match_cur
+ seq
.num_captures
{
383 (&mut new_ei
.matches
[idx
]).push(Rc
::new(MatchedSeq(vec
![], sp
)));
386 cur_eis
.push(new_ei
);
389 let matches
: Vec
<_
> = (0..ei
.matches
.len())
390 .map(|_
| Vec
::new()).collect();
392 cur_eis
.push(Box
::new(MatcherPos
{
394 sep
: seq
.separator
.clone(),
397 match_lo
: ei_t
.match_cur
,
398 match_cur
: ei_t
.match_cur
,
399 match_hi
: ei_t
.match_cur
+ seq
.num_captures
,
402 top_elts
: Tt(TokenTree
::Sequence(sp
, seq
)),
405 TokenTree
::Token(_
, MatchNt(..)) => {
406 // Built-in nonterminals never start with these tokens,
407 // so we can eliminate them from consideration.
409 token
::CloseDelim(_
) => {}
,
410 _
=> bb_eis
.push(ei
),
413 TokenTree
::Token(sp
, SubstNt(..)) => {
414 return Error(sp
, "missing fragment specifier".to_string())
416 seq @ TokenTree
::Delimited(..) | seq @ TokenTree
::Token(_
, DocComment(..)) => {
417 let lower_elts
= mem
::replace(&mut ei
.top_elts
, Tt(seq
));
419 ei
.stack
.push(MatcherTtFrame
{
426 TokenTree
::Token(_
, ref t
) => {
427 let mut ei_t
= ei
.clone();
428 if token_name_eq(t
,&tok
) {
437 /* error messages here could be improved with links to orig. rules */
438 if token_name_eq(&tok
, &token
::Eof
) {
439 if eof_eis
.len() == 1 {
440 let mut v
= Vec
::new();
441 for dv
in &mut (&mut eof_eis
[0]).matches
{
442 v
.push(dv
.pop().unwrap());
444 return nameize(sess
, ms
, &v
[..]);
445 } else if eof_eis
.len() > 1 {
446 return Error(sp
, "ambiguity: multiple successful parses".to_string());
448 return Failure(sp
, "unexpected end of macro invocation".to_string());
451 if (!bb_eis
.is_empty() && !next_eis
.is_empty())
452 || bb_eis
.len() > 1 {
453 let nts
= bb_eis
.iter().map(|ei
| match ei
.top_elts
.get_tt(ei
.idx
) {
454 TokenTree
::Token(_
, MatchNt(bind
, name
, _
, _
)) => {
455 format
!("{} ('{}')", name
, bind
)
458 }).collect
::<Vec
<String
>>().join(" or ");
460 return Error(sp
, format
!(
461 "local ambiguity: multiple parsing options: {}",
462 match next_eis
.len() {
463 0 => format
!("built-in NTs {}.", nts
),
464 1 => format
!("built-in NTs {} or 1 other option.", nts
),
465 n
=> format
!("built-in NTs {} or {} other options.", nts
, n
),
468 } else if bb_eis
.is_empty() && next_eis
.is_empty() {
469 return Failure(sp
, format
!("no rules expected the token `{}`",
470 pprust
::token_to_string(&tok
)));
471 } else if !next_eis
.is_empty() {
472 /* Now process the next token */
473 while !next_eis
.is_empty() {
474 cur_eis
.push(next_eis
.pop().unwrap());
477 } else /* bb_eis.len() == 1 */ {
478 let mut rust_parser
= Parser
::new(sess
, cfg
.clone(), Box
::new(rdr
.clone()));
480 let mut ei
= bb_eis
.pop().unwrap();
481 match ei
.top_elts
.get_tt(ei
.idx
) {
482 TokenTree
::Token(span
, MatchNt(_
, ident
, _
, _
)) => {
483 let match_cur
= ei
.match_cur
;
484 (&mut ei
.matches
[match_cur
]).push(Rc
::new(MatchedNonterminal(
485 parse_nt(&mut rust_parser
, span
, &ident
.name
.as_str()))));
493 for _
in 0..rust_parser
.tokens_consumed
{
494 let _
= rdr
.next_token();
499 assert
!(!cur_eis
.is_empty());
503 pub fn parse_nt
<'a
>(p
: &mut Parser
<'a
>, sp
: Span
, name
: &str) -> Nonterminal
{
506 p
.quote_depth
+= 1; //but in theory, non-quoted tts might be useful
507 let res
: ::parse
::PResult
<'a
, _
> = p
.parse_token_tree();
508 let res
= token
::NtTT(P(panictry
!(res
)));
514 // check at the beginning and the parser checks after each bump
515 p
.check_unknown_macro_variable();
517 "item" => match panictry
!(p
.parse_item()) {
518 Some(i
) => token
::NtItem(i
),
520 p
.fatal("expected an item keyword").emit();
524 "block" => token
::NtBlock(panictry
!(p
.parse_block())),
525 "stmt" => match panictry
!(p
.parse_stmt()) {
526 Some(s
) => token
::NtStmt(P(s
)),
528 p
.fatal("expected a statement").emit();
532 "pat" => token
::NtPat(panictry
!(p
.parse_pat())),
533 "expr" => token
::NtExpr(panictry
!(p
.parse_expr())),
534 "ty" => token
::NtTy(panictry
!(p
.parse_ty())),
535 // this could be handled like a token, since it is one
536 "ident" => match p
.token
{
537 token
::Ident(sn
,b
) => {
539 token
::NtIdent(Box
::new(Spanned
::<Ident
>{node: sn, span: p.span}
),b
)
542 let token_str
= pprust
::token_to_string(&p
.token
);
543 p
.fatal(&format
!("expected ident, found {}",
544 &token_str
[..])).emit();
549 token
::NtPath(Box
::new(panictry
!(p
.parse_path(LifetimeAndTypesWithoutColons
))))
551 "meta" => token
::NtMeta(panictry
!(p
.parse_meta_item())),
553 p
.span_fatal_help(sp
,
554 &format
!("invalid fragment specifier `{}`", name
),
555 "valid fragment specifiers are `ident`, `block`, \
556 `stmt`, `expr`, `pat`, `ty`, `path`, `meta`, `tt` \