1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! This is an Earley-like parser, without support for in-grammar nonterminals,
14 //! only by calling out to the main rust parser for named nonterminals (which it
15 //! commits to fully when it hits one in a grammar). This means that there are no
16 //! completer or predictor rules, and therefore no need to store one column per
17 //! token: instead, there's a set of current Earley items and a set of next
18 //! ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
19 //! pathological cases, is worse than traditional Earley parsing, but it's an
20 //! easier fit for Macro-by-Example-style rules, and I think the overhead is
21 //! lower. (In order to prevent the pathological case, we'd need to lazily
22 //! construct the resulting `NamedMatch`es at the very end. It'd be a pain,
23 //! and require more memory to keep around old items, but it would also save
26 //! Quick intro to how the parser works:
28 //! A 'position' is a dot in the middle of a matcher, usually represented as a
29 //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
31 //! The parser walks through the input a character at a time, maintaining a list
32 //! of items consistent with the current position in the input string: `cur_eis`.
34 //! As it processes them, it fills up `eof_eis` with items that would be valid if
35 //! the macro invocation is now over, `bb_eis` with items that are waiting on
36 //! a Rust nonterminal like `$e:expr`, and `next_eis` with items that are waiting
37 //! on the a particular token. Most of the logic concerns moving the · through the
38 //! repetitions indicated by Kleene stars. It only advances or calls out to the
39 //! real Rust parser when no `cur_eis` items remain
41 //! Example: Start parsing `a a a a b` against [· a $( a )* a b].
43 //! Remaining input: `a a a a b`
44 //! next_eis: [· a $( a )* a b]
46 //! - - - Advance over an `a`. - - -
48 //! Remaining input: `a a a b`
49 //! cur: [a · $( a )* a b]
50 //! Descend/Skip (first item).
51 //! next: [a $( · a )* a b] [a $( a )* · a b].
53 //! - - - Advance over an `a`. - - -
55 //! Remaining input: `a a b`
56 //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
57 //! Finish/Repeat (first item)
58 //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
60 //! - - - Advance over an `a`. - - - (this looks exactly like the last step)
62 //! Remaining input: `a b`
63 //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
64 //! Finish/Repeat (first item)
65 //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
67 //! - - - Advance over an `a`. - - - (this looks exactly like the last step)
69 //! Remaining input: `b`
70 //! cur: [a $( a · )* a b] next: [a $( a )* a · b]
71 //! Finish/Repeat (first item)
72 //! next: [a $( a )* · a b] [a $( · a )* a b]
74 //! - - - Advance over a `b`. - - -
76 //! Remaining input: ``
77 //! eof: [a $( a )* a b ·]
79 pub use self::NamedMatch
::*;
80 pub use self::ParseResult
::*;
81 use self::TokenTreeOrTokenTreeVec
::*;
84 use ast
::{TokenTree, Ident}
;
85 use ast
::{TtDelimited, TtSequence, TtToken}
;
86 use codemap
::{BytePos, mk_sp, Span}
;
88 use parse
::lexer
::*; //resolve bug?
90 use parse
::attr
::ParserAttr
;
91 use parse
::parser
::{LifetimeAndTypesWithoutColons, Parser}
;
92 use parse
::token
::{Eof, DocComment, MatchNt, SubstNt}
;
93 use parse
::token
::{Token, Nonterminal}
;
100 use std
::collections
::HashMap
;
101 use std
::collections
::hash_map
::Entry
::{Vacant, Occupied}
;
103 // To avoid costly uniqueness checks, we require that `MatchSeq` always has
107 enum TokenTreeOrTokenTreeVec
{
109 TtSeq(Rc
<Vec
<ast
::TokenTree
>>),
112 impl TokenTreeOrTokenTreeVec
{
113 fn len(&self) -> usize {
115 &TtSeq(ref v
) => v
.len(),
116 &Tt(ref tt
) => tt
.len(),
120 fn get_tt(&self, index
: usize) -> TokenTree
{
122 &TtSeq(ref v
) => v
[index
].clone(),
123 &Tt(ref tt
) => tt
.get_tt(index
),
128 /// an unzipping of `TokenTree`s
130 struct MatcherTtFrame
{
131 elts
: TokenTreeOrTokenTreeVec
,
136 pub struct MatcherPos
{
137 stack
: Vec
<MatcherTtFrame
>,
138 top_elts
: TokenTreeOrTokenTreeVec
,
141 up
: Option
<Box
<MatcherPos
>>,
142 matches
: Vec
<Vec
<Rc
<NamedMatch
>>>,
149 pub fn count_names(ms
: &[TokenTree
]) -> usize {
150 ms
.iter().fold(0, |count
, elt
| {
152 &TtSequence(_
, ref seq
) => {
155 &TtDelimited(_
, ref delim
) => {
156 count_names(&delim
.tts
)
158 &TtToken(_
, MatchNt(..)) => {
166 pub fn initial_matcher_pos(ms
: Rc
<Vec
<TokenTree
>>, sep
: Option
<Token
>, lo
: BytePos
)
168 let match_idx_hi
= count_names(&ms
[..]);
169 let matches
: Vec
<_
> = (0..match_idx_hi
).map(|_
| Vec
::new()).collect();
179 match_hi
: match_idx_hi
,
184 /// NamedMatch is a pattern-match result for a single token::MATCH_NONTERMINAL:
185 /// so it is associated with a single ident in a parse, and all
186 /// `MatchedNonterminal`s in the NamedMatch have the same nonterminal type
187 /// (expr, item, etc). Each leaf in a single NamedMatch corresponds to a
188 /// single token::MATCH_NONTERMINAL in the TokenTree that produced it.
190 /// The in-memory structure of a particular NamedMatch represents the match
191 /// that occurred when a particular subset of a matcher was applied to a
192 /// particular token tree.
194 /// The width of each MatchedSeq in the NamedMatch, and the identity of the
195 /// `MatchedNonterminal`s, will depend on the token tree it was applied to:
196 /// each MatchedSeq corresponds to a single TTSeq in the originating
197 /// token tree. The depth of the NamedMatch structure will therefore depend
198 /// only on the nesting depth of `ast::TTSeq`s in the originating
199 /// token tree it was derived from.
201 pub enum NamedMatch
{
202 MatchedSeq(Vec
<Rc
<NamedMatch
>>, codemap
::Span
),
203 MatchedNonterminal(Nonterminal
)
206 pub fn nameize(p_s
: &ParseSess
, ms
: &[TokenTree
], res
: &[Rc
<NamedMatch
>])
207 -> HashMap
<Ident
, Rc
<NamedMatch
>> {
208 fn n_rec(p_s
: &ParseSess
, m
: &TokenTree
, res
: &[Rc
<NamedMatch
>],
209 ret_val
: &mut HashMap
<Ident
, Rc
<NamedMatch
>>, idx
: &mut usize) {
211 &TtSequence(_
, ref seq
) => {
212 for next_m
in &seq
.tts
{
213 n_rec(p_s
, next_m
, res
, ret_val
, idx
)
216 &TtDelimited(_
, ref delim
) => {
217 for next_m
in &delim
.tts
{
218 n_rec(p_s
, next_m
, res
, ret_val
, idx
)
221 &TtToken(sp
, MatchNt(bind_name
, _
, _
, _
)) => {
222 match ret_val
.entry(bind_name
) {
224 spot
.insert(res
[*idx
].clone());
228 let string
= token
::get_ident(bind_name
);
231 &format
!("duplicated bind name: {}",
236 &TtToken(_
, SubstNt(..)) => panic
!("Cannot fill in a NT"),
237 &TtToken(_
, _
) => (),
240 let mut ret_val
= HashMap
::new();
242 for m
in ms { n_rec(p_s, m, res, &mut ret_val, &mut idx) }
246 pub enum ParseResult
<T
> {
248 Failure(codemap
::Span
, String
),
249 Error(codemap
::Span
, String
)
252 pub type NamedParseResult
= ParseResult
<HashMap
<Ident
, Rc
<NamedMatch
>>>;
253 pub type PositionalParseResult
= ParseResult
<Vec
<Rc
<NamedMatch
>>>;
255 pub fn parse_or_else(sess
: &ParseSess
,
256 cfg
: ast
::CrateConfig
,
259 -> HashMap
<Ident
, Rc
<NamedMatch
>> {
260 match parse(sess
, cfg
, rdr
, &ms
[..]) {
262 Failure(sp
, str) => {
263 sess
.span_diagnostic
.span_fatal(sp
, &str[..])
266 sess
.span_diagnostic
.span_fatal(sp
, &str[..])
271 /// Perform a token equality check, ignoring syntax context (that is, an
272 /// unhygienic comparison)
273 pub fn token_name_eq(t1
: &Token
, t2
: &Token
) -> bool
{
275 (&token
::Ident(id1
,_
),&token
::Ident(id2
,_
))
276 | (&token
::Lifetime(id1
),&token
::Lifetime(id2
)) =>
277 id1
.name
== id2
.name
,
282 pub fn parse(sess
: &ParseSess
,
283 cfg
: ast
::CrateConfig
,
286 -> NamedParseResult
{
287 let mut cur_eis
= Vec
::new();
288 cur_eis
.push(initial_matcher_pos(Rc
::new(ms
.iter()
295 let mut bb_eis
= Vec
::new(); // black-box parsed by parser.rs
296 let mut next_eis
= Vec
::new(); // or proceed normally
297 let mut eof_eis
= Vec
::new();
299 let TokenAndSpan { tok, sp }
= rdr
.peek();
301 /* we append new items to this while we go */
303 let mut ei
= match cur_eis
.pop() {
304 None
=> break, /* for each Earley Item */
308 // When unzipped trees end, remove them
309 while ei
.idx
>= ei
.top_elts
.len() {
310 match ei
.stack
.pop() {
311 Some(MatcherTtFrame { elts, idx }
) => {
320 let len
= ei
.top_elts
.len();
322 /* at end of sequence */
324 // can't move out of `match`es, so:
326 // hack: a matcher sequence is repeating iff it has a
327 // parent (the top level is just a container)
330 // disregard separator, try to go up
331 // (remove this condition to make trailing seps ok)
333 // pop from the matcher position
335 let mut new_pos
= ei
.up
.clone().unwrap();
337 // update matches (the MBE "parse tree") by appending
338 // each tree as a subtree.
340 // I bet this is a perf problem: we're preemptively
341 // doing a lot of array work that will get thrown away
344 // Only touch the binders we have actually bound
345 for idx
in ei
.match_lo
..ei
.match_hi
{
346 let sub
= (ei
.matches
[idx
]).clone();
347 (&mut new_pos
.matches
[idx
])
348 .push(Rc
::new(MatchedSeq(sub
, mk_sp(ei
.sp_lo
,
352 new_pos
.match_cur
= ei
.match_hi
;
354 cur_eis
.push(new_pos
);
357 // can we go around again?
359 // the *_t vars are workarounds for the lack of unary move
361 Some(ref t
) if idx
== len
=> { // we need a separator
362 // i'm conflicted about whether this should be hygienic....
363 // though in this case, if the separators are never legal
364 // idents, it shouldn't matter.
365 if token_name_eq(&tok
, t
) { //pass the separator
366 let mut ei_t
= ei
.clone();
367 // ei_t.match_cur = ei_t.match_lo;
372 _
=> { // we don't need a separator
374 ei_t
.match_cur
= ei_t
.match_lo
;
383 match ei
.top_elts
.get_tt(idx
) {
384 /* need to descend into sequence */
385 TtSequence(sp
, seq
) => {
386 if seq
.op
== ast
::ZeroOrMore
{
387 let mut new_ei
= ei
.clone();
388 new_ei
.match_cur
+= seq
.num_captures
;
390 //we specifically matched zero repeats.
391 for idx
in ei
.match_cur
..ei
.match_cur
+ seq
.num_captures
{
392 (&mut new_ei
.matches
[idx
]).push(Rc
::new(MatchedSeq(vec
![], sp
)));
395 cur_eis
.push(new_ei
);
398 let matches
: Vec
<_
> = (0..ei
.matches
.len())
399 .map(|_
| Vec
::new()).collect();
401 cur_eis
.push(box MatcherPos
{
403 sep
: seq
.separator
.clone(),
406 match_lo
: ei_t
.match_cur
,
407 match_cur
: ei_t
.match_cur
,
408 match_hi
: ei_t
.match_cur
+ seq
.num_captures
,
411 top_elts
: Tt(TtSequence(sp
, seq
)),
414 TtToken(_
, MatchNt(..)) => {
415 // Built-in nonterminals never start with these tokens,
416 // so we can eliminate them from consideration.
418 token
::CloseDelim(_
) => {}
,
419 _
=> bb_eis
.push(ei
),
422 TtToken(sp
, SubstNt(..)) => {
423 return Error(sp
, "Cannot transcribe in macro LHS".to_string())
425 seq @
TtDelimited(..) | seq @
TtToken(_
, DocComment(..)) => {
426 let lower_elts
= mem
::replace(&mut ei
.top_elts
, Tt(seq
));
428 ei
.stack
.push(MatcherTtFrame
{
435 TtToken(_
, ref t
) => {
436 let mut ei_t
= ei
.clone();
437 if token_name_eq(t
,&tok
) {
446 /* error messages here could be improved with links to orig. rules */
447 if token_name_eq(&tok
, &token
::Eof
) {
448 if eof_eis
.len() == 1 {
449 let mut v
= Vec
::new();
450 for dv
in &mut (&mut eof_eis
[0]).matches
{
451 v
.push(dv
.pop().unwrap());
453 return Success(nameize(sess
, ms
, &v
[..]));
454 } else if eof_eis
.len() > 1 {
455 return Error(sp
, "ambiguity: multiple successful parses".to_string());
457 return Failure(sp
, "unexpected end of macro invocation".to_string());
460 if (bb_eis
.len() > 0 && next_eis
.len() > 0)
461 || bb_eis
.len() > 1 {
462 let nts
= bb_eis
.iter().map(|ei
| {
463 match ei
.top_elts
.get_tt(ei
.idx
) {
464 TtToken(_
, MatchNt(bind
, name
, _
, _
)) => {
465 (format
!("{} ('{}')",
466 token
::get_ident(name
),
467 token
::get_ident(bind
))).to_string()
470 } }).collect
::<Vec
<String
>>().connect(" or ");
471 return Error(sp
, format
!(
472 "local ambiguity: multiple parsing options: \
473 built-in NTs {} or {} other options.",
474 nts
, next_eis
.len()).to_string());
475 } else if bb_eis
.len() == 0 && next_eis
.len() == 0 {
476 return Failure(sp
, format
!("no rules expected the token `{}`",
477 pprust
::token_to_string(&tok
)).to_string());
478 } else if next_eis
.len() > 0 {
479 /* Now process the next token */
480 while next_eis
.len() > 0 {
481 cur_eis
.push(next_eis
.pop().unwrap());
484 } else /* bb_eis.len() == 1 */ {
485 let mut rust_parser
= Parser
::new(sess
, cfg
.clone(), Box
::new(rdr
.clone()));
487 let mut ei
= bb_eis
.pop().unwrap();
488 match ei
.top_elts
.get_tt(ei
.idx
) {
489 TtToken(span
, MatchNt(_
, name
, _
, _
)) => {
490 let name_string
= token
::get_ident(name
);
491 let match_cur
= ei
.match_cur
;
492 (&mut ei
.matches
[match_cur
]).push(Rc
::new(MatchedNonterminal(
493 parse_nt(&mut rust_parser
, span
, &name_string
))));
501 for _
in 0..rust_parser
.tokens_consumed
{
502 let _
= rdr
.next_token();
507 assert
!(cur_eis
.len() > 0);
511 pub fn parse_nt(p
: &mut Parser
, sp
: Span
, name
: &str) -> Nonterminal
{
514 p
.quote_depth
+= 1; //but in theory, non-quoted tts might be useful
515 let res
= token
::NtTT(P(p
.parse_token_tree()));
521 // check at the beginning and the parser checks after each bump
522 p
.check_unknown_macro_variable();
524 "item" => match p
.parse_item() {
525 Some(i
) => token
::NtItem(i
),
526 None
=> p
.fatal("expected an item keyword")
528 "block" => token
::NtBlock(p
.parse_block()),
529 "stmt" => match p
.parse_stmt() {
530 Some(s
) => token
::NtStmt(s
),
531 None
=> p
.fatal("expected a statement")
533 "pat" => token
::NtPat(p
.parse_pat()),
534 "expr" => token
::NtExpr(p
.parse_expr()),
535 "ty" => token
::NtTy(p
.parse_ty()),
536 // this could be handled like a token, since it is one
537 "ident" => match p
.token
{
538 token
::Ident(sn
,b
) => { p.bump(); token::NtIdent(box sn,b) }
540 let token_str
= pprust
::token_to_string(&p
.token
);
541 p
.fatal(&format
!("expected ident, found {}",
546 token
::NtPath(box p
.parse_path(LifetimeAndTypesWithoutColons
))
548 "meta" => token
::NtMeta(p
.parse_meta_item()),
550 p
.span_fatal_help(sp
,
551 &format
!("invalid fragment specifier `{}`", name
),
552 "valid fragment specifiers are `ident`, `block`, \
553 `stmt`, `expr`, `pat`, `ty`, `path`, `meta`, `tt` \