]>
Commit | Line | Data |
---|---|---|
1a4d82fc | 1 | // Copyright 2015 The Rust Project Developers. See the COPYRIGHT |
223e47cc LB |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
9e0c209e | 11 | use {ast, attr}; |
3157f602 | 12 | use syntax_pos::{Span, DUMMY_SP}; |
9e0c209e SL |
13 | use ext::base::{DummyResult, ExtCtxt, MacEager, MacResult, SyntaxExtension}; |
14 | use ext::base::{IdentMacroExpander, NormalTT, TTMacroExpander}; | |
15 | use ext::expand::{Expansion, ExpansionKind}; | |
16 | use ext::placeholders; | |
1a4d82fc | 17 | use ext::tt::macro_parser::{Success, Error, Failure}; |
92a42be0 | 18 | use ext::tt::macro_parser::{MatchedSeq, MatchedNonterminal}; |
e9174d1e | 19 | use ext::tt::macro_parser::parse; |
9e0c209e | 20 | use parse::ParseSess; |
c34b1796 | 21 | use parse::lexer::new_tt_reader; |
54a0048b | 22 | use parse::parser::{Parser, Restrictions}; |
a7813a04 | 23 | use parse::token::{self, gensym_ident, NtTT, Token}; |
1a4d82fc | 24 | use parse::token::Token::*; |
223e47cc | 25 | use print; |
3157f602 | 26 | use tokenstream::{self, TokenTree}; |
223e47cc | 27 | |
9cc50fc6 SL |
28 | use std::collections::{HashMap}; |
29 | use std::collections::hash_map::{Entry}; | |
5bcae85e | 30 | use std::rc::Rc; |
1a4d82fc | 31 | |
9e0c209e SL |
32 | pub struct ParserAnyMacro<'a> { |
33 | parser: Parser<'a>, | |
9346a6ac AL |
34 | |
35 | /// Span of the expansion site of the macro this parser is for | |
36 | site_span: Span, | |
37 | /// The ident of the macro we're parsing | |
38 | macro_ident: ast::Ident | |
1a4d82fc JJ |
39 | } |
40 | ||
41 | impl<'a> ParserAnyMacro<'a> { | |
9e0c209e SL |
42 | pub fn make(mut self: Box<ParserAnyMacro<'a>>, kind: ExpansionKind) -> Expansion { |
43 | let ParserAnyMacro { site_span, macro_ident, ref mut parser } = *self; | |
44 | let expansion = panictry!(parser.parse_expansion(kind, true)); | |
45 | ||
46 | // We allow semicolons at the end of expressions -- e.g. the semicolon in | |
47 | // `macro_rules! m { () => { panic!(); } }` isn't parsed by `.parse_expr()`, | |
48 | // but `m!()` is allowed in expression positions (c.f. issue #34706). | |
49 | if kind == ExpansionKind::Expr && parser.token == token::Semi { | |
9cc50fc6 | 50 | parser.bump(); |
1a4d82fc | 51 | } |
e9174d1e | 52 | |
9e0c209e SL |
53 | // Make sure we don't have any tokens left to parse so we don't silently drop anything. |
54 | parser.ensure_complete_parse(macro_ident.name, kind.name(), site_span); | |
55 | expansion | |
e9174d1e | 56 | } |
1a4d82fc JJ |
57 | } |
58 | ||
59 | struct MacroRulesMacroExpander { | |
60 | name: ast::Ident, | |
61 | imported_from: Option<ast::Ident>, | |
92a42be0 SL |
62 | lhses: Vec<TokenTree>, |
63 | rhses: Vec<TokenTree>, | |
64 | valid: bool, | |
1a4d82fc JJ |
65 | } |
66 | ||
67 | impl TTMacroExpander for MacroRulesMacroExpander { | |
68 | fn expand<'cx>(&self, | |
69 | cx: &'cx mut ExtCtxt, | |
70 | sp: Span, | |
92a42be0 | 71 | arg: &[TokenTree]) |
1a4d82fc | 72 | -> Box<MacResult+'cx> { |
92a42be0 SL |
73 | if !self.valid { |
74 | return DummyResult::any(sp); | |
75 | } | |
1a4d82fc JJ |
76 | generic_extension(cx, |
77 | sp, | |
78 | self.name, | |
79 | self.imported_from, | |
80 | arg, | |
85aaf69f SL |
81 | &self.lhses, |
82 | &self.rhses) | |
1a4d82fc JJ |
83 | } |
84 | } | |
85 | ||
86 | /// Given `lhses` and `rhses`, this is the new macro we create | |
87 | fn generic_extension<'cx>(cx: &'cx ExtCtxt, | |
88 | sp: Span, | |
89 | name: ast::Ident, | |
90 | imported_from: Option<ast::Ident>, | |
92a42be0 SL |
91 | arg: &[TokenTree], |
92 | lhses: &[TokenTree], | |
93 | rhses: &[TokenTree]) | |
1a4d82fc JJ |
94 | -> Box<MacResult+'cx> { |
95 | if cx.trace_macros() { | |
96 | println!("{}! {{ {} }}", | |
c1a9b12d | 97 | name, |
1a4d82fc JJ |
98 | print::pprust::tts_to_string(arg)); |
99 | } | |
100 | ||
101 | // Which arm's failure should we report? (the one furthest along) | |
102 | let mut best_fail_spot = DUMMY_SP; | |
103 | let mut best_fail_msg = "internal error: ran no matchers".to_string(); | |
104 | ||
105 | for (i, lhs) in lhses.iter().enumerate() { // try each arm's matchers | |
92a42be0 SL |
106 | let lhs_tt = match *lhs { |
107 | TokenTree::Delimited(_, ref delim) => &delim.tts[..], | |
3157f602 | 108 | _ => cx.span_bug(sp, "malformed macro lhs") |
92a42be0 SL |
109 | }; |
110 | ||
111 | match TokenTree::parse(cx, lhs_tt, arg) { | |
112 | Success(named_matches) => { | |
113 | let rhs = match rhses[i] { | |
114 | // ignore delimiters | |
115 | TokenTree::Delimited(_, ref delimed) => delimed.tts.clone(), | |
3157f602 | 116 | _ => cx.span_bug(sp, "malformed macro rhs"), |
1a4d82fc JJ |
117 | }; |
118 | // rhs has holes ( `$id` and `$(...)` that need filled) | |
9e0c209e | 119 | let trncbr = new_tt_reader(&cx.parse_sess.span_diagnostic, |
1a4d82fc JJ |
120 | Some(named_matches), |
121 | imported_from, | |
122 | rhs); | |
c34b1796 | 123 | let mut p = Parser::new(cx.parse_sess(), cx.cfg(), Box::new(trncbr)); |
9e0c209e SL |
124 | p.directory = cx.current_expansion.module.directory.clone(); |
125 | p.restrictions = match cx.current_expansion.in_block { | |
54a0048b SL |
126 | true => Restrictions::NO_NONINLINE_MOD, |
127 | false => Restrictions::empty(), | |
128 | }; | |
9cc50fc6 | 129 | p.check_unknown_macro_variable(); |
1a4d82fc JJ |
130 | // Let the context choose how to interpret the result. |
131 | // Weird, but useful for X-macros. | |
d9579d0f | 132 | return Box::new(ParserAnyMacro { |
9e0c209e | 133 | parser: p, |
9346a6ac AL |
134 | |
135 | // Pass along the original expansion site and the name of the macro | |
136 | // so we can print a useful error message if the parse of the expanded | |
137 | // macro leaves unparsed tokens. | |
138 | site_span: sp, | |
139 | macro_ident: name | |
d9579d0f | 140 | }) |
92a42be0 SL |
141 | } |
142 | Failure(sp, ref msg) => if sp.lo >= best_fail_spot.lo { | |
1a4d82fc JJ |
143 | best_fail_spot = sp; |
144 | best_fail_msg = (*msg).clone(); | |
92a42be0 SL |
145 | }, |
146 | Error(err_sp, ref msg) => { | |
147 | cx.span_fatal(err_sp.substitute_dummy(sp), &msg[..]) | |
1a4d82fc | 148 | } |
1a4d82fc JJ |
149 | } |
150 | } | |
e9174d1e | 151 | |
92a42be0 | 152 | cx.span_fatal(best_fail_spot.substitute_dummy(sp), &best_fail_msg[..]); |
1a4d82fc JJ |
153 | } |
154 | ||
9e0c209e SL |
155 | pub struct MacroRulesExpander; |
156 | impl IdentMacroExpander for MacroRulesExpander { | |
157 | fn expand(&self, | |
158 | cx: &mut ExtCtxt, | |
159 | span: Span, | |
160 | ident: ast::Ident, | |
161 | tts: Vec<tokenstream::TokenTree>, | |
162 | attrs: Vec<ast::Attribute>) | |
163 | -> Box<MacResult> { | |
164 | let def = ast::MacroDef { | |
165 | ident: ident, | |
166 | id: ast::DUMMY_NODE_ID, | |
167 | span: span, | |
168 | imported_from: None, | |
169 | use_locally: true, | |
170 | body: tts, | |
171 | export: attr::contains_name(&attrs, "macro_export"), | |
172 | allow_internal_unstable: attr::contains_name(&attrs, "allow_internal_unstable"), | |
173 | attrs: attrs, | |
174 | }; | |
175 | ||
176 | // If keep_macs is true, expands to a MacEager::items instead. | |
177 | let result = if cx.ecfg.keep_macs { | |
178 | MacEager::items(placeholders::reconstructed_macro_rules(&def).make_items()) | |
179 | } else { | |
180 | MacEager::items(placeholders::macro_scope_placeholder().make_items()) | |
181 | }; | |
182 | ||
183 | cx.resolver.add_macro(cx.current_expansion.mark, def); | |
184 | result | |
185 | } | |
186 | } | |
187 | ||
1a4d82fc JJ |
188 | // Note that macro-by-example's input is also matched against a token tree: |
189 | // $( $lhs:tt => $rhs:tt );+ | |
190 | // | |
191 | // Holy self-referential! | |
192 | ||
193 | /// Converts a `macro_rules!` invocation into a syntax extension. | |
9e0c209e | 194 | pub fn compile(sess: &ParseSess, def: &ast::MacroDef) -> SyntaxExtension { |
970d7e83 LB |
195 | let lhs_nm = gensym_ident("lhs"); |
196 | let rhs_nm = gensym_ident("rhs"); | |
223e47cc | 197 | |
1a4d82fc | 198 | // The pattern that macro_rules matches. |
223e47cc | 199 | // The grammar for macro_rules! is: |
1a4d82fc | 200 | // $( $lhs:tt => $rhs:tt );+ |
223e47cc | 201 | // ...quasiquoting this would be nice. |
1a4d82fc | 202 | // These spans won't matter, anyways |
a7813a04 XL |
203 | let match_lhs_tok = MatchNt(lhs_nm, token::str_to_ident("tt")); |
204 | let match_rhs_tok = MatchNt(rhs_nm, token::str_to_ident("tt")); | |
3157f602 | 205 | let argument_gram = vec![ |
5bcae85e | 206 | TokenTree::Sequence(DUMMY_SP, Rc::new(tokenstream::SequenceRepetition { |
3157f602 XL |
207 | tts: vec![ |
208 | TokenTree::Token(DUMMY_SP, match_lhs_tok), | |
209 | TokenTree::Token(DUMMY_SP, token::FatArrow), | |
210 | TokenTree::Token(DUMMY_SP, match_rhs_tok), | |
211 | ], | |
212 | separator: Some(token::Semi), | |
213 | op: tokenstream::KleeneOp::OneOrMore, | |
214 | num_captures: 2, | |
5bcae85e | 215 | })), |
3157f602 | 216 | // to phase into semicolon-termination instead of semicolon-separation |
5bcae85e | 217 | TokenTree::Sequence(DUMMY_SP, Rc::new(tokenstream::SequenceRepetition { |
3157f602 XL |
218 | tts: vec![TokenTree::Token(DUMMY_SP, token::Semi)], |
219 | separator: None, | |
220 | op: tokenstream::KleeneOp::ZeroOrMore, | |
221 | num_captures: 0 | |
5bcae85e | 222 | })), |
3157f602 | 223 | ]; |
223e47cc LB |
224 | |
225 | // Parse the macro_rules! invocation (`none` is for no interpolations): | |
9e0c209e SL |
226 | let arg_reader = new_tt_reader(&sess.span_diagnostic, None, None, def.body.clone()); |
227 | ||
228 | let argument_map = match parse(sess, Vec::new(), arg_reader, &argument_gram) { | |
e9174d1e SL |
229 | Success(m) => m, |
230 | Failure(sp, str) | Error(sp, str) => { | |
9e0c209e | 231 | panic!(sess.span_diagnostic.span_fatal(sp.substitute_dummy(def.span), &str)); |
e9174d1e SL |
232 | } |
233 | }; | |
223e47cc | 234 | |
92a42be0 SL |
235 | let mut valid = true; |
236 | ||
223e47cc | 237 | // Extract the arguments: |
5bcae85e | 238 | let lhses = match **argument_map.get(&lhs_nm).unwrap() { |
92a42be0 SL |
239 | MatchedSeq(ref s, _) => { |
240 | s.iter().map(|m| match **m { | |
3157f602 | 241 | MatchedNonterminal(NtTT(ref tt)) => { |
9e0c209e | 242 | valid &= check_lhs_nt_follows(sess, tt); |
3157f602 XL |
243 | (**tt).clone() |
244 | } | |
9e0c209e SL |
245 | _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") |
246 | }).collect::<Vec<TokenTree>>() | |
92a42be0 | 247 | } |
9e0c209e | 248 | _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") |
223e47cc LB |
249 | }; |
250 | ||
5bcae85e | 251 | let rhses = match **argument_map.get(&rhs_nm).unwrap() { |
92a42be0 SL |
252 | MatchedSeq(ref s, _) => { |
253 | s.iter().map(|m| match **m { | |
254 | MatchedNonterminal(NtTT(ref tt)) => (**tt).clone(), | |
9e0c209e | 255 | _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs") |
92a42be0 SL |
256 | }).collect() |
257 | } | |
9e0c209e | 258 | _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs") |
223e47cc LB |
259 | }; |
260 | ||
92a42be0 | 261 | for rhs in &rhses { |
9e0c209e SL |
262 | valid &= check_rhs(sess, rhs); |
263 | } | |
264 | ||
265 | // don't abort iteration early, so that errors for multiple lhses can be reported | |
266 | for lhs in &lhses { | |
267 | valid &= check_lhs_no_empty_seq(sess, &[lhs.clone()]) | |
92a42be0 SL |
268 | } |
269 | ||
d9579d0f | 270 | let exp: Box<_> = Box::new(MacroRulesMacroExpander { |
1a4d82fc JJ |
271 | name: def.ident, |
272 | imported_from: def.imported_from, | |
273 | lhses: lhses, | |
274 | rhses: rhses, | |
92a42be0 | 275 | valid: valid, |
d9579d0f | 276 | }); |
223e47cc | 277 | |
c34b1796 | 278 | NormalTT(exp, Some(def.span), def.allow_internal_unstable) |
1a4d82fc JJ |
279 | } |
280 | ||
9e0c209e | 281 | fn check_lhs_nt_follows(sess: &ParseSess, lhs: &TokenTree) -> bool { |
92a42be0 SL |
282 | // lhs is going to be like TokenTree::Delimited(...), where the |
283 | // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens. | |
85aaf69f | 284 | match lhs { |
9e0c209e | 285 | &TokenTree::Delimited(_, ref tts) => check_matcher(sess, &tts.tts), |
3157f602 | 286 | _ => { |
9e0c209e SL |
287 | let msg = "invalid macro matcher; matchers must be contained in balanced delimiters"; |
288 | sess.span_diagnostic.span_err(lhs.get_span(), msg); | |
3157f602 XL |
289 | false |
290 | } | |
291 | } | |
1a4d82fc JJ |
292 | // we don't abort on errors on rejection, the driver will do that for us |
293 | // after parsing/expansion. we can report every error in every macro this way. | |
294 | } | |
295 | ||
9e0c209e SL |
296 | /// Check that the lhs contains no repetition which could match an empty token |
297 | /// tree, because then the matcher would hang indefinitely. | |
298 | fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[TokenTree]) -> bool { | |
299 | for tt in tts { | |
300 | match *tt { | |
301 | TokenTree::Token(_, _) => (), | |
302 | TokenTree::Delimited(_, ref del) => if !check_lhs_no_empty_seq(sess, &del.tts) { | |
303 | return false; | |
304 | }, | |
305 | TokenTree::Sequence(span, ref seq) => { | |
306 | if seq.separator.is_none() { | |
307 | if seq.tts.iter().all(|seq_tt| { | |
308 | match *seq_tt { | |
309 | TokenTree::Sequence(_, ref sub_seq) => | |
310 | sub_seq.op == tokenstream::KleeneOp::ZeroOrMore, | |
311 | _ => false, | |
312 | } | |
313 | }) { | |
314 | sess.span_diagnostic.span_err(span, "repetition matches empty token tree"); | |
315 | return false; | |
316 | } | |
317 | } | |
318 | if !check_lhs_no_empty_seq(sess, &seq.tts) { | |
319 | return false; | |
320 | } | |
321 | } | |
322 | } | |
323 | } | |
324 | ||
325 | true | |
326 | } | |
327 | ||
328 | fn check_rhs(sess: &ParseSess, rhs: &TokenTree) -> bool { | |
92a42be0 SL |
329 | match *rhs { |
330 | TokenTree::Delimited(..) => return true, | |
9e0c209e | 331 | _ => sess.span_diagnostic.span_err(rhs.get_span(), "macro rhs must be delimited") |
92a42be0 SL |
332 | } |
333 | false | |
334 | } | |
335 | ||
9e0c209e | 336 | fn check_matcher(sess: &ParseSess, matcher: &[TokenTree]) -> bool { |
9cc50fc6 SL |
337 | let first_sets = FirstSets::new(matcher); |
338 | let empty_suffix = TokenSet::empty(); | |
9e0c209e SL |
339 | let err = sess.span_diagnostic.err_count(); |
340 | check_matcher_core(sess, &first_sets, matcher, &empty_suffix); | |
341 | err == sess.span_diagnostic.err_count() | |
9cc50fc6 SL |
342 | } |
343 | ||
344 | // The FirstSets for a matcher is a mapping from subsequences in the | |
345 | // matcher to the FIRST set for that subsequence. | |
346 | // | |
347 | // This mapping is partially precomputed via a backwards scan over the | |
348 | // token trees of the matcher, which provides a mapping from each | |
349 | // repetition sequence to its FIRST set. | |
350 | // | |
351 | // (Hypothetically sequences should be uniquely identifiable via their | |
352 | // spans, though perhaps that is false e.g. for macro-generated macros | |
353 | // that do not try to inject artificial span information. My plan is | |
354 | // to try to catch such cases ahead of time and not include them in | |
355 | // the precomputed mapping.) | |
356 | struct FirstSets { | |
357 | // this maps each TokenTree::Sequence `$(tt ...) SEP OP` that is uniquely identified by its | |
358 | // span in the original matcher to the First set for the inner sequence `tt ...`. | |
359 | // | |
360 | // If two sequences have the same span in a matcher, then map that | |
361 | // span to None (invalidating the mapping here and forcing the code to | |
362 | // use a slow path). | |
363 | first: HashMap<Span, Option<TokenSet>>, | |
364 | } | |
365 | ||
366 | impl FirstSets { | |
367 | fn new(tts: &[TokenTree]) -> FirstSets { | |
368 | let mut sets = FirstSets { first: HashMap::new() }; | |
369 | build_recur(&mut sets, tts); | |
370 | return sets; | |
371 | ||
372 | // walks backward over `tts`, returning the FIRST for `tts` | |
373 | // and updating `sets` at the same time for all sequence | |
374 | // substructure we find within `tts`. | |
375 | fn build_recur(sets: &mut FirstSets, tts: &[TokenTree]) -> TokenSet { | |
376 | let mut first = TokenSet::empty(); | |
377 | for tt in tts.iter().rev() { | |
378 | match *tt { | |
379 | TokenTree::Token(sp, ref tok) => { | |
380 | first.replace_with((sp, tok.clone())); | |
381 | } | |
382 | TokenTree::Delimited(_, ref delimited) => { | |
383 | build_recur(sets, &delimited.tts[..]); | |
384 | first.replace_with((delimited.open_span, | |
385 | Token::OpenDelim(delimited.delim))); | |
386 | } | |
387 | TokenTree::Sequence(sp, ref seq_rep) => { | |
388 | let subfirst = build_recur(sets, &seq_rep.tts[..]); | |
389 | ||
390 | match sets.first.entry(sp) { | |
391 | Entry::Vacant(vac) => { | |
392 | vac.insert(Some(subfirst.clone())); | |
393 | } | |
394 | Entry::Occupied(mut occ) => { | |
395 | // if there is already an entry, then a span must have collided. | |
396 | // This should not happen with typical macro_rules macros, | |
397 | // but syntax extensions need not maintain distinct spans, | |
398 | // so distinct syntax trees can be assigned the same span. | |
399 | // In such a case, the map cannot be trusted; so mark this | |
400 | // entry as unusable. | |
401 | occ.insert(None); | |
402 | } | |
403 | } | |
404 | ||
405 | // If the sequence contents can be empty, then the first | |
406 | // token could be the separator token itself. | |
407 | ||
408 | if let (Some(ref sep), true) = (seq_rep.separator.clone(), | |
409 | subfirst.maybe_empty) { | |
410 | first.add_one_maybe((sp, sep.clone())); | |
411 | } | |
412 | ||
413 | // Reverse scan: Sequence comes before `first`. | |
3157f602 | 414 | if subfirst.maybe_empty || seq_rep.op == tokenstream::KleeneOp::ZeroOrMore { |
9cc50fc6 SL |
415 | // If sequence is potentially empty, then |
416 | // union them (preserving first emptiness). | |
417 | first.add_all(&TokenSet { maybe_empty: true, ..subfirst }); | |
418 | } else { | |
419 | // Otherwise, sequence guaranteed | |
420 | // non-empty; replace first. | |
421 | first = subfirst; | |
422 | } | |
423 | } | |
424 | } | |
425 | } | |
426 | ||
427 | return first; | |
428 | } | |
429 | } | |
430 | ||
431 | // walks forward over `tts` until all potential FIRST tokens are | |
432 | // identified. | |
433 | fn first(&self, tts: &[TokenTree]) -> TokenSet { | |
434 | let mut first = TokenSet::empty(); | |
435 | for tt in tts.iter() { | |
436 | assert!(first.maybe_empty); | |
437 | match *tt { | |
438 | TokenTree::Token(sp, ref tok) => { | |
439 | first.add_one((sp, tok.clone())); | |
440 | return first; | |
441 | } | |
442 | TokenTree::Delimited(_, ref delimited) => { | |
443 | first.add_one((delimited.open_span, | |
444 | Token::OpenDelim(delimited.delim))); | |
445 | return first; | |
446 | } | |
447 | TokenTree::Sequence(sp, ref seq_rep) => { | |
448 | match self.first.get(&sp) { | |
449 | Some(&Some(ref subfirst)) => { | |
450 | ||
451 | // If the sequence contents can be empty, then the first | |
452 | // token could be the separator token itself. | |
453 | ||
454 | if let (Some(ref sep), true) = (seq_rep.separator.clone(), | |
455 | subfirst.maybe_empty) { | |
456 | first.add_one_maybe((sp, sep.clone())); | |
457 | } | |
458 | ||
459 | assert!(first.maybe_empty); | |
460 | first.add_all(subfirst); | |
3157f602 XL |
461 | if subfirst.maybe_empty || |
462 | seq_rep.op == tokenstream::KleeneOp::ZeroOrMore { | |
9cc50fc6 SL |
463 | // continue scanning for more first |
464 | // tokens, but also make sure we | |
465 | // restore empty-tracking state | |
466 | first.maybe_empty = true; | |
467 | continue; | |
468 | } else { | |
469 | return first; | |
470 | } | |
471 | } | |
472 | ||
473 | Some(&None) => { | |
474 | panic!("assume all sequences have (unique) spans for now"); | |
475 | } | |
476 | ||
477 | None => { | |
478 | panic!("We missed a sequence during FirstSets construction"); | |
479 | } | |
480 | } | |
481 | } | |
482 | } | |
483 | } | |
484 | ||
485 | // we only exit the loop if `tts` was empty or if every | |
486 | // element of `tts` matches the empty sequence. | |
487 | assert!(first.maybe_empty); | |
488 | return first; | |
489 | } | |
490 | } | |
491 | ||
492 | // A set of Tokens, which may include MatchNt tokens (for | |
493 | // macro-by-example syntactic variables). It also carries the | |
494 | // `maybe_empty` flag; that is true if and only if the matcher can | |
495 | // match an empty token sequence. | |
496 | // | |
497 | // The First set is computed on submatchers like `$($a:expr b),* $(c)* d`, | |
498 | // which has corresponding FIRST = {$a:expr, c, d}. | |
499 | // Likewise, `$($a:expr b),* $(c)+ d` has FIRST = {$a:expr, c}. | |
500 | // | |
501 | // (Notably, we must allow for *-op to occur zero times.) | |
502 | #[derive(Clone, Debug)] | |
503 | struct TokenSet { | |
504 | tokens: Vec<(Span, Token)>, | |
505 | maybe_empty: bool, | |
506 | } | |
507 | ||
508 | impl TokenSet { | |
509 | // Returns a set for the empty sequence. | |
510 | fn empty() -> Self { TokenSet { tokens: Vec::new(), maybe_empty: true } } | |
511 | ||
512 | // Returns the set `{ tok }` for the single-token (and thus | |
513 | // non-empty) sequence [tok]. | |
514 | fn singleton(tok: (Span, Token)) -> Self { | |
515 | TokenSet { tokens: vec![tok], maybe_empty: false } | |
516 | } | |
517 | ||
518 | // Changes self to be the set `{ tok }`. | |
519 | // Since `tok` is always present, marks self as non-empty. | |
520 | fn replace_with(&mut self, tok: (Span, Token)) { | |
521 | self.tokens.clear(); | |
522 | self.tokens.push(tok); | |
523 | self.maybe_empty = false; | |
524 | } | |
525 | ||
526 | // Changes self to be the empty set `{}`; meant for use when | |
527 | // the particular token does not matter, but we want to | |
528 | // record that it occurs. | |
529 | fn replace_with_irrelevant(&mut self) { | |
530 | self.tokens.clear(); | |
531 | self.maybe_empty = false; | |
532 | } | |
533 | ||
534 | // Adds `tok` to the set for `self`, marking sequence as non-empy. | |
535 | fn add_one(&mut self, tok: (Span, Token)) { | |
536 | if !self.tokens.contains(&tok) { | |
537 | self.tokens.push(tok); | |
538 | } | |
539 | self.maybe_empty = false; | |
540 | } | |
541 | ||
542 | // Adds `tok` to the set for `self`. (Leaves `maybe_empty` flag alone.) | |
543 | fn add_one_maybe(&mut self, tok: (Span, Token)) { | |
544 | if !self.tokens.contains(&tok) { | |
545 | self.tokens.push(tok); | |
546 | } | |
547 | } | |
548 | ||
549 | // Adds all elements of `other` to this. | |
550 | // | |
551 | // (Since this is a set, we filter out duplicates.) | |
552 | // | |
553 | // If `other` is potentially empty, then preserves the previous | |
554 | // setting of the empty flag of `self`. If `other` is guaranteed | |
555 | // non-empty, then `self` is marked non-empty. | |
556 | fn add_all(&mut self, other: &Self) { | |
557 | for tok in &other.tokens { | |
558 | if !self.tokens.contains(tok) { | |
559 | self.tokens.push(tok.clone()); | |
560 | } | |
561 | } | |
562 | if !other.maybe_empty { | |
563 | self.maybe_empty = false; | |
564 | } | |
565 | } | |
566 | } | |
567 | ||
568 | // Checks that `matcher` is internally consistent and that it | |
569 | // can legally by followed by a token N, for all N in `follow`. | |
570 | // (If `follow` is empty, then it imposes no constraint on | |
571 | // the `matcher`.) | |
572 | // | |
573 | // Returns the set of NT tokens that could possibly come last in | |
574 | // `matcher`. (If `matcher` matches the empty sequence, then | |
575 | // `maybe_empty` will be set to true.) | |
576 | // | |
577 | // Requires that `first_sets` is pre-computed for `matcher`; | |
578 | // see `FirstSets::new`. | |
9e0c209e | 579 | fn check_matcher_core(sess: &ParseSess, |
9cc50fc6 SL |
580 | first_sets: &FirstSets, |
581 | matcher: &[TokenTree], | |
3157f602 | 582 | follow: &TokenSet) -> TokenSet { |
9cc50fc6 SL |
583 | use print::pprust::token_to_string; |
584 | ||
585 | let mut last = TokenSet::empty(); | |
586 | ||
587 | // 2. For each token and suffix [T, SUFFIX] in M: | |
588 | // ensure that T can be followed by SUFFIX, and if SUFFIX may be empty, | |
589 | // then ensure T can also be followed by any element of FOLLOW. | |
590 | 'each_token: for i in 0..matcher.len() { | |
591 | let token = &matcher[i]; | |
592 | let suffix = &matcher[i+1..]; | |
593 | ||
594 | let build_suffix_first = || { | |
595 | let mut s = first_sets.first(suffix); | |
596 | if s.maybe_empty { s.add_all(follow); } | |
597 | return s; | |
598 | }; | |
599 | ||
600 | // (we build `suffix_first` on demand below; you can tell | |
601 | // which cases are supposed to fall through by looking for the | |
602 | // initialization of this variable.) | |
603 | let suffix_first; | |
604 | ||
605 | // First, update `last` so that it corresponds to the set | |
606 | // of NT tokens that might end the sequence `... token`. | |
607 | match *token { | |
608 | TokenTree::Token(sp, ref tok) => { | |
609 | let can_be_followed_by_any; | |
610 | if let Err(bad_frag) = has_legal_fragment_specifier(tok) { | |
9e0c209e SL |
611 | let msg = format!("invalid fragment specifier `{}`", bad_frag); |
612 | sess.span_diagnostic.struct_span_err(sp, &msg) | |
3157f602 XL |
613 | .help("valid fragment specifiers are `ident`, `block`, \ |
614 | `stmt`, `expr`, `pat`, `ty`, `path`, `meta`, `tt` \ | |
615 | and `item`") | |
616 | .emit(); | |
9cc50fc6 SL |
617 | // (This eliminates false positives and duplicates |
618 | // from error messages.) | |
619 | can_be_followed_by_any = true; | |
620 | } else { | |
621 | can_be_followed_by_any = token_can_be_followed_by_any(tok); | |
622 | } | |
623 | ||
624 | if can_be_followed_by_any { | |
625 | // don't need to track tokens that work with any, | |
626 | last.replace_with_irrelevant(); | |
627 | // ... and don't need to check tokens that can be | |
628 | // followed by anything against SUFFIX. | |
629 | continue 'each_token; | |
630 | } else { | |
631 | last.replace_with((sp, tok.clone())); | |
632 | suffix_first = build_suffix_first(); | |
633 | } | |
634 | } | |
635 | TokenTree::Delimited(_, ref d) => { | |
636 | let my_suffix = TokenSet::singleton((d.close_span, Token::CloseDelim(d.delim))); | |
9e0c209e | 637 | check_matcher_core(sess, first_sets, &d.tts, &my_suffix); |
9cc50fc6 SL |
638 | // don't track non NT tokens |
639 | last.replace_with_irrelevant(); | |
640 | ||
641 | // also, we don't need to check delimited sequences | |
642 | // against SUFFIX | |
643 | continue 'each_token; | |
644 | } | |
645 | TokenTree::Sequence(sp, ref seq_rep) => { | |
646 | suffix_first = build_suffix_first(); | |
647 | // The trick here: when we check the interior, we want | |
648 | // to include the separator (if any) as a potential | |
649 | // (but not guaranteed) element of FOLLOW. So in that | |
650 | // case, we make a temp copy of suffix and stuff | |
651 | // delimiter in there. | |
652 | // | |
653 | // FIXME: Should I first scan suffix_first to see if | |
654 | // delimiter is already in it before I go through the | |
655 | // work of cloning it? But then again, this way I may | |
656 | // get a "tighter" span? | |
657 | let mut new; | |
658 | let my_suffix = if let Some(ref u) = seq_rep.separator { | |
659 | new = suffix_first.clone(); | |
660 | new.add_one_maybe((sp, u.clone())); | |
661 | &new | |
662 | } else { | |
663 | &suffix_first | |
664 | }; | |
665 | ||
666 | // At this point, `suffix_first` is built, and | |
667 | // `my_suffix` is some TokenSet that we can use | |
668 | // for checking the interior of `seq_rep`. | |
9e0c209e | 669 | let next = check_matcher_core(sess, first_sets, &seq_rep.tts, my_suffix); |
9cc50fc6 SL |
670 | if next.maybe_empty { |
671 | last.add_all(&next); | |
672 | } else { | |
673 | last = next; | |
674 | } | |
675 | ||
676 | // the recursive call to check_matcher_core already ran the 'each_last | |
677 | // check below, so we can just keep going forward here. | |
678 | continue 'each_token; | |
679 | } | |
680 | } | |
681 | ||
682 | // (`suffix_first` guaranteed initialized once reaching here.) | |
683 | ||
684 | // Now `last` holds the complete set of NT tokens that could | |
685 | // end the sequence before SUFFIX. Check that every one works with `suffix`. | |
686 | 'each_last: for &(_sp, ref t) in &last.tokens { | |
a7813a04 | 687 | if let MatchNt(ref name, ref frag_spec) = *t { |
9cc50fc6 | 688 | for &(sp, ref next_token) in &suffix_first.tokens { |
9e0c209e | 689 | match is_in_follow(next_token, &frag_spec.name.as_str()) { |
3157f602 | 690 | Err((msg, help)) => { |
9e0c209e | 691 | sess.span_diagnostic.struct_span_err(sp, &msg).help(help).emit(); |
9cc50fc6 SL |
692 | // don't bother reporting every source of |
693 | // conflict for a particular element of `last`. | |
694 | continue 'each_last; | |
695 | } | |
696 | Ok(true) => {} | |
697 | Ok(false) => { | |
698 | let may_be = if last.tokens.len() == 1 && | |
699 | suffix_first.tokens.len() == 1 | |
700 | { | |
701 | "is" | |
702 | } else { | |
703 | "may be" | |
704 | }; | |
705 | ||
9e0c209e | 706 | sess.span_diagnostic.span_err( |
3157f602 | 707 | sp, |
9cc50fc6 SL |
708 | &format!("`${name}:{frag}` {may_be} followed by `{next}`, which \ |
709 | is not allowed for `{frag}` fragments", | |
710 | name=name, | |
711 | frag=frag_spec, | |
712 | next=token_to_string(next_token), | |
3157f602 XL |
713 | may_be=may_be) |
714 | ); | |
9cc50fc6 SL |
715 | } |
716 | } | |
717 | } | |
718 | } | |
719 | } | |
720 | } | |
721 | last | |
722 | } | |
723 | ||
9cc50fc6 | 724 | fn token_can_be_followed_by_any(tok: &Token) -> bool { |
a7813a04 | 725 | if let &MatchNt(_, ref frag_spec) = tok { |
9cc50fc6 SL |
726 | frag_can_be_followed_by_any(&frag_spec.name.as_str()) |
727 | } else { | |
728 | // (Non NT's can always be followed by anthing in matchers.) | |
729 | true | |
730 | } | |
731 | } | |
732 | ||
733 | /// True if a fragment of type `frag` can be followed by any sort of | |
734 | /// token. We use this (among other things) as a useful approximation | |
735 | /// for when `frag` can be followed by a repetition like `$(...)*` or | |
736 | /// `$(...)+`. In general, these can be a bit tricky to reason about, | |
737 | /// so we adopt a conservative position that says that any fragment | |
738 | /// specifier which consumes at most one token tree can be followed by | |
739 | /// a fragment specifier (indeed, these fragments can be followed by | |
740 | /// ANYTHING without fear of future compatibility hazards). | |
741 | fn frag_can_be_followed_by_any(frag: &str) -> bool { | |
742 | match frag { | |
3157f602 | 743 | "item" | // always terminated by `}` or `;` |
62682a34 SL |
744 | "block" | // exactly one token tree |
745 | "ident" | // exactly one token tree | |
3157f602 XL |
746 | "meta" | // exactly one token tree |
747 | "tt" => // exactly one token tree | |
62682a34 SL |
748 | true, |
749 | ||
750 | _ => | |
751 | false, | |
752 | } | |
753 | } | |
754 | ||
755 | /// True if `frag` can legally be followed by the token `tok`. For | |
9cc50fc6 | 756 | /// fragments that can consume an unbounded number of tokens, `tok` |
62682a34 SL |
757 | /// must be within a well-defined follow set. This is intended to |
758 | /// guarantee future compatibility: for example, without this rule, if | |
759 | /// we expanded `expr` to include a new binary operator, we might | |
760 | /// break macros that were relying on that binary operator as a | |
761 | /// separator. | |
9cc50fc6 | 762 | // when changing this do not forget to update doc/book/macros.md! |
9e0c209e | 763 | fn is_in_follow(tok: &Token, frag: &str) -> Result<bool, (String, &'static str)> { |
1a4d82fc | 764 | if let &CloseDelim(_) = tok { |
62682a34 SL |
765 | // closing a token tree can never be matched by any fragment; |
766 | // iow, we always require that `(` and `)` match, etc. | |
85aaf69f SL |
767 | Ok(true) |
768 | } else { | |
769 | match frag { | |
770 | "item" => { | |
771 | // since items *must* be followed by either a `;` or a `}`, we can | |
772 | // accept anything after them | |
773 | Ok(true) | |
774 | }, | |
775 | "block" => { | |
b039eaaf | 776 | // anything can follow block, the braces provide an easy boundary to |
85aaf69f SL |
777 | // maintain |
778 | Ok(true) | |
779 | }, | |
780 | "stmt" | "expr" => { | |
781 | match *tok { | |
782 | FatArrow | Comma | Semi => Ok(true), | |
783 | _ => Ok(false) | |
784 | } | |
785 | }, | |
786 | "pat" => { | |
787 | match *tok { | |
9cc50fc6 | 788 | FatArrow | Comma | Eq | BinOp(token::Or) => Ok(true), |
a7813a04 XL |
789 | Ident(i) if (i.name.as_str() == "if" || |
790 | i.name.as_str() == "in") => Ok(true), | |
85aaf69f SL |
791 | _ => Ok(false) |
792 | } | |
793 | }, | |
794 | "path" | "ty" => { | |
795 | match *tok { | |
9cc50fc6 SL |
796 | OpenDelim(token::DelimToken::Brace) | OpenDelim(token::DelimToken::Bracket) | |
797 | Comma | FatArrow | Colon | Eq | Gt | Semi | BinOp(token::Or) => Ok(true), | |
a7813a04 XL |
798 | MatchNt(_, ref frag) if frag.name.as_str() == "block" => Ok(true), |
799 | Ident(i) if i.name.as_str() == "as" || i.name.as_str() == "where" => Ok(true), | |
85aaf69f SL |
800 | _ => Ok(false) |
801 | } | |
802 | }, | |
803 | "ident" => { | |
804 | // being a single token, idents are harmless | |
805 | Ok(true) | |
806 | }, | |
807 | "meta" | "tt" => { | |
808 | // being either a single token or a delimited sequence, tt is | |
809 | // harmless | |
810 | Ok(true) | |
811 | }, | |
3157f602 XL |
812 | _ => Err((format!("invalid fragment specifier `{}`", frag), |
813 | "valid fragment specifiers are `ident`, `block`, \ | |
814 | `stmt`, `expr`, `pat`, `ty`, `path`, `meta`, `tt` \ | |
815 | and `item`")) | |
85aaf69f | 816 | } |
1a4d82fc | 817 | } |
223e47cc | 818 | } |
9cc50fc6 SL |
819 | |
820 | fn has_legal_fragment_specifier(tok: &Token) -> Result<(), String> { | |
821 | debug!("has_legal_fragment_specifier({:?})", tok); | |
a7813a04 | 822 | if let &MatchNt(_, ref frag_spec) = tok { |
9cc50fc6 SL |
823 | let s = &frag_spec.name.as_str(); |
824 | if !is_legal_fragment_specifier(s) { | |
825 | return Err(s.to_string()); | |
826 | } | |
827 | } | |
828 | Ok(()) | |
829 | } | |
830 | ||
831 | fn is_legal_fragment_specifier(frag: &str) -> bool { | |
832 | match frag { | |
833 | "item" | "block" | "stmt" | "expr" | "pat" | | |
834 | "path" | "ty" | "ident" | "meta" | "tt" => true, | |
835 | _ => false, | |
836 | } | |
837 | } |