]>
Commit | Line | Data |
---|---|---|
064997fb FG |
1 | //! An NFA-based parser, which is porting from rustc mbe parsing code |
2 | //! | |
3 | //! See <https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs> | |
4 | //! Here is a quick intro to how the parser works, copied from rustc: | |
5 | //! | |
6 | //! A 'position' is a dot in the middle of a matcher, usually represented as a | |
7 | //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. | |
8 | //! | |
9 | //! The parser walks through the input a character at a time, maintaining a list | |
10 | //! of threads consistent with the current position in the input string: `cur_items`. | |
11 | //! | |
12 | //! As it processes them, it fills up `eof_items` with threads that would be valid if | |
13 | //! the macro invocation is now over, `bb_items` with threads that are waiting on | |
14 | //! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting | |
15 | //! on a particular token. Most of the logic concerns moving the · through the | |
16 | //! repetitions indicated by Kleene stars. The rules for moving the · without | |
17 | //! consuming any input are called epsilon transitions. It only advances or calls | |
18 | //! out to the real Rust parser when no `cur_items` threads remain. | |
19 | //! | |
20 | //! Example: | |
21 | //! | |
22 | //! ```text, ignore | |
23 | //! Start parsing a a a a b against [· a $( a )* a b]. | |
24 | //! | |
25 | //! Remaining input: a a a a b | |
26 | //! next: [· a $( a )* a b] | |
27 | //! | |
28 | //! - - - Advance over an a. - - - | |
29 | //! | |
30 | //! Remaining input: a a a b | |
31 | //! cur: [a · $( a )* a b] | |
32 | //! Descend/Skip (first item). | |
33 | //! next: [a $( · a )* a b] [a $( a )* · a b]. | |
34 | //! | |
35 | //! - - - Advance over an a. - - - | |
36 | //! | |
37 | //! Remaining input: a a b | |
38 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | |
39 | //! Follow epsilon transition: Finish/Repeat (first item) | |
40 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | |
41 | //! | |
42 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | |
43 | //! | |
44 | //! Remaining input: a b | |
45 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | |
46 | //! Follow epsilon transition: Finish/Repeat (first item) | |
47 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | |
48 | //! | |
49 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | |
50 | //! | |
51 | //! Remaining input: b | |
52 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | |
53 | //! Follow epsilon transition: Finish/Repeat (first item) | |
54 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | |
55 | //! | |
56 | //! - - - Advance over a b. - - - | |
57 | //! | |
58 | //! Remaining input: '' | |
59 | //! eof: [a $( a )* a b ·] | |
60 | //! ``` | |
61 | ||
62 | use std::rc::Rc; | |
63 | ||
64 | use smallvec::{smallvec, SmallVec}; | |
65 | use syntax::SmolStr; | |
66 | ||
67 | use crate::{ | |
68 | expander::{Binding, Bindings, ExpandResult, Fragment}, | |
69 | parser::{Op, RepeatKind, Separator}, | |
70 | tt_iter::TtIter, | |
71 | ExpandError, MetaTemplate, | |
72 | }; | |
73 | ||
74 | impl Bindings { | |
75 | fn push_optional(&mut self, name: &SmolStr) { | |
76 | // FIXME: Do we have a better way to represent an empty token ? | |
77 | // Insert an empty subtree for empty token | |
78 | let tt = tt::Subtree::default().into(); | |
79 | self.inner.insert(name.clone(), Binding::Fragment(Fragment::Tokens(tt))); | |
80 | } | |
81 | ||
82 | fn push_empty(&mut self, name: &SmolStr) { | |
83 | self.inner.insert(name.clone(), Binding::Empty); | |
84 | } | |
85 | ||
86 | fn bindings(&self) -> impl Iterator<Item = &Binding> { | |
87 | self.inner.values() | |
88 | } | |
89 | } | |
90 | ||
91 | #[derive(Clone, Debug, Default, PartialEq, Eq)] | |
92 | pub(super) struct Match { | |
93 | pub(super) bindings: Bindings, | |
94 | /// We currently just keep the first error and count the rest to compare matches. | |
95 | pub(super) err: Option<ExpandError>, | |
96 | pub(super) err_count: usize, | |
97 | /// How many top-level token trees were left to match. | |
98 | pub(super) unmatched_tts: usize, | |
99 | /// The number of bound variables | |
100 | pub(super) bound_count: usize, | |
101 | } | |
102 | ||
103 | impl Match { | |
104 | fn add_err(&mut self, err: ExpandError) { | |
105 | let prev_err = self.err.take(); | |
106 | self.err = prev_err.or(Some(err)); | |
107 | self.err_count += 1; | |
108 | } | |
109 | } | |
110 | ||
111 | /// Matching errors are added to the `Match`. | |
112 | pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match { | |
113 | let mut res = match_loop(pattern, input); | |
114 | res.bound_count = count(res.bindings.bindings()); | |
115 | return res; | |
116 | ||
117 | fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize { | |
118 | bindings | |
119 | .map(|it| match it { | |
120 | Binding::Fragment(_) => 1, | |
121 | Binding::Empty => 1, | |
122 | Binding::Nested(it) => count(it.iter()), | |
123 | }) | |
124 | .sum() | |
125 | } | |
126 | } | |
127 | ||
128 | #[derive(Debug, Clone)] | |
129 | enum BindingKind { | |
130 | Empty(SmolStr), | |
131 | Optional(SmolStr), | |
132 | Fragment(SmolStr, Fragment), | |
133 | Nested(usize, usize), | |
134 | } | |
135 | ||
136 | #[derive(Debug, Clone)] | |
137 | struct BindingsIdx(usize, usize); | |
138 | ||
139 | #[derive(Debug, Clone)] | |
140 | enum LinkNode<T> { | |
141 | Node(T), | |
142 | Parent { idx: usize, len: usize }, | |
143 | } | |
144 | ||
145 | #[derive(Default)] | |
146 | struct BindingsBuilder { | |
147 | nodes: Vec<Vec<LinkNode<Rc<BindingKind>>>>, | |
148 | nested: Vec<Vec<LinkNode<usize>>>, | |
149 | } | |
150 | ||
151 | impl BindingsBuilder { | |
152 | fn alloc(&mut self) -> BindingsIdx { | |
153 | let idx = self.nodes.len(); | |
154 | self.nodes.push(Vec::new()); | |
155 | let nidx = self.nested.len(); | |
156 | self.nested.push(Vec::new()); | |
157 | BindingsIdx(idx, nidx) | |
158 | } | |
159 | ||
160 | fn copy(&mut self, bindings: &BindingsIdx) -> BindingsIdx { | |
161 | let idx = copy_parent(bindings.0, &mut self.nodes); | |
162 | let nidx = copy_parent(bindings.1, &mut self.nested); | |
163 | return BindingsIdx(idx, nidx); | |
164 | ||
165 | fn copy_parent<T>(idx: usize, target: &mut Vec<Vec<LinkNode<T>>>) -> usize | |
166 | where | |
167 | T: Clone, | |
168 | { | |
169 | let new_idx = target.len(); | |
170 | let len = target[idx].len(); | |
171 | if len < 4 { | |
172 | target.push(target[idx].clone()) | |
173 | } else { | |
174 | target.push(vec![LinkNode::Parent { idx, len }]); | |
175 | } | |
176 | new_idx | |
177 | } | |
178 | } | |
179 | ||
180 | fn push_empty(&mut self, idx: &mut BindingsIdx, var: &SmolStr) { | |
181 | self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Empty(var.clone())))); | |
182 | } | |
183 | ||
184 | fn push_optional(&mut self, idx: &mut BindingsIdx, var: &SmolStr) { | |
185 | self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Optional(var.clone())))); | |
186 | } | |
187 | ||
188 | fn push_fragment(&mut self, idx: &mut BindingsIdx, var: &SmolStr, fragment: Fragment) { | |
189 | self.nodes[idx.0] | |
190 | .push(LinkNode::Node(Rc::new(BindingKind::Fragment(var.clone(), fragment)))); | |
191 | } | |
192 | ||
193 | fn push_nested(&mut self, parent: &mut BindingsIdx, child: &BindingsIdx) { | |
194 | let BindingsIdx(idx, nidx) = self.copy(child); | |
195 | self.nodes[parent.0].push(LinkNode::Node(Rc::new(BindingKind::Nested(idx, nidx)))); | |
196 | } | |
197 | ||
198 | fn push_default(&mut self, idx: &mut BindingsIdx) { | |
199 | self.nested[idx.1].push(LinkNode::Node(idx.0)); | |
200 | let new_idx = self.nodes.len(); | |
201 | self.nodes.push(Vec::new()); | |
202 | idx.0 = new_idx; | |
203 | } | |
204 | ||
205 | fn build(self, idx: &BindingsIdx) -> Bindings { | |
206 | let mut bindings = Bindings::default(); | |
207 | self.build_inner(&mut bindings, &self.nodes[idx.0]); | |
208 | bindings | |
209 | } | |
210 | ||
211 | fn build_inner(&self, bindings: &mut Bindings, link_nodes: &[LinkNode<Rc<BindingKind>>]) { | |
212 | let mut nodes = Vec::new(); | |
213 | self.collect_nodes(link_nodes, &mut nodes); | |
214 | ||
215 | for cmd in nodes { | |
216 | match &**cmd { | |
217 | BindingKind::Empty(name) => { | |
218 | bindings.push_empty(name); | |
219 | } | |
220 | BindingKind::Optional(name) => { | |
221 | bindings.push_optional(name); | |
222 | } | |
223 | BindingKind::Fragment(name, fragment) => { | |
224 | bindings.inner.insert(name.clone(), Binding::Fragment(fragment.clone())); | |
225 | } | |
226 | BindingKind::Nested(idx, nested_idx) => { | |
227 | let mut nested_nodes = Vec::new(); | |
228 | self.collect_nested(*idx, *nested_idx, &mut nested_nodes); | |
229 | ||
230 | for (idx, iter) in nested_nodes.into_iter().enumerate() { | |
231 | for (key, value) in &iter.inner { | |
232 | let bindings = bindings | |
233 | .inner | |
234 | .entry(key.clone()) | |
235 | .or_insert_with(|| Binding::Nested(Vec::new())); | |
236 | ||
237 | if let Binding::Nested(it) = bindings { | |
238 | // insert empty nested bindings before this one | |
239 | while it.len() < idx { | |
240 | it.push(Binding::Nested(Vec::new())); | |
241 | } | |
242 | it.push(value.clone()); | |
243 | } | |
244 | } | |
245 | } | |
246 | } | |
247 | } | |
248 | } | |
249 | } | |
250 | ||
251 | fn collect_nested_ref<'a>( | |
252 | &'a self, | |
253 | id: usize, | |
254 | len: usize, | |
255 | nested_refs: &mut Vec<&'a Vec<LinkNode<Rc<BindingKind>>>>, | |
256 | ) { | |
257 | self.nested[id].iter().take(len).for_each(|it| match it { | |
258 | LinkNode::Node(id) => nested_refs.push(&self.nodes[*id]), | |
259 | LinkNode::Parent { idx, len } => self.collect_nested_ref(*idx, *len, nested_refs), | |
260 | }); | |
261 | } | |
262 | ||
263 | fn collect_nested(&self, idx: usize, nested_idx: usize, nested: &mut Vec<Bindings>) { | |
264 | let last = &self.nodes[idx]; | |
265 | let mut nested_refs = Vec::new(); | |
266 | self.nested[nested_idx].iter().for_each(|it| match *it { | |
267 | LinkNode::Node(idx) => nested_refs.push(&self.nodes[idx]), | |
268 | LinkNode::Parent { idx, len } => self.collect_nested_ref(idx, len, &mut nested_refs), | |
269 | }); | |
270 | nested_refs.push(last); | |
271 | ||
272 | nested_refs.into_iter().for_each(|iter| { | |
273 | let mut child_bindings = Bindings::default(); | |
274 | self.build_inner(&mut child_bindings, iter); | |
275 | nested.push(child_bindings) | |
276 | }) | |
277 | } | |
278 | ||
279 | fn collect_nodes_ref<'a>( | |
280 | &'a self, | |
281 | id: usize, | |
282 | len: usize, | |
283 | nodes: &mut Vec<&'a Rc<BindingKind>>, | |
284 | ) { | |
285 | self.nodes[id].iter().take(len).for_each(|it| match it { | |
286 | LinkNode::Node(it) => nodes.push(it), | |
287 | LinkNode::Parent { idx, len } => self.collect_nodes_ref(*idx, *len, nodes), | |
288 | }); | |
289 | } | |
290 | ||
291 | fn collect_nodes<'a>( | |
292 | &'a self, | |
293 | link_nodes: &'a [LinkNode<Rc<BindingKind>>], | |
294 | nodes: &mut Vec<&'a Rc<BindingKind>>, | |
295 | ) { | |
296 | link_nodes.iter().for_each(|it| match it { | |
297 | LinkNode::Node(it) => nodes.push(it), | |
298 | LinkNode::Parent { idx, len } => self.collect_nodes_ref(*idx, *len, nodes), | |
299 | }); | |
300 | } | |
301 | } | |
302 | ||
303 | #[derive(Debug, Clone)] | |
304 | struct MatchState<'t> { | |
305 | /// The position of the "dot" in this matcher | |
306 | dot: OpDelimitedIter<'t>, | |
307 | ||
308 | /// Token subtree stack | |
309 | /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) | |
310 | /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does | |
311 | /// that where the bottom of the stack is the outermost matcher. | |
312 | stack: SmallVec<[OpDelimitedIter<'t>; 4]>, | |
313 | ||
314 | /// The "parent" matcher position if we are in a repetition. That is, the matcher position just | |
315 | /// before we enter the repetition. | |
316 | up: Option<Box<MatchState<'t>>>, | |
317 | ||
318 | /// The separator if we are in a repetition. | |
319 | sep: Option<Separator>, | |
320 | ||
321 | /// The KleeneOp of this sequence if we are in a repetition. | |
322 | sep_kind: Option<RepeatKind>, | |
323 | ||
f2b60f7d | 324 | /// Number of tokens of separator parsed |
064997fb FG |
325 | sep_parsed: Option<usize>, |
326 | ||
327 | /// Matched meta variables bindings | |
328 | bindings: BindingsIdx, | |
329 | ||
330 | /// Cached result of meta variable parsing | |
331 | meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>, | |
332 | ||
333 | /// Is error occuried in this state, will `poised` to "parent" | |
334 | is_error: bool, | |
335 | } | |
336 | ||
337 | /// Process the matcher positions of `cur_items` until it is empty. In the process, this will | |
338 | /// produce more items in `next_items`, `eof_items`, and `bb_items`. | |
339 | /// | |
340 | /// For more info about the how this happens, see the module-level doc comments and the inline | |
341 | /// comments of this function. | |
342 | /// | |
343 | /// # Parameters | |
344 | /// | |
345 | /// - `src`: the current token of the parser. | |
346 | /// - `stack`: the "parent" frames of the token tree | |
347 | /// - `res`: the match result to store errors | |
348 | /// - `cur_items`: the set of current items to be processed. This should be empty by the end of a | |
349 | /// successful execution of this function. | |
350 | /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in | |
351 | /// the function `parse`. | |
352 | /// - `eof_items`: the set of items that would be valid if this was the EOF. | |
353 | /// - `bb_items`: the set of items that are waiting for the black-box parser. | |
354 | /// - `error_items`: the set of items in errors, used for error-resilient parsing | |
355 | fn match_loop_inner<'t>( | |
356 | src: TtIter<'t>, | |
357 | stack: &[TtIter<'t>], | |
358 | res: &mut Match, | |
359 | bindings_builder: &mut BindingsBuilder, | |
360 | cur_items: &mut SmallVec<[MatchState<'t>; 1]>, | |
361 | bb_items: &mut SmallVec<[MatchState<'t>; 1]>, | |
362 | next_items: &mut Vec<MatchState<'t>>, | |
363 | eof_items: &mut SmallVec<[MatchState<'t>; 1]>, | |
364 | error_items: &mut SmallVec<[MatchState<'t>; 1]>, | |
365 | ) { | |
366 | macro_rules! try_push { | |
367 | ($items: expr, $it:expr) => { | |
368 | if $it.is_error { | |
369 | error_items.push($it); | |
370 | } else { | |
371 | $items.push($it); | |
372 | } | |
373 | }; | |
374 | } | |
375 | ||
376 | while let Some(mut item) = cur_items.pop() { | |
377 | while item.dot.is_eof() { | |
378 | match item.stack.pop() { | |
379 | Some(frame) => { | |
380 | item.dot = frame; | |
381 | item.dot.next(); | |
382 | } | |
383 | None => break, | |
384 | } | |
385 | } | |
386 | let op = match item.dot.peek() { | |
387 | None => { | |
388 | // We are at or past the end of the matcher of `item`. | |
389 | if item.up.is_some() { | |
390 | if item.sep_parsed.is_none() { | |
391 | // Get the `up` matcher | |
392 | let mut new_pos = *item.up.clone().unwrap(); | |
393 | new_pos.bindings = bindings_builder.copy(&new_pos.bindings); | |
394 | // Add matches from this repetition to the `matches` of `up` | |
395 | bindings_builder.push_nested(&mut new_pos.bindings, &item.bindings); | |
396 | ||
397 | // Move the "dot" past the repetition in `up` | |
398 | new_pos.dot.next(); | |
399 | new_pos.is_error = new_pos.is_error || item.is_error; | |
400 | cur_items.push(new_pos); | |
401 | } | |
402 | ||
403 | // Check if we need a separator. | |
404 | // We check the separator one by one | |
405 | let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0); | |
406 | let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count); | |
407 | if item.sep.is_some() && sep_idx != sep_len { | |
408 | let sep = item.sep.as_ref().unwrap(); | |
409 | if src.clone().expect_separator(sep, sep_idx) { | |
410 | item.dot.next(); | |
411 | item.sep_parsed = Some(sep_idx + 1); | |
412 | try_push!(next_items, item); | |
413 | } | |
414 | } | |
415 | // We don't need a separator. Move the "dot" back to the beginning of the matcher | |
416 | // and try to match again UNLESS we are only allowed to have _one_ repetition. | |
417 | else if item.sep_kind != Some(RepeatKind::ZeroOrOne) { | |
418 | item.dot = item.dot.reset(); | |
419 | item.sep_parsed = None; | |
420 | bindings_builder.push_default(&mut item.bindings); | |
421 | cur_items.push(item); | |
422 | } | |
423 | } else { | |
424 | // If we are not in a repetition, then being at the end of a matcher means that we have | |
425 | // reached the potential end of the input. | |
426 | try_push!(eof_items, item); | |
427 | } | |
428 | continue; | |
429 | } | |
430 | Some(it) => it, | |
431 | }; | |
432 | ||
433 | // We are in the middle of a matcher. | |
434 | match op { | |
435 | OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => { | |
436 | if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) { | |
437 | let mut new_item = item.clone(); | |
438 | new_item.bindings = bindings_builder.copy(&new_item.bindings); | |
439 | new_item.dot.next(); | |
440 | collect_vars( | |
441 | &mut |s| { | |
442 | bindings_builder.push_empty(&mut new_item.bindings, &s); | |
443 | }, | |
444 | tokens, | |
445 | ); | |
446 | cur_items.push(new_item); | |
447 | } | |
448 | cur_items.push(MatchState { | |
449 | dot: tokens.iter_delimited(None), | |
450 | stack: Default::default(), | |
451 | up: Some(Box::new(item)), | |
452 | sep: separator.clone(), | |
453 | sep_kind: Some(*kind), | |
454 | sep_parsed: None, | |
455 | bindings: bindings_builder.alloc(), | |
456 | meta_result: None, | |
457 | is_error: false, | |
458 | }) | |
459 | } | |
460 | OpDelimited::Op(Op::Subtree { tokens, delimiter }) => { | |
461 | if let Ok(subtree) = src.clone().expect_subtree() { | |
462 | if subtree.delimiter_kind() == delimiter.map(|it| it.kind) { | |
463 | item.stack.push(item.dot); | |
464 | item.dot = tokens.iter_delimited(delimiter.as_ref()); | |
465 | cur_items.push(item); | |
466 | } | |
467 | } | |
468 | } | |
469 | OpDelimited::Op(Op::Var { kind, name, .. }) => { | |
470 | if let Some(kind) = kind { | |
471 | let mut fork = src.clone(); | |
472 | let match_res = match_meta_var(kind.as_str(), &mut fork); | |
473 | match match_res.err { | |
474 | None => { | |
475 | // Some meta variables are optional (e.g. vis) | |
476 | if match_res.value.is_some() { | |
477 | item.meta_result = Some((fork, match_res)); | |
478 | try_push!(bb_items, item); | |
479 | } else { | |
480 | bindings_builder.push_optional(&mut item.bindings, name); | |
481 | item.dot.next(); | |
482 | cur_items.push(item); | |
483 | } | |
484 | } | |
485 | Some(err) => { | |
486 | res.add_err(err); | |
487 | if let Some(fragment) = match_res.value { | |
488 | bindings_builder.push_fragment(&mut item.bindings, name, fragment); | |
489 | } | |
490 | item.is_error = true; | |
491 | error_items.push(item); | |
492 | } | |
493 | } | |
494 | } | |
495 | } | |
496 | OpDelimited::Op(Op::Leaf(leaf)) => { | |
497 | if let Err(err) = match_leaf(leaf, &mut src.clone()) { | |
498 | res.add_err(err); | |
499 | item.is_error = true; | |
500 | } else { | |
501 | item.dot.next(); | |
502 | } | |
503 | try_push!(next_items, item); | |
504 | } | |
505 | OpDelimited::Op(Op::Ignore { .. } | Op::Index { .. }) => {} | |
506 | OpDelimited::Open => { | |
507 | if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) { | |
508 | item.dot.next(); | |
509 | try_push!(next_items, item); | |
510 | } | |
511 | } | |
512 | OpDelimited::Close => { | |
513 | let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty(); | |
514 | if is_delim_closed { | |
515 | item.dot.next(); | |
516 | try_push!(next_items, item); | |
517 | } | |
518 | } | |
519 | } | |
520 | } | |
521 | } | |
522 | ||
523 | fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { | |
524 | let mut src = TtIter::new(src); | |
525 | let mut stack: SmallVec<[TtIter<'_>; 1]> = SmallVec::new(); | |
526 | let mut res = Match::default(); | |
527 | let mut error_recover_item = None; | |
528 | ||
529 | let mut bindings_builder = BindingsBuilder::default(); | |
530 | ||
531 | let mut cur_items = smallvec![MatchState { | |
532 | dot: pattern.iter_delimited(None), | |
533 | stack: Default::default(), | |
534 | up: None, | |
535 | sep: None, | |
536 | sep_kind: None, | |
537 | sep_parsed: None, | |
538 | bindings: bindings_builder.alloc(), | |
539 | is_error: false, | |
540 | meta_result: None, | |
541 | }]; | |
542 | ||
543 | let mut next_items = vec![]; | |
544 | ||
545 | loop { | |
546 | let mut bb_items = SmallVec::new(); | |
547 | let mut eof_items = SmallVec::new(); | |
548 | let mut error_items = SmallVec::new(); | |
549 | ||
550 | stdx::always!(next_items.is_empty()); | |
551 | ||
552 | match_loop_inner( | |
553 | src.clone(), | |
554 | &stack, | |
555 | &mut res, | |
556 | &mut bindings_builder, | |
557 | &mut cur_items, | |
558 | &mut bb_items, | |
559 | &mut next_items, | |
560 | &mut eof_items, | |
561 | &mut error_items, | |
562 | ); | |
563 | stdx::always!(cur_items.is_empty()); | |
564 | ||
565 | if !error_items.is_empty() { | |
566 | error_recover_item = error_items.pop().map(|it| it.bindings); | |
567 | } else if let [state, ..] = &*eof_items { | |
568 | error_recover_item = Some(state.bindings.clone()); | |
569 | } | |
570 | ||
571 | // We need to do some post processing after the `match_loop_inner`. | |
572 | // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, | |
573 | // either the parse is ambiguous (which should never happen) or there is a syntax error. | |
574 | if src.peek_n(0).is_none() && stack.is_empty() { | |
575 | if let [state] = &*eof_items { | |
576 | // remove all errors, because it is the correct answer ! | |
577 | res = Match::default(); | |
578 | res.bindings = bindings_builder.build(&state.bindings); | |
579 | } else { | |
580 | // Error recovery | |
581 | if let Some(item) = error_recover_item { | |
582 | res.bindings = bindings_builder.build(&item); | |
583 | } | |
584 | res.add_err(ExpandError::UnexpectedToken); | |
585 | } | |
586 | return res; | |
587 | } | |
588 | ||
589 | // If there are no possible next positions AND we aren't waiting for the black-box parser, | |
590 | // then there is a syntax error. | |
591 | // | |
592 | // Another possibility is that we need to call out to parse some rust nonterminal | |
593 | // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. | |
594 | let has_leftover_tokens = (bb_items.is_empty() && next_items.is_empty()) | |
595 | || !(bb_items.is_empty() || next_items.is_empty()) | |
596 | || bb_items.len() > 1; | |
597 | if has_leftover_tokens { | |
598 | res.unmatched_tts += src.len(); | |
599 | while let Some(it) = stack.pop() { | |
600 | src = it; | |
601 | res.unmatched_tts += src.len(); | |
602 | } | |
603 | res.add_err(ExpandError::LeftoverTokens); | |
604 | ||
605 | if let Some(error_reover_item) = error_recover_item { | |
606 | res.bindings = bindings_builder.build(&error_reover_item); | |
607 | } | |
608 | return res; | |
609 | } | |
610 | // Dump all possible `next_items` into `cur_items` for the next iteration. | |
611 | else if !next_items.is_empty() { | |
612 | // Now process the next token | |
613 | cur_items.extend(next_items.drain(..)); | |
614 | ||
615 | match src.next() { | |
616 | Some(tt::TokenTree::Subtree(subtree)) => { | |
617 | stack.push(src.clone()); | |
618 | src = TtIter::new(subtree); | |
619 | } | |
620 | None => { | |
621 | if let Some(iter) = stack.pop() { | |
622 | src = iter; | |
623 | } | |
624 | } | |
625 | _ => (), | |
626 | } | |
627 | } | |
628 | // Finally, we have the case where we need to call the black-box parser to get some | |
629 | // nonterminal. | |
630 | else { | |
631 | stdx::always!(bb_items.len() == 1); | |
632 | let mut item = bb_items.pop().unwrap(); | |
633 | ||
634 | if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() { | |
635 | let (iter, match_res) = item.meta_result.take().unwrap(); | |
636 | match match_res.value { | |
637 | Some(fragment) => { | |
638 | bindings_builder.push_fragment(&mut item.bindings, name, fragment); | |
639 | } | |
640 | None if match_res.err.is_none() => { | |
641 | bindings_builder.push_optional(&mut item.bindings, name); | |
642 | } | |
643 | None => {} | |
644 | } | |
645 | if let Some(err) = match_res.err { | |
646 | res.add_err(err); | |
647 | } | |
648 | src = iter.clone(); | |
649 | item.dot.next(); | |
650 | } else { | |
651 | unreachable!() | |
652 | } | |
653 | cur_items.push(item); | |
654 | } | |
655 | stdx::always!(!cur_items.is_empty()); | |
656 | } | |
657 | } | |
658 | ||
659 | fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter<'_>) -> Result<(), ExpandError> { | |
660 | let rhs = src | |
661 | .expect_leaf() | |
662 | .map_err(|()| ExpandError::binding_error(format!("expected leaf: `{lhs}`")))?; | |
663 | match (lhs, rhs) { | |
664 | ( | |
665 | tt::Leaf::Punct(tt::Punct { char: lhs, .. }), | |
666 | tt::Leaf::Punct(tt::Punct { char: rhs, .. }), | |
667 | ) if lhs == rhs => Ok(()), | |
668 | ( | |
669 | tt::Leaf::Ident(tt::Ident { text: lhs, .. }), | |
670 | tt::Leaf::Ident(tt::Ident { text: rhs, .. }), | |
671 | ) if lhs == rhs => Ok(()), | |
672 | ( | |
673 | tt::Leaf::Literal(tt::Literal { text: lhs, .. }), | |
674 | tt::Leaf::Literal(tt::Literal { text: rhs, .. }), | |
675 | ) if lhs == rhs => Ok(()), | |
676 | _ => Err(ExpandError::UnexpectedToken), | |
677 | } | |
678 | } | |
679 | ||
680 | fn match_meta_var(kind: &str, input: &mut TtIter<'_>) -> ExpandResult<Option<Fragment>> { | |
681 | let fragment = match kind { | |
682 | "path" => parser::PrefixEntryPoint::Path, | |
683 | "ty" => parser::PrefixEntryPoint::Ty, | |
684 | // FIXME: These two should actually behave differently depending on the edition. | |
685 | // | |
686 | // https://doc.rust-lang.org/edition-guide/rust-2021/or-patterns-macro-rules.html | |
687 | "pat" | "pat_param" => parser::PrefixEntryPoint::Pat, | |
688 | "stmt" => parser::PrefixEntryPoint::Stmt, | |
689 | "block" => parser::PrefixEntryPoint::Block, | |
690 | "meta" => parser::PrefixEntryPoint::MetaItem, | |
691 | "item" => parser::PrefixEntryPoint::Item, | |
692 | "vis" => parser::PrefixEntryPoint::Vis, | |
693 | "expr" => { | |
694 | // `expr` should not match underscores. | |
695 | // HACK: Macro expansion should not be done using "rollback and try another alternative". | |
696 | // rustc [explicitly checks the next token][0]. | |
697 | // [0]: https://github.com/rust-lang/rust/blob/f0c4da499/compiler/rustc_expand/src/mbe/macro_parser.rs#L576 | |
698 | match input.peek_n(0) { | |
699 | Some(tt::TokenTree::Leaf(tt::Leaf::Ident(it))) if it.text == "_" => { | |
700 | return ExpandResult::only_err(ExpandError::NoMatchingRule) | |
701 | } | |
702 | _ => {} | |
703 | }; | |
704 | return input | |
705 | .expect_fragment(parser::PrefixEntryPoint::Expr) | |
706 | .map(|tt| tt.map(Fragment::Expr)); | |
707 | } | |
708 | _ => { | |
709 | let tt_result = match kind { | |
710 | "ident" => input | |
711 | .expect_ident() | |
712 | .map(|ident| tt::Leaf::from(ident.clone()).into()) | |
713 | .map_err(|()| ExpandError::binding_error("expected ident")), | |
714 | "tt" => input | |
715 | .expect_tt() | |
716 | .map_err(|()| ExpandError::binding_error("expected token tree")), | |
717 | "lifetime" => input | |
718 | .expect_lifetime() | |
719 | .map_err(|()| ExpandError::binding_error("expected lifetime")), | |
720 | "literal" => { | |
721 | let neg = input.eat_char('-'); | |
722 | input | |
723 | .expect_literal() | |
724 | .map(|literal| { | |
725 | let lit = literal.clone(); | |
726 | match neg { | |
727 | None => lit.into(), | |
728 | Some(neg) => tt::TokenTree::Subtree(tt::Subtree { | |
729 | delimiter: None, | |
730 | token_trees: vec![neg, lit.into()], | |
731 | }), | |
732 | } | |
733 | }) | |
734 | .map_err(|()| ExpandError::binding_error("expected literal")) | |
735 | } | |
736 | _ => Err(ExpandError::UnexpectedToken), | |
737 | }; | |
738 | return tt_result.map(|it| Some(Fragment::Tokens(it))).into(); | |
739 | } | |
740 | }; | |
741 | input.expect_fragment(fragment).map(|it| it.map(Fragment::Tokens)) | |
742 | } | |
743 | ||
744 | fn collect_vars(collector_fun: &mut impl FnMut(SmolStr), pattern: &MetaTemplate) { | |
745 | for op in pattern.iter() { | |
746 | match op { | |
747 | Op::Var { name, .. } => collector_fun(name.clone()), | |
748 | Op::Leaf(_) => (), | |
749 | Op::Subtree { tokens, .. } => collect_vars(collector_fun, tokens), | |
750 | Op::Repeat { tokens, .. } => collect_vars(collector_fun, tokens), | |
751 | Op::Ignore { .. } | Op::Index { .. } => {} | |
752 | } | |
753 | } | |
754 | } | |
755 | impl MetaTemplate { | |
756 | fn iter_delimited<'a>(&'a self, delimited: Option<&'a tt::Delimiter>) -> OpDelimitedIter<'a> { | |
757 | OpDelimitedIter { inner: &self.0, idx: 0, delimited } | |
758 | } | |
759 | } | |
760 | ||
761 | #[derive(Debug, Clone, Copy)] | |
762 | enum OpDelimited<'a> { | |
763 | Op(&'a Op), | |
764 | Open, | |
765 | Close, | |
766 | } | |
767 | ||
768 | #[derive(Debug, Clone, Copy)] | |
769 | struct OpDelimitedIter<'a> { | |
770 | inner: &'a [Op], | |
771 | delimited: Option<&'a tt::Delimiter>, | |
772 | idx: usize, | |
773 | } | |
774 | ||
775 | impl<'a> OpDelimitedIter<'a> { | |
776 | fn is_eof(&self) -> bool { | |
777 | let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 }; | |
778 | self.idx >= len | |
779 | } | |
780 | ||
781 | fn peek(&self) -> Option<OpDelimited<'a>> { | |
782 | match self.delimited { | |
783 | None => self.inner.get(self.idx).map(OpDelimited::Op), | |
784 | Some(_) => match self.idx { | |
785 | 0 => Some(OpDelimited::Open), | |
786 | i if i == self.inner.len() + 1 => Some(OpDelimited::Close), | |
787 | i => self.inner.get(i - 1).map(OpDelimited::Op), | |
788 | }, | |
789 | } | |
790 | } | |
791 | ||
792 | fn reset(&self) -> Self { | |
793 | Self { inner: self.inner, idx: 0, delimited: self.delimited } | |
794 | } | |
795 | } | |
796 | ||
797 | impl<'a> Iterator for OpDelimitedIter<'a> { | |
798 | type Item = OpDelimited<'a>; | |
799 | ||
800 | fn next(&mut self) -> Option<Self::Item> { | |
801 | let res = self.peek(); | |
802 | self.idx += 1; | |
803 | res | |
804 | } | |
805 | ||
806 | fn size_hint(&self) -> (usize, Option<usize>) { | |
807 | let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 }; | |
808 | let remain = len.saturating_sub(self.idx); | |
809 | (remain, Some(remain)) | |
810 | } | |
811 | } | |
812 | ||
813 | impl<'a> TtIter<'a> { | |
814 | fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { | |
815 | let mut fork = self.clone(); | |
816 | let ok = match separator { | |
817 | Separator::Ident(lhs) if idx == 0 => match fork.expect_ident_or_underscore() { | |
818 | Ok(rhs) => rhs.text == lhs.text, | |
819 | Err(_) => false, | |
820 | }, | |
821 | Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() { | |
822 | Ok(rhs) => match rhs { | |
823 | tt::Leaf::Literal(rhs) => rhs.text == lhs.text, | |
824 | tt::Leaf::Ident(rhs) => rhs.text == lhs.text, | |
825 | tt::Leaf::Punct(_) => false, | |
826 | }, | |
827 | Err(_) => false, | |
828 | }, | |
829 | Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() { | |
830 | Ok(rhs) => rhs.char == lhss[idx].char, | |
831 | Err(_) => false, | |
832 | }, | |
833 | _ => false, | |
834 | }; | |
835 | if ok { | |
836 | *self = fork; | |
837 | } | |
838 | ok | |
839 | } | |
840 | ||
841 | fn expect_tt(&mut self) -> Result<tt::TokenTree, ()> { | |
842 | match self.peek_n(0) { | |
843 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) if punct.char == '\'' => { | |
844 | return self.expect_lifetime(); | |
845 | } | |
846 | _ => (), | |
847 | } | |
848 | ||
849 | let tt = self.next().ok_or(())?.clone(); | |
850 | let punct = match tt { | |
851 | tt::TokenTree::Leaf(tt::Leaf::Punct(punct)) if punct.spacing == tt::Spacing::Joint => { | |
852 | punct | |
853 | } | |
854 | _ => return Ok(tt), | |
855 | }; | |
856 | ||
857 | let (second, third) = match (self.peek_n(0), self.peek_n(1)) { | |
858 | ( | |
859 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), | |
860 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p3))), | |
861 | ) if p2.spacing == tt::Spacing::Joint => (p2.char, Some(p3.char)), | |
862 | (Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), _) => (p2.char, None), | |
863 | _ => return Ok(tt), | |
864 | }; | |
865 | ||
866 | match (punct.char, second, third) { | |
867 | ('.', '.', Some('.' | '=')) | ('<', '<', Some('=')) | ('>', '>', Some('=')) => { | |
868 | let tt2 = self.next().unwrap().clone(); | |
869 | let tt3 = self.next().unwrap().clone(); | |
870 | Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2, tt3] }.into()) | |
871 | } | |
872 | ('-' | '!' | '*' | '/' | '&' | '%' | '^' | '+' | '<' | '=' | '>' | '|', '=', _) | |
873 | | ('-' | '=' | '>', '>', _) | |
874 | | (':', ':', _) | |
875 | | ('.', '.', _) | |
876 | | ('&', '&', _) | |
877 | | ('<', '<', _) | |
878 | | ('|', '|', _) => { | |
879 | let tt2 = self.next().unwrap().clone(); | |
880 | Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2] }.into()) | |
881 | } | |
882 | _ => Ok(tt), | |
883 | } | |
884 | } | |
885 | ||
886 | fn expect_lifetime(&mut self) -> Result<tt::TokenTree, ()> { | |
887 | let punct = self.expect_punct()?; | |
888 | if punct.char != '\'' { | |
889 | return Err(()); | |
890 | } | |
891 | let ident = self.expect_ident_or_underscore()?; | |
892 | ||
893 | Ok(tt::Subtree { | |
894 | delimiter: None, | |
895 | token_trees: vec![ | |
896 | tt::Leaf::Punct(*punct).into(), | |
897 | tt::Leaf::Ident(ident.clone()).into(), | |
898 | ], | |
899 | } | |
900 | .into()) | |
901 | } | |
902 | ||
903 | fn eat_char(&mut self, c: char) -> Option<tt::TokenTree> { | |
904 | let mut fork = self.clone(); | |
905 | match fork.expect_char(c) { | |
906 | Ok(_) => { | |
907 | let tt = self.next().cloned(); | |
908 | *self = fork; | |
909 | tt | |
910 | } | |
911 | Err(_) => None, | |
912 | } | |
913 | } | |
914 | } |