]> git.proxmox.com Git - cargo.git/blob - vendor/regex-syntax/src/ast/visitor.rs
New upstream version 0.52.0
[cargo.git] / vendor / regex-syntax / src / ast / visitor.rs
1 use std::fmt;
2
3 use crate::ast::{self, Ast};
4
5 /// A trait for visiting an abstract syntax tree (AST) in depth first order.
6 ///
7 /// The principle aim of this trait is to enable callers to perform case
8 /// analysis on an abstract syntax tree without necessarily using recursion.
9 /// In particular, this permits callers to do case analysis with constant stack
10 /// usage, which can be important since the size of an abstract syntax tree
11 /// may be proportional to end user input.
12 ///
13 /// Typical usage of this trait involves providing an implementation and then
14 /// running it using the [`visit`](fn.visit.html) function.
15 ///
16 /// Note that the abstract syntax tree for a regular expression is quite
17 /// complex. Unless you specifically need it, you might be able to use the
18 /// much simpler
19 /// [high-level intermediate representation](../hir/struct.Hir.html)
20 /// and its
21 /// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
22 /// instead.
23 pub trait Visitor {
24 /// The result of visiting an AST.
25 type Output;
26 /// An error that visiting an AST might return.
27 type Err;
28
29 /// All implementors of `Visitor` must provide a `finish` method, which
30 /// yields the result of visiting the AST or an error.
31 fn finish(self) -> Result<Self::Output, Self::Err>;
32
33 /// This method is called before beginning traversal of the AST.
34 fn start(&mut self) {}
35
36 /// This method is called on an `Ast` before descending into child `Ast`
37 /// nodes.
38 fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
39 Ok(())
40 }
41
42 /// This method is called on an `Ast` after descending all of its child
43 /// `Ast` nodes.
44 fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
45 Ok(())
46 }
47
48 /// This method is called between child nodes of an
49 /// [`Alternation`](struct.Alternation.html).
50 fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
51 Ok(())
52 }
53
54 /// This method is called on every
55 /// [`ClassSetItem`](enum.ClassSetItem.html)
56 /// before descending into child nodes.
57 fn visit_class_set_item_pre(
58 &mut self,
59 _ast: &ast::ClassSetItem,
60 ) -> Result<(), Self::Err> {
61 Ok(())
62 }
63
64 /// This method is called on every
65 /// [`ClassSetItem`](enum.ClassSetItem.html)
66 /// after descending into child nodes.
67 fn visit_class_set_item_post(
68 &mut self,
69 _ast: &ast::ClassSetItem,
70 ) -> Result<(), Self::Err> {
71 Ok(())
72 }
73
74 /// This method is called on every
75 /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
76 /// before descending into child nodes.
77 fn visit_class_set_binary_op_pre(
78 &mut self,
79 _ast: &ast::ClassSetBinaryOp,
80 ) -> Result<(), Self::Err> {
81 Ok(())
82 }
83
84 /// This method is called on every
85 /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
86 /// after descending into child nodes.
87 fn visit_class_set_binary_op_post(
88 &mut self,
89 _ast: &ast::ClassSetBinaryOp,
90 ) -> Result<(), Self::Err> {
91 Ok(())
92 }
93
94 /// This method is called between the left hand and right hand child nodes
95 /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
96 fn visit_class_set_binary_op_in(
97 &mut self,
98 _ast: &ast::ClassSetBinaryOp,
99 ) -> Result<(), Self::Err> {
100 Ok(())
101 }
102 }
103
104 /// Executes an implementation of `Visitor` in constant stack space.
105 ///
106 /// This function will visit every node in the given `Ast` while calling the
107 /// appropriate methods provided by the
108 /// [`Visitor`](trait.Visitor.html) trait.
109 ///
110 /// The primary use case for this method is when one wants to perform case
111 /// analysis over an `Ast` without using a stack size proportional to the depth
112 /// of the `Ast`. Namely, this method will instead use constant stack size, but
113 /// will use heap space proportional to the size of the `Ast`. This may be
114 /// desirable in cases where the size of `Ast` is proportional to end user
115 /// input.
116 ///
117 /// If the visitor returns an error at any point, then visiting is stopped and
118 /// the error is returned.
119 pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
120 HeapVisitor::new().visit(ast, visitor)
121 }
122
123 /// HeapVisitor visits every item in an `Ast` recursively using constant stack
124 /// size and a heap size proportional to the size of the `Ast`.
125 struct HeapVisitor<'a> {
126 /// A stack of `Ast` nodes. This is roughly analogous to the call stack
127 /// used in a typical recursive visitor.
128 stack: Vec<(&'a Ast, Frame<'a>)>,
129 /// Similar to the `Ast` stack above, but is used only for character
130 /// classes. In particular, character classes embed their own mini
131 /// recursive syntax.
132 stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
133 }
134
135 /// Represents a single stack frame while performing structural induction over
136 /// an `Ast`.
137 enum Frame<'a> {
138 /// A stack frame allocated just before descending into a repetition
139 /// operator's child node.
140 Repetition(&'a ast::Repetition),
141 /// A stack frame allocated just before descending into a group's child
142 /// node.
143 Group(&'a ast::Group),
144 /// The stack frame used while visiting every child node of a concatenation
145 /// of expressions.
146 Concat {
147 /// The child node we are currently visiting.
148 head: &'a Ast,
149 /// The remaining child nodes to visit (which may be empty).
150 tail: &'a [Ast],
151 },
152 /// The stack frame used while visiting every child node of an alternation
153 /// of expressions.
154 Alternation {
155 /// The child node we are currently visiting.
156 head: &'a Ast,
157 /// The remaining child nodes to visit (which may be empty).
158 tail: &'a [Ast],
159 },
160 }
161
162 /// Represents a single stack frame while performing structural induction over
163 /// a character class.
164 enum ClassFrame<'a> {
165 /// The stack frame used while visiting every child node of a union of
166 /// character class items.
167 Union {
168 /// The child node we are currently visiting.
169 head: &'a ast::ClassSetItem,
170 /// The remaining child nodes to visit (which may be empty).
171 tail: &'a [ast::ClassSetItem],
172 },
173 /// The stack frame used while a binary class operation.
174 Binary { op: &'a ast::ClassSetBinaryOp },
175 /// A stack frame allocated just before descending into a binary operator's
176 /// left hand child node.
177 BinaryLHS {
178 op: &'a ast::ClassSetBinaryOp,
179 lhs: &'a ast::ClassSet,
180 rhs: &'a ast::ClassSet,
181 },
182 /// A stack frame allocated just before descending into a binary operator's
183 /// right hand child node.
184 BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
185 }
186
187 /// A representation of the inductive step when performing structural induction
188 /// over a character class.
189 ///
190 /// Note that there is no analogous explicit type for the inductive step for
191 /// `Ast` nodes because the inductive step is just an `Ast`. For character
192 /// classes, the inductive step can produce one of two possible child nodes:
193 /// an item or a binary operation. (An item cannot be a binary operation
194 /// because that would imply binary operations can be unioned in the concrete
195 /// syntax, which is not possible.)
196 enum ClassInduct<'a> {
197 Item(&'a ast::ClassSetItem),
198 BinaryOp(&'a ast::ClassSetBinaryOp),
199 }
200
201 impl<'a> HeapVisitor<'a> {
202 fn new() -> HeapVisitor<'a> {
203 HeapVisitor { stack: vec![], stack_class: vec![] }
204 }
205
206 fn visit<V: Visitor>(
207 &mut self,
208 mut ast: &'a Ast,
209 mut visitor: V,
210 ) -> Result<V::Output, V::Err> {
211 self.stack.clear();
212 self.stack_class.clear();
213
214 visitor.start();
215 loop {
216 visitor.visit_pre(ast)?;
217 if let Some(x) = self.induct(ast, &mut visitor)? {
218 let child = x.child();
219 self.stack.push((ast, x));
220 ast = child;
221 continue;
222 }
223 // No induction means we have a base case, so we can post visit
224 // it now.
225 visitor.visit_post(ast)?;
226
227 // At this point, we now try to pop our call stack until it is
228 // either empty or we hit another inductive case.
229 loop {
230 let (post_ast, frame) = match self.stack.pop() {
231 None => return visitor.finish(),
232 Some((post_ast, frame)) => (post_ast, frame),
233 };
234 // If this is a concat/alternate, then we might have additional
235 // inductive steps to process.
236 if let Some(x) = self.pop(frame) {
237 if let Frame::Alternation { .. } = x {
238 visitor.visit_alternation_in()?;
239 }
240 ast = x.child();
241 self.stack.push((post_ast, x));
242 break;
243 }
244 // Otherwise, we've finished visiting all the child nodes for
245 // this AST, so we can post visit it now.
246 visitor.visit_post(post_ast)?;
247 }
248 }
249 }
250
251 /// Build a stack frame for the given AST if one is needed (which occurs if
252 /// and only if there are child nodes in the AST). Otherwise, return None.
253 ///
254 /// If this visits a class, then the underlying visitor implementation may
255 /// return an error which will be passed on here.
256 fn induct<V: Visitor>(
257 &mut self,
258 ast: &'a Ast,
259 visitor: &mut V,
260 ) -> Result<Option<Frame<'a>>, V::Err> {
261 Ok(match *ast {
262 Ast::Class(ast::Class::Bracketed(ref x)) => {
263 self.visit_class(x, visitor)?;
264 None
265 }
266 Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
267 Ast::Group(ref x) => Some(Frame::Group(x)),
268 Ast::Concat(ref x) if x.asts.is_empty() => None,
269 Ast::Concat(ref x) => {
270 Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
271 }
272 Ast::Alternation(ref x) if x.asts.is_empty() => None,
273 Ast::Alternation(ref x) => Some(Frame::Alternation {
274 head: &x.asts[0],
275 tail: &x.asts[1..],
276 }),
277 _ => None,
278 })
279 }
280
281 /// Pops the given frame. If the frame has an additional inductive step,
282 /// then return it, otherwise return `None`.
283 fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
284 match induct {
285 Frame::Repetition(_) => None,
286 Frame::Group(_) => None,
287 Frame::Concat { tail, .. } => {
288 if tail.is_empty() {
289 None
290 } else {
291 Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
292 }
293 }
294 Frame::Alternation { tail, .. } => {
295 if tail.is_empty() {
296 None
297 } else {
298 Some(Frame::Alternation {
299 head: &tail[0],
300 tail: &tail[1..],
301 })
302 }
303 }
304 }
305 }
306
307 fn visit_class<V: Visitor>(
308 &mut self,
309 ast: &'a ast::ClassBracketed,
310 visitor: &mut V,
311 ) -> Result<(), V::Err> {
312 let mut ast = ClassInduct::from_bracketed(ast);
313 loop {
314 self.visit_class_pre(&ast, visitor)?;
315 if let Some(x) = self.induct_class(&ast) {
316 let child = x.child();
317 self.stack_class.push((ast, x));
318 ast = child;
319 continue;
320 }
321 self.visit_class_post(&ast, visitor)?;
322
323 // At this point, we now try to pop our call stack until it is
324 // either empty or we hit another inductive case.
325 loop {
326 let (post_ast, frame) = match self.stack_class.pop() {
327 None => return Ok(()),
328 Some((post_ast, frame)) => (post_ast, frame),
329 };
330 // If this is a union or a binary op, then we might have
331 // additional inductive steps to process.
332 if let Some(x) = self.pop_class(frame) {
333 if let ClassFrame::BinaryRHS { ref op, .. } = x {
334 visitor.visit_class_set_binary_op_in(op)?;
335 }
336 ast = x.child();
337 self.stack_class.push((post_ast, x));
338 break;
339 }
340 // Otherwise, we've finished visiting all the child nodes for
341 // this class node, so we can post visit it now.
342 self.visit_class_post(&post_ast, visitor)?;
343 }
344 }
345 }
346
347 /// Call the appropriate `Visitor` methods given an inductive step.
348 fn visit_class_pre<V: Visitor>(
349 &self,
350 ast: &ClassInduct<'a>,
351 visitor: &mut V,
352 ) -> Result<(), V::Err> {
353 match *ast {
354 ClassInduct::Item(item) => {
355 visitor.visit_class_set_item_pre(item)?;
356 }
357 ClassInduct::BinaryOp(op) => {
358 visitor.visit_class_set_binary_op_pre(op)?;
359 }
360 }
361 Ok(())
362 }
363
364 /// Call the appropriate `Visitor` methods given an inductive step.
365 fn visit_class_post<V: Visitor>(
366 &self,
367 ast: &ClassInduct<'a>,
368 visitor: &mut V,
369 ) -> Result<(), V::Err> {
370 match *ast {
371 ClassInduct::Item(item) => {
372 visitor.visit_class_set_item_post(item)?;
373 }
374 ClassInduct::BinaryOp(op) => {
375 visitor.visit_class_set_binary_op_post(op)?;
376 }
377 }
378 Ok(())
379 }
380
381 /// Build a stack frame for the given class node if one is needed (which
382 /// occurs if and only if there are child nodes). Otherwise, return None.
383 fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
384 match *ast {
385 ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
386 match x.kind {
387 ast::ClassSet::Item(ref item) => {
388 Some(ClassFrame::Union { head: item, tail: &[] })
389 }
390 ast::ClassSet::BinaryOp(ref op) => {
391 Some(ClassFrame::Binary { op: op })
392 }
393 }
394 }
395 ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
396 if x.items.is_empty() {
397 None
398 } else {
399 Some(ClassFrame::Union {
400 head: &x.items[0],
401 tail: &x.items[1..],
402 })
403 }
404 }
405 ClassInduct::BinaryOp(op) => Some(ClassFrame::BinaryLHS {
406 op: op,
407 lhs: &op.lhs,
408 rhs: &op.rhs,
409 }),
410 _ => None,
411 }
412 }
413
414 /// Pops the given frame. If the frame has an additional inductive step,
415 /// then return it, otherwise return `None`.
416 fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
417 match induct {
418 ClassFrame::Union { tail, .. } => {
419 if tail.is_empty() {
420 None
421 } else {
422 Some(ClassFrame::Union {
423 head: &tail[0],
424 tail: &tail[1..],
425 })
426 }
427 }
428 ClassFrame::Binary { .. } => None,
429 ClassFrame::BinaryLHS { op, rhs, .. } => {
430 Some(ClassFrame::BinaryRHS { op: op, rhs: rhs })
431 }
432 ClassFrame::BinaryRHS { .. } => None,
433 }
434 }
435 }
436
437 impl<'a> Frame<'a> {
438 /// Perform the next inductive step on this frame and return the next
439 /// child AST node to visit.
440 fn child(&self) -> &'a Ast {
441 match *self {
442 Frame::Repetition(rep) => &rep.ast,
443 Frame::Group(group) => &group.ast,
444 Frame::Concat { head, .. } => head,
445 Frame::Alternation { head, .. } => head,
446 }
447 }
448 }
449
450 impl<'a> ClassFrame<'a> {
451 /// Perform the next inductive step on this frame and return the next
452 /// child class node to visit.
453 fn child(&self) -> ClassInduct<'a> {
454 match *self {
455 ClassFrame::Union { head, .. } => ClassInduct::Item(head),
456 ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
457 ClassFrame::BinaryLHS { ref lhs, .. } => {
458 ClassInduct::from_set(lhs)
459 }
460 ClassFrame::BinaryRHS { ref rhs, .. } => {
461 ClassInduct::from_set(rhs)
462 }
463 }
464 }
465 }
466
467 impl<'a> ClassInduct<'a> {
468 fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
469 ClassInduct::from_set(&ast.kind)
470 }
471
472 fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
473 match *ast {
474 ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
475 ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
476 }
477 }
478 }
479
480 impl<'a> fmt::Debug for ClassFrame<'a> {
481 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
482 let x = match *self {
483 ClassFrame::Union { .. } => "Union",
484 ClassFrame::Binary { .. } => "Binary",
485 ClassFrame::BinaryLHS { .. } => "BinaryLHS",
486 ClassFrame::BinaryRHS { .. } => "BinaryRHS",
487 };
488 write!(f, "{}", x)
489 }
490 }
491
492 impl<'a> fmt::Debug for ClassInduct<'a> {
493 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
494 let x = match *self {
495 ClassInduct::Item(it) => match *it {
496 ast::ClassSetItem::Empty(_) => "Item(Empty)",
497 ast::ClassSetItem::Literal(_) => "Item(Literal)",
498 ast::ClassSetItem::Range(_) => "Item(Range)",
499 ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
500 ast::ClassSetItem::Perl(_) => "Item(Perl)",
501 ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
502 ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
503 ast::ClassSetItem::Union(_) => "Item(Union)",
504 },
505 ClassInduct::BinaryOp(it) => match it.kind {
506 ast::ClassSetBinaryOpKind::Intersection => {
507 "BinaryOp(Intersection)"
508 }
509 ast::ClassSetBinaryOpKind::Difference => {
510 "BinaryOp(Difference)"
511 }
512 ast::ClassSetBinaryOpKind::SymmetricDifference => {
513 "BinaryOp(SymmetricDifference)"
514 }
515 },
516 };
517 write!(f, "{}", x)
518 }
519 }