2 This module provides a regular expression printer for `Ast`.
7 use crate::ast
::visitor
::{self, Visitor}
;
8 use crate::ast
::{self, Ast}
;
10 /// A builder for constructing a printer.
12 /// Note that since a printer doesn't have any configuration knobs, this type
13 /// remains unexported.
14 #[derive(Clone, Debug)]
15 struct PrinterBuilder
{
19 impl Default
for PrinterBuilder
{
20 fn default() -> PrinterBuilder
{
26 fn new() -> PrinterBuilder
{
27 PrinterBuilder { _priv: () }
30 fn build(&self) -> Printer
{
35 /// A printer for a regular expression abstract syntax tree.
37 /// A printer converts an abstract syntax tree (AST) to a regular expression
38 /// pattern string. This particular printer uses constant stack space and heap
39 /// space proportional to the size of the AST.
41 /// This printer will not necessarily preserve the original formatting of the
42 /// regular expression pattern string. For example, all whitespace and comments
50 /// Create a new printer.
51 pub fn new() -> Printer
{
52 PrinterBuilder
::new().build()
55 /// Print the given `Ast` to the given writer. The writer must implement
56 /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57 /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58 /// implementations) or a `&mut String`.
59 pub fn print
<W
: fmt
::Write
>(&mut self, ast
: &Ast
, wtr
: W
) -> fmt
::Result
{
60 visitor
::visit(ast
, Writer { wtr }
)
69 impl<W
: fmt
::Write
> Visitor
for Writer
<W
> {
71 type Err
= fmt
::Error
;
73 fn finish(self) -> fmt
::Result
{
77 fn visit_pre(&mut self, ast
: &Ast
) -> fmt
::Result
{
79 Ast
::Group(ref x
) => self.fmt_group_pre(x
),
80 Ast
::Class(ast
::Class
::Bracketed(ref x
)) => {
81 self.fmt_class_bracketed_pre(x
)
87 fn visit_post(&mut self, ast
: &Ast
) -> fmt
::Result
{
88 use crate::ast
::Class
;
91 Ast
::Empty(_
) => Ok(()),
92 Ast
::Flags(ref x
) => self.fmt_set_flags(x
),
93 Ast
::Literal(ref x
) => self.fmt_literal(x
),
94 Ast
::Dot(_
) => self.wtr
.write_str("."),
95 Ast
::Assertion(ref x
) => self.fmt_assertion(x
),
96 Ast
::Class(Class
::Perl(ref x
)) => self.fmt_class_perl(x
),
97 Ast
::Class(Class
::Unicode(ref x
)) => self.fmt_class_unicode(x
),
98 Ast
::Class(Class
::Bracketed(ref x
)) => {
99 self.fmt_class_bracketed_post(x
)
101 Ast
::Repetition(ref x
) => self.fmt_repetition(x
),
102 Ast
::Group(ref x
) => self.fmt_group_post(x
),
103 Ast
::Alternation(_
) => Ok(()),
104 Ast
::Concat(_
) => Ok(()),
108 fn visit_alternation_in(&mut self) -> fmt
::Result
{
109 self.wtr
.write_str("|")
112 fn visit_class_set_item_pre(
114 ast
: &ast
::ClassSetItem
,
115 ) -> Result
<(), Self::Err
> {
117 ast
::ClassSetItem
::Bracketed(ref x
) => {
118 self.fmt_class_bracketed_pre(x
)
124 fn visit_class_set_item_post(
126 ast
: &ast
::ClassSetItem
,
127 ) -> Result
<(), Self::Err
> {
128 use crate::ast
::ClassSetItem
::*;
132 Literal(ref x
) => self.fmt_literal(x
),
134 self.fmt_literal(&x
.start
)?
;
135 self.wtr
.write_str("-")?
;
136 self.fmt_literal(&x
.end
)?
;
139 Ascii(ref x
) => self.fmt_class_ascii(x
),
140 Unicode(ref x
) => self.fmt_class_unicode(x
),
141 Perl(ref x
) => self.fmt_class_perl(x
),
142 Bracketed(ref x
) => self.fmt_class_bracketed_post(x
),
147 fn visit_class_set_binary_op_in(
149 ast
: &ast
::ClassSetBinaryOp
,
150 ) -> Result
<(), Self::Err
> {
151 self.fmt_class_set_binary_op_kind(&ast
.kind
)
155 impl<W
: fmt
::Write
> Writer
<W
> {
156 fn fmt_group_pre(&mut self, ast
: &ast
::Group
) -> fmt
::Result
{
157 use crate::ast
::GroupKind
::*;
159 CaptureIndex(_
) => self.wtr
.write_str("("),
160 CaptureName(ref x
) => {
161 self.wtr
.write_str("(?P<")?
;
162 self.wtr
.write_str(&x
.name
)?
;
163 self.wtr
.write_str(">")?
;
166 NonCapturing(ref flags
) => {
167 self.wtr
.write_str("(?")?
;
168 self.fmt_flags(flags
)?
;
169 self.wtr
.write_str(":")?
;
175 fn fmt_group_post(&mut self, _ast
: &ast
::Group
) -> fmt
::Result
{
176 self.wtr
.write_str(")")
179 fn fmt_repetition(&mut self, ast
: &ast
::Repetition
) -> fmt
::Result
{
180 use crate::ast
::RepetitionKind
::*;
182 ZeroOrOne
if ast
.greedy
=> self.wtr
.write_str("?"),
183 ZeroOrOne
=> self.wtr
.write_str("??"),
184 ZeroOrMore
if ast
.greedy
=> self.wtr
.write_str("*"),
185 ZeroOrMore
=> self.wtr
.write_str("*?"),
186 OneOrMore
if ast
.greedy
=> self.wtr
.write_str("+"),
187 OneOrMore
=> self.wtr
.write_str("+?"),
189 self.fmt_repetition_range(x
)?
;
191 self.wtr
.write_str("?")?
;
198 fn fmt_repetition_range(
200 ast
: &ast
::RepetitionRange
,
202 use crate::ast
::RepetitionRange
::*;
204 Exactly(x
) => write
!(self.wtr
, "{{{}}}", x
),
205 AtLeast(x
) => write
!(self.wtr
, "{{{},}}", x
),
206 Bounded(x
, y
) => write
!(self.wtr
, "{{{},{}}}", x
, y
),
210 fn fmt_literal(&mut self, ast
: &ast
::Literal
) -> fmt
::Result
{
211 use crate::ast
::LiteralKind
::*;
214 Verbatim
=> self.wtr
.write_char(ast
.c
),
215 Punctuation
=> write
!(self.wtr
, r
"\{}", ast
.c
),
216 Octal
=> write
!(self.wtr
, r
"\{:o}", ast
.c
as u32),
217 HexFixed(ast
::HexLiteralKind
::X
) => {
218 write
!(self.wtr
, r
"\x{:02X}", ast
.c
as u32)
220 HexFixed(ast
::HexLiteralKind
::UnicodeShort
) => {
221 write
!(self.wtr
, r
"\u{:04X}", ast
.c
as u32)
223 HexFixed(ast
::HexLiteralKind
::UnicodeLong
) => {
224 write
!(self.wtr
, r
"\U{:08X}", ast
.c
as u32)
226 HexBrace(ast
::HexLiteralKind
::X
) => {
227 write
!(self.wtr
, r
"\x{{{:X}}}", ast
.c
as u32)
229 HexBrace(ast
::HexLiteralKind
::UnicodeShort
) => {
230 write
!(self.wtr
, r
"\u{{{:X}}}", ast
.c
as u32)
232 HexBrace(ast
::HexLiteralKind
::UnicodeLong
) => {
233 write
!(self.wtr
, r
"\U{{{:X}}}", ast
.c
as u32)
235 Special(ast
::SpecialLiteralKind
::Bell
) => {
236 self.wtr
.write_str(r
"\a")
238 Special(ast
::SpecialLiteralKind
::FormFeed
) => {
239 self.wtr
.write_str(r
"\f")
241 Special(ast
::SpecialLiteralKind
::Tab
) => self.wtr
.write_str(r
"\t"),
242 Special(ast
::SpecialLiteralKind
::LineFeed
) => {
243 self.wtr
.write_str(r
"\n")
245 Special(ast
::SpecialLiteralKind
::CarriageReturn
) => {
246 self.wtr
.write_str(r
"\r")
248 Special(ast
::SpecialLiteralKind
::VerticalTab
) => {
249 self.wtr
.write_str(r
"\v")
251 Special(ast
::SpecialLiteralKind
::Space
) => {
252 self.wtr
.write_str(r
"\ ")
257 fn fmt_assertion(&mut self, ast
: &ast
::Assertion
) -> fmt
::Result
{
258 use crate::ast
::AssertionKind
::*;
260 StartLine
=> self.wtr
.write_str("^"),
261 EndLine
=> self.wtr
.write_str("$"),
262 StartText
=> self.wtr
.write_str(r
"\A"),
263 EndText
=> self.wtr
.write_str(r
"\z"),
264 WordBoundary
=> self.wtr
.write_str(r
"\b"),
265 NotWordBoundary
=> self.wtr
.write_str(r
"\B"),
269 fn fmt_set_flags(&mut self, ast
: &ast
::SetFlags
) -> fmt
::Result
{
270 self.wtr
.write_str("(?")?
;
271 self.fmt_flags(&ast
.flags
)?
;
272 self.wtr
.write_str(")")?
;
276 fn fmt_flags(&mut self, ast
: &ast
::Flags
) -> fmt
::Result
{
277 use crate::ast
::{Flag, FlagsItemKind}
;
279 for item
in &ast
.items
{
281 FlagsItemKind
::Negation
=> self.wtr
.write_str("-"),
282 FlagsItemKind
::Flag(ref flag
) => match *flag
{
283 Flag
::CaseInsensitive
=> self.wtr
.write_str("i"),
284 Flag
::MultiLine
=> self.wtr
.write_str("m"),
285 Flag
::DotMatchesNewLine
=> self.wtr
.write_str("s"),
286 Flag
::SwapGreed
=> self.wtr
.write_str("U"),
287 Flag
::Unicode
=> self.wtr
.write_str("u"),
288 Flag
::IgnoreWhitespace
=> self.wtr
.write_str("x"),
295 fn fmt_class_bracketed_pre(
297 ast
: &ast
::ClassBracketed
,
300 self.wtr
.write_str("[^")
302 self.wtr
.write_str("[")
306 fn fmt_class_bracketed_post(
308 _ast
: &ast
::ClassBracketed
,
310 self.wtr
.write_str("]")
313 fn fmt_class_set_binary_op_kind(
315 ast
: &ast
::ClassSetBinaryOpKind
,
317 use crate::ast
::ClassSetBinaryOpKind
::*;
319 Intersection
=> self.wtr
.write_str("&&"),
320 Difference
=> self.wtr
.write_str("--"),
321 SymmetricDifference
=> self.wtr
.write_str("~~"),
325 fn fmt_class_perl(&mut self, ast
: &ast
::ClassPerl
) -> fmt
::Result
{
326 use crate::ast
::ClassPerlKind
::*;
328 Digit
if ast
.negated
=> self.wtr
.write_str(r
"\D"),
329 Digit
=> self.wtr
.write_str(r
"\d"),
330 Space
if ast
.negated
=> self.wtr
.write_str(r
"\S"),
331 Space
=> self.wtr
.write_str(r
"\s"),
332 Word
if ast
.negated
=> self.wtr
.write_str(r
"\W"),
333 Word
=> self.wtr
.write_str(r
"\w"),
337 fn fmt_class_ascii(&mut self, ast
: &ast
::ClassAscii
) -> fmt
::Result
{
338 use crate::ast
::ClassAsciiKind
::*;
340 Alnum
if ast
.negated
=> self.wtr
.write_str("[:^alnum:]"),
341 Alnum
=> self.wtr
.write_str("[:alnum:]"),
342 Alpha
if ast
.negated
=> self.wtr
.write_str("[:^alpha:]"),
343 Alpha
=> self.wtr
.write_str("[:alpha:]"),
344 Ascii
if ast
.negated
=> self.wtr
.write_str("[:^ascii:]"),
345 Ascii
=> self.wtr
.write_str("[:ascii:]"),
346 Blank
if ast
.negated
=> self.wtr
.write_str("[:^blank:]"),
347 Blank
=> self.wtr
.write_str("[:blank:]"),
348 Cntrl
if ast
.negated
=> self.wtr
.write_str("[:^cntrl:]"),
349 Cntrl
=> self.wtr
.write_str("[:cntrl:]"),
350 Digit
if ast
.negated
=> self.wtr
.write_str("[:^digit:]"),
351 Digit
=> self.wtr
.write_str("[:digit:]"),
352 Graph
if ast
.negated
=> self.wtr
.write_str("[:^graph:]"),
353 Graph
=> self.wtr
.write_str("[:graph:]"),
354 Lower
if ast
.negated
=> self.wtr
.write_str("[:^lower:]"),
355 Lower
=> self.wtr
.write_str("[:lower:]"),
356 Print
if ast
.negated
=> self.wtr
.write_str("[:^print:]"),
357 Print
=> self.wtr
.write_str("[:print:]"),
358 Punct
if ast
.negated
=> self.wtr
.write_str("[:^punct:]"),
359 Punct
=> self.wtr
.write_str("[:punct:]"),
360 Space
if ast
.negated
=> self.wtr
.write_str("[:^space:]"),
361 Space
=> self.wtr
.write_str("[:space:]"),
362 Upper
if ast
.negated
=> self.wtr
.write_str("[:^upper:]"),
363 Upper
=> self.wtr
.write_str("[:upper:]"),
364 Word
if ast
.negated
=> self.wtr
.write_str("[:^word:]"),
365 Word
=> self.wtr
.write_str("[:word:]"),
366 Xdigit
if ast
.negated
=> self.wtr
.write_str("[:^xdigit:]"),
367 Xdigit
=> self.wtr
.write_str("[:xdigit:]"),
371 fn fmt_class_unicode(&mut self, ast
: &ast
::ClassUnicode
) -> fmt
::Result
{
372 use crate::ast
::ClassUnicodeKind
::*;
373 use crate::ast
::ClassUnicodeOpKind
::*;
376 self.wtr
.write_str(r
"\P")?
;
378 self.wtr
.write_str(r
"\p")?
;
381 OneLetter(c
) => self.wtr
.write_char(c
),
382 Named(ref x
) => write
!(self.wtr
, "{{{}}}", x
),
383 NamedValue { op: Equal, ref name, ref value }
=> {
384 write
!(self.wtr
, "{{{}={}}}", name
, value
)
386 NamedValue { op: Colon, ref name, ref value }
=> {
387 write
!(self.wtr
, "{{{}:{}}}", name
, value
)
389 NamedValue { op: NotEqual, ref name, ref value }
=> {
390 write
!(self.wtr
, "{{{}!={}}}", name
, value
)
399 use crate::ast
::parse
::ParserBuilder
;
401 fn roundtrip(given
: &str) {
402 roundtrip_with(|b
| b
, given
);
405 fn roundtrip_with
<F
>(mut f
: F
, given
: &str)
407 F
: FnMut(&mut ParserBuilder
) -> &mut ParserBuilder
,
409 let mut builder
= ParserBuilder
::new();
411 let ast
= builder
.build().parse(given
).unwrap();
413 let mut printer
= Printer
::new();
414 let mut dst
= String
::new();
415 printer
.print(&ast
, &mut dst
).unwrap();
416 assert_eq
!(given
, dst
);
423 roundtrip_with(|b
| b
.octal(true), r
"\141");
426 roundtrip(r
"\u0061");
427 roundtrip(r
"\U00000061");
428 roundtrip(r
"\x{61}");
429 roundtrip(r
"\x{7F}");
430 roundtrip(r
"\u{61}");
431 roundtrip(r
"\U{61}");
439 roundtrip(r
"(?x)\ ");
451 roundtrip("a(bcd)ef");
455 fn print_alternation() {
457 roundtrip("a|b|c|d|e");
458 roundtrip("|a|b|c|d|e");
459 roundtrip("|a|b|c|d|e|");
460 roundtrip("a(b|c|d)|e|f");
464 fn print_assertion() {
474 fn print_repetition() {
485 roundtrip("a{5,10}");
486 roundtrip("a{5,10}?");
495 roundtrip("(?siUmux)");
501 roundtrip("(?P<foo>a)");
509 roundtrip(r
"[^a-z]");
510 roundtrip(r
"[a-z0-9]");
511 roundtrip(r
"[-a-z0-9]");
512 roundtrip(r
"[-a-z0-9]");
513 roundtrip(r
"[a-z0-9---]");
514 roundtrip(r
"[a-z&&m-n]");
515 roundtrip(r
"[[a-z&&m-n]]");
516 roundtrip(r
"[a-z--m-n]");
517 roundtrip(r
"[a-z~~m-n]");
518 roundtrip(r
"[a-z[0-9]]");
519 roundtrip(r
"[a-z[^0-9]]");
528 roundtrip(r
"[[:alnum:]]");
529 roundtrip(r
"[[:^alnum:]]");
530 roundtrip(r
"[[:alpha:]]");
531 roundtrip(r
"[[:^alpha:]]");
532 roundtrip(r
"[[:ascii:]]");
533 roundtrip(r
"[[:^ascii:]]");
534 roundtrip(r
"[[:blank:]]");
535 roundtrip(r
"[[:^blank:]]");
536 roundtrip(r
"[[:cntrl:]]");
537 roundtrip(r
"[[:^cntrl:]]");
538 roundtrip(r
"[[:digit:]]");
539 roundtrip(r
"[[:^digit:]]");
540 roundtrip(r
"[[:graph:]]");
541 roundtrip(r
"[[:^graph:]]");
542 roundtrip(r
"[[:lower:]]");
543 roundtrip(r
"[[:^lower:]]");
544 roundtrip(r
"[[:print:]]");
545 roundtrip(r
"[[:^print:]]");
546 roundtrip(r
"[[:punct:]]");
547 roundtrip(r
"[[:^punct:]]");
548 roundtrip(r
"[[:space:]]");
549 roundtrip(r
"[[:^space:]]");
550 roundtrip(r
"[[:upper:]]");
551 roundtrip(r
"[[:^upper:]]");
552 roundtrip(r
"[[:word:]]");
553 roundtrip(r
"[[:^word:]]");
554 roundtrip(r
"[[:xdigit:]]");
555 roundtrip(r
"[[:^xdigit:]]");
561 roundtrip(r
"\p{X=Y}");
562 roundtrip(r
"\P{X=Y}");
563 roundtrip(r
"\p{X:Y}");
564 roundtrip(r
"\P{X:Y}");
565 roundtrip(r
"\p{X!=Y}");
566 roundtrip(r
"\P{X!=Y}");