]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | /*! |
2 | Defines a translator that converts an `Ast` to an `Hir`. | |
3 | */ | |
4 | ||
5 | use std::cell::{Cell, RefCell}; | |
6 | use std::result; | |
7 | ||
8 | use ast::{self, Ast, Span, Visitor}; | |
9 | use hir::{self, Error, ErrorKind, Hir}; | |
10 | use unicode::{self, ClassQuery}; | |
11 | ||
12 | type Result<T> = result::Result<T, Error>; | |
13 | ||
14 | /// A builder for constructing an AST->HIR translator. | |
15 | #[derive(Clone, Debug)] | |
16 | pub struct TranslatorBuilder { | |
17 | allow_invalid_utf8: bool, | |
18 | flags: Flags, | |
19 | } | |
20 | ||
21 | impl Default for TranslatorBuilder { | |
22 | fn default() -> TranslatorBuilder { | |
23 | TranslatorBuilder::new() | |
24 | } | |
25 | } | |
26 | ||
27 | impl TranslatorBuilder { | |
28 | /// Create a new translator builder with a default c onfiguration. | |
29 | pub fn new() -> TranslatorBuilder { | |
30 | TranslatorBuilder { | |
31 | allow_invalid_utf8: false, | |
32 | flags: Flags::default(), | |
33 | } | |
34 | } | |
35 | ||
36 | /// Build a translator using the current configuration. | |
37 | pub fn build(&self) -> Translator { | |
38 | Translator { | |
39 | stack: RefCell::new(vec![]), | |
40 | flags: Cell::new(self.flags), | |
41 | allow_invalid_utf8: self.allow_invalid_utf8, | |
42 | } | |
43 | } | |
44 | ||
45 | /// When enabled, translation will permit the construction of a regular | |
46 | /// expression that may match invalid UTF-8. | |
47 | /// | |
48 | /// When disabled (the default), the translator is guaranteed to produce | |
49 | /// an expression that will only ever match valid UTF-8 (otherwise, the | |
50 | /// translator will return an error). | |
51 | /// | |
b7449926 XL |
52 | /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII |
53 | /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause | |
54 | /// the parser to return an error. Namely, a negated ASCII word boundary | |
55 | /// can result in matching positions that aren't valid UTF-8 boundaries. | |
f9f354fc | 56 | pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder { |
0531ce1d XL |
57 | self.allow_invalid_utf8 = yes; |
58 | self | |
59 | } | |
60 | ||
61 | /// Enable or disable the case insensitive flag (`i`) by default. | |
62 | pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder { | |
63 | self.flags.case_insensitive = if yes { Some(true) } else { None }; | |
64 | self | |
65 | } | |
66 | ||
67 | /// Enable or disable the multi-line matching flag (`m`) by default. | |
68 | pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder { | |
69 | self.flags.multi_line = if yes { Some(true) } else { None }; | |
70 | self | |
71 | } | |
72 | ||
73 | /// Enable or disable the "dot matches any character" flag (`s`) by | |
74 | /// default. | |
75 | pub fn dot_matches_new_line( | |
76 | &mut self, | |
77 | yes: bool, | |
78 | ) -> &mut TranslatorBuilder { | |
79 | self.flags.dot_matches_new_line = if yes { Some(true) } else { None }; | |
80 | self | |
81 | } | |
82 | ||
83 | /// Enable or disable the "swap greed" flag (`U`) by default. | |
84 | pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder { | |
85 | self.flags.swap_greed = if yes { Some(true) } else { None }; | |
86 | self | |
87 | } | |
88 | ||
89 | /// Enable or disable the Unicode flag (`u`) by default. | |
90 | pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder { | |
91 | self.flags.unicode = if yes { None } else { Some(false) }; | |
92 | self | |
93 | } | |
94 | } | |
95 | ||
96 | /// A translator maps abstract syntax to a high level intermediate | |
97 | /// representation. | |
98 | /// | |
99 | /// A translator may be benefit from reuse. That is, a translator can translate | |
100 | /// many abstract syntax trees. | |
101 | /// | |
102 | /// A `Translator` can be configured in more detail via a | |
103 | /// [`TranslatorBuilder`](struct.TranslatorBuilder.html). | |
104 | #[derive(Clone, Debug)] | |
105 | pub struct Translator { | |
106 | /// Our call stack, but on the heap. | |
107 | stack: RefCell<Vec<HirFrame>>, | |
108 | /// The current flag settings. | |
109 | flags: Cell<Flags>, | |
110 | /// Whether we're allowed to produce HIR that can match arbitrary bytes. | |
111 | allow_invalid_utf8: bool, | |
112 | } | |
113 | ||
114 | impl Translator { | |
115 | /// Create a new translator using the default configuration. | |
116 | pub fn new() -> Translator { | |
117 | TranslatorBuilder::new().build() | |
118 | } | |
119 | ||
120 | /// Translate the given abstract syntax tree (AST) into a high level | |
121 | /// intermediate representation (HIR). | |
122 | /// | |
123 | /// If there was a problem doing the translation, then an HIR-specific | |
124 | /// error is returned. | |
125 | /// | |
126 | /// The original pattern string used to produce the `Ast` *must* also be | |
127 | /// provided. The translator does not use the pattern string during any | |
128 | /// correct translation, but is used for error reporting. | |
129 | pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> { | |
130 | ast::visit(ast, TranslatorI::new(self, pattern)) | |
131 | } | |
132 | } | |
133 | ||
134 | /// An HirFrame is a single stack frame, represented explicitly, which is | |
135 | /// created for each item in the Ast that we traverse. | |
136 | /// | |
137 | /// Note that technically, this type doesn't represent our entire stack | |
138 | /// frame. In particular, the Ast visitor represents any state associated with | |
139 | /// traversing the Ast itself. | |
140 | #[derive(Clone, Debug)] | |
141 | enum HirFrame { | |
142 | /// An arbitrary HIR expression. These get pushed whenever we hit a base | |
143 | /// case in the Ast. They get popped after an inductive (i.e., recursive) | |
144 | /// step is complete. | |
145 | Expr(Hir), | |
146 | /// A Unicode character class. This frame is mutated as we descend into | |
147 | /// the Ast of a character class (which is itself its own mini recursive | |
148 | /// structure). | |
149 | ClassUnicode(hir::ClassUnicode), | |
150 | /// A byte-oriented character class. This frame is mutated as we descend | |
151 | /// into the Ast of a character class (which is itself its own mini | |
152 | /// recursive structure). | |
153 | /// | |
154 | /// Byte character classes are created when Unicode mode (`u`) is disabled. | |
155 | /// If `allow_invalid_utf8` is disabled (the default), then a byte | |
156 | /// character is only permitted to match ASCII text. | |
157 | ClassBytes(hir::ClassBytes), | |
158 | /// This is pushed on to the stack upon first seeing any kind of group, | |
159 | /// indicated by parentheses (including non-capturing groups). It is popped | |
160 | /// upon leaving a group. | |
161 | Group { | |
f9f354fc | 162 | /// The old active flags when this group was opened. |
0531ce1d XL |
163 | /// |
164 | /// If this group sets flags, then the new active flags are set to the | |
165 | /// result of merging the old flags with the flags introduced by this | |
f9f354fc XL |
166 | /// group. If the group doesn't set any flags, then this is simply |
167 | /// equivalent to whatever flags were set when the group was opened. | |
0531ce1d XL |
168 | /// |
169 | /// When this group is popped, the active flags should be restored to | |
170 | /// the flags set here. | |
171 | /// | |
172 | /// The "active" flags correspond to whatever flags are set in the | |
173 | /// Translator. | |
f9f354fc | 174 | old_flags: Flags, |
0531ce1d XL |
175 | }, |
176 | /// This is pushed whenever a concatenation is observed. After visiting | |
177 | /// every sub-expression in the concatenation, the translator's stack is | |
178 | /// popped until it sees a Concat frame. | |
179 | Concat, | |
180 | /// This is pushed whenever an alternation is observed. After visiting | |
181 | /// every sub-expression in the alternation, the translator's stack is | |
182 | /// popped until it sees an Alternation frame. | |
183 | Alternation, | |
184 | } | |
185 | ||
186 | impl HirFrame { | |
187 | /// Assert that the current stack frame is an Hir expression and return it. | |
188 | fn unwrap_expr(self) -> Hir { | |
189 | match self { | |
190 | HirFrame::Expr(expr) => expr, | |
f9f354fc | 191 | _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self), |
0531ce1d XL |
192 | } |
193 | } | |
194 | ||
195 | /// Assert that the current stack frame is a Unicode class expression and | |
196 | /// return it. | |
197 | fn unwrap_class_unicode(self) -> hir::ClassUnicode { | |
198 | match self { | |
199 | HirFrame::ClassUnicode(cls) => cls, | |
f9f354fc XL |
200 | _ => panic!( |
201 | "tried to unwrap Unicode class \ | |
202 | from HirFrame, got: {:?}", | |
203 | self | |
204 | ), | |
0531ce1d XL |
205 | } |
206 | } | |
207 | ||
208 | /// Assert that the current stack frame is a byte class expression and | |
209 | /// return it. | |
210 | fn unwrap_class_bytes(self) -> hir::ClassBytes { | |
211 | match self { | |
212 | HirFrame::ClassBytes(cls) => cls, | |
f9f354fc XL |
213 | _ => panic!( |
214 | "tried to unwrap byte class \ | |
215 | from HirFrame, got: {:?}", | |
216 | self | |
217 | ), | |
0531ce1d XL |
218 | } |
219 | } | |
220 | ||
221 | /// Assert that the current stack frame is a group indicator and return | |
222 | /// its corresponding flags (the flags that were active at the time the | |
f9f354fc XL |
223 | /// group was entered). |
224 | fn unwrap_group(self) -> Flags { | |
0531ce1d XL |
225 | match self { |
226 | HirFrame::Group { old_flags } => old_flags, | |
f9f354fc XL |
227 | _ => { |
228 | panic!("tried to unwrap group from HirFrame, got: {:?}", self) | |
229 | } | |
0531ce1d XL |
230 | } |
231 | } | |
232 | } | |
233 | ||
234 | impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { | |
235 | type Output = Hir; | |
236 | type Err = Error; | |
237 | ||
238 | fn finish(self) -> Result<Hir> { | |
0531ce1d XL |
239 | // ... otherwise, we should have exactly one HIR on the stack. |
240 | assert_eq!(self.trans().stack.borrow().len(), 1); | |
241 | Ok(self.pop().unwrap().unwrap_expr()) | |
242 | } | |
243 | ||
244 | fn visit_pre(&mut self, ast: &Ast) -> Result<()> { | |
245 | match *ast { | |
246 | Ast::Class(ast::Class::Bracketed(_)) => { | |
247 | if self.flags().unicode() { | |
248 | let cls = hir::ClassUnicode::empty(); | |
249 | self.push(HirFrame::ClassUnicode(cls)); | |
250 | } else { | |
251 | let cls = hir::ClassBytes::empty(); | |
252 | self.push(HirFrame::ClassBytes(cls)); | |
253 | } | |
254 | } | |
255 | Ast::Group(ref x) => { | |
f9f354fc XL |
256 | let old_flags = x |
257 | .flags() | |
258 | .map(|ast| self.set_flags(ast)) | |
259 | .unwrap_or_else(|| self.flags()); | |
260 | self.push(HirFrame::Group { old_flags }); | |
0531ce1d XL |
261 | } |
262 | Ast::Concat(ref x) if x.asts.is_empty() => {} | |
263 | Ast::Concat(_) => { | |
264 | self.push(HirFrame::Concat); | |
265 | } | |
266 | Ast::Alternation(ref x) if x.asts.is_empty() => {} | |
267 | Ast::Alternation(_) => { | |
268 | self.push(HirFrame::Alternation); | |
269 | } | |
270 | _ => {} | |
271 | } | |
272 | Ok(()) | |
273 | } | |
274 | ||
275 | fn visit_post(&mut self, ast: &Ast) -> Result<()> { | |
276 | match *ast { | |
277 | Ast::Empty(_) => { | |
278 | self.push(HirFrame::Expr(Hir::empty())); | |
279 | } | |
280 | Ast::Flags(ref x) => { | |
281 | self.set_flags(&x.flags); | |
48663c56 XL |
282 | // Flags in the AST are generally considered directives and |
283 | // not actual sub-expressions. However, they can be used in | |
284 | // the concrete syntax like `((?i))`, and we need some kind of | |
285 | // indication of an expression there, and Empty is the correct | |
286 | // choice. | |
287 | // | |
288 | // There can also be things like `(?i)+`, but we rule those out | |
289 | // in the parser. In the future, we might allow them for | |
290 | // consistency sake. | |
291 | self.push(HirFrame::Expr(Hir::empty())); | |
0531ce1d XL |
292 | } |
293 | Ast::Literal(ref x) => { | |
94b46f34 | 294 | self.push(HirFrame::Expr(self.hir_literal(x)?)); |
0531ce1d XL |
295 | } |
296 | Ast::Dot(span) => { | |
94b46f34 | 297 | self.push(HirFrame::Expr(self.hir_dot(span)?)); |
0531ce1d XL |
298 | } |
299 | Ast::Assertion(ref x) => { | |
94b46f34 | 300 | self.push(HirFrame::Expr(self.hir_assertion(x)?)); |
0531ce1d XL |
301 | } |
302 | Ast::Class(ast::Class::Perl(ref x)) => { | |
303 | if self.flags().unicode() { | |
f9f354fc | 304 | let cls = self.hir_perl_unicode_class(x)?; |
0531ce1d XL |
305 | let hcls = hir::Class::Unicode(cls); |
306 | self.push(HirFrame::Expr(Hir::class(hcls))); | |
307 | } else { | |
308 | let cls = self.hir_perl_byte_class(x); | |
309 | let hcls = hir::Class::Bytes(cls); | |
310 | self.push(HirFrame::Expr(Hir::class(hcls))); | |
311 | } | |
312 | } | |
313 | Ast::Class(ast::Class::Unicode(ref x)) => { | |
94b46f34 | 314 | let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); |
0531ce1d XL |
315 | self.push(HirFrame::Expr(Hir::class(cls))); |
316 | } | |
317 | Ast::Class(ast::Class::Bracketed(ref ast)) => { | |
318 | if self.flags().unicode() { | |
319 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); | |
f9f354fc XL |
320 | self.unicode_fold_and_negate( |
321 | &ast.span, | |
322 | ast.negated, | |
323 | &mut cls, | |
324 | )?; | |
5869c6ff | 325 | if cls.ranges().is_empty() { |
0531ce1d | 326 | return Err(self.error( |
f9f354fc XL |
327 | ast.span, |
328 | ErrorKind::EmptyClassNotAllowed, | |
329 | )); | |
0531ce1d XL |
330 | } |
331 | let expr = Hir::class(hir::Class::Unicode(cls)); | |
332 | self.push(HirFrame::Expr(expr)); | |
333 | } else { | |
334 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); | |
94b46f34 | 335 | self.bytes_fold_and_negate( |
f9f354fc XL |
336 | &ast.span, |
337 | ast.negated, | |
338 | &mut cls, | |
339 | )?; | |
5869c6ff | 340 | if cls.ranges().is_empty() { |
0531ce1d | 341 | return Err(self.error( |
f9f354fc XL |
342 | ast.span, |
343 | ErrorKind::EmptyClassNotAllowed, | |
344 | )); | |
0531ce1d XL |
345 | } |
346 | ||
347 | let expr = Hir::class(hir::Class::Bytes(cls)); | |
348 | self.push(HirFrame::Expr(expr)); | |
349 | } | |
350 | } | |
351 | Ast::Repetition(ref x) => { | |
352 | let expr = self.pop().unwrap().unwrap_expr(); | |
353 | self.push(HirFrame::Expr(self.hir_repetition(x, expr))); | |
354 | } | |
355 | Ast::Group(ref x) => { | |
356 | let expr = self.pop().unwrap().unwrap_expr(); | |
f9f354fc XL |
357 | let old_flags = self.pop().unwrap().unwrap_group(); |
358 | self.trans().flags.set(old_flags); | |
0531ce1d XL |
359 | self.push(HirFrame::Expr(self.hir_group(x, expr))); |
360 | } | |
361 | Ast::Concat(_) => { | |
362 | let mut exprs = vec![]; | |
363 | while let Some(HirFrame::Expr(expr)) = self.pop() { | |
94b46f34 XL |
364 | if !expr.kind().is_empty() { |
365 | exprs.push(expr); | |
366 | } | |
0531ce1d XL |
367 | } |
368 | exprs.reverse(); | |
369 | self.push(HirFrame::Expr(Hir::concat(exprs))); | |
370 | } | |
371 | Ast::Alternation(_) => { | |
372 | let mut exprs = vec![]; | |
373 | while let Some(HirFrame::Expr(expr)) = self.pop() { | |
374 | exprs.push(expr); | |
375 | } | |
376 | exprs.reverse(); | |
377 | self.push(HirFrame::Expr(Hir::alternation(exprs))); | |
378 | } | |
379 | } | |
380 | Ok(()) | |
381 | } | |
382 | ||
383 | fn visit_class_set_item_pre( | |
384 | &mut self, | |
385 | ast: &ast::ClassSetItem, | |
386 | ) -> Result<()> { | |
387 | match *ast { | |
388 | ast::ClassSetItem::Bracketed(_) => { | |
389 | if self.flags().unicode() { | |
390 | let cls = hir::ClassUnicode::empty(); | |
391 | self.push(HirFrame::ClassUnicode(cls)); | |
392 | } else { | |
393 | let cls = hir::ClassBytes::empty(); | |
394 | self.push(HirFrame::ClassBytes(cls)); | |
395 | } | |
396 | } | |
397 | // We needn't handle the Union case here since the visitor will | |
398 | // do it for us. | |
399 | _ => {} | |
400 | } | |
401 | Ok(()) | |
402 | } | |
403 | ||
404 | fn visit_class_set_item_post( | |
405 | &mut self, | |
406 | ast: &ast::ClassSetItem, | |
407 | ) -> Result<()> { | |
408 | match *ast { | |
409 | ast::ClassSetItem::Empty(_) => {} | |
410 | ast::ClassSetItem::Literal(ref x) => { | |
411 | if self.flags().unicode() { | |
412 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); | |
413 | cls.push(hir::ClassUnicodeRange::new(x.c, x.c)); | |
414 | self.push(HirFrame::ClassUnicode(cls)); | |
415 | } else { | |
416 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); | |
94b46f34 | 417 | let byte = self.class_literal_byte(x)?; |
0531ce1d XL |
418 | cls.push(hir::ClassBytesRange::new(byte, byte)); |
419 | self.push(HirFrame::ClassBytes(cls)); | |
420 | } | |
421 | } | |
422 | ast::ClassSetItem::Range(ref x) => { | |
423 | if self.flags().unicode() { | |
424 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); | |
425 | cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c)); | |
426 | self.push(HirFrame::ClassUnicode(cls)); | |
427 | } else { | |
428 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); | |
94b46f34 XL |
429 | let start = self.class_literal_byte(&x.start)?; |
430 | let end = self.class_literal_byte(&x.end)?; | |
0531ce1d XL |
431 | cls.push(hir::ClassBytesRange::new(start, end)); |
432 | self.push(HirFrame::ClassBytes(cls)); | |
433 | } | |
434 | } | |
435 | ast::ClassSetItem::Ascii(ref x) => { | |
436 | if self.flags().unicode() { | |
437 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); | |
438 | for &(s, e) in ascii_class(&x.kind) { | |
439 | cls.push(hir::ClassUnicodeRange::new(s, e)); | |
440 | } | |
f9f354fc XL |
441 | self.unicode_fold_and_negate( |
442 | &x.span, x.negated, &mut cls, | |
443 | )?; | |
0531ce1d XL |
444 | self.push(HirFrame::ClassUnicode(cls)); |
445 | } else { | |
446 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); | |
447 | for &(s, e) in ascii_class(&x.kind) { | |
448 | cls.push(hir::ClassBytesRange::new(s as u8, e as u8)); | |
449 | } | |
f9f354fc | 450 | self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?; |
0531ce1d XL |
451 | self.push(HirFrame::ClassBytes(cls)); |
452 | } | |
453 | } | |
454 | ast::ClassSetItem::Unicode(ref x) => { | |
94b46f34 | 455 | let xcls = self.hir_unicode_class(x)?; |
0531ce1d XL |
456 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
457 | cls.union(&xcls); | |
458 | self.push(HirFrame::ClassUnicode(cls)); | |
459 | } | |
460 | ast::ClassSetItem::Perl(ref x) => { | |
461 | if self.flags().unicode() { | |
f9f354fc | 462 | let xcls = self.hir_perl_unicode_class(x)?; |
0531ce1d XL |
463 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
464 | cls.union(&xcls); | |
465 | self.push(HirFrame::ClassUnicode(cls)); | |
466 | } else { | |
467 | let xcls = self.hir_perl_byte_class(x); | |
468 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); | |
469 | cls.union(&xcls); | |
470 | self.push(HirFrame::ClassBytes(cls)); | |
471 | } | |
472 | } | |
473 | ast::ClassSetItem::Bracketed(ref ast) => { | |
474 | if self.flags().unicode() { | |
475 | let mut cls1 = self.pop().unwrap().unwrap_class_unicode(); | |
f9f354fc XL |
476 | self.unicode_fold_and_negate( |
477 | &ast.span, | |
478 | ast.negated, | |
479 | &mut cls1, | |
480 | )?; | |
0531ce1d XL |
481 | |
482 | let mut cls2 = self.pop().unwrap().unwrap_class_unicode(); | |
483 | cls2.union(&cls1); | |
484 | self.push(HirFrame::ClassUnicode(cls2)); | |
485 | } else { | |
486 | let mut cls1 = self.pop().unwrap().unwrap_class_bytes(); | |
94b46f34 | 487 | self.bytes_fold_and_negate( |
f9f354fc XL |
488 | &ast.span, |
489 | ast.negated, | |
490 | &mut cls1, | |
491 | )?; | |
0531ce1d XL |
492 | |
493 | let mut cls2 = self.pop().unwrap().unwrap_class_bytes(); | |
494 | cls2.union(&cls1); | |
495 | self.push(HirFrame::ClassBytes(cls2)); | |
496 | } | |
497 | } | |
498 | // This is handled automatically by the visitor. | |
499 | ast::ClassSetItem::Union(_) => {} | |
500 | } | |
501 | Ok(()) | |
502 | } | |
503 | ||
504 | fn visit_class_set_binary_op_pre( | |
505 | &mut self, | |
506 | _op: &ast::ClassSetBinaryOp, | |
507 | ) -> Result<()> { | |
508 | if self.flags().unicode() { | |
509 | let cls = hir::ClassUnicode::empty(); | |
510 | self.push(HirFrame::ClassUnicode(cls)); | |
511 | } else { | |
512 | let cls = hir::ClassBytes::empty(); | |
513 | self.push(HirFrame::ClassBytes(cls)); | |
514 | } | |
515 | Ok(()) | |
516 | } | |
517 | ||
518 | fn visit_class_set_binary_op_in( | |
519 | &mut self, | |
520 | _op: &ast::ClassSetBinaryOp, | |
521 | ) -> Result<()> { | |
522 | if self.flags().unicode() { | |
523 | let cls = hir::ClassUnicode::empty(); | |
524 | self.push(HirFrame::ClassUnicode(cls)); | |
525 | } else { | |
526 | let cls = hir::ClassBytes::empty(); | |
527 | self.push(HirFrame::ClassBytes(cls)); | |
528 | } | |
529 | Ok(()) | |
530 | } | |
531 | ||
532 | fn visit_class_set_binary_op_post( | |
533 | &mut self, | |
534 | op: &ast::ClassSetBinaryOp, | |
535 | ) -> Result<()> { | |
536 | use ast::ClassSetBinaryOpKind::*; | |
537 | ||
538 | if self.flags().unicode() { | |
539 | let mut rhs = self.pop().unwrap().unwrap_class_unicode(); | |
540 | let mut lhs = self.pop().unwrap().unwrap_class_unicode(); | |
541 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); | |
542 | if self.flags().case_insensitive() { | |
f9f354fc XL |
543 | rhs.try_case_fold_simple().map_err(|_| { |
544 | self.error( | |
545 | op.rhs.span().clone(), | |
546 | ErrorKind::UnicodeCaseUnavailable, | |
547 | ) | |
548 | })?; | |
549 | lhs.try_case_fold_simple().map_err(|_| { | |
550 | self.error( | |
551 | op.lhs.span().clone(), | |
552 | ErrorKind::UnicodeCaseUnavailable, | |
553 | ) | |
554 | })?; | |
0531ce1d XL |
555 | } |
556 | match op.kind { | |
557 | Intersection => lhs.intersect(&rhs), | |
558 | Difference => lhs.difference(&rhs), | |
559 | SymmetricDifference => lhs.symmetric_difference(&rhs), | |
560 | } | |
561 | cls.union(&lhs); | |
562 | self.push(HirFrame::ClassUnicode(cls)); | |
563 | } else { | |
564 | let mut rhs = self.pop().unwrap().unwrap_class_bytes(); | |
565 | let mut lhs = self.pop().unwrap().unwrap_class_bytes(); | |
566 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); | |
567 | if self.flags().case_insensitive() { | |
568 | rhs.case_fold_simple(); | |
569 | lhs.case_fold_simple(); | |
570 | } | |
571 | match op.kind { | |
572 | Intersection => lhs.intersect(&rhs), | |
573 | Difference => lhs.difference(&rhs), | |
574 | SymmetricDifference => lhs.symmetric_difference(&rhs), | |
575 | } | |
576 | cls.union(&lhs); | |
577 | self.push(HirFrame::ClassBytes(cls)); | |
578 | } | |
579 | Ok(()) | |
580 | } | |
581 | } | |
582 | ||
583 | /// The internal implementation of a translator. | |
584 | /// | |
585 | /// This type is responsible for carrying around the original pattern string, | |
586 | /// which is not tied to the internal state of a translator. | |
587 | /// | |
588 | /// A TranslatorI exists for the time it takes to translate a single Ast. | |
589 | #[derive(Clone, Debug)] | |
590 | struct TranslatorI<'t, 'p> { | |
591 | trans: &'t Translator, | |
592 | pattern: &'p str, | |
593 | } | |
594 | ||
595 | impl<'t, 'p> TranslatorI<'t, 'p> { | |
596 | /// Build a new internal translator. | |
597 | fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { | |
598 | TranslatorI { trans: trans, pattern: pattern } | |
599 | } | |
600 | ||
601 | /// Return a reference to the underlying translator. | |
602 | fn trans(&self) -> &Translator { | |
603 | &self.trans | |
604 | } | |
605 | ||
606 | /// Push the given frame on to the call stack. | |
607 | fn push(&self, frame: HirFrame) { | |
608 | self.trans().stack.borrow_mut().push(frame); | |
609 | } | |
610 | ||
611 | /// Pop the top of the call stack. If the call stack is empty, return None. | |
612 | fn pop(&self) -> Option<HirFrame> { | |
613 | self.trans().stack.borrow_mut().pop() | |
614 | } | |
615 | ||
616 | /// Create a new error with the given span and error type. | |
617 | fn error(&self, span: Span, kind: ErrorKind) -> Error { | |
618 | Error { kind: kind, pattern: self.pattern.to_string(), span: span } | |
619 | } | |
620 | ||
621 | /// Return a copy of the active flags. | |
622 | fn flags(&self) -> Flags { | |
623 | self.trans().flags.get() | |
624 | } | |
625 | ||
626 | /// Set the flags of this translator from the flags set in the given AST. | |
627 | /// Then, return the old flags. | |
628 | fn set_flags(&self, ast_flags: &ast::Flags) -> Flags { | |
629 | let old_flags = self.flags(); | |
630 | let mut new_flags = Flags::from_ast(ast_flags); | |
631 | new_flags.merge(&old_flags); | |
632 | self.trans().flags.set(new_flags); | |
633 | old_flags | |
634 | } | |
635 | ||
636 | fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> { | |
94b46f34 | 637 | let ch = match self.literal_to_char(lit)? { |
0531ce1d XL |
638 | byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)), |
639 | hir::Literal::Unicode(ch) => ch, | |
640 | }; | |
641 | if self.flags().case_insensitive() { | |
642 | self.hir_from_char_case_insensitive(lit.span, ch) | |
643 | } else { | |
644 | self.hir_from_char(lit.span, ch) | |
645 | } | |
646 | } | |
647 | ||
648 | /// Convert an Ast literal to its scalar representation. | |
649 | /// | |
650 | /// When Unicode mode is enabled, then this always succeeds and returns a | |
651 | /// `char` (Unicode scalar value). | |
652 | /// | |
653 | /// When Unicode mode is disabled, then a raw byte is returned. If that | |
654 | /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns | |
655 | /// an error. | |
656 | fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> { | |
657 | if self.flags().unicode() { | |
658 | return Ok(hir::Literal::Unicode(lit.c)); | |
659 | } | |
660 | let byte = match lit.byte() { | |
661 | None => return Ok(hir::Literal::Unicode(lit.c)), | |
662 | Some(byte) => byte, | |
663 | }; | |
664 | if byte <= 0x7F { | |
665 | return Ok(hir::Literal::Unicode(byte as char)); | |
666 | } | |
667 | if !self.trans().allow_invalid_utf8 { | |
668 | return Err(self.error(lit.span, ErrorKind::InvalidUtf8)); | |
669 | } | |
670 | Ok(hir::Literal::Byte(byte)) | |
671 | } | |
672 | ||
673 | fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> { | |
674 | if !self.flags().unicode() && c.len_utf8() > 1 { | |
675 | return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); | |
676 | } | |
677 | Ok(Hir::literal(hir::Literal::Unicode(c))) | |
678 | } | |
679 | ||
680 | fn hir_from_char_case_insensitive( | |
681 | &self, | |
682 | span: Span, | |
683 | c: char, | |
684 | ) -> Result<Hir> { | |
0531ce1d | 685 | if self.flags().unicode() { |
f9f354fc XL |
686 | // If case folding won't do anything, then don't bother trying. |
687 | let map = | |
688 | unicode::contains_simple_case_mapping(c, c).map_err(|_| { | |
689 | self.error(span, ErrorKind::UnicodeCaseUnavailable) | |
690 | })?; | |
691 | if !map { | |
692 | return self.hir_from_char(span, c); | |
693 | } | |
694 | let mut cls = | |
695 | hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new( | |
696 | c, c, | |
697 | )]); | |
698 | cls.try_case_fold_simple().map_err(|_| { | |
699 | self.error(span, ErrorKind::UnicodeCaseUnavailable) | |
700 | })?; | |
0531ce1d XL |
701 | Ok(Hir::class(hir::Class::Unicode(cls))) |
702 | } else { | |
703 | if c.len_utf8() > 1 { | |
704 | return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); | |
705 | } | |
f9f354fc XL |
706 | // If case folding won't do anything, then don't bother trying. |
707 | match c { | |
708 | 'A'..='Z' | 'a'..='z' => {} | |
709 | _ => return self.hir_from_char(span, c), | |
710 | } | |
711 | let mut cls = | |
712 | hir::ClassBytes::new(vec![hir::ClassBytesRange::new( | |
713 | c as u8, c as u8, | |
714 | )]); | |
0531ce1d XL |
715 | cls.case_fold_simple(); |
716 | Ok(Hir::class(hir::Class::Bytes(cls))) | |
717 | } | |
718 | } | |
719 | ||
720 | fn hir_dot(&self, span: Span) -> Result<Hir> { | |
721 | let unicode = self.flags().unicode(); | |
722 | if !unicode && !self.trans().allow_invalid_utf8 { | |
723 | return Err(self.error(span, ErrorKind::InvalidUtf8)); | |
724 | } | |
725 | Ok(if self.flags().dot_matches_new_line() { | |
726 | Hir::any(!unicode) | |
727 | } else { | |
728 | Hir::dot(!unicode) | |
729 | }) | |
730 | } | |
731 | ||
732 | fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> { | |
733 | let unicode = self.flags().unicode(); | |
734 | let multi_line = self.flags().multi_line(); | |
735 | Ok(match asst.kind { | |
f9f354fc XL |
736 | ast::AssertionKind::StartLine => Hir::anchor(if multi_line { |
737 | hir::Anchor::StartLine | |
738 | } else { | |
739 | hir::Anchor::StartText | |
740 | }), | |
741 | ast::AssertionKind::EndLine => Hir::anchor(if multi_line { | |
742 | hir::Anchor::EndLine | |
743 | } else { | |
744 | hir::Anchor::EndText | |
745 | }), | |
0531ce1d XL |
746 | ast::AssertionKind::StartText => { |
747 | Hir::anchor(hir::Anchor::StartText) | |
748 | } | |
f9f354fc | 749 | ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText), |
0531ce1d XL |
750 | ast::AssertionKind::WordBoundary => { |
751 | Hir::word_boundary(if unicode { | |
752 | hir::WordBoundary::Unicode | |
753 | } else { | |
754 | hir::WordBoundary::Ascii | |
755 | }) | |
756 | } | |
757 | ast::AssertionKind::NotWordBoundary => { | |
758 | Hir::word_boundary(if unicode { | |
759 | hir::WordBoundary::UnicodeNegate | |
760 | } else { | |
761 | // It is possible for negated ASCII word boundaries to | |
762 | // match at invalid UTF-8 boundaries, even when searching | |
763 | // valid UTF-8. | |
94b46f34 | 764 | if !self.trans().allow_invalid_utf8 { |
f9f354fc XL |
765 | return Err( |
766 | self.error(asst.span, ErrorKind::InvalidUtf8) | |
767 | ); | |
94b46f34 | 768 | } |
0531ce1d XL |
769 | hir::WordBoundary::AsciiNegate |
770 | }) | |
771 | } | |
772 | }) | |
773 | } | |
774 | ||
775 | fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { | |
776 | let kind = match group.kind { | |
777 | ast::GroupKind::CaptureIndex(idx) => { | |
778 | hir::GroupKind::CaptureIndex(idx) | |
779 | } | |
780 | ast::GroupKind::CaptureName(ref capname) => { | |
781 | hir::GroupKind::CaptureName { | |
782 | name: capname.name.clone(), | |
783 | index: capname.index, | |
784 | } | |
785 | } | |
786 | ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, | |
787 | }; | |
f9f354fc | 788 | Hir::group(hir::Group { kind: kind, hir: Box::new(expr) }) |
0531ce1d XL |
789 | } |
790 | ||
791 | fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { | |
792 | let kind = match rep.op.kind { | |
793 | ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne, | |
794 | ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore, | |
795 | ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore, | |
796 | ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => { | |
797 | hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m)) | |
798 | } | |
799 | ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => { | |
800 | hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m)) | |
801 | } | |
f9f354fc XL |
802 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded( |
803 | m, | |
804 | n, | |
805 | )) => { | |
0531ce1d XL |
806 | hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n)) |
807 | } | |
808 | }; | |
809 | let greedy = | |
f9f354fc | 810 | if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; |
0531ce1d XL |
811 | Hir::repetition(hir::Repetition { |
812 | kind: kind, | |
813 | greedy: greedy, | |
814 | hir: Box::new(expr), | |
815 | }) | |
816 | } | |
817 | ||
818 | fn hir_unicode_class( | |
819 | &self, | |
820 | ast_class: &ast::ClassUnicode, | |
821 | ) -> Result<hir::ClassUnicode> { | |
822 | use ast::ClassUnicodeKind::*; | |
823 | ||
824 | if !self.flags().unicode() { | |
f9f354fc XL |
825 | return Err( |
826 | self.error(ast_class.span, ErrorKind::UnicodeNotAllowed) | |
827 | ); | |
0531ce1d XL |
828 | } |
829 | let query = match ast_class.kind { | |
830 | OneLetter(name) => ClassQuery::OneLetter(name), | |
831 | Named(ref name) => ClassQuery::Binary(name), | |
f9f354fc XL |
832 | NamedValue { ref name, ref value, .. } => ClassQuery::ByValue { |
833 | property_name: name, | |
834 | property_value: value, | |
835 | }, | |
0531ce1d | 836 | }; |
f9f354fc XL |
837 | let mut result = self.convert_unicode_class_error( |
838 | &ast_class.span, | |
839 | unicode::class(query), | |
840 | ); | |
841 | if let Ok(ref mut class) = result { | |
842 | self.unicode_fold_and_negate( | |
843 | &ast_class.span, | |
844 | ast_class.negated, | |
845 | class, | |
846 | )?; | |
5869c6ff XL |
847 | if class.ranges().is_empty() { |
848 | let err = self | |
849 | .error(ast_class.span, ErrorKind::EmptyClassNotAllowed); | |
850 | return Err(err); | |
851 | } | |
0531ce1d | 852 | } |
f9f354fc | 853 | result |
0531ce1d XL |
854 | } |
855 | ||
856 | fn hir_perl_unicode_class( | |
857 | &self, | |
858 | ast_class: &ast::ClassPerl, | |
f9f354fc | 859 | ) -> Result<hir::ClassUnicode> { |
0531ce1d | 860 | use ast::ClassPerlKind::*; |
0531ce1d XL |
861 | |
862 | assert!(self.flags().unicode()); | |
f9f354fc XL |
863 | let result = match ast_class.kind { |
864 | Digit => unicode::perl_digit(), | |
865 | Space => unicode::perl_space(), | |
866 | Word => unicode::perl_word(), | |
0531ce1d | 867 | }; |
f9f354fc XL |
868 | let mut class = |
869 | self.convert_unicode_class_error(&ast_class.span, result)?; | |
0531ce1d XL |
870 | // We needn't apply case folding here because the Perl Unicode classes |
871 | // are already closed under Unicode simple case folding. | |
872 | if ast_class.negated { | |
873 | class.negate(); | |
874 | } | |
f9f354fc | 875 | Ok(class) |
0531ce1d XL |
876 | } |
877 | ||
878 | fn hir_perl_byte_class( | |
879 | &self, | |
880 | ast_class: &ast::ClassPerl, | |
881 | ) -> hir::ClassBytes { | |
882 | use ast::ClassPerlKind::*; | |
883 | ||
884 | assert!(!self.flags().unicode()); | |
885 | let mut class = match ast_class.kind { | |
886 | Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit), | |
887 | Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space), | |
888 | Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word), | |
889 | }; | |
890 | // We needn't apply case folding here because the Perl ASCII classes | |
891 | // are already closed (under ASCII case folding). | |
892 | if ast_class.negated { | |
893 | class.negate(); | |
894 | } | |
895 | class | |
896 | } | |
897 | ||
f9f354fc XL |
898 | /// Converts the given Unicode specific error to an HIR translation error. |
899 | /// | |
900 | /// The span given should approximate the position at which an error would | |
901 | /// occur. | |
902 | fn convert_unicode_class_error( | |
903 | &self, | |
904 | span: &Span, | |
905 | result: unicode::Result<hir::ClassUnicode>, | |
906 | ) -> Result<hir::ClassUnicode> { | |
907 | result.map_err(|err| { | |
908 | let sp = span.clone(); | |
909 | match err { | |
910 | unicode::Error::PropertyNotFound => { | |
911 | self.error(sp, ErrorKind::UnicodePropertyNotFound) | |
912 | } | |
913 | unicode::Error::PropertyValueNotFound => { | |
914 | self.error(sp, ErrorKind::UnicodePropertyValueNotFound) | |
915 | } | |
916 | unicode::Error::PerlClassNotFound => { | |
917 | self.error(sp, ErrorKind::UnicodePerlClassNotFound) | |
918 | } | |
919 | } | |
920 | }) | |
921 | } | |
922 | ||
0531ce1d XL |
923 | fn unicode_fold_and_negate( |
924 | &self, | |
f9f354fc | 925 | span: &Span, |
0531ce1d XL |
926 | negated: bool, |
927 | class: &mut hir::ClassUnicode, | |
f9f354fc | 928 | ) -> Result<()> { |
0531ce1d XL |
929 | // Note that we must apply case folding before negation! |
930 | // Consider `(?i)[^x]`. If we applied negation field, then | |
931 | // the result would be the character class that matched any | |
932 | // Unicode scalar value. | |
933 | if self.flags().case_insensitive() { | |
f9f354fc XL |
934 | class.try_case_fold_simple().map_err(|_| { |
935 | self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable) | |
936 | })?; | |
0531ce1d XL |
937 | } |
938 | if negated { | |
939 | class.negate(); | |
940 | } | |
f9f354fc | 941 | Ok(()) |
0531ce1d XL |
942 | } |
943 | ||
944 | fn bytes_fold_and_negate( | |
945 | &self, | |
946 | span: &Span, | |
947 | negated: bool, | |
948 | class: &mut hir::ClassBytes, | |
949 | ) -> Result<()> { | |
950 | // Note that we must apply case folding before negation! | |
951 | // Consider `(?i)[^x]`. If we applied negation field, then | |
952 | // the result would be the character class that matched any | |
953 | // Unicode scalar value. | |
954 | if self.flags().case_insensitive() { | |
955 | class.case_fold_simple(); | |
956 | } | |
957 | if negated { | |
958 | class.negate(); | |
959 | } | |
960 | if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() { | |
961 | return Err(self.error(span.clone(), ErrorKind::InvalidUtf8)); | |
962 | } | |
963 | Ok(()) | |
964 | } | |
965 | ||
966 | /// Return a scalar byte value suitable for use as a literal in a byte | |
967 | /// character class. | |
968 | fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> { | |
94b46f34 | 969 | match self.literal_to_char(ast)? { |
0531ce1d XL |
970 | hir::Literal::Byte(byte) => Ok(byte), |
971 | hir::Literal::Unicode(ch) => { | |
972 | if ch <= 0x7F as char { | |
973 | Ok(ch as u8) | |
974 | } else { | |
975 | // We can't feasibly support Unicode in | |
976 | // byte oriented classes. Byte classes don't | |
977 | // do Unicode case folding. | |
978 | Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed)) | |
979 | } | |
980 | } | |
981 | } | |
982 | } | |
983 | } | |
984 | ||
985 | /// A translator's representation of a regular expression's flags at any given | |
986 | /// moment in time. | |
987 | /// | |
988 | /// Each flag can be in one of three states: absent, present but disabled or | |
989 | /// present but enabled. | |
990 | #[derive(Clone, Copy, Debug, Default)] | |
991 | struct Flags { | |
992 | case_insensitive: Option<bool>, | |
993 | multi_line: Option<bool>, | |
994 | dot_matches_new_line: Option<bool>, | |
995 | swap_greed: Option<bool>, | |
996 | unicode: Option<bool>, | |
997 | // Note that `ignore_whitespace` is omitted here because it is handled | |
998 | // entirely in the parser. | |
999 | } | |
1000 | ||
1001 | impl Flags { | |
1002 | fn from_ast(ast: &ast::Flags) -> Flags { | |
1003 | let mut flags = Flags::default(); | |
1004 | let mut enable = true; | |
1005 | for item in &ast.items { | |
1006 | match item.kind { | |
1007 | ast::FlagsItemKind::Negation => { | |
1008 | enable = false; | |
1009 | } | |
1010 | ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => { | |
1011 | flags.case_insensitive = Some(enable); | |
1012 | } | |
1013 | ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => { | |
1014 | flags.multi_line = Some(enable); | |
1015 | } | |
1016 | ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => { | |
1017 | flags.dot_matches_new_line = Some(enable); | |
1018 | } | |
1019 | ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => { | |
1020 | flags.swap_greed = Some(enable); | |
1021 | } | |
1022 | ast::FlagsItemKind::Flag(ast::Flag::Unicode) => { | |
1023 | flags.unicode = Some(enable); | |
1024 | } | |
1025 | ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {} | |
1026 | } | |
1027 | } | |
1028 | flags | |
1029 | } | |
1030 | ||
1031 | fn merge(&mut self, previous: &Flags) { | |
1032 | if self.case_insensitive.is_none() { | |
1033 | self.case_insensitive = previous.case_insensitive; | |
1034 | } | |
1035 | if self.multi_line.is_none() { | |
1036 | self.multi_line = previous.multi_line; | |
1037 | } | |
1038 | if self.dot_matches_new_line.is_none() { | |
1039 | self.dot_matches_new_line = previous.dot_matches_new_line; | |
1040 | } | |
1041 | if self.swap_greed.is_none() { | |
1042 | self.swap_greed = previous.swap_greed; | |
1043 | } | |
1044 | if self.unicode.is_none() { | |
1045 | self.unicode = previous.unicode; | |
1046 | } | |
1047 | } | |
1048 | ||
1049 | fn case_insensitive(&self) -> bool { | |
1050 | self.case_insensitive.unwrap_or(false) | |
1051 | } | |
1052 | ||
1053 | fn multi_line(&self) -> bool { | |
1054 | self.multi_line.unwrap_or(false) | |
1055 | } | |
1056 | ||
1057 | fn dot_matches_new_line(&self) -> bool { | |
1058 | self.dot_matches_new_line.unwrap_or(false) | |
1059 | } | |
1060 | ||
1061 | fn swap_greed(&self) -> bool { | |
1062 | self.swap_greed.unwrap_or(false) | |
1063 | } | |
1064 | ||
1065 | fn unicode(&self) -> bool { | |
1066 | self.unicode.unwrap_or(true) | |
1067 | } | |
1068 | } | |
1069 | ||
1070 | fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { | |
f9f354fc XL |
1071 | let ranges: Vec<_> = ascii_class(kind) |
1072 | .iter() | |
1073 | .cloned() | |
1074 | .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)) | |
1075 | .collect(); | |
0531ce1d XL |
1076 | hir::ClassBytes::new(ranges) |
1077 | } | |
1078 | ||
1079 | fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { | |
1080 | use ast::ClassAsciiKind::*; | |
0531ce1d | 1081 | match *kind { |
f9f354fc XL |
1082 | Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')], |
1083 | Alpha => &[('A', 'Z'), ('a', 'z')], | |
1084 | Ascii => &[('\x00', '\x7F')], | |
1085 | Blank => &[('\t', '\t'), (' ', ' ')], | |
1086 | Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')], | |
1087 | Digit => &[('0', '9')], | |
1088 | Graph => &[('!', '~')], | |
1089 | Lower => &[('a', 'z')], | |
1090 | Print => &[(' ', '~')], | |
1091 | Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')], | |
1092 | Space => &[ | |
1093 | ('\t', '\t'), | |
1094 | ('\n', '\n'), | |
1095 | ('\x0B', '\x0B'), | |
1096 | ('\x0C', '\x0C'), | |
1097 | ('\r', '\r'), | |
1098 | (' ', ' '), | |
1099 | ], | |
1100 | Upper => &[('A', 'Z')], | |
1101 | Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')], | |
1102 | Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')], | |
0531ce1d XL |
1103 | } |
1104 | } | |
1105 | ||
1106 | #[cfg(test)] | |
1107 | mod tests { | |
0531ce1d | 1108 | use ast::parse::ParserBuilder; |
f9f354fc | 1109 | use ast::{self, Ast, Position, Span}; |
0531ce1d XL |
1110 | use hir::{self, Hir, HirKind}; |
1111 | use unicode::{self, ClassQuery}; | |
1112 | ||
f9f354fc | 1113 | use super::{ascii_class, TranslatorBuilder}; |
0531ce1d XL |
1114 | |
1115 | // We create these errors to compare with real hir::Errors in the tests. | |
1116 | // We define equality between TestError and hir::Error to disregard the | |
1117 | // pattern string in hir::Error, which is annoying to provide in tests. | |
1118 | #[derive(Clone, Debug)] | |
1119 | struct TestError { | |
1120 | span: Span, | |
1121 | kind: hir::ErrorKind, | |
1122 | } | |
1123 | ||
1124 | impl PartialEq<hir::Error> for TestError { | |
1125 | fn eq(&self, other: &hir::Error) -> bool { | |
1126 | self.span == other.span && self.kind == other.kind | |
1127 | } | |
1128 | } | |
1129 | ||
1130 | impl PartialEq<TestError> for hir::Error { | |
1131 | fn eq(&self, other: &TestError) -> bool { | |
1132 | self.span == other.span && self.kind == other.kind | |
1133 | } | |
1134 | } | |
1135 | ||
1136 | fn parse(pattern: &str) -> Ast { | |
1137 | ParserBuilder::new().octal(true).build().parse(pattern).unwrap() | |
1138 | } | |
1139 | ||
1140 | fn t(pattern: &str) -> Hir { | |
1141 | TranslatorBuilder::new() | |
1142 | .allow_invalid_utf8(false) | |
1143 | .build() | |
1144 | .translate(pattern, &parse(pattern)) | |
1145 | .unwrap() | |
1146 | } | |
1147 | ||
1148 | fn t_err(pattern: &str) -> hir::Error { | |
1149 | TranslatorBuilder::new() | |
1150 | .allow_invalid_utf8(false) | |
1151 | .build() | |
1152 | .translate(pattern, &parse(pattern)) | |
1153 | .unwrap_err() | |
1154 | } | |
1155 | ||
1156 | fn t_bytes(pattern: &str) -> Hir { | |
1157 | TranslatorBuilder::new() | |
1158 | .allow_invalid_utf8(true) | |
1159 | .build() | |
1160 | .translate(pattern, &parse(pattern)) | |
1161 | .unwrap() | |
1162 | } | |
1163 | ||
1164 | fn hir_lit(s: &str) -> Hir { | |
1165 | match s.len() { | |
1166 | 0 => Hir::empty(), | |
1167 | _ => { | |
1168 | let lits = s | |
1169 | .chars() | |
1170 | .map(hir::Literal::Unicode) | |
1171 | .map(Hir::literal) | |
1172 | .collect(); | |
1173 | Hir::concat(lits) | |
1174 | } | |
1175 | } | |
1176 | } | |
1177 | ||
1178 | fn hir_blit(s: &[u8]) -> Hir { | |
1179 | match s.len() { | |
1180 | 0 => Hir::empty(), | |
1181 | 1 => Hir::literal(hir::Literal::Byte(s[0])), | |
1182 | _ => { | |
1183 | let lits = s | |
1184 | .iter() | |
1185 | .cloned() | |
1186 | .map(hir::Literal::Byte) | |
1187 | .map(Hir::literal) | |
1188 | .collect(); | |
1189 | Hir::concat(lits) | |
1190 | } | |
1191 | } | |
1192 | } | |
1193 | ||
f9f354fc | 1194 | fn hir_group(i: u32, expr: Hir) -> Hir { |
0531ce1d XL |
1195 | Hir::group(hir::Group { |
1196 | kind: hir::GroupKind::CaptureIndex(i), | |
1197 | hir: Box::new(expr), | |
1198 | }) | |
1199 | } | |
1200 | ||
f9f354fc | 1201 | fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir { |
0531ce1d XL |
1202 | Hir::group(hir::Group { |
1203 | kind: hir::GroupKind::CaptureName { | |
1204 | name: name.to_string(), | |
1205 | index: i, | |
1206 | }, | |
1207 | hir: Box::new(expr), | |
1208 | }) | |
1209 | } | |
1210 | ||
f9f354fc | 1211 | fn hir_group_nocap(expr: Hir) -> Hir { |
0531ce1d XL |
1212 | Hir::group(hir::Group { |
1213 | kind: hir::GroupKind::NonCapturing, | |
1214 | hir: Box::new(expr), | |
1215 | }) | |
1216 | } | |
1217 | ||
1218 | fn hir_quest(greedy: bool, expr: Hir) -> Hir { | |
1219 | Hir::repetition(hir::Repetition { | |
1220 | kind: hir::RepetitionKind::ZeroOrOne, | |
1221 | greedy: greedy, | |
1222 | hir: Box::new(expr), | |
1223 | }) | |
1224 | } | |
1225 | ||
1226 | fn hir_star(greedy: bool, expr: Hir) -> Hir { | |
1227 | Hir::repetition(hir::Repetition { | |
1228 | kind: hir::RepetitionKind::ZeroOrMore, | |
1229 | greedy: greedy, | |
1230 | hir: Box::new(expr), | |
1231 | }) | |
1232 | } | |
1233 | ||
1234 | fn hir_plus(greedy: bool, expr: Hir) -> Hir { | |
1235 | Hir::repetition(hir::Repetition { | |
1236 | kind: hir::RepetitionKind::OneOrMore, | |
1237 | greedy: greedy, | |
1238 | hir: Box::new(expr), | |
1239 | }) | |
1240 | } | |
1241 | ||
1242 | fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { | |
1243 | Hir::repetition(hir::Repetition { | |
1244 | kind: hir::RepetitionKind::Range(range), | |
1245 | greedy: greedy, | |
1246 | hir: Box::new(expr), | |
1247 | }) | |
1248 | } | |
1249 | ||
1250 | fn hir_alt(alts: Vec<Hir>) -> Hir { | |
1251 | Hir::alternation(alts) | |
1252 | } | |
1253 | ||
1254 | fn hir_cat(exprs: Vec<Hir>) -> Hir { | |
1255 | Hir::concat(exprs) | |
1256 | } | |
1257 | ||
f9f354fc | 1258 | #[allow(dead_code)] |
0531ce1d XL |
1259 | fn hir_uclass_query(query: ClassQuery) -> Hir { |
1260 | Hir::class(hir::Class::Unicode(unicode::class(query).unwrap())) | |
1261 | } | |
1262 | ||
f9f354fc | 1263 | #[allow(dead_code)] |
0531ce1d | 1264 | fn hir_uclass_perl_word() -> Hir { |
f9f354fc | 1265 | Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap())) |
0531ce1d XL |
1266 | } |
1267 | ||
1268 | fn hir_uclass(ranges: &[(char, char)]) -> Hir { | |
1269 | let ranges: Vec<hir::ClassUnicodeRange> = ranges | |
1270 | .iter() | |
1271 | .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) | |
1272 | .collect(); | |
1273 | Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges))) | |
1274 | } | |
1275 | ||
1276 | fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { | |
1277 | let ranges: Vec<hir::ClassBytesRange> = ranges | |
1278 | .iter() | |
1279 | .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) | |
1280 | .collect(); | |
1281 | Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) | |
1282 | } | |
1283 | ||
1284 | fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir { | |
1285 | let ranges: Vec<hir::ClassBytesRange> = ranges | |
1286 | .iter() | |
1287 | .map(|&(s, e)| { | |
1288 | assert!(s as u32 <= 0x7F); | |
1289 | assert!(e as u32 <= 0x7F); | |
1290 | hir::ClassBytesRange::new(s as u8, e as u8) | |
1291 | }) | |
1292 | .collect(); | |
1293 | Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) | |
1294 | } | |
1295 | ||
1296 | fn hir_case_fold(expr: Hir) -> Hir { | |
1297 | match expr.into_kind() { | |
1298 | HirKind::Class(mut cls) => { | |
1299 | cls.case_fold_simple(); | |
1300 | Hir::class(cls) | |
1301 | } | |
1302 | _ => panic!("cannot case fold non-class Hir expr"), | |
1303 | } | |
1304 | } | |
1305 | ||
1306 | fn hir_negate(expr: Hir) -> Hir { | |
1307 | match expr.into_kind() { | |
1308 | HirKind::Class(mut cls) => { | |
1309 | cls.negate(); | |
1310 | Hir::class(cls) | |
1311 | } | |
1312 | _ => panic!("cannot negate non-class Hir expr"), | |
1313 | } | |
1314 | } | |
1315 | ||
f9f354fc | 1316 | #[allow(dead_code)] |
0531ce1d XL |
1317 | fn hir_union(expr1: Hir, expr2: Hir) -> Hir { |
1318 | use hir::Class::{Bytes, Unicode}; | |
1319 | ||
1320 | match (expr1.into_kind(), expr2.into_kind()) { | |
f9f354fc | 1321 | (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { |
0531ce1d XL |
1322 | c1.union(&c2); |
1323 | Hir::class(hir::Class::Unicode(c1)) | |
1324 | } | |
f9f354fc | 1325 | (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { |
0531ce1d XL |
1326 | c1.union(&c2); |
1327 | Hir::class(hir::Class::Bytes(c1)) | |
1328 | } | |
1329 | _ => panic!("cannot union non-class Hir exprs"), | |
1330 | } | |
1331 | } | |
1332 | ||
f9f354fc | 1333 | #[allow(dead_code)] |
0531ce1d XL |
1334 | fn hir_difference(expr1: Hir, expr2: Hir) -> Hir { |
1335 | use hir::Class::{Bytes, Unicode}; | |
1336 | ||
1337 | match (expr1.into_kind(), expr2.into_kind()) { | |
f9f354fc | 1338 | (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { |
0531ce1d XL |
1339 | c1.difference(&c2); |
1340 | Hir::class(hir::Class::Unicode(c1)) | |
1341 | } | |
f9f354fc | 1342 | (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { |
0531ce1d XL |
1343 | c1.difference(&c2); |
1344 | Hir::class(hir::Class::Bytes(c1)) | |
1345 | } | |
1346 | _ => panic!("cannot difference non-class Hir exprs"), | |
1347 | } | |
1348 | } | |
1349 | ||
1350 | fn hir_anchor(anchor: hir::Anchor) -> Hir { | |
1351 | Hir::anchor(anchor) | |
1352 | } | |
1353 | ||
1354 | fn hir_word(wb: hir::WordBoundary) -> Hir { | |
1355 | Hir::word_boundary(wb) | |
1356 | } | |
1357 | ||
1358 | #[test] | |
1359 | fn empty() { | |
1360 | assert_eq!(t(""), Hir::empty()); | |
1361 | assert_eq!(t("(?i)"), Hir::empty()); | |
1362 | assert_eq!(t("()"), hir_group(1, Hir::empty())); | |
1363 | assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); | |
1364 | assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty())); | |
1365 | assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()])); | |
f9f354fc XL |
1366 | assert_eq!( |
1367 | t("()|()"), | |
1368 | hir_alt(vec![ | |
1369 | hir_group(1, Hir::empty()), | |
1370 | hir_group(2, Hir::empty()), | |
1371 | ]) | |
1372 | ); | |
1373 | assert_eq!( | |
1374 | t("(|b)"), | |
1375 | hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) | |
1376 | ); | |
1377 | assert_eq!( | |
1378 | t("(a|)"), | |
1379 | hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) | |
1380 | ); | |
1381 | assert_eq!( | |
1382 | t("(a||c)"), | |
1383 | hir_group( | |
1384 | 1, | |
1385 | hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),]) | |
1386 | ) | |
1387 | ); | |
1388 | assert_eq!( | |
1389 | t("(||)"), | |
1390 | hir_group( | |
1391 | 1, | |
1392 | hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),]) | |
1393 | ) | |
1394 | ); | |
0531ce1d XL |
1395 | } |
1396 | ||
1397 | #[test] | |
1398 | fn literal() { | |
1399 | assert_eq!(t("a"), hir_lit("a")); | |
1400 | assert_eq!(t("(?-u)a"), hir_lit("a")); | |
1401 | assert_eq!(t("☃"), hir_lit("☃")); | |
1402 | assert_eq!(t("abcd"), hir_lit("abcd")); | |
1403 | ||
1404 | assert_eq!(t_bytes("(?-u)a"), hir_lit("a")); | |
1405 | assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a")); | |
1406 | assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a")); | |
1407 | assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF")); | |
1408 | ||
f9f354fc XL |
1409 | assert_eq!( |
1410 | t_err("(?-u)☃"), | |
1411 | TestError { | |
1412 | kind: hir::ErrorKind::UnicodeNotAllowed, | |
1413 | span: Span::new( | |
1414 | Position::new(5, 1, 6), | |
1415 | Position::new(8, 1, 7) | |
1416 | ), | |
1417 | } | |
1418 | ); | |
1419 | assert_eq!( | |
1420 | t_err(r"(?-u)\xFF"), | |
1421 | TestError { | |
1422 | kind: hir::ErrorKind::InvalidUtf8, | |
1423 | span: Span::new( | |
1424 | Position::new(5, 1, 6), | |
1425 | Position::new(9, 1, 10) | |
1426 | ), | |
1427 | } | |
1428 | ); | |
0531ce1d XL |
1429 | } |
1430 | ||
1431 | #[test] | |
1432 | fn literal_case_insensitive() { | |
f9f354fc XL |
1433 | #[cfg(feature = "unicode-case")] |
1434 | assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),])); | |
1435 | #[cfg(feature = "unicode-case")] | |
1436 | assert_eq!( | |
1437 | t("(?i:a)"), | |
1438 | hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],)) | |
1439 | ); | |
1440 | #[cfg(feature = "unicode-case")] | |
1441 | assert_eq!( | |
1442 | t("a(?i)a(?-i)a"), | |
1443 | hir_cat(vec![ | |
1444 | hir_lit("a"), | |
1445 | hir_uclass(&[('A', 'A'), ('a', 'a')]), | |
1446 | hir_lit("a"), | |
1447 | ]) | |
1448 | ); | |
1449 | #[cfg(feature = "unicode-case")] | |
1450 | assert_eq!( | |
1451 | t("(?i)ab@c"), | |
1452 | hir_cat(vec![ | |
1453 | hir_uclass(&[('A', 'A'), ('a', 'a')]), | |
1454 | hir_uclass(&[('B', 'B'), ('b', 'b')]), | |
1455 | hir_lit("@"), | |
1456 | hir_uclass(&[('C', 'C'), ('c', 'c')]), | |
1457 | ]) | |
1458 | ); | |
1459 | #[cfg(feature = "unicode-case")] | |
1460 | assert_eq!( | |
1461 | t("(?i)β"), | |
1462 | hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) | |
1463 | ); | |
1464 | ||
1465 | assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])); | |
1466 | #[cfg(feature = "unicode-case")] | |
1467 | assert_eq!( | |
1468 | t("(?-u)a(?i)a(?-i)a"), | |
1469 | hir_cat(vec![ | |
1470 | hir_lit("a"), | |
1471 | hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), | |
1472 | hir_lit("a"), | |
1473 | ]) | |
1474 | ); | |
1475 | assert_eq!( | |
1476 | t("(?i-u)ab@c"), | |
1477 | hir_cat(vec![ | |
1478 | hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), | |
1479 | hir_bclass(&[(b'B', b'B'), (b'b', b'b')]), | |
1480 | hir_lit("@"), | |
1481 | hir_bclass(&[(b'C', b'C'), (b'c', b'c')]), | |
1482 | ]) | |
1483 | ); | |
1484 | ||
1485 | assert_eq!( | |
1486 | t_bytes("(?i-u)a"), | |
1487 | hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) | |
1488 | ); | |
1489 | assert_eq!( | |
1490 | t_bytes("(?i-u)\x61"), | |
1491 | hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) | |
1492 | ); | |
1493 | assert_eq!( | |
1494 | t_bytes(r"(?i-u)\x61"), | |
1495 | hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) | |
1496 | ); | |
0531ce1d XL |
1497 | assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF")); |
1498 | ||
f9f354fc XL |
1499 | assert_eq!( |
1500 | t_err("(?i-u)β"), | |
1501 | TestError { | |
1502 | kind: hir::ErrorKind::UnicodeNotAllowed, | |
1503 | span: Span::new( | |
1504 | Position::new(6, 1, 7), | |
1505 | Position::new(8, 1, 8), | |
1506 | ), | |
1507 | } | |
1508 | ); | |
0531ce1d XL |
1509 | } |
1510 | ||
1511 | #[test] | |
1512 | fn dot() { | |
f9f354fc XL |
1513 | assert_eq!( |
1514 | t("."), | |
1515 | hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),]) | |
1516 | ); | |
1517 | assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),])); | |
1518 | assert_eq!( | |
1519 | t_bytes("(?-u)."), | |
1520 | hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),]) | |
1521 | ); | |
1522 | assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),])); | |
0531ce1d XL |
1523 | |
1524 | // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed. | |
f9f354fc XL |
1525 | assert_eq!( |
1526 | t_err("(?-u)."), | |
1527 | TestError { | |
1528 | kind: hir::ErrorKind::InvalidUtf8, | |
1529 | span: Span::new( | |
1530 | Position::new(5, 1, 6), | |
1531 | Position::new(6, 1, 7) | |
1532 | ), | |
1533 | } | |
1534 | ); | |
1535 | assert_eq!( | |
1536 | t_err("(?s-u)."), | |
1537 | TestError { | |
1538 | kind: hir::ErrorKind::InvalidUtf8, | |
1539 | span: Span::new( | |
1540 | Position::new(6, 1, 7), | |
1541 | Position::new(7, 1, 8) | |
1542 | ), | |
1543 | } | |
1544 | ); | |
0531ce1d XL |
1545 | } |
1546 | ||
1547 | #[test] | |
1548 | fn assertions() { | |
1549 | assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText)); | |
1550 | assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText)); | |
1551 | assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText)); | |
1552 | assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText)); | |
1553 | assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine)); | |
1554 | assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine)); | |
1555 | assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText)); | |
1556 | assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText)); | |
1557 | ||
1558 | assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode)); | |
1559 | assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate)); | |
1560 | assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii)); | |
1561 | assert_eq!( | |
1562 | t_bytes(r"(?-u)\B"), | |
f9f354fc XL |
1563 | hir_word(hir::WordBoundary::AsciiNegate) |
1564 | ); | |
0531ce1d | 1565 | |
f9f354fc XL |
1566 | assert_eq!( |
1567 | t_err(r"(?-u)\B"), | |
1568 | TestError { | |
1569 | kind: hir::ErrorKind::InvalidUtf8, | |
1570 | span: Span::new( | |
1571 | Position::new(5, 1, 6), | |
1572 | Position::new(7, 1, 8) | |
1573 | ), | |
1574 | } | |
1575 | ); | |
0531ce1d XL |
1576 | } |
1577 | ||
1578 | #[test] | |
1579 | fn group() { | |
1580 | assert_eq!(t("(a)"), hir_group(1, hir_lit("a"))); | |
f9f354fc XL |
1581 | assert_eq!( |
1582 | t("(a)(b)"), | |
1583 | hir_cat(vec![ | |
1584 | hir_group(1, hir_lit("a")), | |
1585 | hir_group(2, hir_lit("b")), | |
1586 | ]) | |
1587 | ); | |
1588 | assert_eq!( | |
1589 | t("(a)|(b)"), | |
1590 | hir_alt(vec![ | |
1591 | hir_group(1, hir_lit("a")), | |
1592 | hir_group(2, hir_lit("b")), | |
1593 | ]) | |
1594 | ); | |
0531ce1d XL |
1595 | assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty())); |
1596 | assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a"))); | |
f9f354fc XL |
1597 | assert_eq!( |
1598 | t("(?P<foo>a)(?P<bar>b)"), | |
1599 | hir_cat(vec![ | |
1600 | hir_group_name(1, "foo", hir_lit("a")), | |
1601 | hir_group_name(2, "bar", hir_lit("b")), | |
1602 | ]) | |
1603 | ); | |
0531ce1d XL |
1604 | assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); |
1605 | assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a"))); | |
f9f354fc XL |
1606 | assert_eq!( |
1607 | t("(?:a)(b)"), | |
1608 | hir_cat(vec![ | |
1609 | hir_group_nocap(hir_lit("a")), | |
1610 | hir_group(1, hir_lit("b")), | |
1611 | ]) | |
1612 | ); | |
1613 | assert_eq!( | |
1614 | t("(a)(?:b)(c)"), | |
1615 | hir_cat(vec![ | |
1616 | hir_group(1, hir_lit("a")), | |
1617 | hir_group_nocap(hir_lit("b")), | |
1618 | hir_group(2, hir_lit("c")), | |
1619 | ]) | |
1620 | ); | |
1621 | assert_eq!( | |
1622 | t("(a)(?P<foo>b)(c)"), | |
1623 | hir_cat(vec![ | |
1624 | hir_group(1, hir_lit("a")), | |
1625 | hir_group_name(2, "foo", hir_lit("b")), | |
1626 | hir_group(3, hir_lit("c")), | |
1627 | ]) | |
1628 | ); | |
48663c56 XL |
1629 | assert_eq!(t("()"), hir_group(1, Hir::empty())); |
1630 | assert_eq!(t("((?i))"), hir_group(1, Hir::empty())); | |
1631 | assert_eq!(t("((?x))"), hir_group(1, Hir::empty())); | |
1632 | assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty()))); | |
0531ce1d XL |
1633 | } |
1634 | ||
1635 | #[test] | |
1636 | fn flags() { | |
f9f354fc XL |
1637 | #[cfg(feature = "unicode-case")] |
1638 | assert_eq!( | |
1639 | t("(?i:a)a"), | |
1640 | hir_cat(vec![ | |
1641 | hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])), | |
0531ce1d | 1642 | hir_lit("a"), |
f9f354fc XL |
1643 | ]) |
1644 | ); | |
1645 | assert_eq!( | |
1646 | t("(?i-u:a)β"), | |
1647 | hir_cat(vec![ | |
1648 | hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), | |
1649 | hir_lit("β"), | |
1650 | ]) | |
1651 | ); | |
1652 | assert_eq!( | |
1653 | t("(?:(?i-u)a)b"), | |
1654 | hir_cat(vec![ | |
1655 | hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), | |
1656 | hir_lit("b"), | |
1657 | ]) | |
1658 | ); | |
1659 | assert_eq!( | |
1660 | t("((?i-u)a)b"), | |
1661 | hir_cat(vec![ | |
1662 | hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), | |
1663 | hir_lit("b"), | |
1664 | ]) | |
1665 | ); | |
1666 | #[cfg(feature = "unicode-case")] | |
1667 | assert_eq!( | |
1668 | t("(?i)(?-i:a)a"), | |
1669 | hir_cat(vec![ | |
1670 | hir_group_nocap(hir_lit("a")), | |
1671 | hir_uclass(&[('A', 'A'), ('a', 'a')]), | |
1672 | ]) | |
1673 | ); | |
1674 | #[cfg(feature = "unicode-case")] | |
1675 | assert_eq!( | |
1676 | t("(?im)a^"), | |
1677 | hir_cat(vec![ | |
0531ce1d | 1678 | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
f9f354fc XL |
1679 | hir_anchor(hir::Anchor::StartLine), |
1680 | ]) | |
1681 | ); | |
1682 | #[cfg(feature = "unicode-case")] | |
1683 | assert_eq!( | |
1684 | t("(?im)a^(?i-m)a^"), | |
1685 | hir_cat(vec![ | |
0531ce1d | 1686 | hir_uclass(&[('A', 'A'), ('a', 'a')]), |
f9f354fc XL |
1687 | hir_anchor(hir::Anchor::StartLine), |
1688 | hir_uclass(&[('A', 'A'), ('a', 'a')]), | |
1689 | hir_anchor(hir::Anchor::StartText), | |
1690 | ]) | |
1691 | ); | |
1692 | assert_eq!( | |
1693 | t("(?U)a*a*?(?-U)a*a*?"), | |
1694 | hir_cat(vec![ | |
1695 | hir_star(false, hir_lit("a")), | |
1696 | hir_star(true, hir_lit("a")), | |
1697 | hir_star(true, hir_lit("a")), | |
1698 | hir_star(false, hir_lit("a")), | |
1699 | ]) | |
1700 | ); | |
1701 | #[cfg(feature = "unicode-case")] | |
1702 | assert_eq!( | |
1703 | t("(?:a(?i)a)a"), | |
1704 | hir_cat(vec![ | |
1705 | hir_group_nocap(hir_cat(vec![ | |
1706 | hir_lit("a"), | |
1707 | hir_uclass(&[('A', 'A'), ('a', 'a')]), | |
1708 | ])), | |
0531ce1d | 1709 | hir_lit("a"), |
f9f354fc XL |
1710 | ]) |
1711 | ); | |
1712 | #[cfg(feature = "unicode-case")] | |
1713 | assert_eq!( | |
1714 | t("(?i)(?:a(?-i)a)a"), | |
1715 | hir_cat(vec![ | |
1716 | hir_group_nocap(hir_cat(vec![ | |
1717 | hir_uclass(&[('A', 'A'), ('a', 'a')]), | |
1718 | hir_lit("a"), | |
1719 | ])), | |
1720 | hir_uclass(&[('A', 'A'), ('a', 'a')]), | |
1721 | ]) | |
1722 | ); | |
0531ce1d XL |
1723 | } |
1724 | ||
1725 | #[test] | |
1726 | fn escape() { | |
1727 | assert_eq!( | |
1728 | t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), | |
f9f354fc XL |
1729 | hir_lit(r"\.+*?()|[]{}^$#") |
1730 | ); | |
0531ce1d XL |
1731 | } |
1732 | ||
1733 | #[test] | |
1734 | fn repetition() { | |
1735 | assert_eq!(t("a?"), hir_quest(true, hir_lit("a"))); | |
1736 | assert_eq!(t("a*"), hir_star(true, hir_lit("a"))); | |
1737 | assert_eq!(t("a+"), hir_plus(true, hir_lit("a"))); | |
1738 | assert_eq!(t("a??"), hir_quest(false, hir_lit("a"))); | |
1739 | assert_eq!(t("a*?"), hir_star(false, hir_lit("a"))); | |
1740 | assert_eq!(t("a+?"), hir_plus(false, hir_lit("a"))); | |
1741 | ||
1742 | assert_eq!( | |
1743 | t("a{1}"), | |
f9f354fc XL |
1744 | hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),) |
1745 | ); | |
0531ce1d XL |
1746 | assert_eq!( |
1747 | t("a{1,}"), | |
f9f354fc XL |
1748 | hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) |
1749 | ); | |
0531ce1d XL |
1750 | assert_eq!( |
1751 | t("a{1,2}"), | |
f9f354fc XL |
1752 | hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),) |
1753 | ); | |
0531ce1d XL |
1754 | assert_eq!( |
1755 | t("a{1}?"), | |
f9f354fc XL |
1756 | hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),) |
1757 | ); | |
0531ce1d XL |
1758 | assert_eq!( |
1759 | t("a{1,}?"), | |
f9f354fc XL |
1760 | hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) |
1761 | ); | |
0531ce1d XL |
1762 | assert_eq!( |
1763 | t("a{1,2}?"), | |
1764 | hir_range( | |
1765 | false, | |
1766 | hir::RepetitionRange::Bounded(1, 2), | |
1767 | hir_lit("a"), | |
f9f354fc XL |
1768 | ) |
1769 | ); | |
1770 | ||
1771 | assert_eq!( | |
1772 | t("ab?"), | |
1773 | hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) | |
1774 | ); | |
1775 | assert_eq!( | |
1776 | t("(ab)?"), | |
1777 | hir_quest( | |
1778 | true, | |
1779 | hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) | |
1780 | ) | |
1781 | ); | |
1782 | assert_eq!( | |
1783 | t("a|b?"), | |
1784 | hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) | |
1785 | ); | |
0531ce1d XL |
1786 | } |
1787 | ||
1788 | #[test] | |
1789 | fn cat_alt() { | |
f9f354fc XL |
1790 | assert_eq!( |
1791 | t("(ab)"), | |
1792 | hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) | |
1793 | ); | |
1794 | assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),])); | |
1795 | assert_eq!( | |
1796 | t("a|b|c"), | |
1797 | hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) | |
1798 | ); | |
1799 | assert_eq!( | |
1800 | t("ab|bc|cd"), | |
1801 | hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) | |
1802 | ); | |
1803 | assert_eq!( | |
1804 | t("(a|b)"), | |
1805 | hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),])) | |
1806 | ); | |
1807 | assert_eq!( | |
1808 | t("(a|b|c)"), | |
1809 | hir_group( | |
1810 | 1, | |
1811 | hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) | |
1812 | ) | |
1813 | ); | |
1814 | assert_eq!( | |
1815 | t("(ab|bc|cd)"), | |
1816 | hir_group( | |
1817 | 1, | |
1818 | hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) | |
1819 | ) | |
1820 | ); | |
1821 | assert_eq!( | |
1822 | t("(ab|(bc|(cd)))"), | |
1823 | hir_group( | |
1824 | 1, | |
1825 | hir_alt(vec![ | |
1826 | hir_lit("ab"), | |
1827 | hir_group( | |
1828 | 2, | |
1829 | hir_alt(vec![ | |
1830 | hir_lit("bc"), | |
1831 | hir_group(3, hir_lit("cd")), | |
1832 | ]) | |
1833 | ), | |
1834 | ]) | |
1835 | ) | |
1836 | ); | |
0531ce1d XL |
1837 | } |
1838 | ||
1839 | #[test] | |
1840 | fn class_ascii() { | |
1841 | assert_eq!( | |
1842 | t("[[:alnum:]]"), | |
f9f354fc XL |
1843 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)) |
1844 | ); | |
0531ce1d XL |
1845 | assert_eq!( |
1846 | t("[[:alpha:]]"), | |
f9f354fc XL |
1847 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)) |
1848 | ); | |
0531ce1d XL |
1849 | assert_eq!( |
1850 | t("[[:ascii:]]"), | |
f9f354fc XL |
1851 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)) |
1852 | ); | |
0531ce1d XL |
1853 | assert_eq!( |
1854 | t("[[:blank:]]"), | |
f9f354fc XL |
1855 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)) |
1856 | ); | |
0531ce1d XL |
1857 | assert_eq!( |
1858 | t("[[:cntrl:]]"), | |
f9f354fc XL |
1859 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)) |
1860 | ); | |
0531ce1d XL |
1861 | assert_eq!( |
1862 | t("[[:digit:]]"), | |
f9f354fc XL |
1863 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)) |
1864 | ); | |
0531ce1d XL |
1865 | assert_eq!( |
1866 | t("[[:graph:]]"), | |
f9f354fc XL |
1867 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)) |
1868 | ); | |
0531ce1d XL |
1869 | assert_eq!( |
1870 | t("[[:lower:]]"), | |
f9f354fc XL |
1871 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)) |
1872 | ); | |
0531ce1d XL |
1873 | assert_eq!( |
1874 | t("[[:print:]]"), | |
f9f354fc XL |
1875 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)) |
1876 | ); | |
0531ce1d XL |
1877 | assert_eq!( |
1878 | t("[[:punct:]]"), | |
f9f354fc XL |
1879 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)) |
1880 | ); | |
0531ce1d XL |
1881 | assert_eq!( |
1882 | t("[[:space:]]"), | |
f9f354fc XL |
1883 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)) |
1884 | ); | |
0531ce1d XL |
1885 | assert_eq!( |
1886 | t("[[:upper:]]"), | |
f9f354fc XL |
1887 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)) |
1888 | ); | |
0531ce1d XL |
1889 | assert_eq!( |
1890 | t("[[:word:]]"), | |
f9f354fc XL |
1891 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)) |
1892 | ); | |
0531ce1d XL |
1893 | assert_eq!( |
1894 | t("[[:xdigit:]]"), | |
f9f354fc XL |
1895 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)) |
1896 | ); | |
0531ce1d XL |
1897 | |
1898 | assert_eq!( | |
1899 | t("[[:^lower:]]"), | |
f9f354fc XL |
1900 | hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))) |
1901 | ); | |
1902 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
1903 | assert_eq!( |
1904 | t("(?i)[[:lower:]]"), | |
1905 | hir_uclass(&[ | |
f9f354fc XL |
1906 | ('A', 'Z'), |
1907 | ('a', 'z'), | |
0531ce1d XL |
1908 | ('\u{17F}', '\u{17F}'), |
1909 | ('\u{212A}', '\u{212A}'), | |
f9f354fc XL |
1910 | ]) |
1911 | ); | |
0531ce1d XL |
1912 | |
1913 | assert_eq!( | |
1914 | t("(?-u)[[:lower:]]"), | |
f9f354fc XL |
1915 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)) |
1916 | ); | |
0531ce1d XL |
1917 | assert_eq!( |
1918 | t("(?i-u)[[:lower:]]"), | |
1919 | hir_case_fold(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
1920 | &ast::ClassAsciiKind::Lower |
1921 | ))) | |
1922 | ); | |
1923 | ||
1924 | assert_eq!( | |
1925 | t_err("(?-u)[[:^lower:]]"), | |
1926 | TestError { | |
1927 | kind: hir::ErrorKind::InvalidUtf8, | |
1928 | span: Span::new( | |
1929 | Position::new(6, 1, 7), | |
1930 | Position::new(16, 1, 17) | |
1931 | ), | |
1932 | } | |
1933 | ); | |
1934 | assert_eq!( | |
1935 | t_err("(?i-u)[[:^lower:]]"), | |
1936 | TestError { | |
1937 | kind: hir::ErrorKind::InvalidUtf8, | |
1938 | span: Span::new( | |
1939 | Position::new(7, 1, 8), | |
1940 | Position::new(17, 1, 18) | |
1941 | ), | |
1942 | } | |
1943 | ); | |
0531ce1d XL |
1944 | } |
1945 | ||
1946 | #[test] | |
f9f354fc | 1947 | #[cfg(feature = "unicode-perl")] |
0531ce1d XL |
1948 | fn class_perl() { |
1949 | // Unicode | |
f9f354fc XL |
1950 | assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit"))); |
1951 | assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space"))); | |
1952 | assert_eq!(t(r"\w"), hir_uclass_perl_word()); | |
1953 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
1954 | assert_eq!( |
1955 | t(r"(?i)\d"), | |
f9f354fc XL |
1956 | hir_uclass_query(ClassQuery::Binary("digit")) |
1957 | ); | |
1958 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
1959 | assert_eq!( |
1960 | t(r"(?i)\s"), | |
f9f354fc XL |
1961 | hir_uclass_query(ClassQuery::Binary("space")) |
1962 | ); | |
1963 | #[cfg(feature = "unicode-case")] | |
1964 | assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word()); | |
0531ce1d XL |
1965 | |
1966 | // Unicode, negated | |
1967 | assert_eq!( | |
1968 | t(r"\D"), | |
f9f354fc XL |
1969 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
1970 | ); | |
0531ce1d XL |
1971 | assert_eq!( |
1972 | t(r"\S"), | |
f9f354fc XL |
1973 | hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) |
1974 | ); | |
1975 | assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word())); | |
1976 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
1977 | assert_eq!( |
1978 | t(r"(?i)\D"), | |
f9f354fc XL |
1979 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
1980 | ); | |
1981 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
1982 | assert_eq!( |
1983 | t(r"(?i)\S"), | |
f9f354fc XL |
1984 | hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) |
1985 | ); | |
1986 | #[cfg(feature = "unicode-case")] | |
1987 | assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word())); | |
0531ce1d XL |
1988 | |
1989 | // ASCII only | |
1990 | assert_eq!( | |
1991 | t(r"(?-u)\d"), | |
f9f354fc XL |
1992 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
1993 | ); | |
0531ce1d XL |
1994 | assert_eq!( |
1995 | t(r"(?-u)\s"), | |
f9f354fc XL |
1996 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) |
1997 | ); | |
0531ce1d XL |
1998 | assert_eq!( |
1999 | t(r"(?-u)\w"), | |
f9f354fc XL |
2000 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) |
2001 | ); | |
0531ce1d XL |
2002 | assert_eq!( |
2003 | t(r"(?i-u)\d"), | |
f9f354fc XL |
2004 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2005 | ); | |
0531ce1d XL |
2006 | assert_eq!( |
2007 | t(r"(?i-u)\s"), | |
f9f354fc XL |
2008 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) |
2009 | ); | |
0531ce1d XL |
2010 | assert_eq!( |
2011 | t(r"(?i-u)\w"), | |
f9f354fc XL |
2012 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) |
2013 | ); | |
0531ce1d XL |
2014 | |
2015 | // ASCII only, negated | |
2016 | assert_eq!( | |
2017 | t(r"(?-u)\D"), | |
2018 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2019 | &ast::ClassAsciiKind::Digit |
2020 | ))) | |
2021 | ); | |
0531ce1d XL |
2022 | assert_eq!( |
2023 | t(r"(?-u)\S"), | |
2024 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2025 | &ast::ClassAsciiKind::Space |
2026 | ))) | |
2027 | ); | |
0531ce1d XL |
2028 | assert_eq!( |
2029 | t(r"(?-u)\W"), | |
2030 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2031 | &ast::ClassAsciiKind::Word |
2032 | ))) | |
2033 | ); | |
0531ce1d XL |
2034 | assert_eq!( |
2035 | t(r"(?i-u)\D"), | |
2036 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2037 | &ast::ClassAsciiKind::Digit |
2038 | ))) | |
2039 | ); | |
0531ce1d XL |
2040 | assert_eq!( |
2041 | t(r"(?i-u)\S"), | |
2042 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2043 | &ast::ClassAsciiKind::Space |
2044 | ))) | |
2045 | ); | |
0531ce1d XL |
2046 | assert_eq!( |
2047 | t(r"(?i-u)\W"), | |
2048 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2049 | &ast::ClassAsciiKind::Word |
2050 | ))) | |
2051 | ); | |
0531ce1d XL |
2052 | } |
2053 | ||
2054 | #[test] | |
f9f354fc XL |
2055 | #[cfg(not(feature = "unicode-perl"))] |
2056 | fn class_perl_word_disabled() { | |
2057 | assert_eq!( | |
2058 | t_err(r"\w"), | |
2059 | TestError { | |
2060 | kind: hir::ErrorKind::UnicodePerlClassNotFound, | |
2061 | span: Span::new( | |
2062 | Position::new(0, 1, 1), | |
2063 | Position::new(2, 1, 3) | |
2064 | ), | |
2065 | } | |
2066 | ); | |
2067 | } | |
2068 | ||
2069 | #[test] | |
2070 | #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))] | |
2071 | fn class_perl_space_disabled() { | |
2072 | assert_eq!( | |
2073 | t_err(r"\s"), | |
2074 | TestError { | |
2075 | kind: hir::ErrorKind::UnicodePerlClassNotFound, | |
2076 | span: Span::new( | |
2077 | Position::new(0, 1, 1), | |
2078 | Position::new(2, 1, 3) | |
2079 | ), | |
2080 | } | |
2081 | ); | |
2082 | } | |
2083 | ||
2084 | #[test] | |
2085 | #[cfg(all( | |
2086 | not(feature = "unicode-perl"), | |
2087 | not(feature = "unicode-gencat") | |
2088 | ))] | |
2089 | fn class_perl_digit_disabled() { | |
2090 | assert_eq!( | |
2091 | t_err(r"\d"), | |
2092 | TestError { | |
2093 | kind: hir::ErrorKind::UnicodePerlClassNotFound, | |
2094 | span: Span::new( | |
2095 | Position::new(0, 1, 1), | |
2096 | Position::new(2, 1, 3) | |
2097 | ), | |
2098 | } | |
2099 | ); | |
2100 | } | |
2101 | ||
2102 | #[test] | |
2103 | #[cfg(feature = "unicode-gencat")] | |
2104 | fn class_unicode_gencat() { | |
2105 | assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z"))); | |
2106 | assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z"))); | |
0531ce1d XL |
2107 | assert_eq!( |
2108 | t(r"\p{Separator}"), | |
f9f354fc XL |
2109 | hir_uclass_query(ClassQuery::Binary("Z")) |
2110 | ); | |
0531ce1d XL |
2111 | assert_eq!( |
2112 | t(r"\p{se PaRa ToR}"), | |
f9f354fc XL |
2113 | hir_uclass_query(ClassQuery::Binary("Z")) |
2114 | ); | |
0531ce1d XL |
2115 | assert_eq!( |
2116 | t(r"\p{gc:Separator}"), | |
f9f354fc XL |
2117 | hir_uclass_query(ClassQuery::Binary("Z")) |
2118 | ); | |
0531ce1d XL |
2119 | assert_eq!( |
2120 | t(r"\p{gc=Separator}"), | |
f9f354fc XL |
2121 | hir_uclass_query(ClassQuery::Binary("Z")) |
2122 | ); | |
94b46f34 XL |
2123 | assert_eq!( |
2124 | t(r"\p{Other}"), | |
f9f354fc XL |
2125 | hir_uclass_query(ClassQuery::Binary("Other")) |
2126 | ); | |
2127 | assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other"))); | |
0531ce1d XL |
2128 | |
2129 | assert_eq!( | |
2130 | t(r"\PZ"), | |
f9f354fc XL |
2131 | hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) |
2132 | ); | |
0531ce1d XL |
2133 | assert_eq!( |
2134 | t(r"\P{separator}"), | |
f9f354fc XL |
2135 | hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) |
2136 | ); | |
0531ce1d XL |
2137 | assert_eq!( |
2138 | t(r"\P{gc!=separator}"), | |
f9f354fc XL |
2139 | hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) |
2140 | ); | |
0531ce1d | 2141 | |
f9f354fc | 2142 | assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any"))); |
0531ce1d XL |
2143 | assert_eq!( |
2144 | t(r"\p{assigned}"), | |
f9f354fc XL |
2145 | hir_uclass_query(ClassQuery::Binary("Assigned")) |
2146 | ); | |
0531ce1d XL |
2147 | assert_eq!( |
2148 | t(r"\p{ascii}"), | |
f9f354fc XL |
2149 | hir_uclass_query(ClassQuery::Binary("ASCII")) |
2150 | ); | |
0531ce1d XL |
2151 | assert_eq!( |
2152 | t(r"\p{gc:any}"), | |
f9f354fc XL |
2153 | hir_uclass_query(ClassQuery::Binary("Any")) |
2154 | ); | |
0531ce1d XL |
2155 | assert_eq!( |
2156 | t(r"\p{gc:assigned}"), | |
f9f354fc XL |
2157 | hir_uclass_query(ClassQuery::Binary("Assigned")) |
2158 | ); | |
0531ce1d XL |
2159 | assert_eq!( |
2160 | t(r"\p{gc:ascii}"), | |
f9f354fc XL |
2161 | hir_uclass_query(ClassQuery::Binary("ASCII")) |
2162 | ); | |
2163 | ||
2164 | assert_eq!( | |
2165 | t_err(r"(?-u)\pZ"), | |
2166 | TestError { | |
2167 | kind: hir::ErrorKind::UnicodeNotAllowed, | |
2168 | span: Span::new( | |
2169 | Position::new(5, 1, 6), | |
2170 | Position::new(8, 1, 9) | |
2171 | ), | |
2172 | } | |
2173 | ); | |
2174 | assert_eq!( | |
2175 | t_err(r"(?-u)\p{Separator}"), | |
2176 | TestError { | |
2177 | kind: hir::ErrorKind::UnicodeNotAllowed, | |
2178 | span: Span::new( | |
2179 | Position::new(5, 1, 6), | |
2180 | Position::new(18, 1, 19) | |
2181 | ), | |
2182 | } | |
2183 | ); | |
2184 | assert_eq!( | |
2185 | t_err(r"\pE"), | |
2186 | TestError { | |
2187 | kind: hir::ErrorKind::UnicodePropertyNotFound, | |
2188 | span: Span::new( | |
2189 | Position::new(0, 1, 1), | |
2190 | Position::new(3, 1, 4) | |
2191 | ), | |
2192 | } | |
2193 | ); | |
2194 | assert_eq!( | |
2195 | t_err(r"\p{Foo}"), | |
2196 | TestError { | |
2197 | kind: hir::ErrorKind::UnicodePropertyNotFound, | |
2198 | span: Span::new( | |
2199 | Position::new(0, 1, 1), | |
2200 | Position::new(7, 1, 8) | |
2201 | ), | |
2202 | } | |
2203 | ); | |
2204 | assert_eq!( | |
2205 | t_err(r"\p{gc:Foo}"), | |
2206 | TestError { | |
2207 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, | |
2208 | span: Span::new( | |
2209 | Position::new(0, 1, 1), | |
2210 | Position::new(10, 1, 11) | |
2211 | ), | |
2212 | } | |
2213 | ); | |
2214 | } | |
2215 | ||
2216 | #[test] | |
2217 | #[cfg(not(feature = "unicode-gencat"))] | |
2218 | fn class_unicode_gencat_disabled() { | |
2219 | assert_eq!( | |
2220 | t_err(r"\p{Separator}"), | |
2221 | TestError { | |
2222 | kind: hir::ErrorKind::UnicodePropertyNotFound, | |
2223 | span: Span::new( | |
2224 | Position::new(0, 1, 1), | |
2225 | Position::new(13, 1, 14) | |
2226 | ), | |
2227 | } | |
2228 | ); | |
2229 | ||
2230 | assert_eq!( | |
2231 | t_err(r"\p{Any}"), | |
2232 | TestError { | |
2233 | kind: hir::ErrorKind::UnicodePropertyNotFound, | |
2234 | span: Span::new( | |
2235 | Position::new(0, 1, 1), | |
2236 | Position::new(7, 1, 8) | |
2237 | ), | |
2238 | } | |
2239 | ); | |
2240 | } | |
2241 | ||
2242 | #[test] | |
2243 | #[cfg(feature = "unicode-script")] | |
2244 | fn class_unicode_script() { | |
2245 | assert_eq!( | |
2246 | t(r"\p{Greek}"), | |
2247 | hir_uclass_query(ClassQuery::Binary("Greek")) | |
2248 | ); | |
2249 | #[cfg(feature = "unicode-case")] | |
2250 | assert_eq!( | |
2251 | t(r"(?i)\p{Greek}"), | |
2252 | hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek"))) | |
2253 | ); | |
2254 | #[cfg(feature = "unicode-case")] | |
2255 | assert_eq!( | |
2256 | t(r"(?i)\P{Greek}"), | |
2257 | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( | |
2258 | "Greek" | |
2259 | )))) | |
2260 | ); | |
2261 | ||
2262 | assert_eq!( | |
2263 | t_err(r"\p{sc:Foo}"), | |
2264 | TestError { | |
2265 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, | |
2266 | span: Span::new( | |
2267 | Position::new(0, 1, 1), | |
2268 | Position::new(10, 1, 11) | |
2269 | ), | |
2270 | } | |
2271 | ); | |
2272 | assert_eq!( | |
2273 | t_err(r"\p{scx:Foo}"), | |
2274 | TestError { | |
2275 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, | |
2276 | span: Span::new( | |
2277 | Position::new(0, 1, 1), | |
2278 | Position::new(11, 1, 12) | |
2279 | ), | |
2280 | } | |
2281 | ); | |
2282 | } | |
2283 | ||
2284 | #[test] | |
2285 | #[cfg(not(feature = "unicode-script"))] | |
2286 | fn class_unicode_script_disabled() { | |
2287 | assert_eq!( | |
2288 | t_err(r"\p{Greek}"), | |
2289 | TestError { | |
2290 | kind: hir::ErrorKind::UnicodePropertyNotFound, | |
2291 | span: Span::new( | |
2292 | Position::new(0, 1, 1), | |
2293 | Position::new(9, 1, 10) | |
2294 | ), | |
2295 | } | |
2296 | ); | |
2297 | ||
2298 | assert_eq!( | |
2299 | t_err(r"\p{scx:Greek}"), | |
2300 | TestError { | |
2301 | kind: hir::ErrorKind::UnicodePropertyNotFound, | |
2302 | span: Span::new( | |
2303 | Position::new(0, 1, 1), | |
2304 | Position::new(13, 1, 14) | |
2305 | ), | |
2306 | } | |
2307 | ); | |
2308 | } | |
2309 | ||
2310 | #[test] | |
2311 | #[cfg(feature = "unicode-age")] | |
2312 | fn class_unicode_age() { | |
2313 | assert_eq!( | |
2314 | t_err(r"\p{age:Foo}"), | |
2315 | TestError { | |
2316 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, | |
2317 | span: Span::new( | |
2318 | Position::new(0, 1, 1), | |
2319 | Position::new(11, 1, 12) | |
2320 | ), | |
2321 | } | |
2322 | ); | |
2323 | } | |
2324 | ||
5869c6ff XL |
2325 | #[test] |
2326 | #[cfg(feature = "unicode-gencat")] | |
2327 | fn class_unicode_any_empty() { | |
2328 | assert_eq!( | |
2329 | t_err(r"\P{any}"), | |
2330 | TestError { | |
2331 | kind: hir::ErrorKind::EmptyClassNotAllowed, | |
2332 | span: Span::new( | |
2333 | Position::new(0, 1, 1), | |
2334 | Position::new(7, 1, 8) | |
2335 | ), | |
2336 | } | |
2337 | ); | |
2338 | } | |
2339 | ||
f9f354fc XL |
2340 | #[test] |
2341 | #[cfg(not(feature = "unicode-age"))] | |
2342 | fn class_unicode_age_disabled() { | |
2343 | assert_eq!( | |
2344 | t_err(r"\p{age:3.0}"), | |
2345 | TestError { | |
2346 | kind: hir::ErrorKind::UnicodePropertyNotFound, | |
2347 | span: Span::new( | |
2348 | Position::new(0, 1, 1), | |
2349 | Position::new(11, 1, 12) | |
2350 | ), | |
2351 | } | |
2352 | ); | |
0531ce1d XL |
2353 | } |
2354 | ||
2355 | #[test] | |
2356 | fn class_bracketed() { | |
2357 | assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')])); | |
2358 | assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')]))); | |
2359 | assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')])); | |
2360 | assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')])); | |
2361 | assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')])); | |
2362 | assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')])); | |
2363 | assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')])); | |
2364 | assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')])); | |
f9f354fc XL |
2365 | #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] |
2366 | assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit"))); | |
2367 | #[cfg(feature = "unicode-gencat")] | |
0531ce1d XL |
2368 | assert_eq!( |
2369 | t(r"[\pZ]"), | |
f9f354fc XL |
2370 | hir_uclass_query(ClassQuery::Binary("separator")) |
2371 | ); | |
2372 | #[cfg(feature = "unicode-gencat")] | |
0531ce1d XL |
2373 | assert_eq!( |
2374 | t(r"[\p{separator}]"), | |
f9f354fc XL |
2375 | hir_uclass_query(ClassQuery::Binary("separator")) |
2376 | ); | |
2377 | #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] | |
2378 | assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit"))); | |
2379 | #[cfg(feature = "unicode-gencat")] | |
0531ce1d XL |
2380 | assert_eq!( |
2381 | t(r"[^\PZ]"), | |
f9f354fc XL |
2382 | hir_uclass_query(ClassQuery::Binary("separator")) |
2383 | ); | |
2384 | #[cfg(feature = "unicode-gencat")] | |
0531ce1d XL |
2385 | assert_eq!( |
2386 | t(r"[^\P{separator}]"), | |
f9f354fc XL |
2387 | hir_uclass_query(ClassQuery::Binary("separator")) |
2388 | ); | |
2389 | #[cfg(all( | |
2390 | feature = "unicode-case", | |
2391 | any(feature = "unicode-perl", feature = "unicode-gencat") | |
2392 | ))] | |
0531ce1d XL |
2393 | assert_eq!( |
2394 | t(r"(?i)[^\D]"), | |
f9f354fc XL |
2395 | hir_uclass_query(ClassQuery::Binary("digit")) |
2396 | ); | |
2397 | #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] | |
0531ce1d XL |
2398 | assert_eq!( |
2399 | t(r"(?i)[^\P{greek}]"), | |
f9f354fc XL |
2400 | hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek"))) |
2401 | ); | |
0531ce1d XL |
2402 | |
2403 | assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')])); | |
2404 | assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')])); | |
2405 | assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')])); | |
2406 | ||
f9f354fc | 2407 | #[cfg(feature = "unicode-case")] |
0531ce1d | 2408 | assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')])); |
f9f354fc XL |
2409 | #[cfg(feature = "unicode-case")] |
2410 | assert_eq!( | |
2411 | t("(?i)[k]"), | |
2412 | hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),]) | |
2413 | ); | |
2414 | #[cfg(feature = "unicode-case")] | |
2415 | assert_eq!( | |
2416 | t("(?i)[β]"), | |
2417 | hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) | |
2418 | ); | |
2419 | assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),])); | |
0531ce1d XL |
2420 | |
2421 | assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')]))); | |
2422 | assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')]))); | |
2423 | assert_eq!( | |
2424 | t_bytes("(?-u)[^a]"), | |
f9f354fc XL |
2425 | hir_negate(hir_bclass(&[(b'a', b'a')])) |
2426 | ); | |
2427 | #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] | |
0531ce1d XL |
2428 | assert_eq!( |
2429 | t(r"[^\d]"), | |
f9f354fc XL |
2430 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2431 | ); | |
2432 | #[cfg(feature = "unicode-gencat")] | |
0531ce1d XL |
2433 | assert_eq!( |
2434 | t(r"[^\pZ]"), | |
f9f354fc XL |
2435 | hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) |
2436 | ); | |
2437 | #[cfg(feature = "unicode-gencat")] | |
0531ce1d XL |
2438 | assert_eq!( |
2439 | t(r"[^\p{separator}]"), | |
f9f354fc XL |
2440 | hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) |
2441 | ); | |
2442 | #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] | |
0531ce1d XL |
2443 | assert_eq!( |
2444 | t(r"(?i)[^\p{greek}]"), | |
f9f354fc XL |
2445 | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2446 | "greek" | |
2447 | )))) | |
2448 | ); | |
2449 | #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] | |
0531ce1d XL |
2450 | assert_eq!( |
2451 | t(r"(?i)[\P{greek}]"), | |
f9f354fc XL |
2452 | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2453 | "greek" | |
2454 | )))) | |
2455 | ); | |
0531ce1d XL |
2456 | |
2457 | // Test some weird cases. | |
2458 | assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')])); | |
2459 | ||
2460 | assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')])); | |
2461 | assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')])); | |
2462 | assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')])); | |
2463 | assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')])); | |
2464 | assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')])); | |
2465 | ||
2466 | assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')])); | |
2467 | assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')])); | |
2468 | assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')])); | |
2469 | assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')])); | |
2470 | assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')])); | |
2471 | ||
2472 | assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')])); | |
2473 | assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')])); | |
2474 | assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')])); | |
2475 | assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')])); | |
2476 | assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')])); | |
2477 | ||
f9f354fc XL |
2478 | assert_eq!( |
2479 | t_err("(?-u)[^a]"), | |
2480 | TestError { | |
2481 | kind: hir::ErrorKind::InvalidUtf8, | |
2482 | span: Span::new( | |
2483 | Position::new(5, 1, 6), | |
2484 | Position::new(9, 1, 10) | |
2485 | ), | |
2486 | } | |
2487 | ); | |
2488 | #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] | |
2489 | assert_eq!( | |
2490 | t_err(r"[^\s\S]"), | |
2491 | TestError { | |
2492 | kind: hir::ErrorKind::EmptyClassNotAllowed, | |
2493 | span: Span::new( | |
2494 | Position::new(0, 1, 1), | |
2495 | Position::new(7, 1, 8) | |
2496 | ), | |
2497 | } | |
2498 | ); | |
2499 | #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] | |
2500 | assert_eq!( | |
2501 | t_err(r"(?-u)[^\s\S]"), | |
2502 | TestError { | |
2503 | kind: hir::ErrorKind::EmptyClassNotAllowed, | |
2504 | span: Span::new( | |
2505 | Position::new(5, 1, 6), | |
2506 | Position::new(12, 1, 13) | |
2507 | ), | |
2508 | } | |
2509 | ); | |
0531ce1d XL |
2510 | } |
2511 | ||
2512 | #[test] | |
2513 | fn class_bracketed_union() { | |
f9f354fc XL |
2514 | assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')])); |
2515 | #[cfg(feature = "unicode-gencat")] | |
0531ce1d XL |
2516 | assert_eq!( |
2517 | t(r"[a\pZb]"), | |
2518 | hir_union( | |
2519 | hir_uclass(&[('a', 'b')]), | |
f9f354fc XL |
2520 | hir_uclass_query(ClassQuery::Binary("separator")) |
2521 | ) | |
2522 | ); | |
2523 | #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))] | |
0531ce1d XL |
2524 | assert_eq!( |
2525 | t(r"[\pZ\p{Greek}]"), | |
2526 | hir_union( | |
2527 | hir_uclass_query(ClassQuery::Binary("greek")), | |
f9f354fc XL |
2528 | hir_uclass_query(ClassQuery::Binary("separator")) |
2529 | ) | |
2530 | ); | |
2531 | #[cfg(all( | |
2532 | feature = "unicode-age", | |
2533 | feature = "unicode-gencat", | |
2534 | feature = "unicode-script" | |
2535 | ))] | |
0531ce1d XL |
2536 | assert_eq!( |
2537 | t(r"[\p{age:3.0}\pZ\p{Greek}]"), | |
2538 | hir_union( | |
2539 | hir_uclass_query(ClassQuery::ByValue { | |
2540 | property_name: "age", | |
2541 | property_value: "3.0", | |
2542 | }), | |
2543 | hir_union( | |
2544 | hir_uclass_query(ClassQuery::Binary("greek")), | |
f9f354fc XL |
2545 | hir_uclass_query(ClassQuery::Binary("separator")) |
2546 | ) | |
2547 | ) | |
2548 | ); | |
2549 | #[cfg(all( | |
2550 | feature = "unicode-age", | |
2551 | feature = "unicode-gencat", | |
2552 | feature = "unicode-script" | |
2553 | ))] | |
0531ce1d XL |
2554 | assert_eq!( |
2555 | t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"), | |
2556 | hir_union( | |
2557 | hir_uclass_query(ClassQuery::ByValue { | |
2558 | property_name: "age", | |
2559 | property_value: "3.0", | |
2560 | }), | |
2561 | hir_union( | |
2562 | hir_uclass_query(ClassQuery::Binary("cyrillic")), | |
2563 | hir_union( | |
2564 | hir_uclass_query(ClassQuery::Binary("greek")), | |
f9f354fc XL |
2565 | hir_uclass_query(ClassQuery::Binary("separator")) |
2566 | ) | |
2567 | ) | |
2568 | ) | |
2569 | ); | |
2570 | ||
2571 | #[cfg(all( | |
2572 | feature = "unicode-age", | |
2573 | feature = "unicode-case", | |
2574 | feature = "unicode-gencat", | |
2575 | feature = "unicode-script" | |
2576 | ))] | |
0531ce1d XL |
2577 | assert_eq!( |
2578 | t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"), | |
2579 | hir_case_fold(hir_union( | |
2580 | hir_uclass_query(ClassQuery::ByValue { | |
2581 | property_name: "age", | |
2582 | property_value: "3.0", | |
2583 | }), | |
2584 | hir_union( | |
2585 | hir_uclass_query(ClassQuery::Binary("greek")), | |
f9f354fc XL |
2586 | hir_uclass_query(ClassQuery::Binary("separator")) |
2587 | ) | |
2588 | )) | |
2589 | ); | |
2590 | #[cfg(all( | |
2591 | feature = "unicode-age", | |
2592 | feature = "unicode-gencat", | |
2593 | feature = "unicode-script" | |
2594 | ))] | |
0531ce1d XL |
2595 | assert_eq!( |
2596 | t(r"[^\p{age:3.0}\pZ\p{Greek}]"), | |
2597 | hir_negate(hir_union( | |
2598 | hir_uclass_query(ClassQuery::ByValue { | |
2599 | property_name: "age", | |
2600 | property_value: "3.0", | |
2601 | }), | |
2602 | hir_union( | |
2603 | hir_uclass_query(ClassQuery::Binary("greek")), | |
f9f354fc XL |
2604 | hir_uclass_query(ClassQuery::Binary("separator")) |
2605 | ) | |
2606 | )) | |
2607 | ); | |
2608 | #[cfg(all( | |
2609 | feature = "unicode-age", | |
2610 | feature = "unicode-case", | |
2611 | feature = "unicode-gencat", | |
2612 | feature = "unicode-script" | |
2613 | ))] | |
0531ce1d XL |
2614 | assert_eq!( |
2615 | t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"), | |
2616 | hir_negate(hir_case_fold(hir_union( | |
2617 | hir_uclass_query(ClassQuery::ByValue { | |
2618 | property_name: "age", | |
2619 | property_value: "3.0", | |
2620 | }), | |
2621 | hir_union( | |
2622 | hir_uclass_query(ClassQuery::Binary("greek")), | |
f9f354fc XL |
2623 | hir_uclass_query(ClassQuery::Binary("separator")) |
2624 | ) | |
2625 | ))) | |
2626 | ); | |
0531ce1d XL |
2627 | } |
2628 | ||
2629 | #[test] | |
2630 | fn class_bracketed_nested() { | |
f9f354fc XL |
2631 | assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); |
2632 | assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); | |
2633 | assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[]))); | |
0531ce1d | 2634 | |
f9f354fc XL |
2635 | assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')])); |
2636 | assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')])); | |
0531ce1d | 2637 | |
f9f354fc | 2638 | #[cfg(feature = "unicode-case")] |
0531ce1d XL |
2639 | assert_eq!( |
2640 | t(r"(?i)[a[^c]]"), | |
f9f354fc XL |
2641 | hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) |
2642 | ); | |
2643 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
2644 | assert_eq!( |
2645 | t(r"(?i)[a-b[^c]]"), | |
f9f354fc XL |
2646 | hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) |
2647 | ); | |
0531ce1d | 2648 | |
f9f354fc XL |
2649 | #[cfg(feature = "unicode-case")] |
2650 | assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')])); | |
2651 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
2652 | assert_eq!( |
2653 | t(r"(?i)[^a-b[^c]]"), | |
f9f354fc XL |
2654 | hir_uclass(&[('C', 'C'), ('c', 'c')]) |
2655 | ); | |
0531ce1d | 2656 | |
f9f354fc XL |
2657 | assert_eq!( |
2658 | t_err(r"[^a-c[^c]]"), | |
2659 | TestError { | |
2660 | kind: hir::ErrorKind::EmptyClassNotAllowed, | |
2661 | span: Span::new( | |
2662 | Position::new(0, 1, 1), | |
2663 | Position::new(10, 1, 11) | |
2664 | ), | |
2665 | } | |
2666 | ); | |
2667 | #[cfg(feature = "unicode-case")] | |
2668 | assert_eq!( | |
2669 | t_err(r"(?i)[^a-c[^c]]"), | |
2670 | TestError { | |
2671 | kind: hir::ErrorKind::EmptyClassNotAllowed, | |
2672 | span: Span::new( | |
2673 | Position::new(4, 1, 5), | |
2674 | Position::new(14, 1, 15) | |
2675 | ), | |
2676 | } | |
2677 | ); | |
0531ce1d XL |
2678 | } |
2679 | ||
2680 | #[test] | |
2681 | fn class_bracketed_intersect() { | |
2682 | assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')])); | |
2683 | assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')])); | |
2684 | assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')])); | |
2685 | assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')])); | |
2686 | assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')])); | |
2687 | assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')])); | |
2688 | assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')])); | |
2689 | assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')])); | |
2690 | assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); | |
2691 | ||
2692 | assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')])); | |
2693 | assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); | |
2694 | assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); | |
2695 | assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')])); | |
2696 | assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')])); | |
2697 | assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')])); | |
2698 | ||
f9f354fc | 2699 | #[cfg(feature = "unicode-case")] |
0531ce1d XL |
2700 | assert_eq!( |
2701 | t("(?i)[abc&&b-c]"), | |
f9f354fc XL |
2702 | hir_case_fold(hir_uclass(&[('b', 'c')])) |
2703 | ); | |
2704 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
2705 | assert_eq!( |
2706 | t("(?i)[abc&&[b-c]]"), | |
f9f354fc XL |
2707 | hir_case_fold(hir_uclass(&[('b', 'c')])) |
2708 | ); | |
2709 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
2710 | assert_eq!( |
2711 | t("(?i)[[abc]&&[b-c]]"), | |
f9f354fc XL |
2712 | hir_case_fold(hir_uclass(&[('b', 'c')])) |
2713 | ); | |
2714 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
2715 | assert_eq!( |
2716 | t("(?i)[a-z&&b-y&&c-x]"), | |
f9f354fc XL |
2717 | hir_case_fold(hir_uclass(&[('c', 'x')])) |
2718 | ); | |
2719 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
2720 | assert_eq!( |
2721 | t("(?i)[c-da-b&&a-d]"), | |
f9f354fc XL |
2722 | hir_case_fold(hir_uclass(&[('a', 'd')])) |
2723 | ); | |
2724 | #[cfg(feature = "unicode-case")] | |
0531ce1d XL |
2725 | assert_eq!( |
2726 | t("(?i)[a-d&&c-da-b]"), | |
f9f354fc XL |
2727 | hir_case_fold(hir_uclass(&[('a', 'd')])) |
2728 | ); | |
0531ce1d XL |
2729 | |
2730 | assert_eq!( | |
2731 | t("(?i-u)[abc&&b-c]"), | |
f9f354fc XL |
2732 | hir_case_fold(hir_bclass(&[(b'b', b'c')])) |
2733 | ); | |
0531ce1d XL |
2734 | assert_eq!( |
2735 | t("(?i-u)[abc&&[b-c]]"), | |
f9f354fc XL |
2736 | hir_case_fold(hir_bclass(&[(b'b', b'c')])) |
2737 | ); | |
0531ce1d XL |
2738 | assert_eq!( |
2739 | t("(?i-u)[[abc]&&[b-c]]"), | |
f9f354fc XL |
2740 | hir_case_fold(hir_bclass(&[(b'b', b'c')])) |
2741 | ); | |
0531ce1d XL |
2742 | assert_eq!( |
2743 | t("(?i-u)[a-z&&b-y&&c-x]"), | |
f9f354fc XL |
2744 | hir_case_fold(hir_bclass(&[(b'c', b'x')])) |
2745 | ); | |
0531ce1d XL |
2746 | assert_eq!( |
2747 | t("(?i-u)[c-da-b&&a-d]"), | |
f9f354fc XL |
2748 | hir_case_fold(hir_bclass(&[(b'a', b'd')])) |
2749 | ); | |
0531ce1d XL |
2750 | assert_eq!( |
2751 | t("(?i-u)[a-d&&c-da-b]"), | |
f9f354fc XL |
2752 | hir_case_fold(hir_bclass(&[(b'a', b'd')])) |
2753 | ); | |
0531ce1d XL |
2754 | |
2755 | // In `[a^]`, `^` does not need to be escaped, so it makes sense that | |
2756 | // `^` is also allowed to be unescaped after `&&`. | |
2757 | assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')])); | |
2758 | // `]` needs to be escaped after `&&` since it's not at start of class. | |
2759 | assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')])); | |
2760 | assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')])); | |
2761 | assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')])); | |
2762 | assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')])); | |
2763 | // Test precedence. | |
2764 | assert_eq!( | |
2765 | t(r"[a-w&&[^c-g]z]"), | |
f9f354fc XL |
2766 | hir_uclass(&[('a', 'b'), ('h', 'w')]) |
2767 | ); | |
0531ce1d XL |
2768 | } |
2769 | ||
2770 | #[test] | |
2771 | fn class_bracketed_intersect_negate() { | |
f9f354fc | 2772 | #[cfg(feature = "unicode-perl")] |
0531ce1d XL |
2773 | assert_eq!( |
2774 | t(r"[^\w&&\d]"), | |
f9f354fc XL |
2775 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2776 | ); | |
2777 | assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); | |
2778 | #[cfg(feature = "unicode-perl")] | |
0531ce1d XL |
2779 | assert_eq!( |
2780 | t(r"[^[\w&&\d]]"), | |
f9f354fc XL |
2781 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) |
2782 | ); | |
2783 | #[cfg(feature = "unicode-perl")] | |
0531ce1d XL |
2784 | assert_eq!( |
2785 | t(r"[^[^\w&&\d]]"), | |
f9f354fc XL |
2786 | hir_uclass_query(ClassQuery::Binary("digit")) |
2787 | ); | |
2788 | #[cfg(feature = "unicode-perl")] | |
2789 | assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word())); | |
0531ce1d | 2790 | |
f9f354fc | 2791 | #[cfg(feature = "unicode-perl")] |
0531ce1d XL |
2792 | assert_eq!( |
2793 | t_bytes(r"(?-u)[^\w&&\d]"), | |
2794 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2795 | &ast::ClassAsciiKind::Digit |
2796 | ))) | |
2797 | ); | |
0531ce1d XL |
2798 | assert_eq!( |
2799 | t_bytes(r"(?-u)[^[a-z&&a-c]]"), | |
f9f354fc XL |
2800 | hir_negate(hir_bclass(&[(b'a', b'c')])) |
2801 | ); | |
0531ce1d XL |
2802 | assert_eq!( |
2803 | t_bytes(r"(?-u)[^[\w&&\d]]"), | |
2804 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2805 | &ast::ClassAsciiKind::Digit |
2806 | ))) | |
2807 | ); | |
0531ce1d XL |
2808 | assert_eq!( |
2809 | t_bytes(r"(?-u)[^[^\w&&\d]]"), | |
f9f354fc XL |
2810 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2811 | ); | |
0531ce1d XL |
2812 | assert_eq!( |
2813 | t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"), | |
2814 | hir_negate(hir_bclass_from_char(ascii_class( | |
f9f354fc XL |
2815 | &ast::ClassAsciiKind::Word |
2816 | ))) | |
2817 | ); | |
0531ce1d XL |
2818 | } |
2819 | ||
2820 | #[test] | |
2821 | fn class_bracketed_difference() { | |
f9f354fc | 2822 | #[cfg(feature = "unicode-gencat")] |
0531ce1d XL |
2823 | assert_eq!( |
2824 | t(r"[\pL--[:ascii:]]"), | |
2825 | hir_difference( | |
2826 | hir_uclass_query(ClassQuery::Binary("letter")), | |
f9f354fc XL |
2827 | hir_uclass(&[('\0', '\x7F')]) |
2828 | ) | |
2829 | ); | |
0531ce1d XL |
2830 | |
2831 | assert_eq!( | |
2832 | t(r"(?-u)[[:alpha:]--[:lower:]]"), | |
f9f354fc XL |
2833 | hir_bclass(&[(b'A', b'Z')]) |
2834 | ); | |
0531ce1d XL |
2835 | } |
2836 | ||
2837 | #[test] | |
2838 | fn class_bracketed_symmetric_difference() { | |
f9f354fc | 2839 | #[cfg(feature = "unicode-script")] |
0531ce1d XL |
2840 | assert_eq!( |
2841 | t(r"[\p{sc:Greek}~~\p{scx:Greek}]"), | |
2842 | hir_uclass(&[ | |
2843 | ('\u{0342}', '\u{0342}'), | |
2844 | ('\u{0345}', '\u{0345}'), | |
2845 | ('\u{1DC0}', '\u{1DC1}'), | |
f9f354fc XL |
2846 | ]) |
2847 | ); | |
2848 | assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')])); | |
0531ce1d XL |
2849 | |
2850 | assert_eq!( | |
2851 | t(r"(?-u)[a-g~~c-j]"), | |
f9f354fc XL |
2852 | hir_bclass(&[(b'a', b'b'), (b'h', b'j')]) |
2853 | ); | |
0531ce1d XL |
2854 | } |
2855 | ||
2856 | #[test] | |
2857 | fn ignore_whitespace() { | |
2858 | assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3")); | |
2859 | assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S")); | |
f9f354fc XL |
2860 | assert_eq!( |
2861 | t(r"(?x)\x # comment | |
0531ce1d XL |
2862 | { # comment |
2863 | 53 # comment | |
f9f354fc XL |
2864 | } #comment"), |
2865 | hir_lit("S") | |
2866 | ); | |
0531ce1d XL |
2867 | |
2868 | assert_eq!(t(r"(?x)\x 53"), hir_lit("S")); | |
f9f354fc XL |
2869 | assert_eq!( |
2870 | t(r"(?x)\x # comment | |
2871 | 53 # comment"), | |
2872 | hir_lit("S") | |
2873 | ); | |
0531ce1d XL |
2874 | assert_eq!(t(r"(?x)\x5 3"), hir_lit("S")); |
2875 | ||
f9f354fc XL |
2876 | #[cfg(feature = "unicode-gencat")] |
2877 | assert_eq!( | |
2878 | t(r"(?x)\p # comment | |
0531ce1d XL |
2879 | { # comment |
2880 | Separator # comment | |
f9f354fc XL |
2881 | } # comment"), |
2882 | hir_uclass_query(ClassQuery::Binary("separator")) | |
2883 | ); | |
0531ce1d | 2884 | |
f9f354fc XL |
2885 | assert_eq!( |
2886 | t(r"(?x)a # comment | |
0531ce1d XL |
2887 | { # comment |
2888 | 5 # comment | |
2889 | , # comment | |
2890 | 10 # comment | |
2891 | } # comment"), | |
2892 | hir_range( | |
f9f354fc XL |
2893 | true, |
2894 | hir::RepetitionRange::Bounded(5, 10), | |
2895 | hir_lit("a") | |
2896 | ) | |
2897 | ); | |
0531ce1d XL |
2898 | |
2899 | assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a ")); | |
2900 | } | |
2901 | ||
2902 | #[test] | |
2903 | fn analysis_is_always_utf8() { | |
2904 | // Positive examples. | |
2905 | assert!(t_bytes(r"a").is_always_utf8()); | |
2906 | assert!(t_bytes(r"ab").is_always_utf8()); | |
2907 | assert!(t_bytes(r"(?-u)a").is_always_utf8()); | |
2908 | assert!(t_bytes(r"(?-u)ab").is_always_utf8()); | |
2909 | assert!(t_bytes(r"\xFF").is_always_utf8()); | |
2910 | assert!(t_bytes(r"\xFF\xFF").is_always_utf8()); | |
2911 | assert!(t_bytes(r"[^a]").is_always_utf8()); | |
2912 | assert!(t_bytes(r"[^a][^a]").is_always_utf8()); | |
2913 | assert!(t_bytes(r"\b").is_always_utf8()); | |
2914 | assert!(t_bytes(r"\B").is_always_utf8()); | |
2915 | assert!(t_bytes(r"(?-u)\b").is_always_utf8()); | |
2916 | ||
2917 | // Negative examples. | |
2918 | assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8()); | |
2919 | assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8()); | |
2920 | assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8()); | |
2921 | assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8()); | |
2922 | assert!(!t_bytes(r"(?-u)\B").is_always_utf8()); | |
2923 | } | |
2924 | ||
2925 | #[test] | |
2926 | fn analysis_is_all_assertions() { | |
2927 | // Positive examples. | |
2928 | assert!(t(r"\b").is_all_assertions()); | |
2929 | assert!(t(r"\B").is_all_assertions()); | |
2930 | assert!(t(r"^").is_all_assertions()); | |
2931 | assert!(t(r"$").is_all_assertions()); | |
2932 | assert!(t(r"\A").is_all_assertions()); | |
2933 | assert!(t(r"\z").is_all_assertions()); | |
2934 | assert!(t(r"$^\z\A\b\B").is_all_assertions()); | |
2935 | assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions()); | |
2936 | assert!(t(r"^$|$^").is_all_assertions()); | |
2937 | assert!(t(r"((\b)+())*^").is_all_assertions()); | |
2938 | ||
2939 | // Negative examples. | |
2940 | assert!(!t(r"^a").is_all_assertions()); | |
2941 | } | |
2942 | ||
2943 | #[test] | |
2944 | fn analysis_is_anchored() { | |
2945 | // Positive examples. | |
2946 | assert!(t(r"^").is_anchored_start()); | |
2947 | assert!(t(r"$").is_anchored_end()); | |
48663c56 XL |
2948 | assert!(t(r"^").is_line_anchored_start()); |
2949 | assert!(t(r"$").is_line_anchored_end()); | |
0531ce1d XL |
2950 | |
2951 | assert!(t(r"^^").is_anchored_start()); | |
2952 | assert!(t(r"$$").is_anchored_end()); | |
48663c56 XL |
2953 | assert!(t(r"^^").is_line_anchored_start()); |
2954 | assert!(t(r"$$").is_line_anchored_end()); | |
0531ce1d XL |
2955 | |
2956 | assert!(t(r"^$").is_anchored_start()); | |
2957 | assert!(t(r"^$").is_anchored_end()); | |
48663c56 XL |
2958 | assert!(t(r"^$").is_line_anchored_start()); |
2959 | assert!(t(r"^$").is_line_anchored_end()); | |
0531ce1d XL |
2960 | |
2961 | assert!(t(r"^foo").is_anchored_start()); | |
2962 | assert!(t(r"foo$").is_anchored_end()); | |
48663c56 XL |
2963 | assert!(t(r"^foo").is_line_anchored_start()); |
2964 | assert!(t(r"foo$").is_line_anchored_end()); | |
0531ce1d XL |
2965 | |
2966 | assert!(t(r"^foo|^bar").is_anchored_start()); | |
2967 | assert!(t(r"foo$|bar$").is_anchored_end()); | |
48663c56 XL |
2968 | assert!(t(r"^foo|^bar").is_line_anchored_start()); |
2969 | assert!(t(r"foo$|bar$").is_line_anchored_end()); | |
0531ce1d XL |
2970 | |
2971 | assert!(t(r"^(foo|bar)").is_anchored_start()); | |
2972 | assert!(t(r"(foo|bar)$").is_anchored_end()); | |
48663c56 XL |
2973 | assert!(t(r"^(foo|bar)").is_line_anchored_start()); |
2974 | assert!(t(r"(foo|bar)$").is_line_anchored_end()); | |
0531ce1d XL |
2975 | |
2976 | assert!(t(r"^+").is_anchored_start()); | |
2977 | assert!(t(r"$+").is_anchored_end()); | |
48663c56 XL |
2978 | assert!(t(r"^+").is_line_anchored_start()); |
2979 | assert!(t(r"$+").is_line_anchored_end()); | |
0531ce1d XL |
2980 | assert!(t(r"^++").is_anchored_start()); |
2981 | assert!(t(r"$++").is_anchored_end()); | |
48663c56 XL |
2982 | assert!(t(r"^++").is_line_anchored_start()); |
2983 | assert!(t(r"$++").is_line_anchored_end()); | |
0531ce1d XL |
2984 | assert!(t(r"(^)+").is_anchored_start()); |
2985 | assert!(t(r"($)+").is_anchored_end()); | |
48663c56 XL |
2986 | assert!(t(r"(^)+").is_line_anchored_start()); |
2987 | assert!(t(r"($)+").is_line_anchored_end()); | |
0531ce1d XL |
2988 | |
2989 | assert!(t(r"$^").is_anchored_start()); | |
48663c56 XL |
2990 | assert!(t(r"$^").is_anchored_start()); |
2991 | assert!(t(r"$^").is_line_anchored_end()); | |
2992 | assert!(t(r"$^").is_line_anchored_end()); | |
0531ce1d XL |
2993 | assert!(t(r"$^|^$").is_anchored_start()); |
2994 | assert!(t(r"$^|^$").is_anchored_end()); | |
48663c56 XL |
2995 | assert!(t(r"$^|^$").is_line_anchored_start()); |
2996 | assert!(t(r"$^|^$").is_line_anchored_end()); | |
0531ce1d XL |
2997 | |
2998 | assert!(t(r"\b^").is_anchored_start()); | |
2999 | assert!(t(r"$\b").is_anchored_end()); | |
48663c56 XL |
3000 | assert!(t(r"\b^").is_line_anchored_start()); |
3001 | assert!(t(r"$\b").is_line_anchored_end()); | |
0531ce1d XL |
3002 | assert!(t(r"^(?m:^)").is_anchored_start()); |
3003 | assert!(t(r"(?m:$)$").is_anchored_end()); | |
48663c56 XL |
3004 | assert!(t(r"^(?m:^)").is_line_anchored_start()); |
3005 | assert!(t(r"(?m:$)$").is_line_anchored_end()); | |
0531ce1d XL |
3006 | assert!(t(r"(?m:^)^").is_anchored_start()); |
3007 | assert!(t(r"$(?m:$)").is_anchored_end()); | |
48663c56 XL |
3008 | assert!(t(r"(?m:^)^").is_line_anchored_start()); |
3009 | assert!(t(r"$(?m:$)").is_line_anchored_end()); | |
0531ce1d XL |
3010 | |
3011 | // Negative examples. | |
3012 | assert!(!t(r"(?m)^").is_anchored_start()); | |
3013 | assert!(!t(r"(?m)$").is_anchored_end()); | |
3014 | assert!(!t(r"(?m:^$)|$^").is_anchored_start()); | |
3015 | assert!(!t(r"(?m:^$)|$^").is_anchored_end()); | |
3016 | assert!(!t(r"$^|(?m:^$)").is_anchored_start()); | |
3017 | assert!(!t(r"$^|(?m:^$)").is_anchored_end()); | |
3018 | ||
3019 | assert!(!t(r"a^").is_anchored_start()); | |
3020 | assert!(!t(r"$a").is_anchored_start()); | |
48663c56 XL |
3021 | assert!(!t(r"a^").is_line_anchored_start()); |
3022 | assert!(!t(r"$a").is_line_anchored_start()); | |
0531ce1d | 3023 | |
48663c56 XL |
3024 | assert!(!t(r"a^").is_anchored_end()); |
3025 | assert!(!t(r"$a").is_anchored_end()); | |
3026 | assert!(!t(r"a^").is_line_anchored_end()); | |
3027 | assert!(!t(r"$a").is_line_anchored_end()); | |
0531ce1d XL |
3028 | |
3029 | assert!(!t(r"^foo|bar").is_anchored_start()); | |
3030 | assert!(!t(r"foo|bar$").is_anchored_end()); | |
48663c56 XL |
3031 | assert!(!t(r"^foo|bar").is_line_anchored_start()); |
3032 | assert!(!t(r"foo|bar$").is_line_anchored_end()); | |
0531ce1d XL |
3033 | |
3034 | assert!(!t(r"^*").is_anchored_start()); | |
3035 | assert!(!t(r"$*").is_anchored_end()); | |
48663c56 XL |
3036 | assert!(!t(r"^*").is_line_anchored_start()); |
3037 | assert!(!t(r"$*").is_line_anchored_end()); | |
0531ce1d XL |
3038 | assert!(!t(r"^*+").is_anchored_start()); |
3039 | assert!(!t(r"$*+").is_anchored_end()); | |
48663c56 XL |
3040 | assert!(!t(r"^*+").is_line_anchored_start()); |
3041 | assert!(!t(r"$*+").is_line_anchored_end()); | |
0531ce1d XL |
3042 | assert!(!t(r"^+*").is_anchored_start()); |
3043 | assert!(!t(r"$+*").is_anchored_end()); | |
48663c56 XL |
3044 | assert!(!t(r"^+*").is_line_anchored_start()); |
3045 | assert!(!t(r"$+*").is_line_anchored_end()); | |
0531ce1d XL |
3046 | assert!(!t(r"(^)*").is_anchored_start()); |
3047 | assert!(!t(r"($)*").is_anchored_end()); | |
48663c56 XL |
3048 | assert!(!t(r"(^)*").is_line_anchored_start()); |
3049 | assert!(!t(r"($)*").is_line_anchored_end()); | |
3050 | } | |
3051 | ||
3052 | #[test] | |
3053 | fn analysis_is_line_anchored() { | |
3054 | assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start()); | |
3055 | assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end()); | |
3056 | ||
3057 | assert!(t(r"(?m)^foo|^bar").is_line_anchored_start()); | |
3058 | assert!(t(r"(?m)foo$|bar$").is_line_anchored_end()); | |
3059 | ||
3060 | assert!(t(r"(?m)^").is_line_anchored_start()); | |
3061 | assert!(t(r"(?m)$").is_line_anchored_end()); | |
3062 | ||
3063 | assert!(t(r"(?m:^$)|$^").is_line_anchored_start()); | |
3064 | assert!(t(r"(?m:^$)|$^").is_line_anchored_end()); | |
3065 | ||
3066 | assert!(t(r"$^|(?m:^$)").is_line_anchored_start()); | |
3067 | assert!(t(r"$^|(?m:^$)").is_line_anchored_end()); | |
0531ce1d XL |
3068 | } |
3069 | ||
3070 | #[test] | |
3071 | fn analysis_is_any_anchored() { | |
3072 | // Positive examples. | |
3073 | assert!(t(r"^").is_any_anchored_start()); | |
3074 | assert!(t(r"$").is_any_anchored_end()); | |
3075 | assert!(t(r"\A").is_any_anchored_start()); | |
3076 | assert!(t(r"\z").is_any_anchored_end()); | |
3077 | ||
3078 | // Negative examples. | |
3079 | assert!(!t(r"(?m)^").is_any_anchored_start()); | |
3080 | assert!(!t(r"(?m)$").is_any_anchored_end()); | |
3081 | assert!(!t(r"$").is_any_anchored_start()); | |
3082 | assert!(!t(r"^").is_any_anchored_end()); | |
3083 | } | |
3084 | ||
3085 | #[test] | |
3086 | fn analysis_is_match_empty() { | |
3087 | // Positive examples. | |
3088 | assert!(t(r"").is_match_empty()); | |
3089 | assert!(t(r"()").is_match_empty()); | |
3090 | assert!(t(r"()*").is_match_empty()); | |
3091 | assert!(t(r"()+").is_match_empty()); | |
3092 | assert!(t(r"()?").is_match_empty()); | |
3093 | assert!(t(r"a*").is_match_empty()); | |
3094 | assert!(t(r"a?").is_match_empty()); | |
3095 | assert!(t(r"a{0}").is_match_empty()); | |
3096 | assert!(t(r"a{0,}").is_match_empty()); | |
3097 | assert!(t(r"a{0,1}").is_match_empty()); | |
3098 | assert!(t(r"a{0,10}").is_match_empty()); | |
f9f354fc | 3099 | #[cfg(feature = "unicode-gencat")] |
0531ce1d XL |
3100 | assert!(t(r"\pL*").is_match_empty()); |
3101 | assert!(t(r"a*|b").is_match_empty()); | |
3102 | assert!(t(r"b|a*").is_match_empty()); | |
3103 | assert!(t(r"a*a?(abcd)*").is_match_empty()); | |
3104 | assert!(t(r"^").is_match_empty()); | |
3105 | assert!(t(r"$").is_match_empty()); | |
3106 | assert!(t(r"(?m)^").is_match_empty()); | |
3107 | assert!(t(r"(?m)$").is_match_empty()); | |
3108 | assert!(t(r"\A").is_match_empty()); | |
3109 | assert!(t(r"\z").is_match_empty()); | |
3110 | assert!(t(r"\B").is_match_empty()); | |
3111 | assert!(t_bytes(r"(?-u)\B").is_match_empty()); | |
3112 | ||
3113 | // Negative examples. | |
3114 | assert!(!t(r"a+").is_match_empty()); | |
3115 | assert!(!t(r"a{1}").is_match_empty()); | |
3116 | assert!(!t(r"a{1,}").is_match_empty()); | |
3117 | assert!(!t(r"a{1,2}").is_match_empty()); | |
3118 | assert!(!t(r"a{1,10}").is_match_empty()); | |
3119 | assert!(!t(r"b|a").is_match_empty()); | |
3120 | assert!(!t(r"a*a+(abcd)*").is_match_empty()); | |
3121 | assert!(!t(r"\b").is_match_empty()); | |
3122 | assert!(!t(r"(?-u)\b").is_match_empty()); | |
3123 | } | |
48663c56 XL |
3124 | |
3125 | #[test] | |
3126 | fn analysis_is_literal() { | |
3127 | // Positive examples. | |
48663c56 XL |
3128 | assert!(t(r"a").is_literal()); |
3129 | assert!(t(r"ab").is_literal()); | |
3130 | assert!(t(r"abc").is_literal()); | |
3131 | assert!(t(r"(?m)abc").is_literal()); | |
3132 | ||
3133 | // Negative examples. | |
f035d41b | 3134 | assert!(!t(r"").is_literal()); |
48663c56 XL |
3135 | assert!(!t(r"^").is_literal()); |
3136 | assert!(!t(r"a|b").is_literal()); | |
3137 | assert!(!t(r"(a)").is_literal()); | |
3138 | assert!(!t(r"a+").is_literal()); | |
3139 | assert!(!t(r"foo(a)").is_literal()); | |
3140 | assert!(!t(r"(a)foo").is_literal()); | |
3141 | assert!(!t(r"[a]").is_literal()); | |
3142 | } | |
3143 | ||
3144 | #[test] | |
3145 | fn analysis_is_alternation_literal() { | |
3146 | // Positive examples. | |
48663c56 XL |
3147 | assert!(t(r"a").is_alternation_literal()); |
3148 | assert!(t(r"ab").is_alternation_literal()); | |
3149 | assert!(t(r"abc").is_alternation_literal()); | |
3150 | assert!(t(r"(?m)abc").is_alternation_literal()); | |
3151 | assert!(t(r"a|b").is_alternation_literal()); | |
3152 | assert!(t(r"a|b|c").is_alternation_literal()); | |
3153 | assert!(t(r"foo|bar").is_alternation_literal()); | |
3154 | assert!(t(r"foo|bar|baz").is_alternation_literal()); | |
3155 | ||
3156 | // Negative examples. | |
f035d41b | 3157 | assert!(!t(r"").is_alternation_literal()); |
48663c56 XL |
3158 | assert!(!t(r"^").is_alternation_literal()); |
3159 | assert!(!t(r"(a)").is_alternation_literal()); | |
3160 | assert!(!t(r"a+").is_alternation_literal()); | |
3161 | assert!(!t(r"foo(a)").is_alternation_literal()); | |
3162 | assert!(!t(r"(a)foo").is_alternation_literal()); | |
3163 | assert!(!t(r"[a]").is_alternation_literal()); | |
3164 | assert!(!t(r"[a]|b").is_alternation_literal()); | |
3165 | assert!(!t(r"a|[b]").is_alternation_literal()); | |
3166 | assert!(!t(r"(a)|b").is_alternation_literal()); | |
3167 | assert!(!t(r"a|(b)").is_alternation_literal()); | |
3168 | } | |
0531ce1d | 3169 | } |