4 dfa
::{dense, regex::Regex, Automaton, OverlappingState}
,
6 HalfMatch
, MatchError
, MatchKind
, MultiMatch
,
9 use crate::util
::{BunkPrefilter, SubstringPrefilter}
;
11 // Tests that quit bytes in the forward direction work correctly.
13 fn quit_fwd() -> Result
<(), Box
<dyn Error
>> {
14 let dfa
= dense
::Builder
::new()
15 .configure(dense
::Config
::new().quit(b'x'
, true))
16 .build("[[:word:]]+$")?
;
19 dfa
.find_earliest_fwd(b
"abcxyz"),
20 Err(MatchError
::Quit { byte: b'x', offset: 3 }
)
23 dfa
.find_leftmost_fwd(b
"abcxyz"),
24 Err(MatchError
::Quit { byte: b'x', offset: 3 }
)
27 dfa
.find_overlapping_fwd(b
"abcxyz", &mut OverlappingState
::start()),
28 Err(MatchError
::Quit { byte: b'x', offset: 3 }
)
34 // Tests that quit bytes in the reverse direction work correctly.
36 fn quit_rev() -> Result
<(), Box
<dyn Error
>> {
37 let dfa
= dense
::Builder
::new()
38 .configure(dense
::Config
::new().quit(b'x'
, true))
39 .thompson(thompson
::Config
::new().reverse(true))
40 .build("^[[:word:]]+")?
;
43 dfa
.find_earliest_rev(b
"abcxyz"),
44 Err(MatchError
::Quit { byte: b'x', offset: 3 }
)
47 dfa
.find_leftmost_rev(b
"abcxyz"),
48 Err(MatchError
::Quit { byte: b'x', offset: 3 }
)
54 // Tests that if we heuristically enable Unicode word boundaries but then
55 // instruct that a non-ASCII byte should NOT be a quit byte, then the builder
60 dense
::Config
::new().unicode_word_boundary(true).quit(b'
\xFF'
, false);
63 // Tests that if we attempt an overlapping search using a regex without a
64 // reverse DFA compiled with 'starts_for_each_pattern', then we get a panic.
67 fn incorrect_config_overlapping_search_panics() {
68 let forward
= dense
::DFA
::new(r
"abca").unwrap();
69 let reverse
= dense
::Builder
::new()
73 .match_kind(MatchKind
::All
)
74 .starts_for_each_pattern(false),
76 .thompson(thompson
::Config
::new().reverse(true))
80 let re
= Regex
::builder().build_from_dfas(forward
, reverse
);
81 let haystack
= "bar abcabcabca abca foo".as_bytes();
82 re
.find_overlapping(haystack
, &mut OverlappingState
::start());
85 // This tests an intesting case where even if the Unicode word boundary option
86 // is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
87 // word boundaries to be enabled.
89 fn unicode_word_implicitly_works() -> Result
<(), Box
<dyn Error
>> {
90 let mut config
= dense
::Config
::new();
91 for b
in 0x80..=0xFF {
92 config
= config
.quit(b
, true);
94 let dfa
= dense
::Builder
::new().configure(config
).build(r
"\b")?
;
95 let expected
= HalfMatch
::must(0, 1);
96 assert_eq
!(dfa
.find_leftmost_fwd(b
" a"), Ok(Some(expected
)));
100 // Tests that we can provide a prefilter to a Regex, and the search reports
103 fn prefilter_works() -> Result
<(), Box
<dyn Error
>> {
104 let re
= Regex
::new(r
"a[0-9]+")
106 .with_prefilter(SubstringPrefilter
::new("a"));
107 let text
= b
"foo abc foo a1a2a3 foo a123 bar aa456";
108 let matches
: Vec
<(usize, usize)> =
109 re
.find_leftmost_iter(text
).map(|m
| (m
.start(), m
.end())).collect();
112 vec
![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),]
117 // This test confirms that a prefilter is active by using a prefilter that
118 // reports false negatives.
120 fn prefilter_is_active() -> Result
<(), Box
<dyn Error
>> {
122 let re
= Regex
::new(r
"a[0-9]+")
124 .with_prefilter(SubstringPrefilter
::new("a"));
125 assert_eq
!(re
.find_leftmost(b
"za123"), Some(MultiMatch
::must(0, 1, 5)));
126 assert_eq
!(re
.find_leftmost(b
"a123"), Some(MultiMatch
::must(0, 0, 4)));
127 let re
= re
.with_prefilter(BunkPrefilter
::new());
128 assert_eq
!(re
.find_leftmost(b
"za123"), None
);
129 // This checks that the prefilter is used when first starting the search,
130 // instead of waiting until at least one transition has occurred.
131 assert_eq
!(re
.find_leftmost(b
"a123"), None
);