2 dfa
::{self, dense, regex::Regex, sparse, Automaton}
,
4 MatchKind
, SyntaxConfig
,
6 use regex_syntax
as syntax
;
9 bstr
::{BString, ByteSlice}
,
10 CompiledRegex
, Match
, MatchKind
as TestMatchKind
, RegexTest
, RegexTests
,
11 SearchKind
as TestSearchKind
, TestResult
, TestRunner
,
14 use crate::{suite, Result}
;
16 /// Runs the test suite with the default configuration.
18 fn unminimized_default() -> Result
<()> {
19 let builder
= Regex
::builder();
21 .test_iter(suite()?
.iter(), dense_compiler(builder
))
26 /// Runs the test suite with byte classes disabled.
28 fn unminimized_no_byte_class() -> Result
<()> {
29 let mut builder
= Regex
::builder();
30 builder
.dense(dense
::Config
::new().byte_classes(false));
33 .test_iter(suite()?
.iter(), dense_compiler(builder
))
38 /// Runs the test suite with NFA shrinking disabled.
40 fn unminimized_no_nfa_shrink() -> Result
<()> {
41 let mut builder
= Regex
::builder();
42 builder
.thompson(thompson
::Config
::new().shrink(false));
45 .test_iter(suite()?
.iter(), dense_compiler(builder
))
50 /// Runs the test suite on a minimized DFA with an otherwise default
53 fn minimized_default() -> Result
<()> {
54 let mut builder
= Regex
::builder();
55 builder
.dense(dense
::Config
::new().minimize(true));
57 // These regexes tend to be too big. Minimization takes... forever.
58 .blacklist("expensive")
59 .test_iter(suite()?
.iter(), dense_compiler(builder
))
64 /// Runs the test suite on a minimized DFA with byte classes disabled.
66 fn minimized_no_byte_class() -> Result
<()> {
67 let mut builder
= Regex
::builder();
68 builder
.dense(dense
::Config
::new().minimize(true).byte_classes(false));
71 // These regexes tend to be too big. Minimization takes... forever.
72 .blacklist("expensive")
73 .test_iter(suite()?
.iter(), dense_compiler(builder
))
78 /// Runs the test suite on a sparse unminimized DFA.
80 fn sparse_unminimized_default() -> Result
<()> {
81 let builder
= Regex
::builder();
83 .test_iter(suite()?
.iter(), sparse_compiler(builder
))
88 /// Another basic sanity test that checks we can serialize and then deserialize
89 /// a regex, and that the resulting regex can be used for searching correctly.
91 fn serialization_unminimized_default() -> Result
<()> {
92 let builder
= Regex
::builder();
93 let my_compiler
= |builder
| {
94 compiler(builder
, |builder
, re
| {
95 let builder
= builder
.clone();
96 let (fwd_bytes
, _
) = re
.forward().to_bytes_native_endian();
97 let (rev_bytes
, _
) = re
.reverse().to_bytes_native_endian();
98 Ok(CompiledRegex
::compiled(move |test
| -> Vec
<TestResult
> {
99 let fwd
: dense
::DFA
<&[u32]> =
100 dense
::DFA
::from_bytes(&fwd_bytes
).unwrap().0;
101 let rev
: dense
::DFA
<&[u32]> =
102 dense
::DFA
::from_bytes(&rev_bytes
).unwrap().0;
103 let re
= builder
.build_from_dfas(fwd
, rev
);
110 .test_iter(suite()?
.iter(), my_compiler(builder
))
115 /// A basic sanity test that checks we can serialize and then deserialize a
116 /// regex using sparse DFAs, and that the resulting regex can be used for
117 /// searching correctly.
119 fn sparse_serialization_unminimized_default() -> Result
<()> {
120 let builder
= Regex
::builder();
121 let my_compiler
= |builder
| {
122 compiler(builder
, |builder
, re
| {
123 let builder
= builder
.clone();
124 let fwd_bytes
= re
.forward().to_sparse()?
.to_bytes_native_endian();
125 let rev_bytes
= re
.reverse().to_sparse()?
.to_bytes_native_endian();
126 Ok(CompiledRegex
::compiled(move |test
| -> Vec
<TestResult
> {
127 let fwd
: sparse
::DFA
<&[u8]> =
128 sparse
::DFA
::from_bytes(&fwd_bytes
).unwrap().0;
129 let rev
: sparse
::DFA
<&[u8]> =
130 sparse
::DFA
::from_bytes(&rev_bytes
).unwrap().0;
131 let re
= builder
.build_from_dfas(fwd
, rev
);
137 .test_iter(suite()?
.iter(), my_compiler(builder
))
143 builder
: dfa
::regex
::Builder
,
144 ) -> impl FnMut(&RegexTest
, &[BString
]) -> Result
<CompiledRegex
> {
145 compiler(builder
, |_
, re
| {
146 Ok(CompiledRegex
::compiled(move |test
| -> Vec
<TestResult
> {
153 builder
: dfa
::regex
::Builder
,
154 ) -> impl FnMut(&RegexTest
, &[BString
]) -> Result
<CompiledRegex
> {
155 compiler(builder
, |builder
, re
| {
156 let fwd
= re
.forward().to_sparse()?
;
157 let rev
= re
.reverse().to_sparse()?
;
158 let re
= builder
.build_from_dfas(fwd
, rev
);
159 Ok(CompiledRegex
::compiled(move |test
| -> Vec
<TestResult
> {
166 mut builder
: dfa
::regex
::Builder
,
167 mut create_matcher
: impl FnMut(
168 &dfa
::regex
::Builder
,
170 ) -> Result
<CompiledRegex
>,
171 ) -> impl FnMut(&RegexTest
, &[BString
]) -> Result
<CompiledRegex
> {
172 move |test
, regexes
| {
173 let regexes
= regexes
175 .map(|r
| r
.to_str().map(|s
| s
.to_string()))
176 .collect
::<std
::result
::Result
<Vec
<String
>, _
>>()?
;
178 // Check if our regex contains things that aren't supported by DFAs.
179 // That is, Unicode word boundaries when searching non-ASCII text.
180 let mut thompson
= thompson
::Builder
::new();
181 thompson
.configure(config_thompson(test
));
182 // TODO: Modify Hir to report facts like this, instead of needing to
183 // build an NFA to do it.
184 if let Ok(nfa
) = thompson
.build_many(®exes
) {
185 let non_ascii
= test
.input().iter().any(|&b
| !b
.is_ascii());
186 if nfa
.has_word_boundary_unicode() && non_ascii
{
187 return Ok(CompiledRegex
::skip());
190 if !configure_regex_builder(test
, &mut builder
) {
191 return Ok(CompiledRegex
::skip());
193 create_matcher(&builder
, builder
.build_many(®exes
)?
)
197 fn run_test
<A
: Automaton
>(re
: &Regex
<A
>, test
: &RegexTest
) -> Vec
<TestResult
> {
198 let is_match
= if re
.is_match(test
.input()) {
199 TestResult
::matched()
201 TestResult
::no_match()
203 let is_match
= is_match
.name("is_match");
205 let find_matches
= match test
.search_kind() {
206 TestSearchKind
::Earliest
=> {
208 .find_earliest_iter(test
.input())
209 .take(test
.match_limit().unwrap_or(std
::usize::MAX
))
211 id
: m
.pattern().as_usize(),
215 TestResult
::matches(it
).name("find_earliest_iter")
217 TestSearchKind
::Leftmost
=> {
219 .find_leftmost_iter(test
.input())
220 .take(test
.match_limit().unwrap_or(std
::usize::MAX
))
222 id
: m
.pattern().as_usize(),
226 TestResult
::matches(it
).name("find_leftmost_iter")
228 TestSearchKind
::Overlapping
=> {
230 .find_overlapping_iter(test
.input())
231 .take(test
.match_limit().unwrap_or(std
::usize::MAX
))
233 id
: m
.pattern().as_usize(),
237 TestResult
::matches(it
).name("find_overlapping_iter")
241 vec
![is_match
, find_matches
]
244 /// Configures the given regex builder with all relevant settings on the given
247 /// If the regex test has a setting that is unsupported, then this returns
248 /// false (implying the test should be skipped).
249 fn configure_regex_builder(
251 builder
: &mut dfa
::regex
::Builder
,
253 let match_kind
= match test
.match_kind() {
254 TestMatchKind
::All
=> MatchKind
::All
,
255 TestMatchKind
::LeftmostFirst
=> MatchKind
::LeftmostFirst
,
256 TestMatchKind
::LeftmostLongest
=> return false,
259 let syntax_config
= SyntaxConfig
::new()
260 .case_insensitive(test
.case_insensitive())
261 .unicode(test
.unicode())
263 let dense_config
= dense
::Config
::new()
264 .anchored(test
.anchored())
265 .match_kind(match_kind
)
266 .unicode_word_boundary(true);
267 let regex_config
= Regex
::config().utf8(test
.utf8());
270 .configure(regex_config
)
271 .syntax(syntax_config
)
272 .thompson(config_thompson(test
))
273 .dense(dense_config
);
277 /// Configuration of a Thompson NFA compiler from a regex test.
278 fn config_thompson(test
: &RegexTest
) -> thompson
::Config
{
279 thompson
::Config
::new().utf8(test
.utf8())