]> git.proxmox.com Git - rustc.git/blob - vendor/regex-automata-0.2.0/tests/dfa/suite.rs
New upstream version 1.74.1+dfsg1
[rustc.git] / vendor / regex-automata-0.2.0 / tests / dfa / suite.rs
1 use regex_automata::{
2 dfa::{self, dense, regex::Regex, sparse, Automaton},
3 nfa::thompson,
4 MatchKind, SyntaxConfig,
5 };
6 use regex_syntax as syntax;
7
8 use regex_test::{
9 bstr::{BString, ByteSlice},
10 CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
11 SearchKind as TestSearchKind, TestResult, TestRunner,
12 };
13
14 use crate::{suite, Result};
15
16 /// Runs the test suite with the default configuration.
17 #[test]
18 fn unminimized_default() -> Result<()> {
19 let builder = Regex::builder();
20 TestRunner::new()?
21 .test_iter(suite()?.iter(), dense_compiler(builder))
22 .assert();
23 Ok(())
24 }
25
26 /// Runs the test suite with byte classes disabled.
27 #[test]
28 fn unminimized_no_byte_class() -> Result<()> {
29 let mut builder = Regex::builder();
30 builder.dense(dense::Config::new().byte_classes(false));
31
32 TestRunner::new()?
33 .test_iter(suite()?.iter(), dense_compiler(builder))
34 .assert();
35 Ok(())
36 }
37
38 /// Runs the test suite with NFA shrinking disabled.
39 #[test]
40 fn unminimized_no_nfa_shrink() -> Result<()> {
41 let mut builder = Regex::builder();
42 builder.thompson(thompson::Config::new().shrink(false));
43
44 TestRunner::new()?
45 .test_iter(suite()?.iter(), dense_compiler(builder))
46 .assert();
47 Ok(())
48 }
49
50 /// Runs the test suite on a minimized DFA with an otherwise default
51 /// configuration.
52 #[test]
53 fn minimized_default() -> Result<()> {
54 let mut builder = Regex::builder();
55 builder.dense(dense::Config::new().minimize(true));
56 TestRunner::new()?
57 // These regexes tend to be too big. Minimization takes... forever.
58 .blacklist("expensive")
59 .test_iter(suite()?.iter(), dense_compiler(builder))
60 .assert();
61 Ok(())
62 }
63
64 /// Runs the test suite on a minimized DFA with byte classes disabled.
65 #[test]
66 fn minimized_no_byte_class() -> Result<()> {
67 let mut builder = Regex::builder();
68 builder.dense(dense::Config::new().minimize(true).byte_classes(false));
69
70 TestRunner::new()?
71 // These regexes tend to be too big. Minimization takes... forever.
72 .blacklist("expensive")
73 .test_iter(suite()?.iter(), dense_compiler(builder))
74 .assert();
75 Ok(())
76 }
77
78 /// Runs the test suite on a sparse unminimized DFA.
79 #[test]
80 fn sparse_unminimized_default() -> Result<()> {
81 let builder = Regex::builder();
82 TestRunner::new()?
83 .test_iter(suite()?.iter(), sparse_compiler(builder))
84 .assert();
85 Ok(())
86 }
87
88 /// Another basic sanity test that checks we can serialize and then deserialize
89 /// a regex, and that the resulting regex can be used for searching correctly.
90 #[test]
91 fn serialization_unminimized_default() -> Result<()> {
92 let builder = Regex::builder();
93 let my_compiler = |builder| {
94 compiler(builder, |builder, re| {
95 let builder = builder.clone();
96 let (fwd_bytes, _) = re.forward().to_bytes_native_endian();
97 let (rev_bytes, _) = re.reverse().to_bytes_native_endian();
98 Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
99 let fwd: dense::DFA<&[u32]> =
100 dense::DFA::from_bytes(&fwd_bytes).unwrap().0;
101 let rev: dense::DFA<&[u32]> =
102 dense::DFA::from_bytes(&rev_bytes).unwrap().0;
103 let re = builder.build_from_dfas(fwd, rev);
104
105 run_test(&re, test)
106 }))
107 })
108 };
109 TestRunner::new()?
110 .test_iter(suite()?.iter(), my_compiler(builder))
111 .assert();
112 Ok(())
113 }
114
115 /// A basic sanity test that checks we can serialize and then deserialize a
116 /// regex using sparse DFAs, and that the resulting regex can be used for
117 /// searching correctly.
118 #[test]
119 fn sparse_serialization_unminimized_default() -> Result<()> {
120 let builder = Regex::builder();
121 let my_compiler = |builder| {
122 compiler(builder, |builder, re| {
123 let builder = builder.clone();
124 let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian();
125 let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian();
126 Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
127 let fwd: sparse::DFA<&[u8]> =
128 sparse::DFA::from_bytes(&fwd_bytes).unwrap().0;
129 let rev: sparse::DFA<&[u8]> =
130 sparse::DFA::from_bytes(&rev_bytes).unwrap().0;
131 let re = builder.build_from_dfas(fwd, rev);
132 run_test(&re, test)
133 }))
134 })
135 };
136 TestRunner::new()?
137 .test_iter(suite()?.iter(), my_compiler(builder))
138 .assert();
139 Ok(())
140 }
141
142 fn dense_compiler(
143 builder: dfa::regex::Builder,
144 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
145 compiler(builder, |_, re| {
146 Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
147 run_test(&re, test)
148 }))
149 })
150 }
151
152 fn sparse_compiler(
153 builder: dfa::regex::Builder,
154 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
155 compiler(builder, |builder, re| {
156 let fwd = re.forward().to_sparse()?;
157 let rev = re.reverse().to_sparse()?;
158 let re = builder.build_from_dfas(fwd, rev);
159 Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
160 run_test(&re, test)
161 }))
162 })
163 }
164
165 fn compiler(
166 mut builder: dfa::regex::Builder,
167 mut create_matcher: impl FnMut(
168 &dfa::regex::Builder,
169 Regex,
170 ) -> Result<CompiledRegex>,
171 ) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
172 move |test, regexes| {
173 let regexes = regexes
174 .iter()
175 .map(|r| r.to_str().map(|s| s.to_string()))
176 .collect::<std::result::Result<Vec<String>, _>>()?;
177
178 // Check if our regex contains things that aren't supported by DFAs.
179 // That is, Unicode word boundaries when searching non-ASCII text.
180 let mut thompson = thompson::Builder::new();
181 thompson.configure(config_thompson(test));
182 // TODO: Modify Hir to report facts like this, instead of needing to
183 // build an NFA to do it.
184 if let Ok(nfa) = thompson.build_many(&regexes) {
185 let non_ascii = test.input().iter().any(|&b| !b.is_ascii());
186 if nfa.has_word_boundary_unicode() && non_ascii {
187 return Ok(CompiledRegex::skip());
188 }
189 }
190 if !configure_regex_builder(test, &mut builder) {
191 return Ok(CompiledRegex::skip());
192 }
193 create_matcher(&builder, builder.build_many(&regexes)?)
194 }
195 }
196
197 fn run_test<A: Automaton>(re: &Regex<A>, test: &RegexTest) -> Vec<TestResult> {
198 let is_match = if re.is_match(test.input()) {
199 TestResult::matched()
200 } else {
201 TestResult::no_match()
202 };
203 let is_match = is_match.name("is_match");
204
205 let find_matches = match test.search_kind() {
206 TestSearchKind::Earliest => {
207 let it = re
208 .find_earliest_iter(test.input())
209 .take(test.match_limit().unwrap_or(std::usize::MAX))
210 .map(|m| Match {
211 id: m.pattern().as_usize(),
212 start: m.start(),
213 end: m.end(),
214 });
215 TestResult::matches(it).name("find_earliest_iter")
216 }
217 TestSearchKind::Leftmost => {
218 let it = re
219 .find_leftmost_iter(test.input())
220 .take(test.match_limit().unwrap_or(std::usize::MAX))
221 .map(|m| Match {
222 id: m.pattern().as_usize(),
223 start: m.start(),
224 end: m.end(),
225 });
226 TestResult::matches(it).name("find_leftmost_iter")
227 }
228 TestSearchKind::Overlapping => {
229 let it = re
230 .find_overlapping_iter(test.input())
231 .take(test.match_limit().unwrap_or(std::usize::MAX))
232 .map(|m| Match {
233 id: m.pattern().as_usize(),
234 start: m.start(),
235 end: m.end(),
236 });
237 TestResult::matches(it).name("find_overlapping_iter")
238 }
239 };
240
241 vec![is_match, find_matches]
242 }
243
244 /// Configures the given regex builder with all relevant settings on the given
245 /// regex test.
246 ///
247 /// If the regex test has a setting that is unsupported, then this returns
248 /// false (implying the test should be skipped).
249 fn configure_regex_builder(
250 test: &RegexTest,
251 builder: &mut dfa::regex::Builder,
252 ) -> bool {
253 let match_kind = match test.match_kind() {
254 TestMatchKind::All => MatchKind::All,
255 TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
256 TestMatchKind::LeftmostLongest => return false,
257 };
258
259 let syntax_config = SyntaxConfig::new()
260 .case_insensitive(test.case_insensitive())
261 .unicode(test.unicode())
262 .utf8(test.utf8());
263 let dense_config = dense::Config::new()
264 .anchored(test.anchored())
265 .match_kind(match_kind)
266 .unicode_word_boundary(true);
267 let regex_config = Regex::config().utf8(test.utf8());
268
269 builder
270 .configure(regex_config)
271 .syntax(syntax_config)
272 .thompson(config_thompson(test))
273 .dense(dense_config);
274 true
275 }
276
277 /// Configuration of a Thompson NFA compiler from a regex test.
278 fn config_thompson(test: &RegexTest) -> thompson::Config {
279 thompson::Config::new().utf8(test.utf8())
280 }