]> git.proxmox.com Git - cargo.git/blob - vendor/aho-corasick/src/packed/tests.rs
New upstream version 0.52.0
[cargo.git] / vendor / aho-corasick / src / packed / tests.rs
1 use std::collections::HashMap;
2 use std::usize;
3
4 use crate::packed::{Config, MatchKind};
5 use crate::Match;
6
7 /// A description of a single test against a multi-pattern searcher.
8 ///
9 /// A single test may not necessarily pass on every configuration of a
10 /// searcher. The tests are categorized and grouped appropriately below.
11 #[derive(Clone, Debug, Eq, PartialEq)]
12 struct SearchTest {
13 /// The name of this test, for debugging.
14 name: &'static str,
15 /// The patterns to search for.
16 patterns: &'static [&'static str],
17 /// The text to search.
18 haystack: &'static str,
19 /// Each match is a triple of (pattern_index, start, end), where
20 /// pattern_index is an index into `patterns` and `start`/`end` are indices
21 /// into `haystack`.
22 matches: &'static [(usize, usize, usize)],
23 }
24
25 struct SearchTestOwned {
26 offset: usize,
27 name: String,
28 patterns: Vec<String>,
29 haystack: String,
30 matches: Vec<(usize, usize, usize)>,
31 }
32
33 impl SearchTest {
34 fn variations(&self) -> Vec<SearchTestOwned> {
35 let mut tests = vec![];
36 for i in 0..=260 {
37 tests.push(self.offset_prefix(i));
38 tests.push(self.offset_suffix(i));
39 tests.push(self.offset_both(i));
40 }
41 tests
42 }
43
44 fn offset_both(&self, off: usize) -> SearchTestOwned {
45 SearchTestOwned {
46 offset: off,
47 name: self.name.to_string(),
48 patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
49 haystack: format!(
50 "{}{}{}",
51 "Z".repeat(off),
52 self.haystack,
53 "Z".repeat(off)
54 ),
55 matches: self
56 .matches
57 .iter()
58 .map(|&(id, s, e)| (id, s + off, e + off))
59 .collect(),
60 }
61 }
62
63 fn offset_prefix(&self, off: usize) -> SearchTestOwned {
64 SearchTestOwned {
65 offset: off,
66 name: self.name.to_string(),
67 patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
68 haystack: format!("{}{}", "Z".repeat(off), self.haystack),
69 matches: self
70 .matches
71 .iter()
72 .map(|&(id, s, e)| (id, s + off, e + off))
73 .collect(),
74 }
75 }
76
77 fn offset_suffix(&self, off: usize) -> SearchTestOwned {
78 SearchTestOwned {
79 offset: off,
80 name: self.name.to_string(),
81 patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
82 haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
83 matches: self.matches.to_vec(),
84 }
85 }
86
87 // fn to_owned(&self) -> SearchTestOwned {
88 // SearchTestOwned {
89 // name: self.name.to_string(),
90 // patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
91 // haystack: self.haystack.to_string(),
92 // matches: self.matches.iter().cloned().collect(),
93 // }
94 // }
95 }
96
97 /// Short-hand constructor for SearchTest. We use it a lot below.
98 macro_rules! t {
99 ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
100 SearchTest {
101 name: stringify!($name),
102 patterns: $patterns,
103 haystack: $haystack,
104 matches: $matches,
105 }
106 };
107 }
108
109 /// A collection of test groups.
110 type TestCollection = &'static [&'static [SearchTest]];
111
112 // Define several collections corresponding to the different type of match
113 // semantics supported. These collections have some overlap, but each
114 // collection should have some tests that no other collection has.
115
116 /// Tests for leftmost-first match semantics.
117 const PACKED_LEFTMOST_FIRST: TestCollection =
118 &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
119
120 /// Tests for leftmost-longest match semantics.
121 const PACKED_LEFTMOST_LONGEST: TestCollection =
122 &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
123
124 // Now define the individual tests that make up the collections above.
125
126 /// A collection of tests for the that should always be true regardless of
127 /// match semantics. That is, all combinations of leftmost-{first, longest}
128 /// should produce the same answer.
129 const BASICS: &'static [SearchTest] = &[
130 t!(basic001, &["a"], "", &[]),
131 t!(basic010, &["a"], "a", &[(0, 0, 1)]),
132 t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
133 t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
134 t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
135 t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
136 t!(basic060, &["a"], "bbb", &[]),
137 t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
138 t!(basic100, &["aa"], "", &[]),
139 t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
140 t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
141 t!(basic130, &["aa"], "abbab", &[]),
142 t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
143 t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
144 t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
145 t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
146 t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
147 t!(basic300, &["a", "b"], "", &[]),
148 t!(basic310, &["a", "b"], "z", &[]),
149 t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
150 t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
151 t!(
152 basic340,
153 &["a", "b"],
154 "abba",
155 &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
156 ),
157 t!(
158 basic350,
159 &["b", "a"],
160 "abba",
161 &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
162 ),
163 t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
164 t!(basic400, &["foo", "bar"], "", &[]),
165 t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
166 t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
167 t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
168 t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
169 t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
170 t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
171 t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
172 t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
173 t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
174 t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
175 t!(
176 basic720,
177 &["yabcdef", "bcdeyabc", "abcdezghi"],
178 "yabcdezghi",
179 &[(2, 1, 10),]
180 ),
181 t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
182 t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
183 t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
184 t!(
185 basic840,
186 &["ab", "ba"],
187 "abababa",
188 &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
189 ),
190 t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
191 ];
192
193 /// Tests for leftmost match semantics. These should pass for both
194 /// leftmost-first and leftmost-longest match kinds. Stated differently, among
195 /// ambiguous matches, the longest match and the match that appeared first when
196 /// constructing the automaton should always be the same.
197 const LEFTMOST: &'static [SearchTest] = &[
198 t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
199 t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
200 t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
201 t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
202 t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
203 t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
204 t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
205 t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
206 t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
207 t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
208 t!(
209 leftmost360,
210 &["abcdefghi", "hz", "abcdefgh"],
211 "abcdefghz",
212 &[(2, 0, 8),]
213 ),
214 t!(
215 leftmost370,
216 &["abcdefghi", "cde", "hz", "abcdefgh"],
217 "abcdefghz",
218 &[(3, 0, 8),]
219 ),
220 t!(
221 leftmost380,
222 &["abcdefghi", "hz", "abcdefgh", "a"],
223 "abcdefghz",
224 &[(2, 0, 8),]
225 ),
226 t!(
227 leftmost390,
228 &["b", "abcdefghi", "hz", "abcdefgh"],
229 "abcdefghz",
230 &[(3, 0, 8),]
231 ),
232 t!(
233 leftmost400,
234 &["h", "abcdefghi", "hz", "abcdefgh"],
235 "abcdefghz",
236 &[(3, 0, 8),]
237 ),
238 t!(
239 leftmost410,
240 &["z", "abcdefghi", "hz", "abcdefgh"],
241 "abcdefghz",
242 &[(3, 0, 8), (0, 8, 9),]
243 ),
244 ];
245
246 /// Tests for non-overlapping leftmost-first match semantics. These tests
247 /// should generally be specific to leftmost-first, which means they should
248 /// generally fail under leftmost-longest semantics.
249 const LEFTMOST_FIRST: &'static [SearchTest] = &[
250 t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
251 t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
252 t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
253 t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
254 t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
255 t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
256 t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
257 t!(
258 leftfirst310,
259 &["abcd", "b", "bce", "ce"],
260 "abce",
261 &[(1, 1, 2), (3, 2, 4),]
262 ),
263 t!(
264 leftfirst320,
265 &["a", "abcdefghi", "hz", "abcdefgh"],
266 "abcdefghz",
267 &[(0, 0, 1), (2, 7, 9),]
268 ),
269 t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
270 t!(
271 leftfirst340,
272 &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
273 "abcdef",
274 &[(0, 0, 6)]
275 ),
276 ];
277
278 /// Tests for non-overlapping leftmost-longest match semantics. These tests
279 /// should generally be specific to leftmost-longest, which means they should
280 /// generally fail under leftmost-first semantics.
281 const LEFTMOST_LONGEST: &'static [SearchTest] = &[
282 t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
283 t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
284 t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
285 t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
286 t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
287 t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
288 t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
289 t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
290 t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
291 t!(
292 leftlong310,
293 &["a", "abcdefghi", "hz", "abcdefgh"],
294 "abcdefghz",
295 &[(3, 0, 8),]
296 ),
297 t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
298 t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
299 t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
300 ];
301
302 /// Regression tests that are applied to all combinations.
303 ///
304 /// If regression tests are needed for specific match semantics, then add them
305 /// to the appropriate group above.
306 const REGRESSION: &'static [SearchTest] = &[
307 t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
308 t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
309 t!(
310 regression030,
311 &["libcore/", "libstd/"],
312 "libcore/char/methods.rs",
313 &[(0, 0, 8),]
314 ),
315 t!(
316 regression040,
317 &["libstd/", "libcore/"],
318 "libcore/char/methods.rs",
319 &[(1, 0, 8),]
320 ),
321 t!(
322 regression050,
323 &["\x00\x00\x01", "\x00\x00\x00"],
324 "\x00\x00\x00",
325 &[(1, 0, 3),]
326 ),
327 t!(
328 regression060,
329 &["\x00\x00\x00", "\x00\x00\x01"],
330 "\x00\x00\x00",
331 &[(0, 0, 3),]
332 ),
333 ];
334
335 const TEDDY: &'static [SearchTest] = &[
336 t!(
337 teddy010,
338 &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
339 "abcdefghijk",
340 &[
341 (0, 0, 1),
342 (1, 1, 2),
343 (2, 2, 3),
344 (3, 3, 4),
345 (4, 4, 5),
346 (5, 5, 6),
347 (6, 6, 7),
348 (7, 7, 8),
349 (8, 8, 9),
350 (9, 9, 10),
351 (10, 10, 11)
352 ]
353 ),
354 t!(
355 teddy020,
356 &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
357 "abcdefghijk",
358 &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
359 ),
360 t!(
361 teddy030,
362 &["abc"],
363 "abcdefghijklmnopqrstuvwxyzabcdefghijk",
364 &[(0, 0, 3), (0, 26, 29)]
365 ),
366 ];
367
368 // Now define a test for each combination of things above that we want to run.
369 // Since there are a few different combinations for each collection of tests,
370 // we define a couple of macros to avoid repetition drudgery. The testconfig
371 // macro constructs the automaton from a given match kind, and runs the search
372 // tests one-by-one over the given collection. The `with` parameter allows one
373 // to configure the config with additional parameters. The testcombo macro
374 // invokes testconfig in precisely this way: it sets up several tests where
375 // each one turns a different knob on Config.
376
377 macro_rules! testconfig {
378 ($name:ident, $collection:expr, $with:expr) => {
379 #[test]
380 fn $name() {
381 run_search_tests($collection, |test| {
382 let mut config = Config::new();
383 $with(&mut config);
384 config
385 .builder()
386 .extend(test.patterns.iter().map(|p| p.as_bytes()))
387 .build()
388 .unwrap()
389 .find_iter(&test.haystack)
390 .collect()
391 });
392 }
393 };
394 }
395
396 #[cfg(target_arch = "x86_64")]
397 testconfig!(
398 search_default_leftmost_first,
399 PACKED_LEFTMOST_FIRST,
400 |_: &mut Config| {}
401 );
402
403 #[cfg(target_arch = "x86_64")]
404 testconfig!(
405 search_default_leftmost_longest,
406 PACKED_LEFTMOST_LONGEST,
407 |c: &mut Config| {
408 c.match_kind(MatchKind::LeftmostLongest);
409 }
410 );
411
412 #[cfg(target_arch = "x86_64")]
413 testconfig!(
414 search_teddy_leftmost_first,
415 PACKED_LEFTMOST_FIRST,
416 |c: &mut Config| {
417 c.force_teddy(true);
418 }
419 );
420
421 #[cfg(target_arch = "x86_64")]
422 testconfig!(
423 search_teddy_leftmost_longest,
424 PACKED_LEFTMOST_LONGEST,
425 |c: &mut Config| {
426 c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
427 }
428 );
429
430 #[cfg(target_arch = "x86_64")]
431 testconfig!(
432 search_teddy_ssse3_leftmost_first,
433 PACKED_LEFTMOST_FIRST,
434 |c: &mut Config| {
435 c.force_teddy(true);
436 if is_x86_feature_detected!("ssse3") {
437 c.force_avx(Some(false));
438 }
439 }
440 );
441
442 #[cfg(target_arch = "x86_64")]
443 testconfig!(
444 search_teddy_ssse3_leftmost_longest,
445 PACKED_LEFTMOST_LONGEST,
446 |c: &mut Config| {
447 c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
448 if is_x86_feature_detected!("ssse3") {
449 c.force_avx(Some(false));
450 }
451 }
452 );
453
454 #[cfg(target_arch = "x86_64")]
455 testconfig!(
456 search_teddy_avx2_leftmost_first,
457 PACKED_LEFTMOST_FIRST,
458 |c: &mut Config| {
459 c.force_teddy(true);
460 if is_x86_feature_detected!("avx2") {
461 c.force_avx(Some(true));
462 }
463 }
464 );
465
466 #[cfg(target_arch = "x86_64")]
467 testconfig!(
468 search_teddy_avx2_leftmost_longest,
469 PACKED_LEFTMOST_LONGEST,
470 |c: &mut Config| {
471 c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
472 if is_x86_feature_detected!("avx2") {
473 c.force_avx(Some(true));
474 }
475 }
476 );
477
478 #[cfg(target_arch = "x86_64")]
479 testconfig!(
480 search_teddy_fat_leftmost_first,
481 PACKED_LEFTMOST_FIRST,
482 |c: &mut Config| {
483 c.force_teddy(true);
484 if is_x86_feature_detected!("avx2") {
485 c.force_teddy_fat(Some(true));
486 }
487 }
488 );
489
490 #[cfg(target_arch = "x86_64")]
491 testconfig!(
492 search_teddy_fat_leftmost_longest,
493 PACKED_LEFTMOST_LONGEST,
494 |c: &mut Config| {
495 c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
496 if is_x86_feature_detected!("avx2") {
497 c.force_teddy_fat(Some(true));
498 }
499 }
500 );
501
502 testconfig!(
503 search_rabinkarp_leftmost_first,
504 PACKED_LEFTMOST_FIRST,
505 |c: &mut Config| {
506 c.force_rabin_karp(true);
507 }
508 );
509
510 testconfig!(
511 search_rabinkarp_leftmost_longest,
512 PACKED_LEFTMOST_LONGEST,
513 |c: &mut Config| {
514 c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
515 }
516 );
517
518 #[test]
519 fn search_tests_have_unique_names() {
520 let assert = |constname, tests: &[SearchTest]| {
521 let mut seen = HashMap::new(); // map from test name to position
522 for (i, test) in tests.iter().enumerate() {
523 if !seen.contains_key(test.name) {
524 seen.insert(test.name, i);
525 } else {
526 let last = seen[test.name];
527 panic!(
528 "{} tests have duplicate names at positions {} and {}",
529 constname, last, i
530 );
531 }
532 }
533 };
534 assert("BASICS", BASICS);
535 assert("LEFTMOST", LEFTMOST);
536 assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
537 assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
538 assert("REGRESSION", REGRESSION);
539 assert("TEDDY", TEDDY);
540 }
541
542 fn run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>(
543 which: TestCollection,
544 mut f: F,
545 ) {
546 let get_match_triples =
547 |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
548 matches
549 .into_iter()
550 .map(|m| (m.pattern(), m.start(), m.end()))
551 .collect()
552 };
553 for &tests in which {
554 for spec in tests {
555 for test in spec.variations() {
556 assert_eq!(
557 test.matches,
558 get_match_triples(f(&test)).as_slice(),
559 "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}",
560 test.name,
561 test.patterns,
562 test.haystack,
563 test.offset,
564 );
565 }
566 }
567 }
568 }