1 use std
::collections
::HashMap
;
4 use crate::packed
::{Config, MatchKind}
;
7 /// A description of a single test against a multi-pattern searcher.
9 /// A single test may not necessarily pass on every configuration of a
10 /// searcher. The tests are categorized and grouped appropriately below.
11 #[derive(Clone, Debug, Eq, PartialEq)]
13 /// The name of this test, for debugging.
15 /// The patterns to search for.
16 patterns
: &'
static [&'
static str],
17 /// The text to search.
18 haystack
: &'
static str,
19 /// Each match is a triple of (pattern_index, start, end), where
20 /// pattern_index is an index into `patterns` and `start`/`end` are indices
22 matches
: &'
static [(usize, usize, usize)],
25 struct SearchTestOwned
{
28 patterns
: Vec
<String
>,
30 matches
: Vec
<(usize, usize, usize)>,
34 fn variations(&self) -> Vec
<SearchTestOwned
> {
35 let mut tests
= vec
![];
37 tests
.push(self.offset_prefix(i
));
38 tests
.push(self.offset_suffix(i
));
39 tests
.push(self.offset_both(i
));
44 fn offset_both(&self, off
: usize) -> SearchTestOwned
{
47 name
: self.name
.to_string(),
48 patterns
: self.patterns
.iter().map(|s
| s
.to_string()).collect(),
58 .map(|&(id
, s
, e
)| (id
, s
+ off
, e
+ off
))
63 fn offset_prefix(&self, off
: usize) -> SearchTestOwned
{
66 name
: self.name
.to_string(),
67 patterns
: self.patterns
.iter().map(|s
| s
.to_string()).collect(),
68 haystack
: format
!("{}{}", "Z".repeat(off
), self.haystack
),
72 .map(|&(id
, s
, e
)| (id
, s
+ off
, e
+ off
))
77 fn offset_suffix(&self, off
: usize) -> SearchTestOwned
{
80 name
: self.name
.to_string(),
81 patterns
: self.patterns
.iter().map(|s
| s
.to_string()).collect(),
82 haystack
: format
!("{}{}", self.haystack
, "Z".repeat(off
)),
83 matches
: self.matches
.to_vec(),
87 // fn to_owned(&self) -> SearchTestOwned {
89 // name: self.name.to_string(),
90 // patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
91 // haystack: self.haystack.to_string(),
92 // matches: self.matches.iter().cloned().collect(),
97 /// Short-hand constructor for SearchTest. We use it a lot below.
99 ($name
:ident
, $patterns
:expr
, $haystack
:expr
, $matches
:expr
) => {
101 name
: stringify
!($name
),
109 /// A collection of test groups.
110 type TestCollection
= &'
static [&'
static [SearchTest
]];
112 // Define several collections corresponding to the different type of match
113 // semantics supported. These collections have some overlap, but each
114 // collection should have some tests that no other collection has.
116 /// Tests for leftmost-first match semantics.
117 const PACKED_LEFTMOST_FIRST
: TestCollection
=
118 &[BASICS
, LEFTMOST
, LEFTMOST_FIRST
, REGRESSION
, TEDDY
];
120 /// Tests for leftmost-longest match semantics.
121 const PACKED_LEFTMOST_LONGEST
: TestCollection
=
122 &[BASICS
, LEFTMOST
, LEFTMOST_LONGEST
, REGRESSION
, TEDDY
];
124 // Now define the individual tests that make up the collections above.
126 /// A collection of tests for the that should always be true regardless of
127 /// match semantics. That is, all combinations of leftmost-{first, longest}
128 /// should produce the same answer.
129 const BASICS
: &'
static [SearchTest
] = &[
130 t
!(basic001
, &["a"], "", &[]),
131 t
!(basic010
, &["a"], "a", &[(0, 0, 1)]),
132 t
!(basic020
, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
133 t
!(basic030
, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
134 t
!(basic040
, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
135 t
!(basic050
, &["a"], "bba", &[(0, 2, 3)]),
136 t
!(basic060
, &["a"], "bbb", &[]),
137 t
!(basic070
, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
138 t
!(basic100
, &["aa"], "", &[]),
139 t
!(basic110
, &["aa"], "aa", &[(0, 0, 2)]),
140 t
!(basic120
, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
141 t
!(basic130
, &["aa"], "abbab", &[]),
142 t
!(basic140
, &["aa"], "abbabaa", &[(0, 5, 7)]),
143 t
!(basic150
, &["aaa"], "aaa", &[(0, 0, 3)]),
144 t
!(basic200
, &["abc"], "abc", &[(0, 0, 3)]),
145 t
!(basic210
, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
146 t
!(basic220
, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
147 t
!(basic300
, &["a", "b"], "", &[]),
148 t
!(basic310
, &["a", "b"], "z", &[]),
149 t
!(basic320
, &["a", "b"], "b", &[(1, 0, 1)]),
150 t
!(basic330
, &["a", "b"], "a", &[(0, 0, 1)]),
155 &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
161 &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
163 t
!(basic360
, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
164 t
!(basic400
, &["foo", "bar"], "", &[]),
165 t
!(basic410
, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
166 t
!(basic420
, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
167 t
!(basic430
, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
168 t
!(basic440
, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
169 t
!(basic450
, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
170 t
!(basic460
, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
171 t
!(basic470
, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
172 t
!(basic480
, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
173 t
!(basic700
, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
174 t
!(basic710
, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
177 &["yabcdef", "bcdeyabc", "abcdezghi"],
181 t
!(basic810
, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
182 t
!(basic820
, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
183 t
!(basic830
, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
188 &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
190 t
!(basic850
, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
193 /// Tests for leftmost match semantics. These should pass for both
194 /// leftmost-first and leftmost-longest match kinds. Stated differently, among
195 /// ambiguous matches, the longest match and the match that appeared first when
196 /// constructing the automaton should always be the same.
197 const LEFTMOST
: &'
static [SearchTest
] = &[
198 t
!(leftmost000
, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
199 t
!(leftmost030
, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
200 t
!(leftmost031
, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
201 t
!(leftmost032
, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
202 t
!(leftmost300
, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
203 t
!(leftmost310
, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
204 t
!(leftmost320
, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
205 t
!(leftmost330
, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
206 t
!(leftmost340
, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
207 t
!(leftmost350
, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
210 &["abcdefghi", "hz", "abcdefgh"],
216 &["abcdefghi", "cde", "hz", "abcdefgh"],
222 &["abcdefghi", "hz", "abcdefgh", "a"],
228 &["b", "abcdefghi", "hz", "abcdefgh"],
234 &["h", "abcdefghi", "hz", "abcdefgh"],
240 &["z", "abcdefghi", "hz", "abcdefgh"],
242 &[(3, 0, 8), (0, 8, 9),]
246 /// Tests for non-overlapping leftmost-first match semantics. These tests
247 /// should generally be specific to leftmost-first, which means they should
248 /// generally fail under leftmost-longest semantics.
249 const LEFTMOST_FIRST
: &'
static [SearchTest
] = &[
250 t
!(leftfirst000
, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
251 t
!(leftfirst020
, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
252 t
!(leftfirst030
, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
253 t
!(leftfirst040
, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
254 t
!(leftfirst100
, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
255 t
!(leftfirst110
, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
256 t
!(leftfirst300
, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
259 &["abcd", "b", "bce", "ce"],
261 &[(1, 1, 2), (3, 2, 4),]
265 &["a", "abcdefghi", "hz", "abcdefgh"],
267 &[(0, 0, 1), (2, 7, 9),]
269 t
!(leftfirst330
, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
272 &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
278 /// Tests for non-overlapping leftmost-longest match semantics. These tests
279 /// should generally be specific to leftmost-longest, which means they should
280 /// generally fail under leftmost-first semantics.
281 const LEFTMOST_LONGEST
: &'
static [SearchTest
] = &[
282 t
!(leftlong000
, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
283 t
!(leftlong010
, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
284 t
!(leftlong040
, &["a", "ab"], "a", &[(0, 0, 1)]),
285 t
!(leftlong050
, &["a", "ab"], "ab", &[(1, 0, 2)]),
286 t
!(leftlong060
, &["ab", "a"], "a", &[(1, 0, 1)]),
287 t
!(leftlong070
, &["ab", "a"], "ab", &[(0, 0, 2)]),
288 t
!(leftlong100
, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
289 t
!(leftlong110
, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
290 t
!(leftlong300
, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
293 &["a", "abcdefghi", "hz", "abcdefgh"],
297 t
!(leftlong320
, &["a", "abab"], "abab", &[(1, 0, 4)]),
298 t
!(leftlong330
, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
299 t
!(leftlong340
, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
302 /// Regression tests that are applied to all combinations.
304 /// If regression tests are needed for specific match semantics, then add them
305 /// to the appropriate group above.
306 const REGRESSION
: &'
static [SearchTest
] = &[
307 t
!(regression010
, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
308 t
!(regression020
, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
311 &["libcore/", "libstd/"],
312 "libcore/char/methods.rs",
317 &["libstd/", "libcore/"],
318 "libcore/char/methods.rs",
323 &["\x00\x00\x01", "\x00\x00\x00"],
329 &["\x00\x00\x00", "\x00\x00\x01"],
335 const TEDDY
: &'
static [SearchTest
] = &[
338 &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
356 &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
358 &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
363 "abcdefghijklmnopqrstuvwxyzabcdefghijk",
364 &[(0, 0, 3), (0, 26, 29)]
368 // Now define a test for each combination of things above that we want to run.
369 // Since there are a few different combinations for each collection of tests,
370 // we define a couple of macros to avoid repetition drudgery. The testconfig
371 // macro constructs the automaton from a given match kind, and runs the search
372 // tests one-by-one over the given collection. The `with` parameter allows one
373 // to configure the config with additional parameters. The testcombo macro
374 // invokes testconfig in precisely this way: it sets up several tests where
375 // each one turns a different knob on Config.
377 macro_rules
! testconfig
{
378 ($name
:ident
, $collection
:expr
, $with
:expr
) => {
381 run_search_tests($collection
, |test
| {
382 let mut config
= Config
::new();
386 .extend(test
.patterns
.iter().map(|p
| p
.as_bytes()))
389 .find_iter(&test
.haystack
)
396 #[cfg(target_arch = "x86_64")]
398 search_default_leftmost_first
,
399 PACKED_LEFTMOST_FIRST
,
403 #[cfg(target_arch = "x86_64")]
405 search_default_leftmost_longest
,
406 PACKED_LEFTMOST_LONGEST
,
408 c
.match_kind(MatchKind
::LeftmostLongest
);
412 #[cfg(target_arch = "x86_64")]
414 search_teddy_leftmost_first
,
415 PACKED_LEFTMOST_FIRST
,
421 #[cfg(target_arch = "x86_64")]
423 search_teddy_leftmost_longest
,
424 PACKED_LEFTMOST_LONGEST
,
426 c
.force_teddy(true).match_kind(MatchKind
::LeftmostLongest
);
430 #[cfg(target_arch = "x86_64")]
432 search_teddy_ssse3_leftmost_first
,
433 PACKED_LEFTMOST_FIRST
,
436 if is_x86_feature_detected
!("ssse3") {
437 c
.force_avx(Some(false));
442 #[cfg(target_arch = "x86_64")]
444 search_teddy_ssse3_leftmost_longest
,
445 PACKED_LEFTMOST_LONGEST
,
447 c
.force_teddy(true).match_kind(MatchKind
::LeftmostLongest
);
448 if is_x86_feature_detected
!("ssse3") {
449 c
.force_avx(Some(false));
454 #[cfg(target_arch = "x86_64")]
456 search_teddy_avx2_leftmost_first
,
457 PACKED_LEFTMOST_FIRST
,
460 if is_x86_feature_detected
!("avx2") {
461 c
.force_avx(Some(true));
466 #[cfg(target_arch = "x86_64")]
468 search_teddy_avx2_leftmost_longest
,
469 PACKED_LEFTMOST_LONGEST
,
471 c
.force_teddy(true).match_kind(MatchKind
::LeftmostLongest
);
472 if is_x86_feature_detected
!("avx2") {
473 c
.force_avx(Some(true));
478 #[cfg(target_arch = "x86_64")]
480 search_teddy_fat_leftmost_first
,
481 PACKED_LEFTMOST_FIRST
,
484 if is_x86_feature_detected
!("avx2") {
485 c
.force_teddy_fat(Some(true));
490 #[cfg(target_arch = "x86_64")]
492 search_teddy_fat_leftmost_longest
,
493 PACKED_LEFTMOST_LONGEST
,
495 c
.force_teddy(true).match_kind(MatchKind
::LeftmostLongest
);
496 if is_x86_feature_detected
!("avx2") {
497 c
.force_teddy_fat(Some(true));
503 search_rabinkarp_leftmost_first
,
504 PACKED_LEFTMOST_FIRST
,
506 c
.force_rabin_karp(true);
511 search_rabinkarp_leftmost_longest
,
512 PACKED_LEFTMOST_LONGEST
,
514 c
.force_rabin_karp(true).match_kind(MatchKind
::LeftmostLongest
);
519 fn search_tests_have_unique_names() {
520 let assert
= |constname
, tests
: &[SearchTest
]| {
521 let mut seen
= HashMap
::new(); // map from test name to position
522 for (i
, test
) in tests
.iter().enumerate() {
523 if !seen
.contains_key(test
.name
) {
524 seen
.insert(test
.name
, i
);
526 let last
= seen
[test
.name
];
528 "{} tests have duplicate names at positions {} and {}",
534 assert("BASICS", BASICS
);
535 assert("LEFTMOST", LEFTMOST
);
536 assert("LEFTMOST_FIRST", LEFTMOST_FIRST
);
537 assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST
);
538 assert("REGRESSION", REGRESSION
);
539 assert("TEDDY", TEDDY
);
542 fn run_search_tests
<F
: FnMut(&SearchTestOwned
) -> Vec
<Match
>>(
543 which
: TestCollection
,
546 let get_match_triples
=
547 |matches
: Vec
<Match
>| -> Vec
<(usize, usize, usize)> {
550 .map(|m
| (m
.pattern(), m
.start(), m
.end()))
553 for &tests
in which
{
555 for test
in spec
.variations() {
558 get_match_triples(f(&test
)).as_slice(),
559 "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}",