1 use std
::{collections::HashMap, format, string::String, vec::Vec}
;
4 AhoCorasick
, AhoCorasickBuilder
, AhoCorasickKind
, Anchored
, Input
, Match
,
8 /// A description of a single test against an Aho-Corasick automaton.
10 /// A single test may not necessarily pass on every configuration of an
11 /// Aho-Corasick automaton. The tests are categorized and grouped appropriately
13 #[derive(Clone, Debug, Eq, PartialEq)]
15 /// The name of this test, for debugging.
17 /// The patterns to search for.
18 patterns
: &'
static [&'
static str],
19 /// The text to search.
20 haystack
: &'
static str,
21 /// Each match is a triple of (pattern_index, start, end), where
22 /// pattern_index is an index into `patterns` and `start`/`end` are indices
24 matches
: &'
static [(usize, usize, usize)],
27 /// Short-hand constructor for SearchTest. We use it a lot below.
29 ($name
:ident
, $patterns
:expr
, $haystack
:expr
, $matches
:expr
) => {
31 name
: stringify
!($name
),
39 /// A collection of test groups.
40 type TestCollection
= &'
static [&'
static [SearchTest
]];
42 // Define several collections corresponding to the different type of match
43 // semantics supported by Aho-Corasick. These collections have some overlap,
44 // but each collection should have some tests that no other collection has.
46 /// Tests for Aho-Corasick's standard non-overlapping match semantics.
47 const AC_STANDARD_NON_OVERLAPPING
: TestCollection
=
48 &[BASICS
, NON_OVERLAPPING
, STANDARD
, REGRESSION
];
50 /// Tests for Aho-Corasick's anchored standard non-overlapping match semantics.
51 const AC_STANDARD_ANCHORED_NON_OVERLAPPING
: TestCollection
=
52 &[ANCHORED_BASICS
, ANCHORED_NON_OVERLAPPING
, STANDARD_ANCHORED
];
54 /// Tests for Aho-Corasick's standard overlapping match semantics.
55 const AC_STANDARD_OVERLAPPING
: TestCollection
=
56 &[BASICS
, OVERLAPPING
, REGRESSION
];
59 Iterators of anchored overlapping searches were removed from the API in
60 after 0.7, but we leave the tests commented out for posterity.
61 /// Tests for Aho-Corasick's anchored standard overlapping match semantics.
62 const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection =
63 &[ANCHORED_BASICS, ANCHORED_OVERLAPPING];
66 /// Tests for Aho-Corasick's leftmost-first match semantics.
67 const AC_LEFTMOST_FIRST
: TestCollection
=
68 &[BASICS
, NON_OVERLAPPING
, LEFTMOST
, LEFTMOST_FIRST
, REGRESSION
];
70 /// Tests for Aho-Corasick's anchored leftmost-first match semantics.
71 const AC_LEFTMOST_FIRST_ANCHORED
: TestCollection
= &[
73 ANCHORED_NON_OVERLAPPING
,
75 ANCHORED_LEFTMOST_FIRST
,
78 /// Tests for Aho-Corasick's leftmost-longest match semantics.
79 const AC_LEFTMOST_LONGEST
: TestCollection
=
80 &[BASICS
, NON_OVERLAPPING
, LEFTMOST
, LEFTMOST_LONGEST
, REGRESSION
];
82 /// Tests for Aho-Corasick's anchored leftmost-longest match semantics.
83 const AC_LEFTMOST_LONGEST_ANCHORED
: TestCollection
= &[
85 ANCHORED_NON_OVERLAPPING
,
87 ANCHORED_LEFTMOST_LONGEST
,
90 // Now define the individual tests that make up the collections above.
92 /// A collection of tests for the Aho-Corasick algorithm that should always be
93 /// true regardless of match semantics. That is, all combinations of
94 /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping}
95 /// should produce the same answer.
96 const BASICS
: &'
static [SearchTest
] = &[
97 t
!(basic000
, &[], "", &[]),
98 t
!(basic001
, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
99 t
!(basic002
, &["a"], "", &[]),
100 t
!(basic010
, &["a"], "a", &[(0, 0, 1)]),
101 t
!(basic020
, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
102 t
!(basic030
, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
103 t
!(basic040
, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
104 t
!(basic050
, &["a"], "bba", &[(0, 2, 3)]),
105 t
!(basic060
, &["a"], "bbb", &[]),
106 t
!(basic070
, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
107 t
!(basic100
, &["aa"], "", &[]),
108 t
!(basic110
, &["aa"], "aa", &[(0, 0, 2)]),
109 t
!(basic120
, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
110 t
!(basic130
, &["aa"], "abbab", &[]),
111 t
!(basic140
, &["aa"], "abbabaa", &[(0, 5, 7)]),
112 t
!(basic200
, &["abc"], "abc", &[(0, 0, 3)]),
113 t
!(basic210
, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
114 t
!(basic220
, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
115 t
!(basic300
, &["a", "b"], "", &[]),
116 t
!(basic310
, &["a", "b"], "z", &[]),
117 t
!(basic320
, &["a", "b"], "b", &[(1, 0, 1)]),
118 t
!(basic330
, &["a", "b"], "a", &[(0, 0, 1)]),
123 &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
129 &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
131 t
!(basic360
, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
132 t
!(basic400
, &["foo", "bar"], "", &[]),
133 t
!(basic410
, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
134 t
!(basic420
, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
135 t
!(basic430
, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
136 t
!(basic440
, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
137 t
!(basic450
, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
138 t
!(basic460
, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
139 t
!(basic470
, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
140 t
!(basic480
, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
141 t
!(basic600
, &[""], "", &[(0, 0, 0)]),
142 t
!(basic610
, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
143 t
!(basic620
, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
144 t
!(basic700
, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
145 t
!(basic710
, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
148 &["yabcdef", "bcdeyabc", "abcdezghi"],
154 /// A collection of *anchored* tests for the Aho-Corasick algorithm that should
155 /// always be true regardless of match semantics. That is, all combinations of
156 /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should
157 /// produce the same answer.
158 const ANCHORED_BASICS
: &'
static [SearchTest
] = &[
159 t
!(abasic000
, &[], "", &[]),
160 t
!(abasic001
, &[], "a", &[]),
161 t
!(abasic002
, &[], "abc", &[]),
162 t
!(abasic010
, &[""], "", &[(0, 0, 0)]),
163 t
!(abasic020
, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
164 t
!(abasic030
, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
165 t
!(abasic100
, &["a"], "a", &[(0, 0, 1)]),
166 t
!(abasic110
, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
167 t
!(abasic120
, &["a", "b"], "ab", &[(0, 0, 1), (1, 1, 2)]),
168 t
!(abasic130
, &["a", "b"], "ba", &[(1, 0, 1), (0, 1, 2)]),
169 t
!(abasic140
, &["foo", "foofoo"], "foo", &[(0, 0, 3)]),
170 t
!(abasic150
, &["foofoo", "foo"], "foo", &[(1, 0, 3)]),
171 t
!(abasic200
, &["foo"], "foofoo foo", &[(0, 0, 3), (0, 3, 6)]),
174 /// Tests for non-overlapping standard match semantics.
176 /// These tests generally shouldn't pass for leftmost-{first,longest}, although
177 /// some do in order to write clearer tests. For example, standard000 will
178 /// pass with leftmost-first semantics, but standard010 will not. We write
179 /// both to emphasize how the match semantics work.
180 const STANDARD
: &'
static [SearchTest
] = &[
181 t
!(standard000
, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
182 t
!(standard010
, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
183 t
!(standard020
, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
184 t
!(standard030
, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
185 t
!(standard040
, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
188 &["abcd", "bcd", "cd", "b"],
190 &[(3, 1, 2), (2, 2, 4),]
192 t
!(standard410
, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
193 t
!(standard420
, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]),
194 t
!(standard430
, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
195 t
!(standard440
, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]),
196 t
!(standard450
, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
199 /// Like STANDARD, but for anchored searches.
200 const STANDARD_ANCHORED
: &'
static [SearchTest
] = &[
201 t
!(astandard000
, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
202 t
!(astandard010
, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
203 t
!(astandard020
, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
204 t
!(astandard030
, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
205 t
!(astandard040
, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
206 t
!(astandard050
, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
207 t
!(astandard410
, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
208 t
!(astandard420
, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2)]),
209 t
!(astandard430
, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
210 t
!(astandard440
, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
211 t
!(astandard450
, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
214 /// Tests for non-overlapping leftmost match semantics. These should pass for
215 /// both leftmost-first and leftmost-longest match kinds. Stated differently,
216 /// among ambiguous matches, the longest match and the match that appeared
217 /// first when constructing the automaton should always be the same.
218 const LEFTMOST
: &'
static [SearchTest
] = &[
219 t
!(leftmost000
, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
220 t
!(leftmost010
, &["a", ""], "a", &[(0, 0, 1)]),
221 t
!(leftmost011
, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]),
222 t
!(leftmost020
, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
223 t
!(leftmost030
, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
224 t
!(leftmost031
, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
225 t
!(leftmost032
, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
226 t
!(leftmost300
, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
227 t
!(leftmost310
, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
228 t
!(leftmost320
, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
229 t
!(leftmost330
, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
230 t
!(leftmost340
, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
231 t
!(leftmost350
, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
234 &["abcdefghi", "hz", "abcdefgh"],
240 &["abcdefghi", "cde", "hz", "abcdefgh"],
246 &["abcdefghi", "hz", "abcdefgh", "a"],
252 &["b", "abcdefghi", "hz", "abcdefgh"],
258 &["h", "abcdefghi", "hz", "abcdefgh"],
264 &["z", "abcdefghi", "hz", "abcdefgh"],
266 &[(3, 0, 8), (0, 8, 9),]
270 /// Like LEFTMOST, but for anchored searches.
271 const ANCHORED_LEFTMOST
: &'
static [SearchTest
] = &[
272 t
!(aleftmost000
, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
273 // We shouldn't allow an empty match immediately following a match, right?
274 t
!(aleftmost010
, &["a", ""], "a", &[(0, 0, 1)]),
275 t
!(aleftmost020
, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
276 t
!(aleftmost030
, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
277 t
!(aleftmost031
, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
278 t
!(aleftmost032
, &["ab", "a"], "xayabbbz", &[]),
279 t
!(aleftmost300
, &["abcd", "bce", "b"], "abce", &[]),
280 t
!(aleftmost301
, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
281 t
!(aleftmost310
, &["abcd", "ce", "bc"], "abce", &[]),
282 t
!(aleftmost320
, &["abcd", "bce", "ce", "b"], "abce", &[]),
283 t
!(aleftmost330
, &["abcd", "bce", "cz", "bc"], "abcz", &[]),
284 t
!(aleftmost340
, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
285 t
!(aleftmost350
, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
288 &["abcdefghi", "hz", "abcdefgh"],
294 &["abcdefghi", "cde", "hz", "abcdefgh"],
300 &["abcdefghi", "hz", "abcdefgh", "a"],
306 &["b", "abcdefghi", "hz", "abcdefgh"],
312 &["h", "abcdefghi", "hz", "abcdefgh"],
318 &["z", "abcdefghi", "hz", "abcdefgh"],
320 &[(3, 0, 8), (0, 8, 9)]
324 /// Tests for non-overlapping leftmost-first match semantics. These tests
325 /// should generally be specific to leftmost-first, which means they should
326 /// generally fail under leftmost-longest semantics.
327 const LEFTMOST_FIRST
: &'
static [SearchTest
] = &[
328 t
!(leftfirst000
, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
329 t
!(leftfirst010
, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
330 t
!(leftfirst011
, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
331 t
!(leftfirst012
, &["a", "", ""], "a", &[(0, 0, 1)]),
332 t
!(leftfirst013
, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
333 t
!(leftfirst014
, &["a", ""], "a", &[(0, 0, 1)]),
334 t
!(leftfirst015
, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]),
335 t
!(leftfirst020
, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
336 t
!(leftfirst030
, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
337 t
!(leftfirst040
, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
338 t
!(leftfirst100
, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
339 t
!(leftfirst110
, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
340 t
!(leftfirst300
, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
343 &["abcd", "b", "bce", "ce"],
345 &[(1, 1, 2), (3, 2, 4),]
349 &["a", "abcdefghi", "hz", "abcdefgh"],
351 &[(0, 0, 1), (2, 7, 9),]
353 t
!(leftfirst330
, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
354 t
!(leftfirst400
, &["amwix", "samwise", "sam"], "Zsamwix", &[(2, 1, 4)]),
357 /// Like LEFTMOST_FIRST, but for anchored searches.
358 const ANCHORED_LEFTMOST_FIRST
: &'
static [SearchTest
] = &[
359 t
!(aleftfirst000
, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
360 t
!(aleftfirst010
, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
361 t
!(aleftfirst011
, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
362 t
!(aleftfirst012
, &["a", "", ""], "a", &[(0, 0, 1)]),
363 t
!(aleftfirst013
, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
364 t
!(aleftfirst020
, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
365 t
!(aleftfirst030
, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
366 t
!(aleftfirst040
, &["a", "ab"], "xayabbbz", &[]),
367 t
!(aleftfirst100
, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
368 t
!(aleftfirst110
, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
369 t
!(aleftfirst300
, &["abcd", "b", "bce"], "abce", &[]),
370 t
!(aleftfirst310
, &["abcd", "b", "bce", "ce"], "abce", &[]),
373 &["a", "abcdefghi", "hz", "abcdefgh"],
377 t
!(aleftfirst330
, &["a", "abab"], "abab", &[(0, 0, 1)]),
378 t
!(aleftfirst400
, &["wise", "samwise", "sam"], "samwix", &[(2, 0, 3)]),
381 /// Tests for non-overlapping leftmost-longest match semantics. These tests
382 /// should generally be specific to leftmost-longest, which means they should
383 /// generally fail under leftmost-first semantics.
384 const LEFTMOST_LONGEST
: &'
static [SearchTest
] = &[
385 t
!(leftlong000
, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
386 t
!(leftlong010
, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
387 t
!(leftlong020
, &["", "a"], "a", &[(1, 0, 1)]),
388 t
!(leftlong021
, &["", "a", ""], "a", &[(1, 0, 1)]),
389 t
!(leftlong022
, &["a", "", ""], "a", &[(0, 0, 1)]),
390 t
!(leftlong023
, &["", "", "a"], "a", &[(2, 0, 1)]),
391 t
!(leftlong024
, &["", "a"], "ab", &[(1, 0, 1), (0, 2, 2)]),
392 t
!(leftlong030
, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
393 t
!(leftlong040
, &["a", "ab"], "a", &[(0, 0, 1)]),
394 t
!(leftlong050
, &["a", "ab"], "ab", &[(1, 0, 2)]),
395 t
!(leftlong060
, &["ab", "a"], "a", &[(1, 0, 1)]),
396 t
!(leftlong070
, &["ab", "a"], "ab", &[(0, 0, 2)]),
397 t
!(leftlong100
, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
398 t
!(leftlong110
, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
399 t
!(leftlong300
, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
402 &["a", "abcdefghi", "hz", "abcdefgh"],
406 t
!(leftlong320
, &["a", "abab"], "abab", &[(1, 0, 4)]),
407 t
!(leftlong330
, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
408 t
!(leftlong340
, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
411 /// Like LEFTMOST_LONGEST, but for anchored searches.
412 const ANCHORED_LEFTMOST_LONGEST
: &'
static [SearchTest
] = &[
413 t
!(aleftlong000
, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
414 t
!(aleftlong010
, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
415 t
!(aleftlong020
, &["", "a"], "a", &[(1, 0, 1)]),
416 t
!(aleftlong021
, &["", "a", ""], "a", &[(1, 0, 1)]),
417 t
!(aleftlong022
, &["a", "", ""], "a", &[(0, 0, 1)]),
418 t
!(aleftlong023
, &["", "", "a"], "a", &[(2, 0, 1)]),
419 t
!(aleftlong030
, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
420 t
!(aleftlong040
, &["a", "ab"], "a", &[(0, 0, 1)]),
421 t
!(aleftlong050
, &["a", "ab"], "ab", &[(1, 0, 2)]),
422 t
!(aleftlong060
, &["ab", "a"], "a", &[(1, 0, 1)]),
423 t
!(aleftlong070
, &["ab", "a"], "ab", &[(0, 0, 2)]),
424 t
!(aleftlong100
, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
425 t
!(aleftlong110
, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
426 t
!(aleftlong300
, &["abcd", "b", "bce"], "abce", &[]),
429 &["a", "abcdefghi", "hz", "abcdefgh"],
433 t
!(aleftlong320
, &["a", "abab"], "abab", &[(1, 0, 4)]),
434 t
!(aleftlong330
, &["abcd", "b", "ce"], "abce", &[]),
435 t
!(aleftlong340
, &["a", "ab"], "xayabbbz", &[]),
438 /// Tests for non-overlapping match semantics.
440 /// Generally these tests shouldn't pass when using overlapping semantics.
441 /// These should pass for both standard and leftmost match semantics.
442 const NON_OVERLAPPING
: &'
static [SearchTest
] = &[
443 t
!(nover010
, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
444 t
!(nover020
, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
445 t
!(nover030
, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
450 &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
452 t
!(nover200
, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
453 t
!(nover300
, &["", ""], "", &[(0, 0, 0),]),
454 t
!(nover310
, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
457 /// Like NON_OVERLAPPING, but for anchored searches.
458 const ANCHORED_NON_OVERLAPPING
: &'
static [SearchTest
] = &[
459 t
!(anover010
, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
460 t
!(anover020
, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
461 t
!(anover030
, &["abc", "bc"], "zazabcz", &[]),
466 &[(0, 0, 2), (0, 2, 4), (0, 4, 6)]
468 t
!(anover200
, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]),
469 t
!(anover300
, &["", ""], "", &[(0, 0, 0)]),
470 t
!(anover310
, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
473 /// Tests for overlapping match semantics.
475 /// This only supports standard match semantics, since leftmost-{first,longest}
476 /// do not support overlapping matches.
477 const OVERLAPPING
: &'
static [SearchTest
] = &[
480 &["abcd", "bcd", "cd", "b"],
482 &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),]
486 &["bcd", "cd", "b", "abcd"],
488 &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),]
492 &["abcd", "bcd", "cd"],
494 &[(0, 0, 4), (1, 1, 4), (2, 2, 4),]
498 &["bcd", "abcd", "cd"],
500 &[(1, 0, 4), (0, 1, 4), (2, 2, 4),]
504 &["bcd", "cd", "abcd"],
506 &[(2, 0, 4), (0, 1, 4), (1, 2, 4),]
508 t
!(over050
, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]),
513 &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),]
519 &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),]
521 t
!(over300
, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
526 &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),]
528 t
!(over320
, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]),
533 &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),]
539 &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),]
545 &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),]
551 &[(0, 0, 3), (1, 0, 6), (0, 3, 6)]
556 Iterators of anchored overlapping searches were removed from the API in
557 after 0.7, but we leave the tests commented out for posterity.
558 /// Like OVERLAPPING, but for anchored searches.
559 const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[
560 t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
561 t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]),
562 t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]),
563 t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]),
564 t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]),
565 t!(aover050, &["abc", "bc"], "zazabcz", &[]),
566 t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]),
567 t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]),
568 t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
569 t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]),
570 t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]),
571 t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]),
572 t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]),
573 t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]),
574 t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]),
578 /// Tests for ASCII case insensitivity.
580 /// These tests should all have the same behavior regardless of match semantics
581 /// or whether the search is overlapping.
582 const ASCII_CASE_INSENSITIVE
: &'
static [SearchTest
] = &[
583 t
!(acasei000
, &["a"], "A", &[(0, 0, 1)]),
584 t
!(acasei010
, &["Samwise"], "SAMWISE", &[(0, 0, 7)]),
585 t
!(acasei011
, &["Samwise"], "SAMWISE.abcd", &[(0, 0, 7)]),
586 t
!(acasei020
, &["fOoBaR"], "quux foobar baz", &[(0, 5, 11)]),
589 /// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests.
590 const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
: &'
static [SearchTest
] = &[
591 t
!(acasei000
, &["foo", "FOO"], "fOo", &[(0, 0, 3)]),
592 t
!(acasei000
, &["FOO", "foo"], "fOo", &[(0, 0, 3)]),
593 t
!(acasei010
, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]),
596 /// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests.
597 const ASCII_CASE_INSENSITIVE_OVERLAPPING
: &'
static [SearchTest
] = &[
598 t
!(acasei000
, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
599 t
!(acasei001
, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
600 // This is a regression test from:
601 // https://github.com/BurntSushi/aho-corasick/issues/68
602 // Previously, it was reporting a duplicate (1, 3, 6) match.
605 &["abc", "def", "abcdef"],
607 &[(0, 0, 3), (2, 0, 6), (1, 3, 6)]
611 /// Regression tests that are applied to all Aho-Corasick combinations.
613 /// If regression tests are needed for specific match semantics, then add them
614 /// to the appropriate group above.
615 const REGRESSION
: &'
static [SearchTest
] = &[
616 t
!(regression010
, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
617 t
!(regression020
, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
620 &["libcore/", "libstd/"],
621 "libcore/char/methods.rs",
626 &["libstd/", "libcore/"],
627 "libcore/char/methods.rs",
632 &["\x00\x00\x01", "\x00\x00\x00"],
638 &["\x00\x00\x00", "\x00\x00\x01"],
644 // Now define a test for each combination of things above that we want to run.
645 // Since there are a few different combinations for each collection of tests,
646 // we define a couple of macros to avoid repetition drudgery. The testconfig
647 // macro constructs the automaton from a given match kind, and runs the search
648 // tests one-by-one over the given collection. The `with` parameter allows one
649 // to configure the builder with additional parameters. The testcombo macro
650 // invokes testconfig in precisely this way: it sets up several tests where
651 // each one turns a different knob on AhoCorasickBuilder.
653 macro_rules
! testconfig
{
654 (anchored
, $name
:ident
, $collection
:expr
, $kind
:ident
, $with
:expr
) => {
657 run_search_tests($collection
, |test
| {
658 let mut builder
= AhoCorasick
::builder();
660 let input
= Input
::new(test
.haystack
).anchored(Anchored
::Yes
);
662 .match_kind(MatchKind
::$kind
)
663 .build(test
.patterns
)
665 .try_find_iter(input
)
671 (overlapping
, $name
:ident
, $collection
:expr
, $kind
:ident
, $with
:expr
) => {
674 run_search_tests($collection
, |test
| {
675 let mut builder
= AhoCorasick
::builder();
678 .match_kind(MatchKind
::$kind
)
679 .build(test
.patterns
)
681 .find_overlapping_iter(test
.haystack
)
686 (stream
, $name
:ident
, $collection
:expr
, $kind
:ident
, $with
:expr
) => {
689 run_stream_search_tests($collection
, |test
| {
690 let buf
= std
::io
::BufReader
::with_capacity(
692 test
.haystack
.as_bytes(),
694 let mut builder
= AhoCorasick
::builder();
697 .match_kind(MatchKind
::$kind
)
698 .build(test
.patterns
)
700 .stream_find_iter(buf
)
701 .map(|result
| result
.unwrap())
706 ($name
:ident
, $collection
:expr
, $kind
:ident
, $with
:expr
) => {
709 run_search_tests($collection
, |test
| {
710 let mut builder
= AhoCorasick
::builder();
713 .match_kind(MatchKind
::$kind
)
714 .build(test
.patterns
)
716 .find_iter(test
.haystack
)
723 macro_rules
! testcombo
{
724 ($name
:ident
, $collection
:expr
, $kind
:ident
) => {
728 testconfig
!(default, $collection
, $kind
, |_
| ());
733 |b
: &mut AhoCorasickBuilder
| {
734 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
));
738 nfa_noncontig_no_prefilter
,
741 |b
: &mut AhoCorasickBuilder
| {
742 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
747 nfa_noncontig_all_sparse
,
750 |b
: &mut AhoCorasickBuilder
| {
751 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
756 nfa_noncontig_all_dense
,
759 |b
: &mut AhoCorasickBuilder
| {
760 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
761 .dense_depth(usize::MAX
);
768 |b
: &mut AhoCorasickBuilder
| {
769 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
));
773 nfa_contig_no_prefilter
,
776 |b
: &mut AhoCorasickBuilder
| {
777 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
782 nfa_contig_all_sparse
,
785 |b
: &mut AhoCorasickBuilder
| {
786 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
791 nfa_contig_all_dense
,
794 |b
: &mut AhoCorasickBuilder
| {
795 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
796 .dense_depth(usize::MAX
);
800 nfa_contig_no_byte_class
,
803 |b
: &mut AhoCorasickBuilder
| {
804 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
805 .byte_classes(false);
812 |b
: &mut AhoCorasickBuilder
| {
813 b
.kind(Some(AhoCorasickKind
::DFA
));
820 |b
: &mut AhoCorasickBuilder
| {
821 b
.kind(Some(AhoCorasickKind
::DFA
))
822 .start_kind(StartKind
::Both
);
829 |b
: &mut AhoCorasickBuilder
| {
830 b
.kind(Some(AhoCorasickKind
::DFA
)).prefilter(false);
834 dfa_start_both_no_prefilter
,
837 |b
: &mut AhoCorasickBuilder
| {
838 b
.kind(Some(AhoCorasickKind
::DFA
))
839 .start_kind(StartKind
::Both
)
847 |b
: &mut AhoCorasickBuilder
| {
848 b
.kind(Some(AhoCorasickKind
::DFA
)).byte_classes(false);
852 dfa_start_both_no_byte_class
,
855 |b
: &mut AhoCorasickBuilder
| {
856 b
.kind(Some(AhoCorasickKind
::DFA
))
857 .start_kind(StartKind
::Both
)
858 .byte_classes(false);
865 // Write out the various combinations of match semantics given the variety of
866 // configurations tested by 'testcombo!'.
867 testcombo
!(search_leftmost_longest
, AC_LEFTMOST_LONGEST
, LeftmostLongest
);
868 testcombo
!(search_leftmost_first
, AC_LEFTMOST_FIRST
, LeftmostFirst
);
870 search_standard_nonoverlapping
,
871 AC_STANDARD_NON_OVERLAPPING
,
875 // Write out the overlapping combo by hand since there is only one of them.
878 search_standard_overlapping_default
,
879 AC_STANDARD_OVERLAPPING
,
885 search_standard_overlapping_nfa_noncontig_default
,
886 AC_STANDARD_OVERLAPPING
,
888 |b
: &mut AhoCorasickBuilder
| {
889 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
));
894 search_standard_overlapping_nfa_noncontig_no_prefilter
,
895 AC_STANDARD_OVERLAPPING
,
897 |b
: &mut AhoCorasickBuilder
| {
898 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
)).prefilter(false);
903 search_standard_overlapping_nfa_contig_default
,
904 AC_STANDARD_OVERLAPPING
,
906 |b
: &mut AhoCorasickBuilder
| {
907 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
));
912 search_standard_overlapping_nfa_contig_no_prefilter
,
913 AC_STANDARD_OVERLAPPING
,
915 |b
: &mut AhoCorasickBuilder
| {
916 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
)).prefilter(false);
921 search_standard_overlapping_nfa_contig_all_sparse
,
922 AC_STANDARD_OVERLAPPING
,
924 |b
: &mut AhoCorasickBuilder
| {
925 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
)).dense_depth(0);
930 search_standard_overlapping_nfa_contig_all_dense
,
931 AC_STANDARD_OVERLAPPING
,
933 |b
: &mut AhoCorasickBuilder
| {
934 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
)).dense_depth(usize::MAX
);
939 search_standard_overlapping_dfa_default
,
940 AC_STANDARD_OVERLAPPING
,
942 |b
: &mut AhoCorasickBuilder
| {
943 b
.kind(Some(AhoCorasickKind
::DFA
));
948 search_standard_overlapping_dfa_start_both
,
949 AC_STANDARD_OVERLAPPING
,
951 |b
: &mut AhoCorasickBuilder
| {
952 b
.kind(Some(AhoCorasickKind
::DFA
)).start_kind(StartKind
::Both
);
957 search_standard_overlapping_dfa_no_prefilter
,
958 AC_STANDARD_OVERLAPPING
,
960 |b
: &mut AhoCorasickBuilder
| {
961 b
.kind(Some(AhoCorasickKind
::DFA
)).prefilter(false);
966 search_standard_overlapping_dfa_start_both_no_prefilter
,
967 AC_STANDARD_OVERLAPPING
,
969 |b
: &mut AhoCorasickBuilder
| {
970 b
.kind(Some(AhoCorasickKind
::DFA
))
971 .start_kind(StartKind
::Both
)
977 search_standard_overlapping_dfa_no_byte_class
,
978 AC_STANDARD_OVERLAPPING
,
980 |b
: &mut AhoCorasickBuilder
| {
981 b
.kind(Some(AhoCorasickKind
::DFA
)).byte_classes(false);
986 search_standard_overlapping_dfa_start_both_no_byte_class
,
987 AC_STANDARD_OVERLAPPING
,
989 |b
: &mut AhoCorasickBuilder
| {
990 b
.kind(Some(AhoCorasickKind
::DFA
))
991 .start_kind(StartKind
::Both
)
992 .byte_classes(false);
996 // Also write out tests manually for streams, since we only test the standard
997 // match semantics. We also don't bother testing different automaton
998 // configurations, since those are well covered by tests above.
999 #[cfg(feature = "std")]
1002 search_standard_stream_default
,
1003 AC_STANDARD_NON_OVERLAPPING
,
1007 #[cfg(feature = "std")]
1010 search_standard_stream_nfa_noncontig_default
,
1011 AC_STANDARD_NON_OVERLAPPING
,
1013 |b
: &mut AhoCorasickBuilder
| {
1014 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
));
1017 #[cfg(feature = "std")]
1020 search_standard_stream_nfa_contig_default
,
1021 AC_STANDARD_NON_OVERLAPPING
,
1023 |b
: &mut AhoCorasickBuilder
| {
1024 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
));
1027 #[cfg(feature = "std")]
1030 search_standard_stream_dfa_default
,
1031 AC_STANDARD_NON_OVERLAPPING
,
1033 |b
: &mut AhoCorasickBuilder
| {
1034 b
.kind(Some(AhoCorasickKind
::DFA
));
1038 // Same thing for anchored searches. Write them out manually.
1041 search_standard_anchored_default
,
1042 AC_STANDARD_ANCHORED_NON_OVERLAPPING
,
1044 |b
: &mut AhoCorasickBuilder
| {
1045 b
.start_kind(StartKind
::Anchored
);
1050 search_standard_anchored_nfa_noncontig_default
,
1051 AC_STANDARD_ANCHORED_NON_OVERLAPPING
,
1053 |b
: &mut AhoCorasickBuilder
| {
1054 b
.start_kind(StartKind
::Anchored
)
1055 .kind(Some(AhoCorasickKind
::NoncontiguousNFA
));
1060 search_standard_anchored_nfa_contig_default
,
1061 AC_STANDARD_ANCHORED_NON_OVERLAPPING
,
1063 |b
: &mut AhoCorasickBuilder
| {
1064 b
.start_kind(StartKind
::Anchored
)
1065 .kind(Some(AhoCorasickKind
::ContiguousNFA
));
1070 search_standard_anchored_dfa_default
,
1071 AC_STANDARD_ANCHORED_NON_OVERLAPPING
,
1073 |b
: &mut AhoCorasickBuilder
| {
1074 b
.start_kind(StartKind
::Anchored
).kind(Some(AhoCorasickKind
::DFA
));
1079 search_standard_anchored_dfa_start_both
,
1080 AC_STANDARD_ANCHORED_NON_OVERLAPPING
,
1082 |b
: &mut AhoCorasickBuilder
| {
1083 b
.start_kind(StartKind
::Both
).kind(Some(AhoCorasickKind
::DFA
));
1088 search_leftmost_first_anchored_default
,
1089 AC_LEFTMOST_FIRST_ANCHORED
,
1091 |b
: &mut AhoCorasickBuilder
| {
1092 b
.start_kind(StartKind
::Anchored
);
1097 search_leftmost_first_anchored_nfa_noncontig_default
,
1098 AC_LEFTMOST_FIRST_ANCHORED
,
1100 |b
: &mut AhoCorasickBuilder
| {
1101 b
.start_kind(StartKind
::Anchored
)
1102 .kind(Some(AhoCorasickKind
::NoncontiguousNFA
));
1107 search_leftmost_first_anchored_nfa_contig_default
,
1108 AC_LEFTMOST_FIRST_ANCHORED
,
1110 |b
: &mut AhoCorasickBuilder
| {
1111 b
.start_kind(StartKind
::Anchored
)
1112 .kind(Some(AhoCorasickKind
::ContiguousNFA
));
1117 search_leftmost_first_anchored_dfa_default
,
1118 AC_LEFTMOST_FIRST_ANCHORED
,
1120 |b
: &mut AhoCorasickBuilder
| {
1121 b
.start_kind(StartKind
::Anchored
).kind(Some(AhoCorasickKind
::DFA
));
1126 search_leftmost_first_anchored_dfa_start_both
,
1127 AC_LEFTMOST_FIRST_ANCHORED
,
1129 |b
: &mut AhoCorasickBuilder
| {
1130 b
.start_kind(StartKind
::Both
).kind(Some(AhoCorasickKind
::DFA
));
1135 search_leftmost_longest_anchored_default
,
1136 AC_LEFTMOST_LONGEST_ANCHORED
,
1138 |b
: &mut AhoCorasickBuilder
| {
1139 b
.start_kind(StartKind
::Anchored
);
1144 search_leftmost_longest_anchored_nfa_noncontig_default
,
1145 AC_LEFTMOST_LONGEST_ANCHORED
,
1147 |b
: &mut AhoCorasickBuilder
| {
1148 b
.start_kind(StartKind
::Anchored
)
1149 .kind(Some(AhoCorasickKind
::NoncontiguousNFA
));
1154 search_leftmost_longest_anchored_nfa_contig_default
,
1155 AC_LEFTMOST_LONGEST_ANCHORED
,
1157 |b
: &mut AhoCorasickBuilder
| {
1158 b
.start_kind(StartKind
::Anchored
)
1159 .kind(Some(AhoCorasickKind
::ContiguousNFA
));
1164 search_leftmost_longest_anchored_dfa_default
,
1165 AC_LEFTMOST_LONGEST_ANCHORED
,
1167 |b
: &mut AhoCorasickBuilder
| {
1168 b
.start_kind(StartKind
::Anchored
).kind(Some(AhoCorasickKind
::DFA
));
1173 search_leftmost_longest_anchored_dfa_start_both
,
1174 AC_LEFTMOST_LONGEST_ANCHORED
,
1176 |b
: &mut AhoCorasickBuilder
| {
1177 b
.start_kind(StartKind
::Both
).kind(Some(AhoCorasickKind
::DFA
));
1181 // And also write out the test combinations for ASCII case insensitivity.
1183 acasei_standard_default
,
1184 &[ASCII_CASE_INSENSITIVE
],
1186 |b
: &mut AhoCorasickBuilder
| {
1187 b
.prefilter(false).ascii_case_insensitive(true);
1191 acasei_standard_nfa_noncontig_default
,
1192 &[ASCII_CASE_INSENSITIVE
],
1194 |b
: &mut AhoCorasickBuilder
| {
1195 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
1197 .ascii_case_insensitive(true);
1201 acasei_standard_nfa_contig_default
,
1202 &[ASCII_CASE_INSENSITIVE
],
1204 |b
: &mut AhoCorasickBuilder
| {
1205 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
1207 .ascii_case_insensitive(true);
1211 acasei_standard_dfa_default
,
1212 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1214 |b
: &mut AhoCorasickBuilder
| {
1215 b
.kind(Some(AhoCorasickKind
::DFA
)).ascii_case_insensitive(true);
1220 acasei_standard_overlapping_default
,
1221 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_OVERLAPPING
],
1223 |b
: &mut AhoCorasickBuilder
| {
1224 b
.ascii_case_insensitive(true);
1229 acasei_standard_overlapping_nfa_noncontig_default
,
1230 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_OVERLAPPING
],
1232 |b
: &mut AhoCorasickBuilder
| {
1233 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
1234 .ascii_case_insensitive(true);
1239 acasei_standard_overlapping_nfa_contig_default
,
1240 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_OVERLAPPING
],
1242 |b
: &mut AhoCorasickBuilder
| {
1243 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
1244 .ascii_case_insensitive(true);
1249 acasei_standard_overlapping_dfa_default
,
1250 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_OVERLAPPING
],
1252 |b
: &mut AhoCorasickBuilder
| {
1253 b
.kind(Some(AhoCorasickKind
::DFA
)).ascii_case_insensitive(true);
1257 acasei_leftmost_first_default
,
1258 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1260 |b
: &mut AhoCorasickBuilder
| {
1261 b
.ascii_case_insensitive(true);
1265 acasei_leftmost_first_nfa_noncontig_default
,
1266 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1268 |b
: &mut AhoCorasickBuilder
| {
1269 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
1270 .ascii_case_insensitive(true);
1274 acasei_leftmost_first_nfa_contig_default
,
1275 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1277 |b
: &mut AhoCorasickBuilder
| {
1278 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
1279 .ascii_case_insensitive(true);
1283 acasei_leftmost_first_dfa_default
,
1284 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1286 |b
: &mut AhoCorasickBuilder
| {
1287 b
.kind(Some(AhoCorasickKind
::DFA
)).ascii_case_insensitive(true);
1291 acasei_leftmost_longest_default
,
1292 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1294 |b
: &mut AhoCorasickBuilder
| {
1295 b
.ascii_case_insensitive(true);
1299 acasei_leftmost_longest_nfa_noncontig_default
,
1300 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1302 |b
: &mut AhoCorasickBuilder
| {
1303 b
.kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
1304 .ascii_case_insensitive(true);
1308 acasei_leftmost_longest_nfa_contig_default
,
1309 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1311 |b
: &mut AhoCorasickBuilder
| {
1312 b
.kind(Some(AhoCorasickKind
::ContiguousNFA
))
1313 .ascii_case_insensitive(true);
1317 acasei_leftmost_longest_dfa_default
,
1318 &[ASCII_CASE_INSENSITIVE
, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING
],
1320 |b
: &mut AhoCorasickBuilder
| {
1321 b
.kind(Some(AhoCorasickKind
::DFA
)).ascii_case_insensitive(true);
1325 fn run_search_tests
<F
: FnMut(&SearchTest
) -> Vec
<Match
>>(
1326 which
: TestCollection
,
1329 let get_match_triples
=
1330 |matches
: Vec
<Match
>| -> Vec
<(usize, usize, usize)> {
1333 .map(|m
| (m
.pattern().as_usize(), m
.start(), m
.end()))
1336 for &tests
in which
{
1340 get_match_triples(f(&test
)).as_slice(),
1341 "test: {}, patterns: {:?}, haystack: {:?}",
1350 // Like 'run_search_tests', but we skip any tests that contain the empty
1351 // pattern because stream searching doesn't support it.
1352 #[cfg(feature = "std")]
1353 fn run_stream_search_tests
<F
: FnMut(&SearchTest
) -> Vec
<Match
>>(
1354 which
: TestCollection
,
1357 let get_match_triples
=
1358 |matches
: Vec
<Match
>| -> Vec
<(usize, usize, usize)> {
1361 .map(|m
| (m
.pattern().as_usize(), m
.start(), m
.end()))
1364 for &tests
in which
{
1366 if test
.patterns
.iter().any(|p
| p
.is_empty()) {
1371 get_match_triples(f(&test
)).as_slice(),
1372 "test: {}, patterns: {:?}, haystack: {:?}",
1382 fn search_tests_have_unique_names() {
1383 let assert
= |constname
, tests
: &[SearchTest
]| {
1384 let mut seen
= HashMap
::new(); // map from test name to position
1385 for (i
, test
) in tests
.iter().enumerate() {
1386 if !seen
.contains_key(test
.name
) {
1387 seen
.insert(test
.name
, i
);
1389 let last
= seen
[test
.name
];
1391 "{} tests have duplicate names at positions {} and {}",
1397 assert("BASICS", BASICS
);
1398 assert("STANDARD", STANDARD
);
1399 assert("LEFTMOST", LEFTMOST
);
1400 assert("LEFTMOST_FIRST", LEFTMOST_FIRST
);
1401 assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST
);
1402 assert("NON_OVERLAPPING", NON_OVERLAPPING
);
1403 assert("OVERLAPPING", OVERLAPPING
);
1404 assert("REGRESSION", REGRESSION
);
1407 #[cfg(feature = "std")]
1410 fn stream_not_allowed_leftmost_first() {
1411 let fsm
= AhoCorasick
::builder()
1412 .match_kind(MatchKind
::LeftmostFirst
)
1413 .build(None
::<String
>)
1415 assert_eq
!(fsm
.stream_find_iter(&b
""[..]).count(), 0);
1418 #[cfg(feature = "std")]
1421 fn stream_not_allowed_leftmost_longest() {
1422 let fsm
= AhoCorasick
::builder()
1423 .match_kind(MatchKind
::LeftmostLongest
)
1424 .build(None
::<String
>)
1426 assert_eq
!(fsm
.stream_find_iter(&b
""[..]).count(), 0);
1431 fn overlapping_not_allowed_leftmost_first() {
1432 let fsm
= AhoCorasick
::builder()
1433 .match_kind(MatchKind
::LeftmostFirst
)
1434 .build(None
::<String
>)
1436 assert_eq
!(fsm
.find_overlapping_iter("").count(), 0);
1441 fn overlapping_not_allowed_leftmost_longest() {
1442 let fsm
= AhoCorasick
::builder()
1443 .match_kind(MatchKind
::LeftmostLongest
)
1444 .build(None
::<String
>)
1446 assert_eq
!(fsm
.find_overlapping_iter("").count(), 0);
1449 // This tests that if we build an AC matcher with an "unanchored" start kind,
1450 // then we can't run an anchored search even if the underlying searcher
1453 // The key bit here is that both of the NFAs in this crate unconditionally
1454 // support both unanchored and anchored searches, but the DFA does not because
1455 // of the added cost of doing so. To avoid the top-level AC matcher sometimes
1456 // supporting anchored and sometimes not (depending on which searcher it
1457 // chooses to use internally), we ensure that the given 'StartKind' is always
1460 fn anchored_not_allowed_even_if_technically_available() {
1461 let ac
= AhoCorasick
::builder()
1462 .kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
1463 .start_kind(StartKind
::Unanchored
)
1466 assert
!(ac
.try_find(Input
::new("foo").anchored(Anchored
::Yes
)).is_err());
1468 let ac
= AhoCorasick
::builder()
1469 .kind(Some(AhoCorasickKind
::ContiguousNFA
))
1470 .start_kind(StartKind
::Unanchored
)
1473 assert
!(ac
.try_find(Input
::new("foo").anchored(Anchored
::Yes
)).is_err());
1475 // For completeness, check that the DFA returns an error too.
1476 let ac
= AhoCorasick
::builder()
1477 .kind(Some(AhoCorasickKind
::DFA
))
1478 .start_kind(StartKind
::Unanchored
)
1481 assert
!(ac
.try_find(Input
::new("foo").anchored(Anchored
::Yes
)).is_err());
1484 // This is like the test aboved, but with unanchored and anchored flipped. That
1485 // is, we asked for an AC searcher with anchored support and we check that
1486 // unanchored searches return an error even if the underlying searcher would
1487 // technically support it.
1489 fn unanchored_not_allowed_even_if_technically_available() {
1490 let ac
= AhoCorasick
::builder()
1491 .kind(Some(AhoCorasickKind
::NoncontiguousNFA
))
1492 .start_kind(StartKind
::Anchored
)
1495 assert
!(ac
.try_find(Input
::new("foo").anchored(Anchored
::No
)).is_err());
1497 let ac
= AhoCorasick
::builder()
1498 .kind(Some(AhoCorasickKind
::ContiguousNFA
))
1499 .start_kind(StartKind
::Anchored
)
1502 assert
!(ac
.try_find(Input
::new("foo").anchored(Anchored
::No
)).is_err());
1504 // For completeness, check that the DFA returns an error too.
1505 let ac
= AhoCorasick
::builder()
1506 .kind(Some(AhoCorasickKind
::DFA
))
1507 .start_kind(StartKind
::Anchored
)
1510 assert
!(ac
.try_find(Input
::new("foo").anchored(Anchored
::No
)).is_err());
1513 // This tests that a prefilter does not cause a search to report a match
1514 // outside the bounds provided by the caller.
1516 // This is a regression test for a bug I introduced during the rewrite of most
1517 // of the crate after 0.7. It was never released. The tricky part here is
1518 // ensuring we get a prefilter that can report matches on its own (such as the
1519 // packed searcher). Otherwise, prefilters that report false positives might
1520 // have searched past the bounds provided by the caller, but confirming the
1521 // match would subsequently fail.
1523 fn prefilter_stays_in_bounds() {
1524 let ac
= AhoCorasick
::builder()
1525 .match_kind(MatchKind
::LeftmostFirst
)
1526 .build(&["sam", "frodo", "pippin", "merry", "gandalf", "sauron"])
1528 let haystack
= "foo gandalf";
1529 assert_eq
!(None
, ac
.find(Input
::new(haystack
).range(0..10)));
1532 // See: https://github.com/BurntSushi/aho-corasick/issues/44
1534 // In short, this test ensures that enabling ASCII case insensitivity does not
1535 // visit an exponential number of states when filling in failure transitions.
1537 fn regression_ascii_case_insensitive_no_exponential() {
1538 let ac
= AhoCorasick
::builder()
1539 .ascii_case_insensitive(true)
1540 .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"])
1542 assert
!(ac
.find("").is_none());
1545 // See: https://github.com/BurntSushi/aho-corasick/issues/53
1547 // This test ensures that the rare byte prefilter works in a particular corner
1548 // case. In particular, the shift offset detected for '/' in the patterns below
1549 // was incorrect, leading to a false negative.
1551 fn regression_rare_byte_prefilter() {
1552 use crate::AhoCorasick
;
1554 let ac
= AhoCorasick
::new(&["ab/j/", "x/"]).unwrap();
1555 assert
!(ac
.is_match("ab/j/"));
1559 fn regression_case_insensitive_prefilter() {
1560 for c
in b'a'
..b'z'
{
1561 for c2
in b'a'
..b'z'
{
1563 let c2
= c2
as char;
1564 let needle
= format
!("{}{}", c
, c2
).to_lowercase();
1565 let haystack
= needle
.to_uppercase();
1566 let ac
= AhoCorasick
::builder()
1567 .ascii_case_insensitive(true)
1573 ac
.find_iter(&haystack
).count(),
1574 "failed to find {:?} in {:?}\n\nautomaton:\n{:?}",
1583 // See: https://github.com/BurntSushi/aho-corasick/issues/64
1585 // This occurs when the rare byte prefilter is active.
1586 #[cfg(feature = "std")]
1588 fn regression_stream_rare_byte_prefilter() {
1591 // NOTE: The test only fails if this ends with j.
1592 const MAGIC
: [u8; 5] = *b
"1234j";
1594 // NOTE: The test fails for value in 8188..=8191 These value put the string
1595 // to search accross two call to read because the buffer size is 64KB by
1597 const BEGIN
: usize = 65_535;
1599 /// This is just a structure that implements Reader. The reader
1600 /// implementation will simulate a file filled with 0, except for the MAGIC
1601 /// string at offset BEGIN.
1608 fn read(&mut self, buf
: &mut [u8]) -> std
::io
::Result
<usize> {
1609 if self.read
> 100000 {
1613 if self.read
< BEGIN
{
1614 from
= buf
.len().min(BEGIN
- self.read
);
1620 if self.read
>= BEGIN
&& self.read
<= BEGIN
+ MAGIC
.len() {
1621 let to
= buf
.len().min(BEGIN
+ MAGIC
.len() - self.read
+ from
);
1623 buf
[from
..to
].copy_from_slice(
1625 [self.read
- BEGIN
..self.read
- BEGIN
+ to
- from
],
1627 self.read
+= to
- from
;
1631 for x
in from
..buf
.len() {
1639 fn run() -> std
::io
::Result
<()> {
1640 let aut
= AhoCorasick
::builder()
1641 // Enable byte classes to make debugging the automaton easier. It
1642 // should have no effect on the test result.
1643 .byte_classes(false)
1647 // While reading from a vector, it works:
1648 let mut buf
= alloc
::vec
![];
1649 R
::default().read_to_end(&mut buf
)?
;
1650 let from_whole
= aut
.find_iter(&buf
).next().unwrap().start();
1652 // But using stream_find_iter fails!
1653 let mut file
= std
::io
::BufReader
::new(R
::default());
1655 .stream_find_iter(&mut file
)
1657 .expect("NOT FOUND!!!!")?
// Panic here
1659 assert_eq
!(from_whole
, begin
);