vendor/aho-corasick/src/packed/tests.rs

   1 use std::collections::HashMap;
   2 use std::usize;
   3
   4 use crate::packed::{Config, MatchKind};
   5 use crate::Match;
   6
   7 /// A description of a single test against a multi-pattern searcher.
   8 ///
   9 /// A single test may not necessarily pass on every configuration of a
  10 /// searcher. The tests are categorized and grouped appropriately below.
  11 #[derive(Clone, Debug, Eq, PartialEq)]
  12 struct SearchTest {
  13     /// The name of this test, for debugging.
  14     name: &'static str,
  15     /// The patterns to search for.
  16     patterns: &'static [&'static str],
  17     /// The text to search.
  18     haystack: &'static str,
  19     /// Each match is a triple of (pattern_index, start, end), where
  20     /// pattern_index is an index into `patterns` and `start`/`end` are indices
  21     /// into `haystack`.
  22     matches: &'static [(usize, usize, usize)],
  23 }
  24
  25 struct SearchTestOwned {
  26     offset: usize,
  27     name: String,
  28     patterns: Vec<String>,
  29     haystack: String,
  30     matches: Vec<(usize, usize, usize)>,
  31 }
  32
  33 impl SearchTest {
  34     fn variations(&self) -> Vec<SearchTestOwned> {
  35         let mut tests = vec![];
  36         for i in 0..=260 {
  37             tests.push(self.offset_prefix(i));
  38             tests.push(self.offset_suffix(i));
  39             tests.push(self.offset_both(i));
  40         }
  41         tests
  42     }
  43
  44     fn offset_both(&self, off: usize) -> SearchTestOwned {
  45         SearchTestOwned {
  46             offset: off,
  47             name: self.name.to_string(),
  48             patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
  49             haystack: format!(
  50                 "{}{}{}",
  51                 "Z".repeat(off),
  52                 self.haystack,
  53                 "Z".repeat(off)
  54             ),
  55             matches: self
  56                 .matches
  57                 .iter()
  58                 .map(|&(id, s, e)| (id, s + off, e + off))
  59                 .collect(),
  60         }
  61     }
  62
  63     fn offset_prefix(&self, off: usize) -> SearchTestOwned {
  64         SearchTestOwned {
  65             offset: off,
  66             name: self.name.to_string(),
  67             patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
  68             haystack: format!("{}{}", "Z".repeat(off), self.haystack),
  69             matches: self
  70                 .matches
  71                 .iter()
  72                 .map(|&(id, s, e)| (id, s + off, e + off))
  73                 .collect(),
  74         }
  75     }
  76
  77     fn offset_suffix(&self, off: usize) -> SearchTestOwned {
  78         SearchTestOwned {
  79             offset: off,
  80             name: self.name.to_string(),
  81             patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
  82             haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
  83             matches: self.matches.to_vec(),
  84         }
  85     }
  86
  87     // fn to_owned(&self) -> SearchTestOwned {
  88     // SearchTestOwned {
  89     // name: self.name.to_string(),
  90     // patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
  91     // haystack: self.haystack.to_string(),
  92     // matches: self.matches.iter().cloned().collect(),
  93     // }
  94     // }
  95 }
  96
  97 /// Short-hand constructor for SearchTest. We use it a lot below.
  98 macro_rules! t {
  99     ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
 100         SearchTest {
 101             name: stringify!($name),
 102             patterns: $patterns,
 103             haystack: $haystack,
 104             matches: $matches,
 105         }
 106     };
 107 }
 108
 109 /// A collection of test groups.
 110 type TestCollection = &'static [&'static [SearchTest]];
 111
 112 // Define several collections corresponding to the different type of match
 113 // semantics supported. These collections have some overlap, but each
 114 // collection should have some tests that no other collection has.
 115
 116 /// Tests for leftmost-first match semantics.
 117 const PACKED_LEFTMOST_FIRST: TestCollection =
 118     &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
 119
 120 /// Tests for leftmost-longest match semantics.
 121 const PACKED_LEFTMOST_LONGEST: TestCollection =
 122     &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
 123
 124 // Now define the individual tests that make up the collections above.
 125
 126 /// A collection of tests for the that should always be true regardless of
 127 /// match semantics. That is, all combinations of leftmost-{first, longest}
 128 /// should produce the same answer.
 129 const BASICS: &'static [SearchTest] = &[
 130     t!(basic001, &["a"], "", &[]),
 131     t!(basic010, &["a"], "a", &[(0, 0, 1)]),
 132     t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
 133     t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
 134     t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
 135     t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
 136     t!(basic060, &["a"], "bbb", &[]),
 137     t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
 138     t!(basic100, &["aa"], "", &[]),
 139     t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
 140     t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
 141     t!(basic130, &["aa"], "abbab", &[]),
 142     t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
 143     t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
 144     t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
 145     t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
 146     t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
 147     t!(basic300, &["a", "b"], "", &[]),
 148     t!(basic310, &["a", "b"], "z", &[]),
 149     t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
 150     t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
 151     t!(
 152         basic340,
 153         &["a", "b"],
 154         "abba",
 155         &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
 156     ),
 157     t!(
 158         basic350,
 159         &["b", "a"],
 160         "abba",
 161         &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
 162     ),
 163     t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
 164     t!(basic400, &["foo", "bar"], "", &[]),
 165     t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
 166     t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
 167     t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
 168     t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
 169     t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
 170     t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
 171     t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
 172     t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
 173     t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
 174     t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
 175     t!(
 176         basic720,
 177         &["yabcdef", "bcdeyabc", "abcdezghi"],
 178         "yabcdezghi",
 179         &[(2, 1, 10),]
 180     ),
 181     t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
 182     t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
 183     t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
 184     t!(
 185         basic840,
 186         &["ab", "ba"],
 187         "abababa",
 188         &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
 189     ),
 190     t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
 191 ];
 192
 193 /// Tests for leftmost match semantics. These should pass for both
 194 /// leftmost-first and leftmost-longest match kinds. Stated differently, among
 195 /// ambiguous matches, the longest match and the match that appeared first when
 196 /// constructing the automaton should always be the same.
 197 const LEFTMOST: &'static [SearchTest] = &[
 198     t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
 199     t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
 200     t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
 201     t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
 202     t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
 203     t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
 204     t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
 205     t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
 206     t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
 207     t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
 208     t!(
 209         leftmost360,
 210         &["abcdefghi", "hz", "abcdefgh"],
 211         "abcdefghz",
 212         &[(2, 0, 8),]
 213     ),
 214     t!(
 215         leftmost370,
 216         &["abcdefghi", "cde", "hz", "abcdefgh"],
 217         "abcdefghz",
 218         &[(3, 0, 8),]
 219     ),
 220     t!(
 221         leftmost380,
 222         &["abcdefghi", "hz", "abcdefgh", "a"],
 223         "abcdefghz",
 224         &[(2, 0, 8),]
 225     ),
 226     t!(
 227         leftmost390,
 228         &["b", "abcdefghi", "hz", "abcdefgh"],
 229         "abcdefghz",
 230         &[(3, 0, 8),]
 231     ),
 232     t!(
 233         leftmost400,
 234         &["h", "abcdefghi", "hz", "abcdefgh"],
 235         "abcdefghz",
 236         &[(3, 0, 8),]
 237     ),
 238     t!(
 239         leftmost410,
 240         &["z", "abcdefghi", "hz", "abcdefgh"],
 241         "abcdefghz",
 242         &[(3, 0, 8), (0, 8, 9),]
 243     ),
 244 ];
 245
 246 /// Tests for non-overlapping leftmost-first match semantics. These tests
 247 /// should generally be specific to leftmost-first, which means they should
 248 /// generally fail under leftmost-longest semantics.
 249 const LEFTMOST_FIRST: &'static [SearchTest] = &[
 250     t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
 251     t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
 252     t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
 253     t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
 254     t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
 255     t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
 256     t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
 257     t!(
 258         leftfirst310,
 259         &["abcd", "b", "bce", "ce"],
 260         "abce",
 261         &[(1, 1, 2), (3, 2, 4),]
 262     ),
 263     t!(
 264         leftfirst320,
 265         &["a", "abcdefghi", "hz", "abcdefgh"],
 266         "abcdefghz",
 267         &[(0, 0, 1), (2, 7, 9),]
 268     ),
 269     t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
 270     t!(
 271         leftfirst340,
 272         &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
 273         "abcdef",
 274         &[(0, 0, 6)]
 275     ),
 276 ];
 277
 278 /// Tests for non-overlapping leftmost-longest match semantics. These tests
 279 /// should generally be specific to leftmost-longest, which means they should
 280 /// generally fail under leftmost-first semantics.
 281 const LEFTMOST_LONGEST: &'static [SearchTest] = &[
 282     t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
 283     t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
 284     t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
 285     t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
 286     t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
 287     t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
 288     t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
 289     t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
 290     t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
 291     t!(
 292         leftlong310,
 293         &["a", "abcdefghi", "hz", "abcdefgh"],
 294         "abcdefghz",
 295         &[(3, 0, 8),]
 296     ),
 297     t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
 298     t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
 299     t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
 300 ];
 301
 302 /// Regression tests that are applied to all combinations.
 303 ///
 304 /// If regression tests are needed for specific match semantics, then add them
 305 /// to the appropriate group above.
 306 const REGRESSION: &'static [SearchTest] = &[
 307     t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
 308     t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
 309     t!(
 310         regression030,
 311         &["libcore/", "libstd/"],
 312         "libcore/char/methods.rs",
 313         &[(0, 0, 8),]
 314     ),
 315     t!(
 316         regression040,
 317         &["libstd/", "libcore/"],
 318         "libcore/char/methods.rs",
 319         &[(1, 0, 8),]
 320     ),
 321     t!(
 322         regression050,
 323         &["\x00\x00\x01", "\x00\x00\x00"],
 324         "\x00\x00\x00",
 325         &[(1, 0, 3),]
 326     ),
 327     t!(
 328         regression060,
 329         &["\x00\x00\x00", "\x00\x00\x01"],
 330         "\x00\x00\x00",
 331         &[(0, 0, 3),]
 332     ),
 333 ];
 334
 335 const TEDDY: &'static [SearchTest] = &[
 336     t!(
 337         teddy010,
 338         &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
 339         "abcdefghijk",
 340         &[
 341             (0, 0, 1),
 342             (1, 1, 2),
 343             (2, 2, 3),
 344             (3, 3, 4),
 345             (4, 4, 5),
 346             (5, 5, 6),
 347             (6, 6, 7),
 348             (7, 7, 8),
 349             (8, 8, 9),
 350             (9, 9, 10),
 351             (10, 10, 11)
 352         ]
 353     ),
 354     t!(
 355         teddy020,
 356         &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
 357         "abcdefghijk",
 358         &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
 359     ),
 360     t!(
 361         teddy030,
 362         &["abc"],
 363         "abcdefghijklmnopqrstuvwxyzabcdefghijk",
 364         &[(0, 0, 3), (0, 26, 29)]
 365     ),
 366 ];
 367
 368 // Now define a test for each combination of things above that we want to run.
 369 // Since there are a few different combinations for each collection of tests,
 370 // we define a couple of macros to avoid repetition drudgery. The testconfig
 371 // macro constructs the automaton from a given match kind, and runs the search
 372 // tests one-by-one over the given collection. The `with` parameter allows one
 373 // to configure the config with additional parameters. The testcombo macro
 374 // invokes testconfig in precisely this way: it sets up several tests where
 375 // each one turns a different knob on Config.
 376
 377 macro_rules! testconfig {
 378     ($name:ident, $collection:expr, $with:expr) => {
 379         #[test]
 380         fn $name() {
 381             run_search_tests($collection, |test| {
 382                 let mut config = Config::new();
 383                 $with(&mut config);
 384                 config
 385                     .builder()
 386                     .extend(test.patterns.iter().map(|p| p.as_bytes()))
 387                     .build()
 388                     .unwrap()
 389                     .find_iter(&test.haystack)
 390                     .collect()
 391             });
 392         }
 393     };
 394 }
 395
 396 #[cfg(target_arch = "x86_64")]
 397 testconfig!(
 398     search_default_leftmost_first,
 399     PACKED_LEFTMOST_FIRST,
 400     |_: &mut Config| {}
 401 );
 402
 403 #[cfg(target_arch = "x86_64")]
 404 testconfig!(
 405     search_default_leftmost_longest,
 406     PACKED_LEFTMOST_LONGEST,
 407     |c: &mut Config| {
 408         c.match_kind(MatchKind::LeftmostLongest);
 409     }
 410 );
 411
 412 #[cfg(target_arch = "x86_64")]
 413 testconfig!(
 414     search_teddy_leftmost_first,
 415     PACKED_LEFTMOST_FIRST,
 416     |c: &mut Config| {
 417         c.force_teddy(true);
 418     }
 419 );
 420
 421 #[cfg(target_arch = "x86_64")]
 422 testconfig!(
 423     search_teddy_leftmost_longest,
 424     PACKED_LEFTMOST_LONGEST,
 425     |c: &mut Config| {
 426         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
 427     }
 428 );
 429
 430 #[cfg(target_arch = "x86_64")]
 431 testconfig!(
 432     search_teddy_ssse3_leftmost_first,
 433     PACKED_LEFTMOST_FIRST,
 434     |c: &mut Config| {
 435         c.force_teddy(true);
 436         if is_x86_feature_detected!("ssse3") {
 437             c.force_avx(Some(false));
 438         }
 439     }
 440 );
 441
 442 #[cfg(target_arch = "x86_64")]
 443 testconfig!(
 444     search_teddy_ssse3_leftmost_longest,
 445     PACKED_LEFTMOST_LONGEST,
 446     |c: &mut Config| {
 447         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
 448         if is_x86_feature_detected!("ssse3") {
 449             c.force_avx(Some(false));
 450         }
 451     }
 452 );
 453
 454 #[cfg(target_arch = "x86_64")]
 455 testconfig!(
 456     search_teddy_avx2_leftmost_first,
 457     PACKED_LEFTMOST_FIRST,
 458     |c: &mut Config| {
 459         c.force_teddy(true);
 460         if is_x86_feature_detected!("avx2") {
 461             c.force_avx(Some(true));
 462         }
 463     }
 464 );
 465
 466 #[cfg(target_arch = "x86_64")]
 467 testconfig!(
 468     search_teddy_avx2_leftmost_longest,
 469     PACKED_LEFTMOST_LONGEST,
 470     |c: &mut Config| {
 471         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
 472         if is_x86_feature_detected!("avx2") {
 473             c.force_avx(Some(true));
 474         }
 475     }
 476 );
 477
 478 #[cfg(target_arch = "x86_64")]
 479 testconfig!(
 480     search_teddy_fat_leftmost_first,
 481     PACKED_LEFTMOST_FIRST,
 482     |c: &mut Config| {
 483         c.force_teddy(true);
 484         if is_x86_feature_detected!("avx2") {
 485             c.force_teddy_fat(Some(true));
 486         }
 487     }
 488 );
 489
 490 #[cfg(target_arch = "x86_64")]
 491 testconfig!(
 492     search_teddy_fat_leftmost_longest,
 493     PACKED_LEFTMOST_LONGEST,
 494     |c: &mut Config| {
 495         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
 496         if is_x86_feature_detected!("avx2") {
 497             c.force_teddy_fat(Some(true));
 498         }
 499     }
 500 );
 501
 502 testconfig!(
 503     search_rabinkarp_leftmost_first,
 504     PACKED_LEFTMOST_FIRST,
 505     |c: &mut Config| {
 506         c.force_rabin_karp(true);
 507     }
 508 );
 509
 510 testconfig!(
 511     search_rabinkarp_leftmost_longest,
 512     PACKED_LEFTMOST_LONGEST,
 513     |c: &mut Config| {
 514         c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
 515     }
 516 );
 517
 518 #[test]
 519 fn search_tests_have_unique_names() {
 520     let assert = |constname, tests: &[SearchTest]| {
 521         let mut seen = HashMap::new(); // map from test name to position
 522         for (i, test) in tests.iter().enumerate() {
 523             if !seen.contains_key(test.name) {
 524                 seen.insert(test.name, i);
 525             } else {
 526                 let last = seen[test.name];
 527                 panic!(
 528                     "{} tests have duplicate names at positions {} and {}",
 529                     constname, last, i
 530                 );
 531             }
 532         }
 533     };
 534     assert("BASICS", BASICS);
 535     assert("LEFTMOST", LEFTMOST);
 536     assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
 537     assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
 538     assert("REGRESSION", REGRESSION);
 539     assert("TEDDY", TEDDY);
 540 }
 541
 542 fn run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>(
 543     which: TestCollection,
 544     mut f: F,
 545 ) {
 546     let get_match_triples =
 547         |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
 548             matches
 549                 .into_iter()
 550                 .map(|m| (m.pattern(), m.start(), m.end()))
 551                 .collect()
 552         };
 553     for &tests in which {
 554         for spec in tests {
 555             for test in spec.variations() {
 556                 assert_eq!(
 557                     test.matches,
 558                     get_match_triples(f(&test)).as_slice(),
 559                     "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}",
 560                     test.name,
 561                     test.patterns,
 562                     test.haystack,
 563                     test.offset,
 564                 );
 565             }
 566         }
 567     }
 568 }