-use std::iter::repeat;
-
-mod iter;
mod memchr;
-#[cfg(target_endian = "little")]
+// For debugging, particularly in CI, print out the byte order of the current
+// target.
+#[cfg(all(feature = "std", target_endian = "little"))]
#[test]
fn byte_order() {
eprintln!("LITTLE ENDIAN");
}
-#[cfg(target_endian = "big")]
+#[cfg(all(feature = "std", target_endian = "big"))]
#[test]
fn byte_order() {
eprintln!("BIG ENDIAN");
}
-
-/// Create a sequence of tests that should be run by memchr implementations.
-fn memchr_tests() -> Vec<MemchrTest> {
- let mut tests = Vec::new();
- for statict in MEMCHR_TESTS {
- assert!(!statict.corpus.contains("%"), "% is not allowed in corpora");
- assert!(!statict.corpus.contains("#"), "# is not allowed in corpora");
- assert!(!statict.needles.contains(&b'%'), "% is an invalid needle");
- assert!(!statict.needles.contains(&b'#'), "# is an invalid needle");
-
- let t = MemchrTest {
- corpus: statict.corpus.to_string(),
- needles: statict.needles.to_vec(),
- positions: statict.positions.to_vec(),
- };
- tests.push(t.clone());
- tests.extend(t.expand());
- }
- tests
-}
-
-/// A set of tests for memchr-like functions.
-///
-/// These tests mostly try to cover the short string cases. We cover the longer
-/// string cases via the benchmarks (which are tests themselves), via
-/// quickcheck tests and via automatic expansion of each test case (by
-/// increasing the corpus size). Finally, we cover different alignment cases
-/// in the tests by varying the starting point of the slice.
-const MEMCHR_TESTS: &[MemchrTestStatic] = &[
- // one needle (applied to memchr + memchr2 + memchr3)
- MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] },
- MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] },
- MemchrTestStatic {
- corpus: "aaa",
- needles: &[b'a'],
- positions: &[0, 1, 2],
- },
- MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] },
- MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] },
- MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] },
- MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] },
- MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] },
- MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] },
- MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] },
- MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] },
- MemchrTestStatic {
- corpus: "\x00\x00",
- needles: &[b'\x00'],
- positions: &[0, 1],
- },
- MemchrTestStatic {
- corpus: "\x00a\x00",
- needles: &[b'\x00'],
- positions: &[0, 2],
- },
- MemchrTestStatic {
- corpus: "zzzzzzzzzzzzzzzza",
- needles: &[b'a'],
- positions: &[16],
- },
- MemchrTestStatic {
- corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
- needles: &[b'a'],
- positions: &[32],
- },
- // two needles (applied to memchr2 + memchr3)
- MemchrTestStatic {
- corpus: "az",
- needles: &[b'a', b'z'],
- positions: &[0, 1],
- },
- MemchrTestStatic {
- corpus: "az",
- needles: &[b'a', b'z'],
- positions: &[0, 1],
- },
- MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] },
- MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] },
- MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] },
- MemchrTestStatic {
- corpus: "yyyyaz",
- needles: &[b'a', b'z'],
- positions: &[4, 5],
- },
- MemchrTestStatic {
- corpus: "yyyyaz",
- needles: &[b'z', b'a'],
- positions: &[4, 5],
- },
- // three needles (applied to memchr3)
- MemchrTestStatic {
- corpus: "xyz",
- needles: &[b'x', b'y', b'z'],
- positions: &[0, 1, 2],
- },
- MemchrTestStatic {
- corpus: "zxy",
- needles: &[b'x', b'y', b'z'],
- positions: &[0, 1, 2],
- },
- MemchrTestStatic {
- corpus: "zxy",
- needles: &[b'x', b'a', b'z'],
- positions: &[0, 1],
- },
- MemchrTestStatic {
- corpus: "zxy",
- needles: &[b't', b'a', b'z'],
- positions: &[0],
- },
- MemchrTestStatic {
- corpus: "yxz",
- needles: &[b't', b'a', b'z'],
- positions: &[2],
- },
-];
-
-/// A description of a test on a memchr like function.
-#[derive(Clone, Debug)]
-struct MemchrTest {
- /// The thing to search. We use `&str` instead of `&[u8]` because they
- /// are nicer to write in tests, and we don't miss much since memchr
- /// doesn't care about UTF-8.
- ///
- /// Corpora cannot contain either '%' or '#'. We use these bytes when
- /// expanding test cases into many test cases, and we assume they are not
- /// used. If they are used, `memchr_tests` will panic.
- corpus: String,
- /// The needles to search for. This is intended to be an "alternation" of
- /// needles. The number of needles may cause this test to be skipped for
- /// some memchr variants. For example, a test with 2 needles cannot be used
- /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
- /// However, a test with only 1 needle can be used to test all of `memchr`,
- /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
- /// bytes that we never used in the corpus (such as '#').
- needles: Vec<u8>,
- /// The positions expected to match for all of the needles.
- positions: Vec<usize>,
-}
-
-/// Like MemchrTest, but easier to define as a constant.
-#[derive(Clone, Debug)]
-struct MemchrTestStatic {
- corpus: &'static str,
- needles: &'static [u8],
- positions: &'static [usize],
-}
-
-impl MemchrTest {
- fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
- let needles = match self.needles(1) {
- None => return,
- Some(needles) => needles,
- };
- // We test different alignments here. Since some implementations use
- // AVX2, which can read 32 bytes at a time, we test at least that.
- // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128
- // (avx) bytes at a time, so we include that in our offsets as well.
- //
- // You might think this would cause most needles to not be found, but
- // we actually expand our tests to include corpus sizes all the way up
- // to >500 bytes, so we should exericse most branches.
- for align in 0..130 {
- let corpus = self.corpus(align);
- assert_eq!(
- self.positions(align, reverse).get(0).cloned(),
- f(needles[0], corpus.as_bytes()),
- "search for {:?} failed in: {:?} (len: {}, alignment: {})",
- needles[0] as char,
- corpus,
- corpus.len(),
- align
- );
- }
- }
-
- fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
- let needles = match self.needles(2) {
- None => return,
- Some(needles) => needles,
- };
- for align in 0..130 {
- let corpus = self.corpus(align);
- assert_eq!(
- self.positions(align, reverse).get(0).cloned(),
- f(needles[0], needles[1], corpus.as_bytes()),
- "search for {:?}|{:?} failed in: {:?} \
- (len: {}, alignment: {})",
- needles[0] as char,
- needles[1] as char,
- corpus,
- corpus.len(),
- align
- );
- }
- }
-
- fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>(
- &self,
- reverse: bool,
- f: F,
- ) {
- let needles = match self.needles(3) {
- None => return,
- Some(needles) => needles,
- };
- for align in 0..130 {
- let corpus = self.corpus(align);
- assert_eq!(
- self.positions(align, reverse).get(0).cloned(),
- f(needles[0], needles[1], needles[2], corpus.as_bytes()),
- "search for {:?}|{:?}|{:?} failed in: {:?} \
- (len: {}, alignment: {})",
- needles[0] as char,
- needles[1] as char,
- needles[2] as char,
- corpus,
- corpus.len(),
- align
- );
- }
- }
-
- fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F)
- where
- F: FnOnce(u8, &'a [u8]) -> I,
- I: Iterator<Item = usize>,
- {
- if let Some(ns) = self.needles(1) {
- self.iter(reverse, f(ns[0], self.corpus.as_bytes()));
- }
- }
-
- fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F)
- where
- F: FnOnce(u8, u8, &'a [u8]) -> I,
- I: Iterator<Item = usize>,
- {
- if let Some(ns) = self.needles(2) {
- self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes()));
- }
- }
-
- fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F)
- where
- F: FnOnce(u8, u8, u8, &'a [u8]) -> I,
- I: Iterator<Item = usize>,
- {
- if let Some(ns) = self.needles(3) {
- self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes()));
- }
- }
-
- /// Test that the positions yielded by the given iterator match the
- /// positions in this test. If reverse is true, then reverse the positions
- /// before comparing them.
- fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) {
- assert_eq!(
- self.positions(0, reverse),
- it.collect::<Vec<usize>>(),
- r"search for {:?} failed in: {:?}",
- self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(),
- self.corpus
- );
- }
-
- /// Expand this test into many variations of the same test.
- ///
- /// In particular, this will generate more tests with larger corpus sizes.
- /// The expected positions are updated to maintain the integrity of the
- /// test.
- ///
- /// This is important in testing a memchr implementation, because there are
- /// often different cases depending on the length of the corpus.
- ///
- /// Note that we extend the corpus by adding `%` bytes, which we
- /// don't otherwise use as a needle.
- fn expand(&self) -> Vec<MemchrTest> {
- let mut more = Vec::new();
-
- // Add bytes to the start of the corpus.
- for i in 1..515 {
- let mut t = self.clone();
- let mut new_corpus: String = repeat('%').take(i).collect();
- new_corpus.push_str(&t.corpus);
- t.corpus = new_corpus;
- t.positions = t.positions.into_iter().map(|p| p + i).collect();
- more.push(t);
- }
- // Add bytes to the end of the corpus.
- for i in 1..515 {
- let mut t = self.clone();
- let padding: String = repeat('%').take(i).collect();
- t.corpus.push_str(&padding);
- more.push(t);
- }
-
- more
- }
-
- /// Return the corpus at the given alignment.
- ///
- /// If the alignment exceeds the length of the corpus, then this returns
- /// an empty slice.
- fn corpus(&self, align: usize) -> &str {
- self.corpus.get(align..).unwrap_or("")
- }
-
- /// Return exactly `count` needles from this test. If this test has less
- /// than `count` needles, then add `#` until the number of needles
- /// matches `count`. If this test has more than `count` needles, then
- /// return `None` (because there is no way to use this test data for a
- /// search using fewer needles).
- fn needles(&self, count: usize) -> Option<Vec<u8>> {
- if self.needles.len() > count {
- return None;
- }
-
- let mut needles = self.needles.to_vec();
- for _ in needles.len()..count {
- // we assume # is never used in tests.
- needles.push(b'#');
- }
- Some(needles)
- }
-
- /// Return the positions in this test, reversed if `reverse` is true.
- ///
- /// If alignment is given, then all positions greater than or equal to that
- /// alignment are offset by the alignment. Positions less than the
- /// alignment are dropped.
- fn positions(&self, align: usize, reverse: bool) -> Vec<usize> {
- let positions = if reverse {
- let mut positions = self.positions.to_vec();
- positions.reverse();
- positions
- } else {
- self.positions.to_vec()
- };
- positions
- .into_iter()
- .filter(|&p| p >= align)
- .map(|p| p - align)
- .collect()
- }
-}