use core::cmp;
-use bstr::BStr;
-use cow::CowBStr;
+use cow::CowBytes;
+use ext_slice::ByteSlice;
use search::prefilter::{Freqy, PrefilterState};
/// An implementation of the TwoWay substring search algorithm, with heuristics
#[derive(Clone, Debug)]
pub struct TwoWay<'b> {
/// The needle that we're looking for.
- needle: CowBStr<'b>,
+ needle: CowBytes<'b>,
/// An implementation of a fast skip loop based on hard-coded frequency
/// data. This is only used when conditions are deemed favorable.
freqy: Freqy,
impl<'b> TwoWay<'b> {
/// Create a searcher that uses the Two-Way algorithm by searching forwards
/// through any haystack.
- pub fn forward(needle: &'b BStr) -> TwoWay<'b> {
+ pub fn forward(needle: &'b [u8]) -> TwoWay<'b> {
let freqy = Freqy::forward(needle);
if needle.is_empty() {
return TwoWay {
- needle: CowBStr::new(needle),
+ needle: CowBytes::new(needle),
freqy,
critical_pos: 0,
- shift: Shift::Large { shift: 0 },
+ shift: Shift::Large { shift: 0 },
};
}
(max_suffix.period, max_suffix.pos)
};
let shift = Shift::forward(needle, period_lower_bound, critical_pos);
- let needle = CowBStr::new(needle);
+ let needle = CowBytes::new(needle);
TwoWay { needle, freqy, critical_pos, shift }
}
/// Create a searcher that uses the Two-Way algorithm by searching in
/// reverse through any haystack.
- pub fn reverse(needle: &'b BStr) -> TwoWay<'b> {
+ pub fn reverse(needle: &'b [u8]) -> TwoWay<'b> {
let freqy = Freqy::reverse(needle);
if needle.is_empty() {
return TwoWay {
- needle: CowBStr::new(needle),
+ needle: CowBytes::new(needle),
freqy,
critical_pos: 0,
- shift: Shift::Large { shift: 0 },
+ shift: Shift::Large { shift: 0 },
};
}
(max_suffix.period, max_suffix.pos)
};
let shift = Shift::reverse(needle, period_lower_bound, critical_pos);
- let needle = CowBStr::new(needle);
+ let needle = CowBytes::new(needle);
TwoWay { needle, freqy, critical_pos, shift }
}
}
/// Return the needle used by this searcher.
- pub fn needle(&self) -> &BStr {
- self.needle.as_bstr()
+ pub fn needle(&self) -> &[u8] {
+ self.needle.as_slice()
}
/// Convert this searched into an owned version, where the needle is
///
/// This will automatically initialize prefilter state. This should only
/// be used for one-off searches.
- pub fn find(&self, haystack: &BStr) -> Option<usize> {
+ pub fn find(&self, haystack: &[u8]) -> Option<usize> {
self.find_with(&mut self.prefilter_state(), haystack)
}
///
/// This will automatically initialize prefilter state. This should only
/// be used for one-off searches.
- pub fn rfind(&self, haystack: &BStr) -> Option<usize> {
+ pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
self.rfind_with(&mut self.prefilter_state(), haystack)
}
pub fn find_with(
&self,
prestate: &mut PrefilterState,
- haystack: &BStr,
+ haystack: &[u8],
) -> Option<usize> {
if self.needle.is_empty() {
return Some(0);
pub fn rfind_with(
&self,
prestate: &mut PrefilterState,
- haystack: &BStr,
+ haystack: &[u8],
) -> Option<usize> {
if self.needle.is_empty() {
return Some(haystack.len());
fn find_small(
&self,
prestate: &mut PrefilterState,
- haystack: &BStr,
+ haystack: &[u8],
period: usize,
) -> Option<usize> {
if prestate.is_effective() {
&self,
prestate: &mut PrefilterState,
prefilter: bool,
- haystack: &BStr,
+ haystack: &[u8],
period: usize,
) -> Option<usize> {
- let needle = self.needle.as_bstr();
+ let needle = self.needle.as_slice();
let mut pos = 0;
let mut shift = 0;
while pos + needle.len() <= haystack.len() {
fn find_large(
&self,
prestate: &mut PrefilterState,
- haystack: &BStr,
+ haystack: &[u8],
shift: usize,
) -> Option<usize> {
if prestate.is_effective() {
&self,
prestate: &mut PrefilterState,
prefilter: bool,
- haystack: &BStr,
+ haystack: &[u8],
shift: usize,
) -> Option<usize> {
- let needle = self.needle.as_bstr();
+ let needle = self.needle.as_slice();
let mut pos = 0;
while pos + needle.len() <= haystack.len() {
let mut i = self.critical_pos;
fn rfind_small(
&self,
prestate: &mut PrefilterState,
- haystack: &BStr,
+ haystack: &[u8],
period: usize,
) -> Option<usize> {
if prestate.is_effective() {
&self,
prestate: &mut PrefilterState,
prefilter: bool,
- haystack: &BStr,
+ haystack: &[u8],
period: usize,
) -> Option<usize> {
let needle = &*self.needle;
fn rfind_large(
&self,
prestate: &mut PrefilterState,
- haystack: &BStr,
+ haystack: &[u8],
shift: usize,
) -> Option<usize> {
if prestate.is_effective() {
&self,
prestate: &mut PrefilterState,
prefilter: bool,
- haystack: &BStr,
+ haystack: &[u8],
shift: usize,
) -> Option<usize> {
let needle = &*self.needle;
/// lexicographic suffixes, and choosing the right-most starting position.
/// The lower bound on the period is then the period of the chosen suffix.
fn forward(
- needle: &BStr,
+ needle: &[u8],
period_lower_bound: usize,
critical_pos: usize,
) -> Shift {
/// lexicographic suffixes, and choosing the left-most starting position.
/// The lower bound on the period is then the period of the chosen suffix.
fn reverse(
- needle: &BStr,
+ needle: &[u8],
period_lower_bound: usize,
critical_pos: usize,
) -> Shift {
}
impl Suffix {
- fn forward(needle: &BStr, kind: SuffixKind) -> Suffix {
+ fn forward(needle: &[u8], kind: SuffixKind) -> Suffix {
debug_assert!(!needle.is_empty());
// suffix represents our maximal (or minimal) suffix, along with
suffix
}
- fn reverse(needle: &BStr, kind: SuffixKind) -> Suffix {
+ fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix {
debug_assert!(!needle.is_empty());
// See the comments in `forward` for how this works.
// N.B. There are more holistic tests in src/search/tests.rs.
#[cfg(test)]
mod tests {
- use bstr::{B, BStr};
- use bstring::BString;
-
use super::*;
+ use ext_slice::B;
/// Convenience wrapper for computing the suffix as a byte string.
- fn get_suffix_forward(needle: &BStr, kind: SuffixKind) -> (&BStr, usize) {
+ fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
let s = Suffix::forward(needle, kind);
(&needle[s.pos..], s.period)
}
/// Convenience wrapper for computing the reverse suffix as a byte string.
- fn get_suffix_reverse(needle: &BStr, kind: SuffixKind) -> (&BStr, usize) {
+ fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
let s = Suffix::reverse(needle, kind);
(&needle[..s.pos], s.period)
}
/// Return all of the non-empty suffixes in the given byte string.
- fn suffixes(bytes: &BStr) -> Vec<&BStr> {
+ fn suffixes(bytes: &[u8]) -> Vec<&[u8]> {
(0..bytes.len()).map(|i| &bytes[i..]).collect()
}
/// Return the lexicographically maximal suffix of the given byte string.
- fn naive_maximal_suffix_forward(needle: &BStr) -> &BStr {
+ fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] {
let mut sufs = suffixes(needle);
sufs.sort();
sufs.pop().unwrap()
/// Return the lexicographically maximal suffix of the reverse of the given
/// byte string.
- fn naive_maximal_suffix_reverse(needle: &BStr) -> BString {
- let mut reversed = needle.to_bstring();
- reversed.reverse_bytes();
- let mut got = naive_maximal_suffix_forward(&reversed).to_bstring();
- got.reverse_bytes();
+ fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> {
+ let mut reversed = needle.to_vec();
+ reversed.reverse();
+ let mut got = naive_maximal_suffix_forward(&reversed).to_vec();
+ got.reverse();
got
}
fn suffix_forward() {
macro_rules! assert_suffix_min {
($given:expr, $expected:expr, $period:expr) => {
- let (got_suffix, got_period) = get_suffix_forward(
- B($given),
- SuffixKind::Minimal,
- );
+ let (got_suffix, got_period) =
+ get_suffix_forward($given.as_bytes(), SuffixKind::Minimal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
macro_rules! assert_suffix_max {
($given:expr, $expected:expr, $period:expr) => {
- let (got_suffix, got_period) = get_suffix_forward(
- B($given),
- SuffixKind::Maximal,
- );
+ let (got_suffix, got_period) =
+ get_suffix_forward($given.as_bytes(), SuffixKind::Maximal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
fn suffix_reverse() {
macro_rules! assert_suffix_min {
($given:expr, $expected:expr, $period:expr) => {
- let (got_suffix, got_period) = get_suffix_reverse(
- B($given),
- SuffixKind::Minimal,
- );
+ let (got_suffix, got_period) =
+ get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
macro_rules! assert_suffix_max {
($given:expr, $expected:expr, $period:expr) => {
- let (got_suffix, got_period) = get_suffix_reverse(
- B($given),
- SuffixKind::Maximal,
- );
+ let (got_suffix, got_period) =
+ get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal);
assert_eq!((B($expected), $period), (got_suffix, got_period));
};
}
quickcheck! {
fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool {
- let bytes = BString::from(bytes);
if bytes.is_empty() {
return true;
}
}
fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool {
- let bytes = BString::from(bytes);
if bytes.is_empty() {
return true;
}
let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal);
let expected = naive_maximal_suffix_reverse(&bytes);
- got == expected
+ expected == got
}
}
}