2 Copyright (c) Marshall Clow 2010-2012.
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 For more information, see http://www.boost.org
10 #ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
11 #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
14 #include <iterator> // for std::iterator_traits
16 #include <boost/assert.hpp>
17 #include <boost/static_assert.hpp>
19 #include <boost/range/begin.hpp>
20 #include <boost/range/end.hpp>
22 #include <boost/utility/enable_if.hpp>
23 #include <boost/type_traits/is_same.hpp>
25 #include <boost/algorithm/searching/detail/debugging.hpp>
27 // #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
29 namespace boost { namespace algorithm {
34 A templated version of the Knuth-Morris-Pratt searching algorithm.
37 * Random-access iterators
38 * The two iterator types (I1 and I2) must "point to" the same underlying type.
40 http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
41 http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
44 template <typename patIter>
45 class knuth_morris_pratt {
46 typedef typename std::iterator_traits<patIter>::difference_type difference_type;
48 knuth_morris_pratt ( patIter first, patIter last )
49 : pat_first ( first ), pat_last ( last ),
50 k_pattern_length ( std::distance ( pat_first, pat_last )),
51 skip_ ( k_pattern_length + 1 ) {
53 preKmp ( pat_first, pat_last );
55 init_skip_table ( pat_first, pat_last );
57 #ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
58 detail::PrintTable ( skip_.begin (), skip_.end ());
62 ~knuth_morris_pratt () {}
64 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
65 /// \brief Searches the corpus for the pattern that was passed into the constructor
67 /// \param corpus_first The start of the data to search (Random Access Iterator)
68 /// \param corpus_last One past the end of the data to search
69 /// \param p A predicate used for the search comparisons.
71 template <typename corpusIter>
72 std::pair<corpusIter, corpusIter>
73 operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
74 BOOST_STATIC_ASSERT (( boost::is_same<
75 typename std::iterator_traits<patIter>::value_type,
76 typename std::iterator_traits<corpusIter>::value_type>::value ));
78 if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last); // if nothing to search, we didn't find it!
79 if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start
81 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
82 // If the pattern is larger than the corpus, we can't find it!
83 if ( k_corpus_length < k_pattern_length )
84 return std::make_pair(corpus_last, corpus_last);
86 return do_search ( corpus_first, corpus_last, k_corpus_length );
89 template <typename Range>
90 std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
91 operator () ( Range &r ) const {
92 return (*this) (boost::begin(r), boost::end(r));
96 /// \cond DOXYGEN_HIDE
97 patIter pat_first, pat_last;
98 const difference_type k_pattern_length;
99 std::vector <difference_type> skip_;
101 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
102 /// \brief Searches the corpus for the pattern that was passed into the constructor
104 /// \param corpus_first The start of the data to search (Random Access Iterator)
105 /// \param corpus_last One past the end of the data to search
106 /// \param p A predicate used for the search comparisons.
108 template <typename corpusIter>
109 std::pair<corpusIter, corpusIter>
110 do_search ( corpusIter corpus_first, corpusIter corpus_last,
111 difference_type k_corpus_length ) const {
112 difference_type match_start = 0; // position in the corpus that we're matching
116 while ( match_start < k_corpus_length ) {
117 while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
118 patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch
121 match_start++; //<--- corpus is always increased by 1
123 if ( patternIdx >= (int) k_pattern_length )
124 return corpus_first + match_start - patternIdx;
128 // At this point, we know:
129 // k_pattern_length <= k_corpus_length
130 // for all elements of skip, it holds -1 .. k_pattern_length
132 // In the loop, we have the following invariants
133 // idx is in the range 0 .. k_pattern_length
134 // match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1
136 const difference_type last_match = k_corpus_length - k_pattern_length;
137 difference_type idx = 0; // position in the pattern we're comparing
139 while ( match_start <= last_match ) {
140 while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
141 if ( ++idx == k_pattern_length )
142 return std::make_pair(corpus_first + match_start, corpus_first + match_start + k_pattern_length);
144 // Figure out where to start searching again
145 // assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward
146 match_start += idx - skip_ [ idx ];
147 idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
148 // assert ( idx >= 0 && idx < k_pattern_length );
152 // We didn't find anything
153 return std::make_pair(corpus_last, corpus_last);
157 void preKmp ( patIter first, patIter last ) {
158 const /*std::size_t*/ int count = std::distance ( first, last );
165 while (j > -1 && first[i] != first[j])
169 if (first[i] == first[j])
177 void init_skip_table ( patIter first, patIter last ) {
178 const difference_type count = std::distance ( first, last );
182 for ( int i = 1; i <= count; ++i ) {
185 if ( first [ j ] == first [ i - 1 ] )
196 /* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
197 Use a bit of TMP to disambiguate the 3-argument templates */
199 /// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last,
200 /// patIter pat_first, patIter pat_last )
201 /// \brief Searches the corpus for the pattern.
203 /// \param corpus_first The start of the data to search (Random Access Iterator)
204 /// \param corpus_last One past the end of the data to search
205 /// \param pat_first The start of the pattern to search for (Random Access Iterator)
206 /// \param pat_last One past the end of the data to search for
208 template <typename patIter, typename corpusIter>
209 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
210 corpusIter corpus_first, corpusIter corpus_last,
211 patIter pat_first, patIter pat_last )
213 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
214 return kmp ( corpus_first, corpus_last );
217 template <typename PatternRange, typename corpusIter>
218 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
219 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
221 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
222 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
223 return kmp ( corpus_first, corpus_last );
226 template <typename patIter, typename CorpusRange>
227 typename boost::disable_if_c<
228 boost::is_same<CorpusRange, patIter>::value,
229 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
231 knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
233 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
234 return kmp (boost::begin (corpus), boost::end (corpus));
237 template <typename PatternRange, typename CorpusRange>
238 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
239 knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
241 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
242 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
243 return kmp (boost::begin (corpus), boost::end (corpus));
247 // Creator functions -- take a pattern range, return an object
248 template <typename Range>
249 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
250 make_knuth_morris_pratt ( const Range &r ) {
251 return boost::algorithm::knuth_morris_pratt
252 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
255 template <typename Range>
256 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
257 make_knuth_morris_pratt ( Range &r ) {
258 return boost::algorithm::knuth_morris_pratt
259 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
263 #endif // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP