]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/algorithm/include/boost/algorithm/searching/knuth_morris_pratt.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / algorithm / include / boost / algorithm / searching / knuth_morris_pratt.hpp
1 /*
2 Copyright (c) Marshall Clow 2010-2012.
3
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 For more information, see http://www.boost.org
8 */
9
10 #ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
11 #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
12
13 #include <vector>
14 #include <iterator> // for std::iterator_traits
15
16 #include <boost/assert.hpp>
17 #include <boost/static_assert.hpp>
18
19 #include <boost/range/begin.hpp>
20 #include <boost/range/end.hpp>
21
22 #include <boost/utility/enable_if.hpp>
23 #include <boost/type_traits/is_same.hpp>
24
25 #include <boost/algorithm/searching/detail/debugging.hpp>
26
27 // #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
28
29 namespace boost { namespace algorithm {
30
31 // #define NEW_KMP
32
33 /*
34 A templated version of the Knuth-Morris-Pratt searching algorithm.
35
36 Requirements:
37 * Random-access iterators
38 * The two iterator types (I1 and I2) must "point to" the same underlying type.
39
40 http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
41 http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
42 */
43
44 template <typename patIter>
45 class knuth_morris_pratt {
46 typedef typename std::iterator_traits<patIter>::difference_type difference_type;
47 public:
48 knuth_morris_pratt ( patIter first, patIter last )
49 : pat_first ( first ), pat_last ( last ),
50 k_pattern_length ( std::distance ( pat_first, pat_last )),
51 skip_ ( k_pattern_length + 1 ) {
52 #ifdef NEW_KMP
53 preKmp ( pat_first, pat_last );
54 #else
55 init_skip_table ( pat_first, pat_last );
56 #endif
57 #ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
58 detail::PrintTable ( skip_.begin (), skip_.end ());
59 #endif
60 }
61
62 ~knuth_morris_pratt () {}
63
64 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
65 /// \brief Searches the corpus for the pattern that was passed into the constructor
66 ///
67 /// \param corpus_first The start of the data to search (Random Access Iterator)
68 /// \param corpus_last One past the end of the data to search
69 /// \param p A predicate used for the search comparisons.
70 ///
71 template <typename corpusIter>
72 std::pair<corpusIter, corpusIter>
73 operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
74 BOOST_STATIC_ASSERT (( boost::is_same<
75 typename std::iterator_traits<patIter>::value_type,
76 typename std::iterator_traits<corpusIter>::value_type>::value ));
77
78 if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last); // if nothing to search, we didn't find it!
79 if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start
80
81 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
82 // If the pattern is larger than the corpus, we can't find it!
83 if ( k_corpus_length < k_pattern_length )
84 return std::make_pair(corpus_last, corpus_last);
85
86 return do_search ( corpus_first, corpus_last, k_corpus_length );
87 }
88
89 template <typename Range>
90 std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
91 operator () ( Range &r ) const {
92 return (*this) (boost::begin(r), boost::end(r));
93 }
94
95 private:
96 /// \cond DOXYGEN_HIDE
97 patIter pat_first, pat_last;
98 const difference_type k_pattern_length;
99 std::vector <difference_type> skip_;
100
101 /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
102 /// \brief Searches the corpus for the pattern that was passed into the constructor
103 ///
104 /// \param corpus_first The start of the data to search (Random Access Iterator)
105 /// \param corpus_last One past the end of the data to search
106 /// \param p A predicate used for the search comparisons.
107 ///
108 template <typename corpusIter>
109 std::pair<corpusIter, corpusIter>
110 do_search ( corpusIter corpus_first, corpusIter corpus_last,
111 difference_type k_corpus_length ) const {
112 difference_type match_start = 0; // position in the corpus that we're matching
113
114 #ifdef NEW_KMP
115 int patternIdx = 0;
116 while ( match_start < k_corpus_length ) {
117 while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
118 patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch
119
120 patternIdx++;
121 match_start++; //<--- corpus is always increased by 1
122
123 if ( patternIdx >= (int) k_pattern_length )
124 return corpus_first + match_start - patternIdx;
125 }
126
127 #else
128 // At this point, we know:
129 // k_pattern_length <= k_corpus_length
130 // for all elements of skip, it holds -1 .. k_pattern_length
131 //
132 // In the loop, we have the following invariants
133 // idx is in the range 0 .. k_pattern_length
134 // match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1
135
136 const difference_type last_match = k_corpus_length - k_pattern_length;
137 difference_type idx = 0; // position in the pattern we're comparing
138
139 while ( match_start <= last_match ) {
140 while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
141 if ( ++idx == k_pattern_length )
142 return std::make_pair(corpus_first + match_start, corpus_first + match_start + k_pattern_length);
143 }
144 // Figure out where to start searching again
145 // assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward
146 match_start += idx - skip_ [ idx ];
147 idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
148 // assert ( idx >= 0 && idx < k_pattern_length );
149 }
150 #endif
151
152 // We didn't find anything
153 return std::make_pair(corpus_last, corpus_last);
154 }
155
156
157 void preKmp ( patIter first, patIter last ) {
158 const /*std::size_t*/ int count = std::distance ( first, last );
159
160 int i, j;
161
162 i = 0;
163 j = skip_[0] = -1;
164 while (i < count) {
165 while (j > -1 && first[i] != first[j])
166 j = skip_[j];
167 i++;
168 j++;
169 if (first[i] == first[j])
170 skip_[i] = skip_[j];
171 else
172 skip_[i] = j;
173 }
174 }
175
176
177 void init_skip_table ( patIter first, patIter last ) {
178 const difference_type count = std::distance ( first, last );
179
180 int j;
181 skip_ [ 0 ] = -1;
182 for ( int i = 1; i <= count; ++i ) {
183 j = skip_ [ i - 1 ];
184 while ( j >= 0 ) {
185 if ( first [ j ] == first [ i - 1 ] )
186 break;
187 j = skip_ [ j ];
188 }
189 skip_ [ i ] = j + 1;
190 }
191 }
192 // \endcond
193 };
194
195
196 /* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
197 Use a bit of TMP to disambiguate the 3-argument templates */
198
199 /// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last,
200 /// patIter pat_first, patIter pat_last )
201 /// \brief Searches the corpus for the pattern.
202 ///
203 /// \param corpus_first The start of the data to search (Random Access Iterator)
204 /// \param corpus_last One past the end of the data to search
205 /// \param pat_first The start of the pattern to search for (Random Access Iterator)
206 /// \param pat_last One past the end of the data to search for
207 ///
208 template <typename patIter, typename corpusIter>
209 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
210 corpusIter corpus_first, corpusIter corpus_last,
211 patIter pat_first, patIter pat_last )
212 {
213 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
214 return kmp ( corpus_first, corpus_last );
215 }
216
217 template <typename PatternRange, typename corpusIter>
218 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
219 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
220 {
221 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
222 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
223 return kmp ( corpus_first, corpus_last );
224 }
225
226 template <typename patIter, typename CorpusRange>
227 typename boost::disable_if_c<
228 boost::is_same<CorpusRange, patIter>::value,
229 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
230 ::type
231 knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
232 {
233 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
234 return kmp (boost::begin (corpus), boost::end (corpus));
235 }
236
237 template <typename PatternRange, typename CorpusRange>
238 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
239 knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
240 {
241 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
242 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
243 return kmp (boost::begin (corpus), boost::end (corpus));
244 }
245
246
247 // Creator functions -- take a pattern range, return an object
248 template <typename Range>
249 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
250 make_knuth_morris_pratt ( const Range &r ) {
251 return boost::algorithm::knuth_morris_pratt
252 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
253 }
254
255 template <typename Range>
256 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
257 make_knuth_morris_pratt ( Range &r ) {
258 return boost::algorithm::knuth_morris_pratt
259 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
260 }
261 }}
262
263 #endif // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP