2 Copyright (c) Marshall Clow 2010-2012.
4 Distributed under the Boost Software License, Version 1.0. (See accompanying
5 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 For more information, see http://www.boost.org
10 #include <boost/algorithm/searching/boyer_moore.hpp>
11 #include <boost/algorithm/searching/boyer_moore_horspool.hpp>
12 #include <boost/algorithm/searching/knuth_morris_pratt.hpp>
14 #define BOOST_TEST_MAIN
15 #include <boost/test/unit_test.hpp>
22 namespace ba
= boost::algorithm
;
24 template <typename Iter
>
25 std::string
make_str ( Iter first
, std::size_t len
) {
26 std::string
retVal ( len
+ 2, '\'' );
27 std::copy ( first
, first
+len
, retVal
.begin () + 1);
33 // Check using iterators
34 template<typename Container
>
35 void check_one_iter ( const Container
&haystack
, const std::string
&needle
, int expected
) {
36 typedef typename
Container::const_iterator iter_type
;
37 typedef typename
std::pair
<iter_type
, iter_type
> ret_type
;
38 typedef std::string::const_iterator pattern_type
;
40 iter_type hBeg
= haystack
.begin ();
41 iter_type hEnd
= haystack
.end ();
42 pattern_type nBeg
= needle
.begin ();
43 pattern_type nEnd
= needle
.end ();
45 // iter_type ret0 = std::search (hBeg, hEnd, nBeg, nEnd);
46 ret_type ret1
= ba::boyer_moore_search (hBeg
, hEnd
, nBeg
, nEnd
);
47 ret_type ret1r
= ba::boyer_moore_search (haystack
, nBeg
, nEnd
);
48 ret_type ret2
= ba::boyer_moore_horspool_search (hBeg
, hEnd
, nBeg
, nEnd
);
49 ret_type ret3
= ba::knuth_morris_pratt_search (hBeg
, hEnd
, nBeg
, nEnd
);
51 iter_type it0
= std::search (hBeg
, hEnd
, nBeg
, nEnd
);
52 // iter_type it1 = ret1.first;
53 // iter_type it1r = ret1r.first;
54 // iter_type it2 = ret2.first;
55 // iter_type it3 = ret3.first;
56 const int dist
= ret1
.first
== hEnd
? -1 : std::distance ( hBeg
, ret1
.first
);
58 std::cout
<< "(Iterators) Pattern is " << needle
.length () << ", haysstack is " << haystack
.length () << " chars long; " << std::endl
;
60 if ( it0
!= ret1
.first
) {
61 throw std::runtime_error (
62 std::string ( "results mismatch between std::search and boyer-moore search" ));
65 if ( ret1
.first
!= ret1r
.first
|| ret1
.second
!= ret1r
.second
) {
66 throw std::runtime_error (
67 std::string ( "results mismatch between iterator and range boyer_moore search" ));
70 if ( ret1
.first
!= ret2
.first
|| ret1
.second
!= ret2
.second
) {
71 throw std::runtime_error (
72 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
75 if ( ret1
.first
!= ret3
.first
|| ret1
.second
!= ret3
.second
) {
76 throw std::runtime_error (
77 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
83 std::cout
<< "Searching for: " << needle
<< std::endl
;
84 std::cout
<< "Expected: " << expected
<< "\n";
85 std::cout
<< " std: " << std::distance ( hBeg
, it0
) << "\n";
86 std::cout
<< " bm: " << std::distance ( hBeg
, ret1
.first
) << "\n";
87 std::cout
<< " bm(r): " << std::distance ( hBeg
, ret1r
.first
) << "\n";
88 std::cout
<< " bmh: " << std::distance ( hBeg
, ret2
.first
) << "\n";
89 std::cout
<< " kpm: " << std::distance ( hBeg
, ret3
.first
)<< "\n";
90 std::cout
<< std::flush
;
94 BOOST_CHECK_EQUAL ( dist
, expected
);
97 // Check using pointers
98 // We're assuming that the container implements contiguous storage here.
99 template<typename Container
>
100 void check_one_pointer ( const Container
&haystack
, const std::string
&needle
, int expected
) {
101 typedef const typename
Container::value_type
*ptr_type
;
102 typedef typename
std::pair
<ptr_type
, ptr_type
> ret_type
;
104 ptr_type hBeg
= haystack
.size () == 0 ? NULL
: &*haystack
.begin ();
105 ptr_type hEnd
= hBeg
+ haystack
.size ();
106 ptr_type nBeg
= needle
.size () == 0 ? NULL
: &*needle
.begin ();
107 ptr_type nEnd
= nBeg
+ needle
.size ();
109 ptr_type it0
= std::search (hBeg
, hEnd
, nBeg
, nEnd
);
110 ret_type ret1
= ba::boyer_moore_search (hBeg
, hEnd
, nBeg
, nEnd
);
111 ret_type ret2
= ba::boyer_moore_horspool_search (hBeg
, hEnd
, nBeg
, nEnd
);
112 ret_type ret3
= ba::knuth_morris_pratt_search (hBeg
, hEnd
, nBeg
, nEnd
);
113 const int dist
= ret1
.first
== hEnd
? -1 : std::distance ( hBeg
, ret1
.first
);
115 std::cout
<< "(Pointers) Pattern is " << needle
.length () << ", haysstack is " << haystack
.length () << " chars long; " << std::endl
;
117 if ( it0
!= ret1
.first
) {
118 throw std::runtime_error (
119 std::string ( "results mismatch between std::search and boyer-moore search" ));
122 if ( ret1
.first
!= ret2
.first
|| ret1
.second
!= ret2
.second
) {
123 throw std::runtime_error (
124 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
127 if ( ret1
.first
!= ret3
.first
|| ret1
.second
!= ret3
.second
) {
128 throw std::runtime_error (
129 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
135 std::cout
<< "Searching for: " << needle
<< std::endl
;
136 std::cout
<< "Expected: " << expected
<< "\n";
137 std::cout
<< " std: " << std::distance ( hBeg
, it0
) << "\n";
138 std::cout
<< " bm: " << std::distance ( hBeg
, ret1
.first
) << "\n";
139 std::cout
<< " bmh: " << std::distance ( hBeg
, ret2
.first
) << "\n";
140 std::cout
<< " kpm: " << std::distance ( hBeg
, ret3
.first
)<< "\n";
141 std::cout
<< std::flush
;
145 BOOST_CHECK_EQUAL ( dist
, expected
);
148 // Check using objects
149 template<typename Container
>
150 void check_one_object ( const Container
&haystack
, const std::string
&needle
, int expected
) {
151 typedef typename
Container::const_iterator iter_type
;
152 typedef typename
std::pair
<iter_type
, iter_type
> ret_type
;
153 typedef std::string::const_iterator pattern_type
;
155 iter_type hBeg
= haystack
.begin ();
156 iter_type hEnd
= haystack
.end ();
157 pattern_type nBeg
= needle
.begin ();
158 pattern_type nEnd
= needle
.end ();
160 ba::boyer_moore
<pattern_type
> bm_r
= ba::make_boyer_moore ( needle
);
161 ba::boyer_moore
<pattern_type
> bm ( nBeg
, nEnd
);
162 ba::boyer_moore_horspool
<pattern_type
> bmh ( nBeg
, nEnd
);
163 ba::knuth_morris_pratt
<pattern_type
> kmp ( nBeg
, nEnd
);
165 iter_type it0
= std::search (hBeg
, hEnd
, nBeg
, nEnd
);
166 ret_type ret1
= bm (hBeg
, hEnd
);
167 ret_type ret1r
= bm (haystack
);
168 ret_type retr1
= bm_r (hBeg
, hEnd
);
169 ret_type retr1r
= bm_r (haystack
);
170 ret_type ret2
= bmh (hBeg
, hEnd
);
171 ret_type ret3
= kmp (hBeg
, hEnd
);
172 const int dist
= ret1
.first
== hEnd
? -1 : std::distance ( hBeg
, ret1
.first
);
174 std::cout
<< "(Objects) Pattern is " << needle
.length () << ", haysstack is " << haystack
.length () << " chars long; " << std::endl
;
176 if ( it0
!= ret1
.first
) {
177 throw std::runtime_error (
178 std::string ( "results mismatch between std::search and boyer-moore search" ));
181 if ( ret1
.first
!= ret1r
.first
|| ret1
.second
!= ret1r
.second
) {
182 throw std::runtime_error (
183 std::string ( "results mismatch between iterator and range boyer_moore search(1)" ));
186 if ( ret1
.first
!= retr1
.first
|| ret1
.second
!= retr1
.second
) {
187 throw std::runtime_error (
188 std::string ( "results mismatch between iterator and range boyer_moore search(2)" ));
191 if ( ret1
.first
!= retr1r
.first
|| ret1
.second
!= retr1r
.second
) {
192 throw std::runtime_error (
193 std::string ( "results mismatch between iterator and range boyer_moore search(3)" ));
196 if ( ret1
.first
!= ret2
.first
|| ret1
.second
!= ret2
.second
) {
197 throw std::runtime_error (
198 std::string ( "results mismatch between boyer-moore and boyer-moore-horspool search" ));
201 if ( ret1
.first
!= ret3
.first
|| ret1
.second
!= ret3
.second
) {
202 throw std::runtime_error (
203 std::string ( "results mismatch between boyer-moore and knuth-morris-pratt search" ));
209 std::cout
<< "Searching for: " << needle
<< std::endl
;
210 std::cout
<< "Expected: " << expected
<< "\n";
211 std::cout
<< " std: " << std::distance ( hBeg
, it0
) << "\n";
212 std::cout
<< " bm: " << std::distance ( hBeg
, ret1
.first
) << "\n";
213 std::cout
<< " bm(r1): " << std::distance ( hBeg
, ret1r
.first
) << "\n";
214 std::cout
<< " bm(r2): " << std::distance ( hBeg
, retr1
.first
) << "\n";
215 std::cout
<< " bm(r3): " << std::distance ( hBeg
, retr1r
.first
) << "\n";
216 std::cout
<< " bmh: " << std::distance ( hBeg
, ret2
.first
) << "\n";
217 std::cout
<< " kpm: " << std::distance ( hBeg
, ret3
.first
)<< "\n";
218 std::cout
<< std::flush
;
222 BOOST_CHECK_EQUAL ( dist
, expected
);
226 template<typename Container
>
227 void check_one ( const Container
&haystack
, const std::string
&needle
, int expected
) {
228 check_one_iter ( haystack
, needle
, expected
);
229 check_one_pointer ( haystack
, needle
, expected
);
230 check_one_object ( haystack
, needle
, expected
);
235 BOOST_AUTO_TEST_CASE( test_main
)
237 std::string
haystack1 ( "NOW AN FOWE\220ER ANNMAN THE ANPANMANEND" );
238 std::string
needle1 ( "ANPANMAN" );
239 std::string
needle2 ( "MAN THE" );
240 std::string
needle3 ( "WE\220ER" );
241 std::string
needle4 ( "NOW " ); // At the beginning
242 std::string
needle5 ( "NEND" ); // At the end
243 std::string
needle6 ( "NOT FOUND" ); // Nowhere
244 std::string
needle7 ( "NOT FO\340ND" ); // Nowhere
246 std::string
haystack2 ( "ABC ABCDAB ABCDABCDABDE" );
247 std::string
needle11 ( "ABCDABD" );
249 std::string
haystack3 ( "abra abracad abracadabra" );
250 std::string
needle12 ( "abracadabra" );
252 std::string
needle13 ( "" );
253 std::string
haystack4 ( "" );
255 check_one ( haystack1
, needle1
, 26 );
256 check_one ( haystack1
, needle2
, 18 );
257 check_one ( haystack1
, needle3
, 9 );
258 check_one ( haystack1
, needle4
, 0 );
259 check_one ( haystack1
, needle5
, 33 );
260 check_one ( haystack1
, needle6
, -1 );
261 check_one ( haystack1
, needle7
, -1 );
263 check_one ( needle1
, haystack1
, -1 ); // cant find long pattern in short corpus
264 check_one ( haystack1
, haystack1
, 0 ); // find something in itself
265 check_one ( haystack2
, haystack2
, 0 ); // find something in itself
267 check_one ( haystack2
, needle11
, 15 );
268 check_one ( haystack3
, needle12
, 13 );
270 check_one ( haystack1
, needle13
, 0 ); // find the empty string
271 check_one ( haystack4
, needle1
, -1 ); // can't find in an empty haystack
273 // Mikhail Levin <svarneticist@gmail.com> found a problem, and this was the test
274 // that triggered it.
276 const std::string mikhail_pattern
=
277 "GATACACCTACCTTCACCAGTTACTCTATGCACTAGGTGCGCCAGGCCCATGCACAAGGGCTTGAGTGGATGGGAAGGA"
278 "TGTGCCCTAGTGATGGCAGCATAAGCTACGCAGAGAAGTTCCAGGGCAGAGTCACCATGACCAGGGACACATCCACGAG"
279 "CACAGCCTACATGGAGCTGAGCAGCCTGAGATCTGAAGACACGGCCATGTATTACTGTGGGAGAGATGTCTGGAGTGGT"
280 "TATTATTGCCCCGGTAATATTACTACTACTACTACTACATGGACGTCTGGGGCAAAGGGACCACG"
282 const std::string mikhail_corpus
= std::string (8, 'a') + mikhail_pattern
;
284 check_one ( mikhail_corpus
, mikhail_pattern
, 8 );