]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | /// \file boyer_moore.hpp | |
3 | /// Contains the boyer-moore implementation. Note: this is *not* a general- | |
4 | /// purpose boyer-moore implementation. It truncates the search string at | |
5 | /// 256 characters, but it is sufficient for the needs of xpressive. | |
6 | // | |
7 | // Copyright 2008 Eric Niebler. Distributed under the Boost | |
8 | // Software License, Version 1.0. (See accompanying file | |
9 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
10 | ||
11 | #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 | |
12 | #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 | |
13 | ||
14 | // MS compatible compilers support #pragma once | |
15 | #if defined(_MSC_VER) | |
16 | # pragma once | |
17 | # pragma warning(push) | |
18 | # pragma warning(disable : 4100) // unreferenced formal parameter | |
19 | #endif | |
20 | ||
21 | #include <climits> // for UCHAR_MAX | |
22 | #include <cstddef> // for std::ptrdiff_t | |
23 | #include <utility> // for std::max | |
24 | #include <vector> | |
25 | #include <boost/mpl/bool.hpp> | |
26 | #include <boost/noncopyable.hpp> | |
27 | #include <boost/iterator/iterator_traits.hpp> | |
28 | #include <boost/type_traits/is_convertible.hpp> | |
29 | #include <boost/xpressive/detail/detail_fwd.hpp> | |
30 | ||
31 | namespace boost { namespace xpressive { namespace detail | |
32 | { | |
33 | ||
34 | /////////////////////////////////////////////////////////////////////////////// | |
35 | // boyer_moore | |
36 | // | |
37 | template<typename BidiIter, typename Traits> | |
38 | struct boyer_moore | |
39 | : noncopyable | |
40 | { | |
41 | typedef typename iterator_value<BidiIter>::type char_type; | |
42 | typedef Traits traits_type; | |
43 | typedef has_fold_case<Traits> case_fold; | |
44 | typedef typename Traits::string_type string_type; | |
45 | ||
46 | // initialize the Boyer-Moore search data structure, using the | |
47 | // search sub-sequence to prime the pump. | |
48 | boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase) | |
49 | : begin_(begin) | |
50 | , last_(begin) | |
51 | , fold_() | |
52 | , find_fun_( | |
53 | icase | |
54 | ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_) | |
55 | : &boyer_moore::find_ | |
56 | ) | |
57 | { | |
58 | std::ptrdiff_t const uchar_max = UCHAR_MAX; | |
59 | std::ptrdiff_t diff = std::distance(begin, end); | |
60 | this->length_ = static_cast<unsigned char>((std::min)(diff, uchar_max)); | |
61 | std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_); | |
62 | --this->length_; | |
63 | ||
64 | icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_()); | |
65 | } | |
66 | ||
67 | BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const | |
68 | { | |
69 | return (this->*this->find_fun_)(begin, end, tr); | |
70 | } | |
71 | ||
72 | private: | |
73 | ||
74 | void init_(Traits const &tr, mpl::false_) | |
75 | { | |
76 | for(unsigned char offset = this->length_; offset; --offset, ++this->last_) | |
77 | { | |
78 | this->offsets_[tr.hash(*this->last_)] = offset; | |
79 | } | |
80 | } | |
81 | ||
82 | void init_(Traits const &tr, mpl::true_) | |
83 | { | |
84 | this->fold_.reserve(this->length_ + 1); | |
85 | for(unsigned char offset = this->length_; offset; --offset, ++this->last_) | |
86 | { | |
87 | this->fold_.push_back(tr.fold_case(*this->last_)); | |
88 | for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end(); | |
89 | beg != end; ++beg) | |
90 | { | |
91 | this->offsets_[tr.hash(*beg)] = offset; | |
92 | } | |
93 | } | |
94 | this->fold_.push_back(tr.fold_case(*this->last_)); | |
95 | } | |
96 | ||
97 | // case-sensitive Boyer-Moore search | |
98 | BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const | |
99 | { | |
100 | typedef typename boost::iterator_difference<BidiIter>::type diff_type; | |
101 | diff_type const endpos = std::distance(begin, end); | |
102 | diff_type offset = static_cast<diff_type>(this->length_); | |
103 | ||
104 | for(diff_type curpos = offset; curpos < endpos; curpos += offset) | |
105 | { | |
106 | std::advance(begin, offset); | |
107 | ||
108 | char_type const *pat_tmp = this->last_; | |
109 | BidiIter str_tmp = begin; | |
110 | ||
111 | for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) | |
112 | { | |
113 | if(pat_tmp == this->begin_) | |
114 | { | |
115 | return str_tmp; | |
116 | } | |
117 | } | |
118 | ||
119 | offset = this->offsets_[tr.hash(tr.translate(*begin))]; | |
120 | } | |
121 | ||
122 | return end; | |
123 | } | |
124 | ||
125 | // case-insensitive Boyer-Moore search | |
126 | BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const | |
127 | { | |
128 | typedef typename boost::iterator_difference<BidiIter>::type diff_type; | |
129 | diff_type const endpos = std::distance(begin, end); | |
130 | diff_type offset = static_cast<diff_type>(this->length_); | |
131 | ||
132 | for(diff_type curpos = offset; curpos < endpos; curpos += offset) | |
133 | { | |
134 | std::advance(begin, offset); | |
135 | ||
136 | char_type const *pat_tmp = this->last_; | |
137 | BidiIter str_tmp = begin; | |
138 | ||
139 | for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) | |
140 | { | |
141 | if(pat_tmp == this->begin_) | |
142 | { | |
143 | return str_tmp; | |
144 | } | |
145 | } | |
146 | ||
147 | offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))]; | |
148 | } | |
149 | ||
150 | return end; | |
151 | } | |
152 | ||
153 | // case-insensitive Boyer-Moore search with case-folding | |
154 | BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const | |
155 | { | |
156 | typedef typename boost::iterator_difference<BidiIter>::type diff_type; | |
157 | diff_type const endpos = std::distance(begin, end); | |
158 | diff_type offset = static_cast<diff_type>(this->length_); | |
159 | ||
160 | for(diff_type curpos = offset; curpos < endpos; curpos += offset) | |
161 | { | |
162 | std::advance(begin, offset); | |
163 | ||
164 | string_type const *pat_tmp = &this->fold_.back(); | |
165 | BidiIter str_tmp = begin; | |
166 | ||
167 | for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp); | |
168 | --pat_tmp, --str_tmp) | |
169 | { | |
170 | if(pat_tmp == &this->fold_.front()) | |
171 | { | |
172 | return str_tmp; | |
173 | } | |
174 | } | |
175 | ||
176 | offset = this->offsets_[tr.hash(*begin)]; | |
177 | } | |
178 | ||
179 | return end; | |
180 | } | |
181 | ||
182 | private: | |
183 | ||
184 | char_type const *begin_; | |
185 | char_type const *last_; | |
186 | std::vector<string_type> fold_; | |
187 | BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const; | |
188 | unsigned char length_; | |
189 | unsigned char offsets_[UCHAR_MAX + 1]; | |
190 | }; | |
191 | ||
192 | }}} // namespace boost::xpressive::detail | |
193 | ||
194 | #if defined(_MSC_VER) | |
195 | # pragma warning(pop) | |
196 | #endif | |
197 | ||
198 | #endif |