]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | /// \file symbols.hpp | |
3 | /// Contains the Ternary Search Trie implementation. | |
4 | /// Based on the following papers: | |
5 | /// J. Bentley and R. Sedgewick. (1998) Ternary search trees. Dr. Dobbs Journal | |
6 | /// G. Badr and B.J. Oommen. (2005) Self-Adjusting of Ternary Search Tries Using | |
7 | /// Conditional Rotations and Randomized Heuristics | |
8 | // | |
9 | // Copyright 2007 David Jenkins. | |
10 | // Copyright 2007 Eric Niebler. | |
11 | // | |
12 | // Distributed under the Boost Software License, Version 1.0. (See | |
13 | // accompanying file LICENSE_1_0.txt or copy at | |
14 | // http://www.boost.org/LICENSE_1_0.txt) | |
15 | ||
16 | #ifndef BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007 | |
17 | #define BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007 | |
18 | ||
19 | // MS compatible compilers support #pragma once | |
20 | #if defined(_MSC_VER) | |
21 | # pragma once | |
22 | #endif | |
23 | ||
24 | #include <boost/noncopyable.hpp> | |
25 | #include <boost/range/begin.hpp> | |
26 | #include <boost/range/end.hpp> | |
27 | #include <boost/range/value_type.hpp> | |
28 | #include <boost/range/const_iterator.hpp> | |
29 | #include <boost/shared_ptr.hpp> | |
30 | ||
31 | namespace boost { namespace xpressive { namespace detail | |
32 | { | |
33 | ||
34 | /////////////////////////////////////////////////////////////////////////////// | |
35 | // symbols (using a ternary search trie) | |
36 | // | |
37 | template<typename Map> | |
38 | struct symbols | |
39 | { | |
40 | typedef typename range_value<Map>::type::first_type key_type; | |
41 | typedef typename range_value<Map>::type::second_type value_type; | |
42 | typedef typename range_value<key_type>::type char_type; | |
43 | typedef typename range_const_iterator<Map>::type iterator; | |
44 | typedef typename range_const_iterator<key_type>::type key_iterator; | |
45 | typedef value_type const *result_type; | |
46 | ||
47 | // copies of this symbol table share the TST | |
48 | ||
49 | template<typename Trans> | |
50 | void load(Map const &map, Trans trans) | |
51 | { | |
52 | iterator begin = boost::begin(map); | |
53 | iterator end = boost::end(map); | |
54 | node* root_p = this->root.get(); | |
55 | for(; begin != end; ++begin) | |
56 | { | |
57 | key_iterator kbegin = boost::begin(begin->first); | |
58 | key_iterator kend = boost::end(begin->first); | |
59 | root_p = this->insert(root_p, kbegin, kend, &begin->second, trans); | |
60 | } | |
61 | this->root.reset(root_p); | |
62 | } | |
63 | ||
64 | template<typename BidiIter, typename Trans> | |
65 | result_type operator ()(BidiIter &begin, BidiIter end, Trans trans) const | |
66 | { | |
67 | return this->search(begin, end, trans, this->root.get()); | |
68 | } | |
69 | ||
70 | template<typename Sink> | |
71 | void peek(Sink const &sink) const | |
72 | { | |
73 | this->peek_(this->root.get(), sink); | |
74 | } | |
75 | ||
76 | private: | |
77 | /////////////////////////////////////////////////////////////////////////////// | |
78 | // struct node : a node in the TST. | |
79 | // The "eq" field stores the result pointer when ch is zero. | |
80 | // | |
81 | struct node | |
82 | : boost::noncopyable | |
83 | { | |
84 | node(char_type c) | |
85 | : ch(c) | |
86 | , lo(0) | |
87 | , eq(0) | |
88 | , hi(0) | |
89 | #ifdef BOOST_DISABLE_THREADS | |
90 | , tau(0) | |
91 | #endif | |
92 | {} | |
93 | ||
94 | ~node() | |
95 | { | |
96 | delete lo; | |
97 | if (ch) | |
98 | delete eq; | |
99 | delete hi; | |
100 | } | |
101 | ||
102 | void swap(node& that) | |
103 | { | |
104 | std::swap(ch, that.ch); | |
105 | std::swap(lo, that.lo); | |
106 | std::swap(eq, that.eq); | |
107 | std::swap(hi, that.hi); | |
108 | #ifdef BOOST_DISABLE_THREADS | |
109 | std::swap(tau, that.tau); | |
110 | #endif | |
111 | } | |
112 | ||
113 | char_type ch; | |
114 | node* lo; | |
115 | union | |
116 | { | |
117 | node* eq; | |
118 | result_type result; | |
119 | }; | |
120 | node* hi; | |
121 | #ifdef BOOST_DISABLE_THREADS | |
122 | long tau; | |
123 | #endif | |
124 | }; | |
125 | ||
126 | /////////////////////////////////////////////////////////////////////////////// | |
127 | // insert : insert a string into the TST | |
128 | // | |
129 | template<typename Trans> | |
130 | node* insert(node* p, key_iterator &begin, key_iterator end, result_type r, Trans trans) const | |
131 | { | |
132 | char_type c1 = 0; | |
133 | ||
134 | if(begin != end) | |
135 | { | |
136 | c1 = trans(*begin); | |
137 | } | |
138 | ||
139 | if(!p) | |
140 | { | |
141 | p = new node(c1); | |
142 | } | |
143 | ||
144 | if(c1 < p->ch) | |
145 | { | |
146 | p->lo = this->insert(p->lo, begin, end, r, trans); | |
147 | } | |
148 | else if(c1 == p->ch) | |
149 | { | |
150 | if(0 == c1) | |
151 | { | |
152 | p->result = r; | |
153 | } | |
154 | else | |
155 | { | |
156 | p->eq = this->insert(p->eq, ++begin, end, r, trans); | |
157 | } | |
158 | } | |
159 | else | |
160 | { | |
161 | p->hi = this->insert(p->hi, begin, end, r, trans); | |
162 | } | |
163 | ||
164 | return p; | |
165 | } | |
166 | ||
167 | #ifdef BOOST_DISABLE_THREADS | |
168 | /////////////////////////////////////////////////////////////////////////////// | |
169 | // conditional rotation : the goal is to minimize the overall | |
170 | // weighted path length of each binary search tree | |
171 | // | |
172 | bool cond_rotation(bool left, node* const i, node* const j) const | |
173 | { | |
174 | // don't rotate top node in binary search tree | |
175 | if (i == j) | |
176 | return false; | |
177 | // calculate psi (the rotation condition) | |
178 | node* const k = (left ? i->hi : i->lo); | |
179 | long psi = 2*i->tau - j->tau - (k ? k->tau : 0); | |
180 | if (psi <= 0) | |
181 | return false; | |
182 | ||
183 | // recalculate the tau values | |
184 | j->tau += -i->tau + (k ? k->tau : 0); | |
185 | i->tau += j->tau - (k ? k->tau : 0); | |
186 | // fixup links and swap nodes | |
187 | if (left) | |
188 | { | |
189 | j->lo = k; | |
190 | i->hi = i; | |
191 | } | |
192 | else | |
193 | { | |
194 | j->hi = k; | |
195 | i->lo = i; | |
196 | } | |
197 | (*i).swap(*j); | |
198 | return true; | |
199 | } | |
200 | #endif | |
201 | ||
202 | /////////////////////////////////////////////////////////////////////////////// | |
203 | // search : find a string in the TST | |
204 | // | |
205 | template<typename BidiIter, typename Trans> | |
206 | result_type search(BidiIter &begin, BidiIter end, Trans trans, node* p) const | |
207 | { | |
208 | result_type r = 0; | |
209 | #ifdef BOOST_DISABLE_THREADS | |
210 | node* p2 = p; | |
211 | bool left = false; | |
212 | #endif | |
213 | char_type c1 = (begin != end ? trans(*begin) : 0); | |
214 | while(p) | |
215 | { | |
216 | #ifdef BOOST_DISABLE_THREADS | |
217 | ++p->tau; | |
218 | #endif | |
219 | if(c1 == p->ch) | |
220 | { | |
221 | // conditional rotation test | |
222 | #ifdef BOOST_DISABLE_THREADS | |
223 | if (this->cond_rotation(left, p, p2)) | |
224 | p = p2; | |
225 | #endif | |
226 | if (0 == p->ch) | |
227 | { | |
228 | // it's a match! | |
229 | r = p->result; | |
230 | } | |
231 | if(begin == end) | |
232 | break; | |
233 | ++begin; | |
234 | p = p->eq; | |
235 | // search for the longest match first | |
236 | r = search(begin,end,trans,p); | |
237 | if (0 == r) | |
238 | { | |
239 | // search for a match ending here | |
240 | r = search(end,end,trans,p); | |
241 | if (0 == r) | |
242 | { | |
243 | --begin; | |
244 | } | |
245 | } | |
246 | break; | |
247 | } | |
248 | else if(c1 < p->ch) | |
249 | { | |
250 | #ifdef BOOST_DISABLE_THREADS | |
251 | left = true; | |
252 | p2 = p; | |
253 | #endif | |
254 | p = p->lo; | |
255 | } | |
256 | else // (c1 > p->ch) | |
257 | { | |
258 | #ifdef BOOST_DISABLE_THREADS | |
259 | left = false; | |
260 | p2 = p; | |
261 | #endif | |
262 | p = p->hi; | |
263 | } | |
264 | } | |
265 | return r; | |
266 | } | |
267 | ||
268 | template<typename Sink> | |
269 | void peek_(node const *const &p, Sink const &sink) const | |
270 | { | |
271 | if(p) | |
272 | { | |
273 | sink(p->ch); | |
274 | this->peek_(p->lo, sink); | |
275 | this->peek_(p->hi, sink); | |
276 | } | |
277 | } | |
278 | ||
279 | boost::shared_ptr<node> root; | |
280 | }; | |
281 | ||
282 | }}} // namespace boost::xpressive::detail | |
283 | ||
284 | #endif |