]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2001-2003 Joel de Guzman | |
3 | Copyright (c) 2011 Daniel James | |
4 | http://spirit.sourceforge.net/ | |
5 | ||
6 | Use, modification and distribution is subject to the Boost Software | |
7 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
8 | http://www.boost.org/LICENSE_1_0.txt) | |
9 | =============================================================================*/ | |
10 | #ifndef QUICKBOOK_SYMBOLS_IPP | |
11 | #define QUICKBOOK_SYMBOLS_IPP | |
12 | ||
13 | /////////////////////////////////////////////////////////////////////////////// | |
14 | ||
7c673cae FG |
15 | #include <boost/intrusive_ptr.hpp> |
16 | #include <boost/scoped_ptr.hpp> | |
11fdf7f2 | 17 | #include <boost/spirit/home/classic/symbols.hpp> |
7c673cae FG |
18 | |
19 | /////////////////////////////////////////////////////////////////////////////// | |
20 | namespace quickbook | |
21 | { | |
22 | ||
11fdf7f2 TL |
23 | /////////////////////////////////////////////////////////////////////////////// |
24 | // | |
25 | // tst class | |
26 | // | |
27 | // This it the Ternary Search Tree from | |
28 | // <boost/spirit/home/classic/symbols/impl/tst.ipp> adapted to be cheap | |
29 | // to copy. | |
30 | // | |
31 | // Ternary Search Tree implementation. The data structure is faster | |
32 | // than | |
33 | // hashing for many typical search problems especially when the search | |
34 | // interface is iterator based. Searching for a string of length k in a | |
35 | // ternary search tree with n strings will require at most O(log n+k) | |
36 | // character comparisons. TSTs are many times faster than hash tables | |
37 | // for unsuccessful searches since mismatches are discovered earlier | |
38 | // after examining only a few characters. Hash tables always examine an | |
39 | // entire key when searching. | |
40 | // | |
41 | // For details see http://www.cs.princeton.edu/~rs/strings/. | |
42 | // | |
43 | // *** This is a low level class and is | |
44 | // not meant for public consumption *** | |
45 | // | |
46 | /////////////////////////////////////////////////////////////////////////////// | |
47 | ||
48 | template <typename T, typename CharT> struct tst_node | |
7c673cae FG |
49 | { |
50 | tst_node(CharT value_) | |
11fdf7f2 TL |
51 | : reference_count(0) |
52 | , left() | |
53 | , middle() | |
54 | , right() | |
55 | , data() | |
56 | , value(value_) | |
7c673cae FG |
57 | { |
58 | } | |
11fdf7f2 | 59 | |
7c673cae | 60 | tst_node(tst_node const& other) |
11fdf7f2 TL |
61 | : reference_count(0) |
62 | , left(other.left) | |
63 | , middle(other.middle) | |
64 | , right(other.right) | |
65 | , data(other.data ? new T(*other.data) : 0) | |
66 | , value(other.value) | |
7c673cae FG |
67 | { |
68 | } | |
69 | ||
70 | // If you fancy a slight improvement in memory use, | |
71 | // reference_count + value could probably be packed | |
72 | // in the space for a single int. | |
73 | int reference_count; | |
74 | boost::intrusive_ptr<tst_node> left; | |
75 | boost::intrusive_ptr<tst_node> middle; | |
76 | boost::intrusive_ptr<tst_node> right; | |
77 | boost::scoped_ptr<T> data; | |
78 | CharT value; | |
11fdf7f2 TL |
79 | |
80 | private: | |
7c673cae FG |
81 | tst_node& operator=(tst_node const&); |
82 | }; | |
83 | ||
84 | template <typename T, typename CharT> | |
85 | void intrusive_ptr_add_ref(tst_node<T, CharT>* ptr) | |
11fdf7f2 TL |
86 | { |
87 | ++ptr->reference_count; | |
88 | } | |
7c673cae FG |
89 | |
90 | template <typename T, typename CharT> | |
91 | void intrusive_ptr_release(tst_node<T, CharT>* ptr) | |
11fdf7f2 TL |
92 | { |
93 | if (--ptr->reference_count == 0) delete ptr; | |
94 | } | |
7c673cae FG |
95 | |
96 | /////////////////////////////////////////////////////////////////////////// | |
11fdf7f2 | 97 | template <typename T, typename CharT> class tst |
7c673cae FG |
98 | { |
99 | typedef tst_node<T, CharT> node_t; | |
100 | typedef boost::intrusive_ptr<node_t> node_ptr; | |
101 | node_ptr root; | |
102 | ||
11fdf7f2 | 103 | public: |
7c673cae FG |
104 | struct search_info |
105 | { | |
11fdf7f2 | 106 | T* data; |
7c673cae FG |
107 | std::size_t length; |
108 | }; | |
109 | ||
11fdf7f2 | 110 | void swap(tst& other) { root.swap(other.root); } |
7c673cae FG |
111 | |
112 | // Adds symbol to ternary search tree. | |
113 | // If it already exists, then replace it with new value. | |
114 | // | |
115 | // pre: first != last | |
116 | template <typename IteratorT> | |
117 | T* add(IteratorT first, IteratorT const& last, T const& data) | |
118 | { | |
11fdf7f2 | 119 | assert(first != last); |
7c673cae FG |
120 | |
121 | node_ptr* np = &root; | |
122 | CharT ch = *first; | |
123 | ||
11fdf7f2 TL |
124 | for (;;) { |
125 | if (!*np) { | |
7c673cae FG |
126 | *np = new node_t(ch); |
127 | } | |
11fdf7f2 | 128 | else if ((*np)->reference_count > 1) { |
7c673cae FG |
129 | *np = new node_t(**np); |
130 | } | |
131 | ||
11fdf7f2 | 132 | if (ch < (*np)->value) { |
7c673cae FG |
133 | np = &(*np)->left; |
134 | } | |
11fdf7f2 | 135 | else if (ch == (*np)->value) { |
7c673cae FG |
136 | ++first; |
137 | if (first == last) break; | |
138 | ch = *first; | |
139 | np = &(*np)->middle; | |
140 | } | |
11fdf7f2 | 141 | else { |
7c673cae FG |
142 | np = &(*np)->right; |
143 | } | |
144 | } | |
145 | ||
146 | boost::scoped_ptr<T> new_data(new T(data)); | |
147 | boost::swap((*np)->data, new_data); | |
148 | return (*np)->data.get(); | |
149 | } | |
11fdf7f2 | 150 | |
7c673cae FG |
151 | template <typename ScannerT> |
152 | search_info find(ScannerT const& scan) const | |
153 | { | |
11fdf7f2 | 154 | search_info result = {0, 0}; |
7c673cae FG |
155 | if (scan.at_end()) { |
156 | return result; | |
157 | } | |
158 | ||
159 | typedef typename ScannerT::iterator_t iterator_t; | |
11fdf7f2 TL |
160 | node_ptr np = root; |
161 | CharT ch = *scan; | |
162 | iterator_t latest = scan.first; | |
7c673cae FG |
163 | std::size_t length = 0; |
164 | ||
11fdf7f2 | 165 | while (np) { |
7c673cae FG |
166 | if (ch < np->value) // => go left! |
167 | { | |
168 | np = np->left; | |
169 | } | |
170 | else if (ch == np->value) // => go middle! | |
171 | { | |
172 | ++scan; | |
173 | ++length; | |
174 | ||
175 | // Found a potential match. | |
11fdf7f2 | 176 | if (np->data.get()) { |
7c673cae FG |
177 | result.data = np->data.get(); |
178 | result.length = length; | |
179 | latest = scan.first; | |
180 | } | |
181 | ||
182 | if (scan.at_end()) break; | |
183 | ch = *scan; | |
184 | np = np->middle; | |
185 | } | |
186 | else // (ch > np->value) => go right! | |
187 | { | |
188 | np = np->right; | |
189 | } | |
190 | } | |
191 | ||
192 | scan.first = latest; | |
193 | return result; | |
194 | } | |
195 | }; | |
196 | ||
11fdf7f2 TL |
197 | typedef boost::spirit::classic:: |
198 | symbols<std::string, char, quickbook::tst<std::string, char> > | |
199 | string_symbols; | |
7c673cae FG |
200 | } // namespace quickbook |
201 | ||
202 | #endif |