]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
2 | #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ | |
3 | ||
4 | /* Copyright (c) 2004-2005 CrystalClear Software, Inc. | |
5 | * Use, modification and distribution is subject to the | |
6 | * Boost Software License, Version 1.0. (See accompanying | |
7 | * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) | |
8 | * Author: Jeff Garland, Bart Garst | |
9 | * $Date$ | |
10 | */ | |
11 | ||
12 | ||
13 | #include "boost/lexical_cast.hpp" //error without? | |
14 | #include "boost/algorithm/string/case_conv.hpp" | |
15 | #include <map> | |
16 | #include <string> | |
17 | #include <vector> | |
18 | #include <algorithm> | |
19 | ||
20 | namespace boost { namespace date_time { | |
21 | ||
22 | ||
23 | template<typename charT> | |
24 | struct parse_match_result | |
25 | { | |
26 | parse_match_result() : | |
27 | match_depth(0), | |
28 | current_match(-1)// -1 is match_not-found value | |
29 | {} | |
30 | typedef std::basic_string<charT> string_type; | |
31 | string_type remaining() const | |
32 | { | |
33 | if (match_depth == cache.size()) { | |
34 | return string_type(); | |
35 | } | |
36 | if (current_match == -1) { | |
37 | return cache; | |
38 | } | |
39 | //some of the cache was used return the rest | |
40 | return string_type(cache, match_depth); | |
41 | } | |
42 | charT last_char() const | |
43 | { | |
44 | return cache[cache.size()-1]; | |
45 | } | |
46 | //! Returns true if more characters were parsed than was necessary | |
47 | /*! Should be used in conjunction with last_char() | |
48 | * to get the remaining character. | |
49 | */ | |
50 | bool has_remaining() const | |
51 | { | |
52 | return (cache.size() > match_depth); | |
53 | } | |
54 | ||
55 | // cache will hold characters that have been read from the stream | |
56 | string_type cache; | |
57 | unsigned short match_depth; | |
58 | short current_match; | |
59 | enum PARSE_STATE { PARSE_ERROR= -1 }; | |
60 | }; | |
61 | ||
62 | //for debug -- really only char streams... | |
63 | template<typename charT> | |
64 | std::basic_ostream<charT>& | |
65 | operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr) | |
66 | { | |
67 | os << "cm: " << mr.current_match | |
68 | << " C: '" << mr.cache | |
69 | << "' md: " << mr.match_depth | |
70 | << " R: " << mr.remaining(); | |
71 | return os; | |
72 | } | |
73 | ||
74 | ||
75 | ||
76 | //! Recursive data structure to allow efficient parsing of various strings | |
77 | /*! This class provides a quick lookup by building what amounts to a | |
78 | * tree data structure. It also features a match function which can | |
79 | * can handle nasty input interators by caching values as it recurses | |
80 | * the tree so that it can backtrack as needed. | |
81 | */ | |
82 | template<typename charT> | |
83 | struct string_parse_tree | |
84 | { | |
85 | #if BOOST_WORKAROUND( __BORLANDC__, BOOST_TESTED_AT(0x581) ) | |
86 | typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll; | |
87 | #else | |
88 | typedef std::multimap<charT, string_parse_tree > ptree_coll; | |
89 | #endif | |
90 | typedef typename ptree_coll::value_type value_type; | |
91 | typedef typename ptree_coll::iterator iterator; | |
92 | typedef typename ptree_coll::const_iterator const_iterator; | |
93 | typedef std::basic_string<charT> string_type; | |
94 | typedef std::vector<std::basic_string<charT> > collection_type; | |
95 | typedef parse_match_result<charT> parse_match_result_type; | |
96 | ||
97 | /*! Parameter "starting_point" designates where the numbering begins. | |
98 | * A starting_point of zero will start the numbering at zero | |
99 | * (Sun=0, Mon=1, ...) were a starting_point of one starts the | |
100 | * numbering at one (Jan=1, Feb=2, ...). The default is zero, | |
101 | * negative vaules are not allowed */ | |
102 | string_parse_tree(collection_type names, unsigned int starting_point=0) | |
103 | { | |
104 | // iterate thru all the elements and build the tree | |
105 | unsigned short index = 0; | |
106 | while (index != names.size() ) { | |
107 | string_type s = boost::algorithm::to_lower_copy(names[index]); | |
108 | insert(s, static_cast<unsigned short>(index + starting_point)); | |
109 | index++; | |
110 | } | |
111 | //set the last tree node = index+1 indicating a value | |
112 | index++; | |
113 | } | |
114 | ||
115 | ||
116 | string_parse_tree(short value = -1) : | |
117 | m_value(value) | |
118 | {} | |
119 | ptree_coll m_next_chars; | |
120 | short m_value; | |
121 | ||
122 | void insert(const string_type& s, unsigned short value) | |
123 | { | |
124 | unsigned int i = 0; | |
125 | iterator ti; | |
126 | while(i < s.size()) { | |
127 | if (i==0) { | |
128 | if (i == (s.size()-1)) { | |
129 | ti = m_next_chars.insert(value_type(s[i], | |
130 | string_parse_tree<charT>(value))); | |
131 | } | |
132 | else { | |
133 | ti = m_next_chars.insert(value_type(s[i], | |
134 | string_parse_tree<charT>())); | |
135 | } | |
136 | } | |
137 | else { | |
138 | if (i == (s.size()-1)) { | |
139 | ti = ti->second.m_next_chars.insert(value_type(s[i], | |
140 | string_parse_tree<charT>(value))); | |
141 | } | |
142 | ||
143 | else { | |
144 | ti = ti->second.m_next_chars.insert(value_type(s[i], | |
145 | string_parse_tree<charT>())); | |
146 | } | |
147 | ||
148 | } | |
149 | i++; | |
150 | } | |
151 | } | |
152 | ||
153 | ||
154 | //! Recursive function that finds a matching string in the tree. | |
155 | /*! Must check match_results::has_remaining() after match() is | |
156 | * called. This is required so the user can determine if | |
157 | * stream iterator is already pointing to the expected | |
158 | * character or not (match() might advance sitr to next char in stream). | |
159 | * | |
160 | * A parse_match_result that has been returned from a failed match | |
161 | * attempt can be sent in to the match function of a different | |
162 | * string_parse_tree to attempt a match there. Use the iterators | |
163 | * for the partially consumed stream, the parse_match_result object, | |
164 | * and '0' for the level parameter. */ | |
165 | short | |
166 | match(std::istreambuf_iterator<charT>& sitr, | |
167 | std::istreambuf_iterator<charT>& stream_end, | |
168 | parse_match_result_type& result, | |
169 | unsigned int& level) const | |
170 | { | |
171 | ||
172 | level++; | |
173 | charT c; | |
174 | // if we conditionally advance sitr, we won't have | |
175 | // to consume the next character past the input | |
176 | bool adv_itr = true; | |
177 | if (level > result.cache.size()) { | |
178 | if (sitr == stream_end) return 0; //bail - input exhausted | |
179 | c = static_cast<charT>(std::tolower(*sitr)); | |
180 | //result.cache += c; | |
181 | //sitr++; | |
182 | } | |
183 | else { | |
184 | // if we're looking for characters from the cache, | |
185 | // we don't want to increment sitr | |
186 | adv_itr = false; | |
187 | c = static_cast<charT>(std::tolower(result.cache[level-1])); | |
188 | } | |
189 | const_iterator litr = m_next_chars.lower_bound(c); | |
190 | const_iterator uitr = m_next_chars.upper_bound(c); | |
191 | while (litr != uitr) { // equal if not found | |
192 | if(adv_itr) { | |
193 | sitr++; | |
194 | result.cache += c; | |
195 | } | |
196 | if (litr->second.m_value != -1) { // -1 is default value | |
197 | if (result.match_depth < level) { | |
198 | result.current_match = litr->second.m_value; | |
199 | result.match_depth = static_cast<unsigned short>(level); | |
200 | } | |
201 | litr->second.match(sitr, stream_end, | |
202 | result, level); | |
203 | level--; | |
204 | } | |
205 | else { | |
206 | litr->second.match(sitr, stream_end, | |
207 | result, level); | |
208 | level--; | |
209 | } | |
210 | ||
211 | if(level <= result.cache.size()) { | |
212 | adv_itr = false; | |
213 | } | |
214 | ||
215 | litr++; | |
216 | } | |
217 | return result.current_match; | |
218 | ||
219 | } | |
220 | ||
221 | /*! Must check match_results::has_remaining() after match() is | |
222 | * called. This is required so the user can determine if | |
223 | * stream iterator is already pointing to the expected | |
224 | * character or not (match() might advance sitr to next char in stream). | |
225 | */ | |
226 | parse_match_result_type | |
227 | match(std::istreambuf_iterator<charT>& sitr, | |
228 | std::istreambuf_iterator<charT>& stream_end) const | |
229 | { | |
230 | // lookup to_lower of char in tree. | |
231 | unsigned int level = 0; | |
232 | // string_type cache; | |
233 | parse_match_result_type result; | |
234 | match(sitr, stream_end, result, level); | |
235 | return result; | |
236 | } | |
237 | ||
238 | void printme(std::ostream& os, int& level) | |
239 | { | |
240 | level++; | |
241 | iterator itr = m_next_chars.begin(); | |
242 | iterator end = m_next_chars.end(); | |
243 | // os << "starting level: " << level << std::endl; | |
244 | while (itr != end) { | |
245 | os << "level: " << level | |
246 | << " node: " << itr->first | |
247 | << " value: " << itr->second.m_value | |
248 | << std::endl; | |
249 | itr->second.printme(os, level); | |
250 | itr++; | |
251 | } | |
252 | level--; | |
253 | } | |
254 | ||
255 | void print(std::ostream& os) | |
256 | { | |
257 | int level = 0; | |
258 | printme(os, level); | |
259 | } | |
260 | ||
261 | void printmatch(std::ostream& os, charT c) | |
262 | { | |
263 | iterator litr = m_next_chars.lower_bound(c); | |
264 | iterator uitr = m_next_chars.upper_bound(c); | |
265 | os << "matches for: " << c << std::endl; | |
266 | while (litr != uitr) { | |
267 | os << " node: " << litr->first | |
268 | << " value: " << litr->second.m_value | |
269 | << std::endl; | |
270 | litr++; | |
271 | } | |
272 | } | |
273 | ||
274 | }; | |
275 | ||
276 | ||
277 | } } //namespace | |
278 | #endif |