]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // state_machine.hpp |
2 | // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | #ifndef BOOST_LEXER_STATE_MACHINE_HPP | |
7 | #define BOOST_LEXER_STATE_MACHINE_HPP | |
8 | ||
9 | #include <algorithm> | |
10 | #include "conversion/char_state_machine.hpp" | |
11 | #include "consts.hpp" | |
12 | #include <deque> | |
13 | #include "internals.hpp" | |
14 | #include <map> | |
15 | #include "containers/ptr_vector.hpp" | |
16 | #include "size_t.hpp" | |
17 | #include <string> | |
18 | ||
19 | namespace boost | |
20 | { | |
21 | namespace lexer | |
22 | { | |
23 | template<typename CharT> | |
24 | class basic_state_machine | |
25 | { | |
26 | public: | |
27 | typedef CharT char_type; | |
28 | ||
29 | class iterator | |
30 | { | |
31 | public: | |
32 | friend class basic_state_machine; | |
33 | ||
34 | struct data | |
35 | { | |
36 | // Current iterator info | |
37 | std::size_t dfa; | |
38 | std::size_t states; | |
39 | std::size_t state; | |
40 | std::size_t transitions; | |
41 | std::size_t transition; | |
42 | ||
43 | // Current state info | |
44 | bool end_state; | |
45 | std::size_t id; | |
46 | std::size_t unique_id; | |
47 | std::size_t goto_dfa; | |
48 | std::size_t bol_index; | |
49 | std::size_t eol_index; | |
50 | ||
51 | // Current transition info | |
52 | basic_string_token<CharT> token; | |
53 | std::size_t goto_state; | |
54 | ||
55 | data () : | |
56 | dfa (npos), | |
57 | states (0), | |
58 | state (npos), | |
59 | transitions (0), | |
60 | transition (npos), | |
61 | end_state (false), | |
62 | id (npos), | |
63 | unique_id (npos), | |
64 | goto_dfa (npos), | |
65 | bol_index (npos), | |
66 | eol_index (npos), | |
67 | goto_state (npos) | |
68 | { | |
69 | } | |
70 | ||
71 | bool operator == (const data &rhs_) const | |
72 | { | |
73 | return dfa == rhs_.dfa && | |
74 | states == rhs_.states && | |
75 | state == rhs_.state && | |
76 | transitions == rhs_.transitions && | |
77 | transition == rhs_.transition && | |
78 | end_state == rhs_.end_state && | |
79 | id == rhs_.id && | |
80 | unique_id == rhs_.unique_id && | |
81 | goto_dfa == rhs_.goto_dfa && | |
82 | bol_index == rhs_.bol_index && | |
83 | eol_index == rhs_.eol_index && | |
84 | token == rhs_.token && | |
85 | goto_state == rhs_.goto_state; | |
86 | } | |
87 | }; | |
88 | ||
89 | iterator () : | |
90 | _sm (0), | |
91 | _dfas (0), | |
92 | _dfa (npos), | |
93 | _states (0), | |
94 | _state (npos), | |
95 | _transitions (0), | |
96 | _transition (npos) | |
97 | { | |
98 | } | |
99 | ||
100 | bool operator == (const iterator &rhs_) const | |
101 | { | |
102 | return _dfas == rhs_._dfas && _dfa == rhs_._dfa && | |
103 | _states == rhs_._states && _state == rhs_._state && | |
104 | _transitions == rhs_._transitions && | |
105 | _transition == rhs_._transition; | |
106 | } | |
107 | ||
108 | bool operator != (const iterator &rhs_) const | |
109 | { | |
110 | return !(*this == rhs_); | |
111 | } | |
112 | ||
113 | data &operator * () | |
114 | { | |
115 | return _data; | |
116 | } | |
117 | ||
118 | data *operator -> () | |
119 | { | |
120 | return &_data; | |
121 | } | |
122 | ||
123 | // Let compiler generate operator = (). | |
124 | ||
125 | // prefix version | |
126 | iterator &operator ++ () | |
127 | { | |
128 | next (); | |
129 | return *this; | |
130 | } | |
131 | ||
132 | // postfix version | |
133 | iterator operator ++ (int) | |
134 | { | |
135 | iterator iter_ = *this; | |
136 | ||
137 | next (); | |
138 | return iter_; | |
139 | } | |
140 | ||
141 | void clear () | |
142 | { | |
143 | _dfas = _states = _transitions = 0; | |
144 | _dfa = _state = _transition = npos; | |
145 | } | |
146 | ||
147 | private: | |
148 | basic_state_machine *_sm; | |
149 | data _data; | |
150 | std::size_t _dfas; | |
151 | std::size_t _dfa; | |
152 | std::size_t _states; | |
153 | std::size_t _state; | |
154 | std::size_t _transitions; | |
155 | std::size_t _transition; | |
156 | typename detail::basic_char_state_machine<CharT>::state:: | |
157 | size_t_string_token_map::const_iterator _token_iter; | |
158 | typename detail::basic_char_state_machine<CharT>::state:: | |
159 | size_t_string_token_map::const_iterator _token_end; | |
160 | ||
161 | void next () | |
162 | { | |
163 | bool reset_state_ = false; | |
164 | ||
165 | if (_transition >= _transitions) | |
166 | { | |
167 | _transition = _data.transition = 0; | |
168 | _data.state = ++_state; | |
169 | reset_state_ = true; | |
170 | ||
171 | if (_state >= _states) | |
172 | { | |
173 | ++_dfa; | |
174 | ||
175 | if (_dfa >= _dfas) | |
176 | { | |
177 | clear (); | |
178 | reset_state_ = false; | |
179 | } | |
180 | else | |
181 | { | |
182 | _states = _data.states = | |
183 | _sm->_csm._sm_vector[_dfa].size (); | |
184 | _state = _data.state = 0; | |
185 | } | |
186 | } | |
187 | } | |
188 | else | |
189 | { | |
190 | _data.transition = _transition; | |
191 | } | |
192 | ||
193 | if (reset_state_) | |
194 | { | |
195 | const typename detail::basic_char_state_machine<CharT>:: | |
196 | state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state]; | |
197 | ||
198 | _transitions = _data.transitions = ptr_->_transitions.size (); | |
199 | _data.end_state = ptr_->_end_state; | |
200 | _data.id = ptr_->_id; | |
201 | _data.unique_id = ptr_->_unique_id; | |
202 | _data.goto_dfa = ptr_->_state; | |
203 | _data.bol_index = ptr_->_bol_index; | |
204 | _data.eol_index = ptr_->_eol_index; | |
205 | _token_iter = ptr_->_transitions.begin (); | |
206 | _token_end = ptr_->_transitions.end (); | |
207 | } | |
208 | ||
209 | if (_token_iter != _token_end) | |
210 | { | |
211 | _data.token = _token_iter->second; | |
212 | _data.goto_state = _token_iter->first; | |
213 | ++_token_iter; | |
214 | ++_transition; | |
215 | } | |
216 | else | |
217 | { | |
218 | _data.token.clear (); | |
219 | _data.goto_state = npos; | |
220 | } | |
221 | } | |
222 | }; | |
223 | ||
224 | friend class iterator; | |
225 | ||
226 | basic_state_machine () | |
227 | { | |
228 | } | |
229 | ||
230 | void clear () | |
231 | { | |
232 | _internals.clear (); | |
233 | _csm.clear (); | |
234 | } | |
235 | ||
236 | bool empty () const | |
237 | { | |
238 | // Don't include _csm in this test, as irrelevant to state. | |
239 | return _internals._lookup->empty () && | |
240 | _internals._dfa_alphabet.empty () && | |
241 | _internals._dfa->empty (); | |
242 | } | |
243 | ||
244 | std::size_t size () const | |
245 | { | |
246 | return _internals._dfa->size (); | |
247 | } | |
248 | ||
249 | bool operator == (const basic_state_machine &rhs_) const | |
250 | { | |
251 | // Don't include _csm in this test, as irrelevant to state. | |
252 | return _internals._lookup == rhs_._internals._lookup && | |
253 | _internals._dfa_alphabet == rhs_._internals._dfa_alphabet && | |
254 | _internals._dfa == rhs_._internals._dfa && | |
255 | _internals._seen_BOL_assertion == | |
256 | rhs_._internals._seen_BOL_assertion && | |
257 | _internals._seen_EOL_assertion == | |
258 | rhs_._internals._seen_EOL_assertion; | |
259 | } | |
260 | ||
261 | iterator begin () const | |
262 | { | |
263 | iterator iter_; | |
264 | ||
265 | iter_._sm = const_cast<basic_state_machine *>(this); | |
266 | check_for_csm (); | |
267 | ||
268 | if (!_csm.empty ()) | |
269 | { | |
270 | const typename detail::basic_char_state_machine<CharT>:: | |
271 | state_vector *ptr_ = &_csm._sm_vector.front (); | |
272 | ||
273 | iter_._dfas = _csm._sm_vector.size (); | |
274 | iter_._states = iter_._data.states = ptr_->size (); | |
275 | iter_._transitions = iter_._data.transitions = | |
276 | ptr_->front ()._transitions.size (); | |
277 | iter_._dfa = iter_._data.dfa = 0; | |
278 | iter_._state = iter_._data.state = 0; | |
279 | iter_._transition = 0; | |
280 | iter_._data.end_state = ptr_->front ()._end_state; | |
281 | iter_._data.id = ptr_->front ()._id; | |
282 | iter_._data.unique_id = ptr_->front ()._unique_id; | |
283 | iter_._data.goto_dfa = ptr_->front ()._state; | |
284 | iter_._data.bol_index = ptr_->front ()._bol_index; | |
285 | iter_._data.eol_index = ptr_->front ()._eol_index; | |
286 | iter_._token_iter = ptr_->front ()._transitions.begin (); | |
287 | iter_._token_end = ptr_->front ()._transitions.end (); | |
288 | ||
289 | // Deal with case where there is only a bol or eol | |
290 | // but no other transitions. | |
291 | if (iter_._transitions) | |
292 | { | |
293 | ++iter_; | |
294 | } | |
295 | } | |
296 | ||
297 | return iter_; | |
298 | } | |
299 | ||
300 | iterator end () const | |
301 | { | |
302 | iterator iter_; | |
303 | ||
304 | iter_._sm = const_cast<basic_state_machine *>(this); | |
305 | return iter_; | |
306 | } | |
307 | ||
308 | void swap (basic_state_machine &sm_) | |
309 | { | |
310 | _internals.swap (sm_._internals); | |
311 | _csm.swap (sm_._csm); | |
312 | } | |
313 | ||
314 | const detail::internals &data () const | |
315 | { | |
316 | return _internals; | |
317 | } | |
318 | ||
319 | private: | |
320 | detail::internals _internals; | |
321 | mutable detail::basic_char_state_machine<CharT> _csm; | |
322 | ||
323 | void check_for_csm () const | |
324 | { | |
325 | if (_csm.empty ()) | |
326 | { | |
327 | human_readable (_csm); | |
328 | } | |
329 | } | |
330 | ||
331 | void human_readable (detail::basic_char_state_machine<CharT> &sm_) const | |
332 | { | |
333 | const std::size_t max_ = sizeof (CharT) == 1 ? | |
334 | num_chars : num_wchar_ts; | |
335 | const std::size_t start_states_ = _internals._dfa->size (); | |
336 | ||
337 | sm_.clear (); | |
338 | sm_._sm_vector.resize (start_states_); | |
339 | ||
340 | for (std::size_t start_state_index_ = 0; | |
341 | start_state_index_ < start_states_; ++start_state_index_) | |
342 | { | |
343 | const detail::internals::size_t_vector *lu_ = | |
344 | _internals._lookup[start_state_index_]; | |
345 | const std::size_t alphabet_ = | |
346 | _internals._dfa_alphabet[start_state_index_] - dfa_offset; | |
347 | std::vector<std::basic_string<CharT> > chars_ (alphabet_); | |
348 | const std::size_t states_ = _internals._dfa[start_state_index_]-> | |
349 | size () / (alphabet_ + dfa_offset); | |
350 | const std::size_t *read_ptr_ = &_internals. | |
351 | _dfa[start_state_index_]->front () + alphabet_ + dfa_offset; | |
352 | ||
353 | sm_._sm_vector[start_state_index_].resize (states_ - 1); | |
354 | ||
355 | for (std::size_t alpha_index_ = 0; alpha_index_ < max_; | |
356 | ++alpha_index_) | |
357 | { | |
358 | const std::size_t col_ = lu_->at (alpha_index_); | |
359 | ||
360 | if (col_ != static_cast<std::size_t>(dead_state_index)) | |
361 | { | |
362 | chars_[col_ - dfa_offset] += static_cast<CharT> | |
363 | (alpha_index_); | |
364 | } | |
365 | } | |
366 | ||
367 | for (std::size_t state_index_ = 1; state_index_ < states_; | |
368 | ++state_index_) | |
369 | { | |
370 | typename detail::basic_char_state_machine<CharT>::state | |
371 | *state_ = &sm_._sm_vector[start_state_index_] | |
372 | [state_index_ - 1]; | |
373 | ||
374 | state_->_end_state = *read_ptr_ != 0; | |
375 | state_->_id = *(read_ptr_ + id_index); | |
376 | state_->_unique_id = *(read_ptr_ + unique_id_index); | |
377 | state_->_state = *(read_ptr_ + state_index); | |
378 | state_->_bol_index = *(read_ptr_ + bol_index) - 1; | |
379 | state_->_eol_index = *(read_ptr_ + eol_index) - 1; | |
380 | read_ptr_ += dfa_offset; | |
381 | ||
382 | for (std::size_t col_index_ = 0; col_index_ < alphabet_; | |
383 | ++col_index_, ++read_ptr_) | |
384 | { | |
385 | const std::size_t transition_ = *read_ptr_; | |
386 | ||
387 | if (transition_ != 0) | |
388 | { | |
389 | const std::size_t i_ = transition_ - 1; | |
390 | typename detail::basic_char_state_machine<CharT>:: | |
391 | state::size_t_string_token_map::iterator iter_ = | |
392 | state_->_transitions.find (i_); | |
393 | ||
394 | if (iter_ == state_->_transitions.end ()) | |
395 | { | |
396 | basic_string_token<CharT> token_ | |
397 | (false, chars_[col_index_]); | |
398 | typename detail::basic_char_state_machine<CharT>:: | |
399 | state::size_t_string_token_pair pair_ | |
400 | (i_, token_); | |
401 | ||
402 | state_->_transitions.insert (pair_); | |
403 | } | |
404 | else | |
405 | { | |
406 | iter_->second._charset += chars_[col_index_]; | |
407 | } | |
408 | } | |
409 | } | |
410 | ||
411 | for (typename detail::basic_char_state_machine<CharT>::state:: | |
412 | size_t_string_token_map::iterator iter_ = | |
413 | state_->_transitions.begin (), | |
414 | end_ = state_->_transitions.end (); | |
415 | iter_ != end_; ++iter_) | |
416 | { | |
417 | std::sort (iter_->second._charset.begin (), | |
418 | iter_->second._charset.end ()); | |
419 | iter_->second.normalise (); | |
420 | } | |
421 | } | |
422 | } | |
423 | } | |
424 | }; | |
425 | ||
426 | typedef basic_state_machine<char> state_machine; | |
427 | typedef basic_state_machine<wchar_t> wstate_machine; | |
428 | } | |
429 | } | |
430 | ||
431 | #endif |