2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_STATE_MACHINE_HPP
7 #define BOOST_LEXER_STATE_MACHINE_HPP
10 #include "conversion/char_state_machine.hpp"
13 #include "internals.hpp"
15 #include "containers/ptr_vector.hpp"
23 template<typename CharT>
24 class basic_state_machine
27 typedef CharT char_type;
32 friend class basic_state_machine;
36 // Current iterator info
40 std::size_t transitions;
41 std::size_t transition;
46 std::size_t unique_id;
48 std::size_t bol_index;
49 std::size_t eol_index;
51 // Current transition info
52 basic_string_token<CharT> token;
53 std::size_t goto_state;
71 bool operator == (const data &rhs_) const
73 return dfa == rhs_.dfa &&
74 states == rhs_.states &&
75 state == rhs_.state &&
76 transitions == rhs_.transitions &&
77 transition == rhs_.transition &&
78 end_state == rhs_.end_state &&
80 unique_id == rhs_.unique_id &&
81 goto_dfa == rhs_.goto_dfa &&
82 bol_index == rhs_.bol_index &&
83 eol_index == rhs_.eol_index &&
84 token == rhs_.token &&
85 goto_state == rhs_.goto_state;
100 bool operator == (const iterator &rhs_) const
102 return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
103 _states == rhs_._states && _state == rhs_._state &&
104 _transitions == rhs_._transitions &&
105 _transition == rhs_._transition;
108 bool operator != (const iterator &rhs_) const
110 return !(*this == rhs_);
123 // Let compiler generate operator = ().
126 iterator &operator ++ ()
133 iterator operator ++ (int)
135 iterator iter_ = *this;
143 _dfas = _states = _transitions = 0;
144 _dfa = _state = _transition = npos;
148 basic_state_machine *_sm;
154 std::size_t _transitions;
155 std::size_t _transition;
156 typename detail::basic_char_state_machine<CharT>::state::
157 size_t_string_token_map::const_iterator _token_iter;
158 typename detail::basic_char_state_machine<CharT>::state::
159 size_t_string_token_map::const_iterator _token_end;
163 bool reset_state_ = false;
165 if (_transition >= _transitions)
167 _transition = _data.transition = 0;
168 _data.state = ++_state;
171 if (_state >= _states)
178 reset_state_ = false;
182 _states = _data.states =
183 _sm->_csm._sm_vector[_dfa].size ();
184 _state = _data.state = 0;
190 _data.transition = _transition;
195 const typename detail::basic_char_state_machine<CharT>::
196 state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
198 _transitions = _data.transitions = ptr_->_transitions.size ();
199 _data.end_state = ptr_->_end_state;
200 _data.id = ptr_->_id;
201 _data.unique_id = ptr_->_unique_id;
202 _data.goto_dfa = ptr_->_state;
203 _data.bol_index = ptr_->_bol_index;
204 _data.eol_index = ptr_->_eol_index;
205 _token_iter = ptr_->_transitions.begin ();
206 _token_end = ptr_->_transitions.end ();
209 if (_token_iter != _token_end)
211 _data.token = _token_iter->second;
212 _data.goto_state = _token_iter->first;
218 _data.token.clear ();
219 _data.goto_state = npos;
224 friend class iterator;
226 basic_state_machine ()
238 // Don't include _csm in this test, as irrelevant to state.
239 return _internals._lookup->empty () &&
240 _internals._dfa_alphabet.empty () &&
241 _internals._dfa->empty ();
244 std::size_t size () const
246 return _internals._dfa->size ();
249 bool operator == (const basic_state_machine &rhs_) const
251 // Don't include _csm in this test, as irrelevant to state.
252 return _internals._lookup == rhs_._internals._lookup &&
253 _internals._dfa_alphabet == rhs_._internals._dfa_alphabet &&
254 _internals._dfa == rhs_._internals._dfa &&
255 _internals._seen_BOL_assertion ==
256 rhs_._internals._seen_BOL_assertion &&
257 _internals._seen_EOL_assertion ==
258 rhs_._internals._seen_EOL_assertion;
261 iterator begin () const
265 iter_._sm = const_cast<basic_state_machine *>(this);
270 const typename detail::basic_char_state_machine<CharT>::
271 state_vector *ptr_ = &_csm._sm_vector.front ();
273 iter_._dfas = _csm._sm_vector.size ();
274 iter_._states = iter_._data.states = ptr_->size ();
275 iter_._transitions = iter_._data.transitions =
276 ptr_->front ()._transitions.size ();
277 iter_._dfa = iter_._data.dfa = 0;
278 iter_._state = iter_._data.state = 0;
279 iter_._transition = 0;
280 iter_._data.end_state = ptr_->front ()._end_state;
281 iter_._data.id = ptr_->front ()._id;
282 iter_._data.unique_id = ptr_->front ()._unique_id;
283 iter_._data.goto_dfa = ptr_->front ()._state;
284 iter_._data.bol_index = ptr_->front ()._bol_index;
285 iter_._data.eol_index = ptr_->front ()._eol_index;
286 iter_._token_iter = ptr_->front ()._transitions.begin ();
287 iter_._token_end = ptr_->front ()._transitions.end ();
289 // Deal with case where there is only a bol or eol
290 // but no other transitions.
291 if (iter_._transitions)
300 iterator end () const
304 iter_._sm = const_cast<basic_state_machine *>(this);
308 void swap (basic_state_machine &sm_)
310 _internals.swap (sm_._internals);
311 _csm.swap (sm_._csm);
314 const detail::internals &data () const
320 detail::internals _internals;
321 mutable detail::basic_char_state_machine<CharT> _csm;
323 void check_for_csm () const
327 human_readable (_csm);
331 void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
333 const std::size_t max_ = sizeof (CharT) == 1 ?
334 num_chars : num_wchar_ts;
335 const std::size_t start_states_ = _internals._dfa->size ();
338 sm_._sm_vector.resize (start_states_);
340 for (std::size_t start_state_index_ = 0;
341 start_state_index_ < start_states_; ++start_state_index_)
343 const detail::internals::size_t_vector *lu_ =
344 _internals._lookup[start_state_index_];
345 const std::size_t alphabet_ =
346 _internals._dfa_alphabet[start_state_index_] - dfa_offset;
347 std::vector<std::basic_string<CharT> > chars_ (alphabet_);
348 const std::size_t states_ = _internals._dfa[start_state_index_]->
349 size () / (alphabet_ + dfa_offset);
350 const std::size_t *read_ptr_ = &_internals.
351 _dfa[start_state_index_]->front () + alphabet_ + dfa_offset;
353 sm_._sm_vector[start_state_index_].resize (states_ - 1);
355 for (std::size_t alpha_index_ = 0; alpha_index_ < max_;
358 const std::size_t col_ = lu_->at (alpha_index_);
360 if (col_ != static_cast<std::size_t>(dead_state_index))
362 chars_[col_ - dfa_offset] += static_cast<CharT>
367 for (std::size_t state_index_ = 1; state_index_ < states_;
370 typename detail::basic_char_state_machine<CharT>::state
371 *state_ = &sm_._sm_vector[start_state_index_]
374 state_->_end_state = *read_ptr_ != 0;
375 state_->_id = *(read_ptr_ + id_index);
376 state_->_unique_id = *(read_ptr_ + unique_id_index);
377 state_->_state = *(read_ptr_ + state_index);
378 state_->_bol_index = *(read_ptr_ + bol_index) - 1;
379 state_->_eol_index = *(read_ptr_ + eol_index) - 1;
380 read_ptr_ += dfa_offset;
382 for (std::size_t col_index_ = 0; col_index_ < alphabet_;
383 ++col_index_, ++read_ptr_)
385 const std::size_t transition_ = *read_ptr_;
387 if (transition_ != 0)
389 const std::size_t i_ = transition_ - 1;
390 typename detail::basic_char_state_machine<CharT>::
391 state::size_t_string_token_map::iterator iter_ =
392 state_->_transitions.find (i_);
394 if (iter_ == state_->_transitions.end ())
396 basic_string_token<CharT> token_
397 (false, chars_[col_index_]);
398 typename detail::basic_char_state_machine<CharT>::
399 state::size_t_string_token_pair pair_
402 state_->_transitions.insert (pair_);
406 iter_->second._charset += chars_[col_index_];
411 for (typename detail::basic_char_state_machine<CharT>::state::
412 size_t_string_token_map::iterator iter_ =
413 state_->_transitions.begin (),
414 end_ = state_->_transitions.end ();
415 iter_ != end_; ++iter_)
417 std::sort (iter_->second._charset.begin (),
418 iter_->second._charset.end ());
419 iter_->second.normalise ();
426 typedef basic_state_machine<char> state_machine;
427 typedef basic_state_machine<wchar_t> wstate_machine;