2 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
7 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
9 #include "char_traits.hpp"
11 #include "state_machine.hpp"
12 #include <iterator> // for std::iterator_traits
18 template<typename FwdIter, typename Traits =
19 char_traits<typename std::iterator_traits<FwdIter>::value_type> >
26 friend class basic_input;
31 std::size_t unique_id;
37 // Construct in end() state.
46 bool operator == (const data &rhs_) const
48 return id == rhs_.id && unique_id == rhs_.unique_id &&
49 start == rhs_.start && end == rhs_.end &&
50 bol == rhs_.bol && state == rhs_.state;
59 bool operator == (const iterator &rhs_) const
61 return _data == rhs_._data;
64 bool operator != (const iterator &rhs_) const
66 return !(*this == rhs_);
79 // Let compiler generate operator = ().
82 iterator &operator ++ ()
89 iterator operator ++ (int)
91 iterator iter_ = *this;
98 // Not owner (obviously!)
99 const basic_input *_input;
104 const detail::internals &internals_ =
105 _input->_state_machine->data ();
107 _data.start = _data.end;
109 if (internals_._dfa->size () == 1)
111 if (internals_._seen_BOL_assertion ||
112 internals_._seen_EOL_assertion)
115 (&internals_._lookup->front ()->front (),
116 internals_._dfa_alphabet.front (),
117 &internals_._dfa->front ()->front (),
118 _data.bol, _data.end, _input->_end, _data.unique_id);
122 _data.id = next (&internals_._lookup->front ()->front (),
123 internals_._dfa_alphabet.front (), &internals_.
124 _dfa->front ()->front (), _data.end, _input->_end,
130 if (internals_._seen_BOL_assertion ||
131 internals_._seen_EOL_assertion)
133 _data.id = next (internals_, _data.state,
134 _data.bol, _data.end, _input->_end, _data.unique_id);
138 _data.id = next (internals_, _data.state,
139 _data.end, _input->_end, _data.unique_id);
143 if (_data.end == _input->_end && _data.start == _data.end)
145 // Ensure current state matches that returned by end().
150 std::size_t next (const detail::internals &internals_,
151 std::size_t &start_state_, bool bol_,
152 FwdIter &start_token_, const FwdIter &end_,
153 std::size_t &unique_id_)
155 if (start_token_ == end_)
162 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
164 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
165 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
166 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
167 FwdIter curr_ = start_token_;
168 bool end_state_ = *ptr_ != 0;
169 std::size_t id_ = *(ptr_ + id_index);
170 std::size_t uid_ = *(ptr_ + unique_id_index);
171 std::size_t end_start_state_ = start_state_;
172 bool end_bol_ = bol_;
173 FwdIter end_token_ = start_token_;
175 while (curr_ != end_)
177 const std::size_t BOL_state_ = ptr_[bol_index];
178 const std::size_t EOL_state_ = ptr_[eol_index];
180 if (BOL_state_ && bol_)
182 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
184 else if (EOL_state_ && *curr_ == '\n')
186 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
190 typename Traits::char_type prev_char_ = *curr_++;
192 bol_ = prev_char_ == '\n';
194 const std::size_t state_ =
195 ptr_[lookup_[static_cast<typename Traits::index_type>
203 ptr_ = &dfa_[state_ * dfa_alphabet_];
209 id_ = *(ptr_ + id_index);
210 uid_ = *(ptr_ + unique_id_index);
211 end_start_state_ = *(ptr_ + state_index);
217 const std::size_t EOL_state_ = ptr_[eol_index];
219 if (EOL_state_ && curr_ == end_)
221 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
226 id_ = *(ptr_ + id_index);
227 uid_ = *(ptr_ + unique_id_index);
228 end_start_state_ = *(ptr_ + state_index);
236 // return longest match
237 start_state_ = end_start_state_;
238 start_token_ = end_token_;
247 _data.bol = end_bol_;
252 // No match causes char to be skipped
253 _data.bol = *start_token_ == '\n';
263 std::size_t next (const detail::internals &internals_,
264 std::size_t &start_state_, FwdIter &start_token_,
265 FwdIter const &end_, std::size_t &unique_id_)
267 if (start_token_ == end_)
274 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
276 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
277 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
278 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
279 FwdIter curr_ = start_token_;
280 bool end_state_ = *ptr_ != 0;
281 std::size_t id_ = *(ptr_ + id_index);
282 std::size_t uid_ = *(ptr_ + unique_id_index);
283 std::size_t end_start_state_ = start_state_;
284 FwdIter end_token_ = start_token_;
286 while (curr_ != end_)
288 const std::size_t state_ = ptr_[lookup_[static_cast
289 <typename Traits::index_type>(*curr_++)]];
296 ptr_ = &dfa_[state_ * dfa_alphabet_];
301 id_ = *(ptr_ + id_index);
302 uid_ = *(ptr_ + unique_id_index);
303 end_start_state_ = *(ptr_ + state_index);
310 // return longest match
311 start_state_ = end_start_state_;
312 start_token_ = end_token_;
314 if (id_ == 0) goto again;
318 // No match causes char to be skipped
328 std::size_t next (const std::size_t * const lookup_,
329 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
330 bool bol_, FwdIter &start_token_, FwdIter const &end_,
331 std::size_t &unique_id_)
333 if (start_token_ == end_)
339 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
340 FwdIter curr_ = start_token_;
341 bool end_state_ = *ptr_ != 0;
342 std::size_t id_ = *(ptr_ + id_index);
343 std::size_t uid_ = *(ptr_ + unique_id_index);
344 bool end_bol_ = bol_;
345 FwdIter end_token_ = start_token_;
347 while (curr_ != end_)
349 const std::size_t BOL_state_ = ptr_[bol_index];
350 const std::size_t EOL_state_ = ptr_[eol_index];
352 if (BOL_state_ && bol_)
354 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
356 else if (EOL_state_ && *curr_ == '\n')
358 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
362 typename Traits::char_type prev_char_ = *curr_++;
364 bol_ = prev_char_ == '\n';
366 const std::size_t state_ =
367 ptr_[lookup_[static_cast<typename Traits::index_type>
375 ptr_ = &dfa_[state_ * dfa_alphabet_];
381 id_ = *(ptr_ + id_index);
382 uid_ = *(ptr_ + unique_id_index);
388 const std::size_t EOL_state_ = ptr_[eol_index];
390 if (EOL_state_ && curr_ == end_)
392 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
397 id_ = *(ptr_ + id_index);
398 uid_ = *(ptr_ + unique_id_index);
406 // return longest match
407 _data.bol = end_bol_;
408 start_token_ = end_token_;
412 // No match causes char to be skipped
413 _data.bol = *start_token_ == '\n';
423 std::size_t next (const std::size_t * const lookup_,
424 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
425 FwdIter &start_token_, FwdIter const &end_,
426 std::size_t &unique_id_)
428 if (start_token_ == end_)
434 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
435 FwdIter curr_ = start_token_;
436 bool end_state_ = *ptr_ != 0;
437 std::size_t id_ = *(ptr_ + id_index);
438 std::size_t uid_ = *(ptr_ + unique_id_index);
439 FwdIter end_token_ = start_token_;
441 while (curr_ != end_)
443 const std::size_t state_ = ptr_[lookup_[static_cast
444 <typename Traits::index_type>(*curr_++)]];
451 ptr_ = &dfa_[state_ * dfa_alphabet_];
456 id_ = *(ptr_ + id_index);
457 uid_ = *(ptr_ + unique_id_index);
464 // return longest match
465 start_token_ = end_token_;
469 // No match causes char to be skipped
480 friend class iterator;
482 // Make it explicit that we are NOT taking a copy of state_machine_!
483 basic_input (const basic_state_machine<typename Traits::char_type>
484 *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
485 _state_machine (state_machine_),
491 iterator begin () const
496 // Over-ride default of 0 (EOI)
497 iter_._data.id = npos;
498 iter_._data.start = _begin;
499 iter_._data.end = _begin;
500 iter_._data.bol = _state_machine->data ()._seen_BOL_assertion;
501 iter_._data.state = 0;
506 iterator end () const
511 iter_._data.start = _end;
512 iter_._data.end = _end;
517 const basic_state_machine<typename Traits::char_type> *_state_machine;
522 typedef basic_input<std::string::iterator> iter_input;
523 typedef basic_input<std::basic_string<wchar_t>::iterator> iter_winput;
524 typedef basic_input<const char *> ptr_input;
525 typedef basic_input<const wchar_t *> ptr_winput;