2 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_FILE_INPUT
7 #define BOOST_LEXER_FILE_INPUT
9 #include "char_traits.hpp"
14 #include "state_machine.hpp"
20 template<typename CharT, typename Traits = char_traits<CharT> >
21 class basic_file_input
27 friend class basic_file_input;
32 std::size_t unique_id;
37 // Construct in end() state.
45 bool operator == (const data &rhs_) const
47 return id == rhs_.id && unique_id == rhs_.unique_id &&
48 start == rhs_.start && end == rhs_.end &&
58 bool operator == (const iterator &rhs_) const
60 return _data == rhs_._data;
63 bool operator != (const iterator &rhs_) const
65 return !(*this == rhs_);
78 // Let compiler generate operator = ().
81 iterator &operator ++ ()
88 iterator operator ++ (int)
90 iterator iter_ = *this;
98 const detail::internals &internals_ =
99 _input->_state_machine->data ();
101 _data.start = _data.end;
103 if (internals_._dfa->size () == 1)
105 _data.id = _input->next (&internals_._lookup->front ()->
106 front (), internals_._dfa_alphabet.front (),
107 &internals_._dfa->front ()->front (), _data.start,
108 _data.end, _data.unique_id);
112 _data.id = _input->next (internals_, _data.state, _data.start,
113 _data.end, _data.unique_id);
120 // Ensure current state matches that returned by end().
126 // Not owner (obviously!)
127 basic_file_input *_input;
131 friend class iterator;
133 // Make it explict that we are NOT taking a copy of state_machine_!
134 basic_file_input (const basic_state_machine<CharT> *state_machine_,
135 std::basic_ifstream<CharT> *is_,
136 const std::streamsize buffer_size_ = 4096,
137 const std::streamsize buffer_increment_ = 1024) :
138 _state_machine (state_machine_),
140 _buffer_size (buffer_size_),
141 _buffer_increment (buffer_increment_),
142 _buffer (_buffer_size, '!')
144 _start_buffer = &_buffer.front ();
145 _end_buffer = _start_buffer + _buffer.size ();
146 _start_token = _end_buffer;
147 _end_token = _end_buffer;
155 // Over-ride default of 0 (EOF)
156 iter_._data.id = npos;
157 iter_._data.start = 0;
159 iter_._data.state = 0;
169 iter_._data.start = 0;
176 // This temporary is mandatory, otherwise the
177 // pointer calculations won't work!
178 const CharT *temp_ = _end_buffer;
180 _start_token = _end_token = _end_buffer;
181 reload_buffer (temp_, true, _end_token);
185 typedef std::basic_istream<CharT> istream;
186 typedef std::vector<CharT> buffer;
188 const basic_state_machine<CharT> *_state_machine;
189 const std::streamsize _buffer_size;
190 const std::streamsize _buffer_increment;
193 CharT *_start_buffer;
195 const CharT *_start_token;
196 const CharT *_end_token;
199 std::size_t next (const detail::internals &internals_,
200 std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
201 std::size_t &unique_id_)
203 _start_token = _end_token;
206 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
208 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
209 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
210 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
211 const CharT *curr_ = _start_token;
212 bool end_state_ = *ptr_ != 0;
213 std::size_t id_ = *(ptr_ + id_index);
214 std::size_t uid_ = *(ptr_ + unique_id_index);
215 const CharT *end_token_ = curr_;
219 if (curr_ >= _end_buffer)
221 if (!reload_buffer (curr_, end_state_, end_token_))
228 const std::size_t BOL_state_ = ptr_[bol_index];
229 const std::size_t EOL_state_ = ptr_[eol_index];
231 if (BOL_state_ && (_start_token == _start_buffer ||
232 *(_start_token - 1) == '\n'))
234 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
236 else if (EOL_state_ && *curr_ == '\n')
238 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
242 const std::size_t state_ =
243 ptr_[lookup_[static_cast<typename Traits::index_type>
251 ptr_ = &dfa_[state_ * dfa_alphabet_];
257 id_ = *(ptr_ + id_index);
258 uid_ = *(ptr_ + unique_id_index);
259 start_state_ = *(ptr_ + state_index);
264 if (_start_token >= _end_buffer)
271 const std::size_t EOL_state_ = ptr_[eol_index];
273 if (EOL_state_ && curr_ == end_)
275 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
280 id_ = *(ptr_ + id_index);
281 uid_ = *(ptr_ + unique_id_index);
282 start_state_ = *(ptr_ + state_index);
289 // return longest match
290 _end_token = end_token_;
292 if (id_ == 0) goto again;
296 // No match causes char to be skipped
297 _end_token = _start_token + 1;
302 start_ = _start_token;
308 std::size_t next (const std::size_t * const lookup_,
309 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
310 const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
312 _start_token = _end_token;
314 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
315 const CharT *curr_ = _start_token;
316 bool end_state_ = *ptr_ != 0;
317 std::size_t id_ = *(ptr_ + id_index);
318 std::size_t uid_ = *(ptr_ + unique_id_index);
319 const CharT *end_token_ = curr_;
323 if (curr_ >= _end_buffer)
325 if (!reload_buffer (curr_, end_state_, end_token_))
332 const std::size_t BOL_state_ = ptr_[bol_index];
333 const std::size_t EOL_state_ = ptr_[eol_index];
335 if (BOL_state_ && (_start_token == _start_buffer ||
336 *(_start_token - 1) == '\n'))
338 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
340 else if (EOL_state_ && *curr_ == '\n')
342 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
346 const std::size_t state_ =
347 ptr_[lookup_[static_cast<typename Traits::index_type>
355 ptr_ = &dfa_[state_ * dfa_alphabet_];
361 id_ = *(ptr_ + id_index);
362 uid_ = *(ptr_ + unique_id_index);
367 if (_start_token >= _end_buffer)
374 const std::size_t EOL_state_ = ptr_[eol_index];
376 if (EOL_state_ && curr_ == end_)
378 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
383 id_ = *(ptr_ + id_index);
384 uid_ = *(ptr_ + unique_id_index);
391 // return longest match
392 _end_token = end_token_;
396 // No match causes char to be skipped
397 _end_token = _start_token + 1;
402 start_ = _start_token;
408 bool reload_buffer (const CharT * &curr_, const bool end_state_,
409 const CharT * &end_token_)
411 bool success_ = !_stream->eof ();
415 const CharT *old_start_token_ = _start_token;
416 std::size_t old_size_ = _buffer.size ();
417 std::size_t count_ = 0;
419 if (_start_token - 1 == _start_buffer)
421 // Run out of buffer space, so increase.
422 _buffer.resize (old_size_ + _buffer_increment, '!');
423 _start_buffer = &_buffer.front ();
424 _start_token = _start_buffer + 1;
425 _stream->read (_start_buffer + old_size_,
427 count_ = _stream->gcount ();
428 _end_buffer = _start_buffer + old_size_ + count_;
430 else if (_start_token < _end_buffer)
432 const std::size_t len_ = _end_buffer - _start_token;
433 // Some systems have memcpy in namespace std.
436 memcpy (_start_buffer, _start_token - 1, (len_ + 1) *
438 _stream->read (_start_buffer + len_ + 1,
439 static_cast<std::streamsize> (_buffer.size () - len_ - 1));
440 count_ = _stream->gcount ();
441 _start_token = _start_buffer + 1;
442 _end_buffer = _start_buffer + len_ + 1 + count_;
446 _stream->read (_start_buffer, static_cast<std::streamsize>
448 count_ = _stream->gcount ();
449 _start_token = _start_buffer;
450 _end_buffer = _start_buffer + count_;
455 end_token_ = _start_token +
456 (end_token_ - old_start_token_);
459 curr_ = _start_token + (curr_ - old_start_token_);
465 // Disallow copying of buffer
466 basic_file_input (const basic_file_input &);
467 const basic_file_input &operator = (const basic_file_input &);
470 typedef basic_file_input<char> file_input;
471 typedef basic_file_input<wchar_t> wfile_input;