]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM) | |
7 | #define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM | |
8 | ||
9 | #if defined(_MSC_VER) | |
10 | #pragma once | |
11 | #endif | |
12 | ||
13 | #include <boost/detail/iterator.hpp> | |
14 | #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
15 | #include <boost/spirit/home/support/detail/lexer/consts.hpp> | |
16 | #include <boost/spirit/home/support/detail/lexer/size_t.hpp> | |
17 | #include <boost/spirit/home/support/detail/lexer/char_traits.hpp> | |
18 | #include <vector> | |
19 | ||
20 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
21 | { | |
22 | /////////////////////////////////////////////////////////////////////////// | |
23 | template<typename Iterator> | |
24 | class basic_iterator_tokeniser | |
25 | { | |
26 | public: | |
27 | typedef std::vector<std::size_t> size_t_vector; | |
28 | typedef typename boost::detail::iterator_traits<Iterator>::value_type | |
29 | char_type; | |
30 | ||
31 | static std::size_t next ( | |
32 | boost::lexer::basic_state_machine<char_type> const& state_machine_ | |
33 | , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_ | |
34 | , Iterator const& end_, std::size_t& unique_id_) | |
35 | { | |
36 | if (start_token_ == end_) | |
37 | { | |
38 | unique_id_ = boost::lexer::npos; | |
39 | return 0; | |
40 | } | |
41 | ||
42 | bool bol = bol_; | |
43 | boost::lexer::detail::internals const& internals_ = | |
44 | state_machine_.data(); | |
45 | ||
46 | again: | |
47 | std::size_t const* lookup_ = &internals_._lookup[dfa_state_]-> | |
48 | front (); | |
49 | std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_]; | |
50 | std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front (); | |
51 | ||
52 | std::size_t const* ptr_ = dfa_ + dfa_alphabet_; | |
53 | Iterator curr_ = start_token_; | |
54 | bool end_state_ = *ptr_ != 0; | |
55 | std::size_t id_ = *(ptr_ + boost::lexer::id_index); | |
56 | std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
57 | std::size_t end_start_state_ = dfa_state_; | |
58 | bool end_bol_ = bol_; | |
59 | Iterator end_token_ = start_token_; | |
60 | ||
61 | while (curr_ != end_) | |
62 | { | |
63 | std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; | |
64 | std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
65 | ||
66 | if (BOL_state_ && bol) | |
67 | { | |
68 | ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; | |
69 | } | |
70 | else if (EOL_state_ && *curr_ == '\n') | |
71 | { | |
72 | ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
73 | } | |
74 | else | |
75 | { | |
76 | typedef typename | |
77 | boost::detail::iterator_traits<Iterator>::value_type | |
78 | value_type; | |
79 | typedef typename | |
80 | boost::lexer::char_traits<value_type>::index_type | |
81 | index_type; | |
82 | ||
83 | index_type index = | |
84 | boost::lexer::char_traits<value_type>::call(*curr_++); | |
85 | bol = (index == '\n') ? true : false; | |
86 | std::size_t const state_ = ptr_[ | |
87 | lookup_[static_cast<std::size_t>(index)]]; | |
88 | ||
89 | if (state_ == 0) | |
90 | { | |
91 | break; | |
92 | } | |
93 | ||
94 | ptr_ = &dfa_[state_ * dfa_alphabet_]; | |
95 | } | |
96 | ||
97 | if (*ptr_) | |
98 | { | |
99 | end_state_ = true; | |
100 | id_ = *(ptr_ + boost::lexer::id_index); | |
101 | uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
102 | end_start_state_ = *(ptr_ + boost::lexer::state_index); | |
103 | end_bol_ = bol; | |
104 | end_token_ = curr_; | |
105 | } | |
106 | } | |
107 | ||
108 | std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
109 | ||
110 | if (EOL_state_ && curr_ == end_) | |
111 | { | |
112 | ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
113 | ||
114 | if (*ptr_) | |
115 | { | |
116 | end_state_ = true; | |
117 | id_ = *(ptr_ + boost::lexer::id_index); | |
118 | uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
119 | end_start_state_ = *(ptr_ + boost::lexer::state_index); | |
120 | end_bol_ = bol; | |
121 | end_token_ = curr_; | |
122 | } | |
123 | } | |
124 | ||
125 | if (end_state_) { | |
126 | // return longest match | |
127 | dfa_state_ = end_start_state_; | |
128 | start_token_ = end_token_; | |
129 | ||
130 | if (id_ == 0) | |
131 | { | |
132 | bol = end_bol_; | |
133 | goto again; | |
134 | } | |
135 | else | |
136 | { | |
137 | bol_ = end_bol_; | |
138 | } | |
139 | } | |
140 | else { | |
141 | bol_ = (*start_token_ == '\n') ? true : false; | |
142 | id_ = boost::lexer::npos; | |
143 | uid_ = boost::lexer::npos; | |
144 | } | |
145 | ||
146 | unique_id_ = uid_; | |
147 | return id_; | |
148 | } | |
149 | ||
150 | /////////////////////////////////////////////////////////////////////// | |
151 | static std::size_t next ( | |
152 | boost::lexer::basic_state_machine<char_type> const& state_machine_ | |
153 | , bool& bol_, Iterator &start_token_, Iterator const& end_ | |
154 | , std::size_t& unique_id_) | |
155 | { | |
156 | if (start_token_ == end_) | |
157 | { | |
158 | unique_id_ = boost::lexer::npos; | |
159 | return 0; | |
160 | } | |
161 | ||
162 | bool bol = bol_; | |
163 | std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front(); | |
164 | std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0]; | |
165 | std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front (); | |
166 | std::size_t const* ptr_ = dfa_ + dfa_alphabet_; | |
167 | ||
168 | Iterator curr_ = start_token_; | |
169 | bool end_state_ = *ptr_ != 0; | |
170 | std::size_t id_ = *(ptr_ + boost::lexer::id_index); | |
171 | std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
172 | bool end_bol_ = bol_; | |
173 | Iterator end_token_ = start_token_; | |
174 | ||
175 | while (curr_ != end_) | |
176 | { | |
177 | std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; | |
178 | std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
179 | ||
180 | if (BOL_state_ && bol) | |
181 | { | |
182 | ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; | |
183 | } | |
184 | else if (EOL_state_ && *curr_ == '\n') | |
185 | { | |
186 | ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
187 | } | |
188 | else | |
189 | { | |
190 | typedef typename | |
191 | boost::detail::iterator_traits<Iterator>::value_type | |
192 | value_type; | |
193 | typedef typename | |
194 | boost::lexer::char_traits<value_type>::index_type | |
195 | index_type; | |
196 | ||
197 | index_type index = | |
198 | boost::lexer::char_traits<value_type>::call(*curr_++); | |
199 | bol = (index == '\n') ? true : false; | |
200 | std::size_t const state_ = ptr_[ | |
201 | lookup_[static_cast<std::size_t>(index)]]; | |
202 | ||
203 | if (state_ == 0) | |
204 | { | |
205 | break; | |
206 | } | |
207 | ||
208 | ptr_ = &dfa_[state_ * dfa_alphabet_]; | |
209 | } | |
210 | ||
211 | if (*ptr_) | |
212 | { | |
213 | end_state_ = true; | |
214 | id_ = *(ptr_ + boost::lexer::id_index); | |
215 | uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
216 | end_bol_ = bol; | |
217 | end_token_ = curr_; | |
218 | } | |
219 | } | |
220 | ||
221 | std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
222 | ||
223 | if (EOL_state_ && curr_ == end_) | |
224 | { | |
225 | ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
226 | ||
227 | if (*ptr_) | |
228 | { | |
229 | end_state_ = true; | |
230 | id_ = *(ptr_ + boost::lexer::id_index); | |
231 | uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
232 | end_bol_ = bol; | |
233 | end_token_ = curr_; | |
234 | } | |
235 | } | |
236 | ||
237 | if (end_state_) { | |
238 | // return longest match | |
239 | bol_ = end_bol_; | |
240 | start_token_ = end_token_; | |
241 | } | |
242 | else { | |
243 | bol_ = *start_token_ == '\n'; | |
244 | id_ = boost::lexer::npos; | |
245 | uid_ = boost::lexer::npos; | |
246 | } | |
247 | ||
248 | unique_id_ = uid_; | |
249 | return id_; | |
250 | } | |
251 | }; | |
252 | ||
253 | }}}} | |
254 | ||
255 | #endif |