]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2008-2009 Ben Hanson |
2 | // Copyright (c) 2008-2011 Hartmut Kaiser | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
7 | #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM) | |
8 | #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM | |
9 | ||
10 | #if defined(_MSC_VER) | |
11 | #pragma once | |
12 | #endif | |
13 | ||
14 | #include <boost/spirit/home/support/detail/lexer/char_traits.hpp> | |
15 | #include <boost/spirit/home/support/detail/lexer/consts.hpp> | |
16 | #include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
17 | #include <boost/spirit/home/support/detail/lexer/size_t.hpp> | |
18 | #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
19 | #include <boost/spirit/home/support/detail/lexer/debug.hpp> | |
20 | #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp> | |
21 | #include <boost/algorithm/string.hpp> | |
22 | #include <boost/lexical_cast.hpp> | |
23 | #include <boost/scoped_array.hpp> | |
24 | ||
25 | /////////////////////////////////////////////////////////////////////////////// | |
26 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
27 | { | |
28 | namespace detail | |
29 | { | |
30 | ||
31 | /////////////////////////////////////////////////////////////////////////// | |
32 | template <typename CharT> | |
33 | struct string_lit; | |
34 | ||
35 | template <> | |
36 | struct string_lit<char> | |
37 | { | |
38 | static char get(char c) { return c; } | |
39 | static std::string get(char const* str = "") { return str; } | |
40 | }; | |
41 | ||
42 | template <> | |
43 | struct string_lit<wchar_t> | |
44 | { | |
45 | static wchar_t get(char c) | |
46 | { | |
47 | typedef std::ctype<wchar_t> ctype_t; | |
48 | return std::use_facet<ctype_t>(std::locale()).widen(c); | |
49 | } | |
50 | static std::basic_string<wchar_t> get(char const* source = "") | |
51 | { | |
52 | using namespace std; // some systems have size_t in ns std | |
53 | size_t len = strlen(source); | |
54 | boost::scoped_array<wchar_t> result (new wchar_t[len+1]); | |
55 | result.get()[len] = '\0'; | |
56 | ||
57 | // working with wide character streams is supported only if the | |
58 | // platform provides the std::ctype<wchar_t> facet | |
59 | BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale())); | |
60 | ||
61 | std::use_facet<std::ctype<wchar_t> >(std::locale()) | |
62 | .widen(source, source + len, result.get()); | |
63 | return result.get(); | |
64 | } | |
65 | }; | |
66 | ||
67 | template <typename Char> | |
68 | inline Char L(char c) | |
69 | { | |
70 | return string_lit<Char>::get(c); | |
71 | } | |
72 | ||
73 | template <typename Char> | |
74 | inline std::basic_string<Char> L(char const* c = "") | |
75 | { | |
76 | return string_lit<Char>::get(c); | |
77 | } | |
78 | ||
79 | /////////////////////////////////////////////////////////////////////////// | |
80 | template <typename Char> | |
81 | inline bool | |
82 | generate_delimiter(std::basic_ostream<Char> &os_) | |
83 | { | |
84 | os_ << std::basic_string<Char>(80, '/') << "\n"; | |
85 | return os_.good(); | |
86 | } | |
87 | ||
88 | /////////////////////////////////////////////////////////////////////////// | |
89 | // Generate a table of the names of the used lexer states, which is a bit | |
90 | // tricky, because the table stored with the rules is sorted based on the | |
91 | // names, but we need it sorted using the state ids. | |
92 | template <typename Char> | |
93 | inline bool | |
94 | generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_ | |
95 | , std::basic_ostream<Char> &os_, Char const* name_suffix) | |
96 | { | |
97 | // we need to re-sort the state names in ascending order of the state | |
98 | // ids, filling possible gaps in between later | |
99 | typedef typename | |
100 | boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator | |
101 | state_iterator; | |
102 | typedef std::map<std::size_t, Char const*> reverse_state_map_type; | |
103 | ||
104 | reverse_state_map_type reverse_state_map; | |
105 | state_iterator send = rules_.statemap().end(); | |
106 | for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit) | |
107 | { | |
108 | typedef typename reverse_state_map_type::value_type value_type; | |
109 | reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str())); | |
110 | } | |
111 | ||
112 | generate_delimiter(os_); | |
113 | os_ << "// this table defines the names of the lexer states\n"; | |
114 | os_ << boost::lexer::detail::strings<Char>::char_name() | |
115 | << " const* const lexer_state_names" | |
116 | << (name_suffix[0] ? "_" : "") << name_suffix | |
117 | << "[" << rules_.statemap().size() << "] = \n{\n"; | |
118 | ||
119 | typedef typename reverse_state_map_type::iterator iterator; | |
120 | iterator rend = reverse_state_map.end(); | |
121 | std::size_t last_id = 0; | |
122 | for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id) | |
123 | { | |
124 | for (/**/; last_id < (*rit).first; ++last_id) | |
125 | { | |
126 | os_ << " 0, // \"<undefined state>\"\n"; | |
127 | } | |
128 | os_ << " " | |
129 | << boost::lexer::detail::strings<Char>::char_prefix() | |
130 | << "\"" << (*rit).second << "\""; | |
131 | if (++rit != rend) | |
132 | os_ << ",\n"; | |
133 | else | |
134 | os_ << "\n"; // don't generate the final comma | |
135 | } | |
136 | os_ << "};\n\n"; | |
137 | ||
138 | generate_delimiter(os_); | |
139 | os_ << "// this variable defines the number of lexer states\n"; | |
140 | os_ << "std::size_t const lexer_state_count" | |
141 | << (name_suffix[0] ? "_" : "") << name_suffix | |
142 | << " = " << rules_.statemap().size() << ";\n\n"; | |
143 | return os_.good(); | |
144 | } | |
145 | ||
146 | template <typename Char> | |
147 | inline bool | |
148 | generate_cpp_state_table (std::basic_ostream<Char> &os_ | |
149 | , Char const* name_suffix, bool bol, bool eol) | |
150 | { | |
151 | std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : "")); | |
152 | suffix += name_suffix; | |
153 | ||
154 | generate_delimiter(os_); | |
155 | os_ << "// this defines a generic accessors for the information above\n"; | |
156 | os_ << "struct lexer" << suffix << "\n{\n"; | |
157 | os_ << " // version number and feature-set of compatible static lexer engine\n"; | |
158 | os_ << " enum\n"; | |
159 | os_ << " {\n static_version = " | |
160 | << boost::lexical_cast<std::basic_string<Char> >(SPIRIT_STATIC_LEXER_VERSION) | |
161 | << ",\n"; | |
162 | os_ << " supports_bol = " << std::boolalpha << bol << ",\n"; | |
163 | os_ << " supports_eol = " << std::boolalpha << eol << "\n"; | |
164 | os_ << " };\n\n"; | |
165 | os_ << " // return the number of lexer states\n"; | |
166 | os_ << " static std::size_t state_count()\n"; | |
167 | os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n"; | |
168 | os_ << " // return the name of the lexer state as given by 'idx'\n"; | |
169 | os_ << " static " << boost::lexer::detail::strings<Char>::char_name() | |
170 | << " const* state_name(std::size_t idx)\n"; | |
171 | os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n"; | |
172 | os_ << " // return the next matched token\n"; | |
173 | os_ << " template<typename Iterator>\n"; | |
174 | os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n"; | |
175 | os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n"; | |
176 | os_ << " {\n return next_token" << suffix | |
177 | << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n"; | |
178 | os_ << "};\n\n"; | |
179 | return os_.good(); | |
180 | } | |
181 | ||
182 | /////////////////////////////////////////////////////////////////////////// | |
183 | // generate function body based on traversing the DFA tables | |
184 | template <typename Char> | |
185 | bool generate_function_body_dfa(std::basic_ostream<Char>& os_ | |
186 | , boost::lexer::basic_state_machine<Char> const &sm_) | |
187 | { | |
188 | std::size_t const dfas_ = sm_.data()._dfa->size(); | |
189 | std::size_t const lookups_ = sm_.data()._lookup->front()->size(); | |
190 | ||
191 | os_ << " enum {end_state_index, id_index, unique_id_index, " | |
192 | "state_index, bol_index,\n"; | |
193 | os_ << " eol_index, dead_state_index, dfa_offset};\n\n"; | |
194 | os_ << " static std::size_t const npos = " | |
195 | "static_cast<std::size_t>(~0);\n"; | |
196 | ||
197 | if (dfas_ > 1) | |
198 | { | |
199 | for (std::size_t state_ = 0; state_ < dfas_; ++state_) | |
200 | { | |
201 | std::size_t i_ = 0; | |
202 | std::size_t j_ = 1; | |
203 | std::size_t count_ = lookups_ / 8; | |
204 | std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front(); | |
205 | std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front(); | |
206 | ||
207 | os_ << " static std::size_t const lookup" << state_ | |
208 | << "_[" << lookups_ << "] = {\n "; | |
209 | for (/**/; i_ < count_; ++i_) | |
210 | { | |
211 | std::size_t const index_ = i_ * 8; | |
212 | os_ << lookup_[index_]; | |
213 | for (/**/; j_ < 8; ++j_) | |
214 | { | |
215 | os_ << ", " << lookup_[index_ + j_]; | |
216 | } | |
217 | if (i_ < count_ - 1) | |
218 | { | |
219 | os_ << ",\n "; | |
220 | } | |
221 | j_ = 1; | |
222 | } | |
223 | os_ << " };\n"; | |
224 | ||
225 | count_ = sm_.data()._dfa[state_]->size (); | |
226 | os_ << " static const std::size_t dfa" << state_ << "_[" | |
227 | << count_ << "] = {\n "; | |
228 | count_ /= 8; | |
229 | for (i_ = 0; i_ < count_; ++i_) | |
230 | { | |
231 | std::size_t const index_ = i_ * 8; | |
232 | os_ << dfa_[index_]; | |
233 | for (j_ = 1; j_ < 8; ++j_) | |
234 | { | |
235 | os_ << ", " << dfa_[index_ + j_]; | |
236 | } | |
237 | if (i_ < count_ - 1) | |
238 | { | |
239 | os_ << ",\n "; | |
240 | } | |
241 | } | |
242 | ||
243 | std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8; | |
244 | if (mod_) | |
245 | { | |
246 | std::size_t const index_ = count_ * 8; | |
247 | if (count_) | |
248 | { | |
249 | os_ << ",\n "; | |
250 | } | |
251 | os_ << dfa_[index_]; | |
252 | for (j_ = 1; j_ < mod_; ++j_) | |
253 | { | |
254 | os_ << ", " << dfa_[index_ + j_]; | |
255 | } | |
256 | } | |
257 | os_ << " };\n"; | |
258 | } | |
259 | ||
260 | std::size_t count_ = sm_.data()._dfa_alphabet.size(); | |
261 | std::size_t i_ = 1; | |
262 | ||
263 | os_ << " static std::size_t const* lookup_arr_[" << count_ | |
264 | << "] = { lookup0_"; | |
265 | for (i_ = 1; i_ < count_; ++i_) | |
266 | { | |
267 | os_ << ", " << "lookup" << i_ << "_"; | |
268 | } | |
269 | os_ << " };\n"; | |
270 | ||
271 | os_ << " static std::size_t const dfa_alphabet_arr_[" | |
272 | << count_ << "] = { "; | |
273 | os_ << sm_.data()._dfa_alphabet.front (); | |
274 | for (i_ = 1; i_ < count_; ++i_) | |
275 | { | |
276 | os_ << ", " << sm_.data()._dfa_alphabet[i_]; | |
277 | } | |
278 | os_ << " };\n"; | |
279 | ||
280 | os_ << " static std::size_t const* dfa_arr_[" << count_ | |
281 | << "] = { "; | |
282 | os_ << "dfa0_"; | |
283 | for (i_ = 1; i_ < count_; ++i_) | |
284 | { | |
285 | os_ << ", " << "dfa" << i_ << "_"; | |
286 | } | |
287 | os_ << " };\n"; | |
288 | } | |
289 | else | |
290 | { | |
291 | std::size_t const* lookup_ = &sm_.data()._lookup[0]->front(); | |
292 | std::size_t const* dfa_ = &sm_.data()._dfa[0]->front(); | |
293 | std::size_t i_ = 0; | |
294 | std::size_t j_ = 1; | |
295 | std::size_t count_ = lookups_ / 8; | |
296 | ||
297 | os_ << " static std::size_t const lookup_["; | |
298 | os_ << sm_.data()._lookup[0]->size() << "] = {\n "; | |
299 | for (/**/; i_ < count_; ++i_) | |
300 | { | |
301 | const std::size_t index_ = i_ * 8; | |
302 | os_ << lookup_[index_]; | |
303 | for (/**/; j_ < 8; ++j_) | |
304 | { | |
305 | os_ << ", " << lookup_[index_ + j_]; | |
306 | } | |
307 | if (i_ < count_ - 1) | |
308 | { | |
309 | os_ << ",\n "; | |
310 | } | |
311 | j_ = 1; | |
312 | } | |
313 | os_ << " };\n"; | |
314 | ||
315 | os_ << " static std::size_t const dfa_alphabet_ = " | |
316 | << sm_.data()._dfa_alphabet.front () << ";\n"; | |
317 | os_ << " static std::size_t const dfa_[" | |
318 | << sm_.data()._dfa[0]->size () << "] = {\n "; | |
319 | count_ = sm_.data()._dfa[0]->size () / 8; | |
320 | for (i_ = 0; i_ < count_; ++i_) | |
321 | { | |
322 | const std::size_t index_ = i_ * 8; | |
323 | os_ << dfa_[index_]; | |
324 | for (j_ = 1; j_ < 8; ++j_) | |
325 | { | |
326 | os_ << ", " << dfa_[index_ + j_]; | |
327 | } | |
328 | if (i_ < count_ - 1) | |
329 | { | |
330 | os_ << ",\n "; | |
331 | } | |
332 | } | |
333 | ||
334 | const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8; | |
335 | if (mod_) | |
336 | { | |
337 | const std::size_t index_ = count_ * 8; | |
338 | if (count_) | |
339 | { | |
340 | os_ << ",\n "; | |
341 | } | |
342 | os_ << dfa_[index_]; | |
343 | for (j_ = 1; j_ < mod_; ++j_) | |
344 | { | |
345 | os_ << ", " << dfa_[index_ + j_]; | |
346 | } | |
347 | } | |
348 | os_ << " };\n"; | |
349 | } | |
350 | ||
351 | os_ << "\n if (start_token_ == end_)\n"; | |
352 | os_ << " {\n"; | |
353 | os_ << " unique_id_ = npos;\n"; | |
354 | os_ << " return 0;\n"; | |
355 | os_ << " }\n\n"; | |
356 | if (sm_.data()._seen_BOL_assertion) | |
357 | { | |
358 | os_ << " bool bol = bol_;\n\n"; | |
359 | } | |
360 | ||
361 | if (dfas_ > 1) | |
362 | { | |
363 | os_ << "again:\n"; | |
364 | os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n"; | |
365 | os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n"; | |
366 | os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n"; | |
367 | } | |
368 | ||
369 | os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n"; | |
370 | os_ << " Iterator curr_ = start_token_;\n"; | |
371 | os_ << " bool end_state_ = *ptr_ != 0;\n"; | |
372 | os_ << " std::size_t id_ = *(ptr_ + id_index);\n"; | |
373 | os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n"; | |
374 | if (dfas_ > 1) | |
375 | { | |
376 | os_ << " std::size_t end_start_state_ = start_state_;\n"; | |
377 | } | |
378 | if (sm_.data()._seen_BOL_assertion) | |
379 | { | |
380 | os_ << " bool end_bol_ = bol_;\n"; | |
381 | } | |
382 | os_ << " Iterator end_token_ = start_token_;\n\n"; | |
383 | ||
384 | os_ << " while (curr_ != end_)\n"; | |
385 | os_ << " {\n"; | |
386 | ||
387 | if (sm_.data()._seen_BOL_assertion) | |
388 | { | |
389 | os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n"; | |
390 | } | |
391 | ||
392 | if (sm_.data()._seen_EOL_assertion) | |
393 | { | |
394 | os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; | |
395 | } | |
396 | ||
397 | if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion) | |
398 | { | |
399 | os_ << " if (BOL_state_ && bol)\n"; | |
400 | os_ << " {\n"; | |
401 | os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; | |
402 | os_ << " }\n"; | |
403 | os_ << " else if (EOL_state_ && *curr_ == '\\n')\n"; | |
404 | os_ << " {\n"; | |
405 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; | |
406 | os_ << " }\n"; | |
407 | os_ << " else\n"; | |
408 | os_ << " {\n"; | |
409 | if (lookups_ == 256) | |
410 | { | |
411 | os_ << " unsigned char index = \n"; | |
412 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
413 | } | |
414 | else | |
415 | { | |
416 | os_ << " std::size_t index = *curr_++\n"; | |
417 | } | |
418 | os_ << " bol = (index == '\n') ? true : false;\n"; | |
419 | os_ << " std::size_t const state_ = ptr_[\n"; | |
420 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
421 | ||
422 | os_ << '\n'; | |
423 | os_ << " if (state_ == 0) break;\n"; | |
424 | os_ << '\n'; | |
425 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
426 | os_ << " }\n\n"; | |
427 | } | |
428 | else if (sm_.data()._seen_BOL_assertion) | |
429 | { | |
430 | os_ << " if (BOL_state_ && bol)\n"; | |
431 | os_ << " {\n"; | |
432 | os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; | |
433 | os_ << " }\n"; | |
434 | os_ << " else\n"; | |
435 | os_ << " {\n"; | |
436 | if (lookups_ == 256) | |
437 | { | |
438 | os_ << " unsigned char index = \n"; | |
439 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
440 | } | |
441 | else | |
442 | { | |
443 | os_ << " std::size_t index = *curr_++\n"; | |
444 | } | |
445 | os_ << " bol = (index == '\n') ? true : false;\n"; | |
446 | os_ << " std::size_t const state_ = ptr_[\n"; | |
447 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
448 | ||
449 | os_ << '\n'; | |
450 | os_ << " if (state_ == 0) break;\n"; | |
451 | os_ << '\n'; | |
452 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
453 | os_ << " }\n\n"; | |
454 | } | |
455 | else if (sm_.data()._seen_EOL_assertion) | |
456 | { | |
457 | os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; | |
458 | os_ << " {\n"; | |
459 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; | |
460 | os_ << " }\n"; | |
461 | os_ << " else\n"; | |
462 | os_ << " {\n"; | |
463 | if (lookups_ == 256) | |
464 | { | |
465 | os_ << " unsigned char index = \n"; | |
466 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
467 | } | |
468 | else | |
469 | { | |
470 | os_ << " std::size_t index = *curr_++\n"; | |
471 | } | |
472 | os_ << " bol = (index == '\n') ? true : false;\n"; | |
473 | os_ << " std::size_t const state_ = ptr_[\n"; | |
474 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
475 | ||
476 | os_ << '\n'; | |
477 | os_ << " if (state_ == 0) break;\n"; | |
478 | os_ << '\n'; | |
479 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
480 | os_ << " }\n\n"; | |
481 | } | |
482 | else | |
483 | { | |
484 | os_ << " std::size_t const state_ =\n"; | |
485 | ||
486 | if (lookups_ == 256) | |
487 | { | |
488 | os_ << " ptr_[lookup_[" | |
489 | "static_cast<unsigned char>(*curr_++)]];\n"; | |
490 | } | |
491 | else | |
492 | { | |
493 | os_ << " ptr_[lookup_[*curr_++]];\n"; | |
494 | } | |
495 | ||
496 | os_ << '\n'; | |
497 | os_ << " if (state_ == 0) break;\n"; | |
498 | os_ << '\n'; | |
499 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n"; | |
500 | } | |
501 | ||
502 | os_ << " if (*ptr_)\n"; | |
503 | os_ << " {\n"; | |
504 | os_ << " end_state_ = true;\n"; | |
505 | os_ << " id_ = *(ptr_ + id_index);\n"; | |
506 | os_ << " uid_ = *(ptr_ + unique_id_index);\n"; | |
507 | if (dfas_ > 1) | |
508 | { | |
509 | os_ << " end_start_state_ = *(ptr_ + state_index);\n"; | |
510 | } | |
511 | if (sm_.data()._seen_BOL_assertion) | |
512 | { | |
513 | os_ << " end_bol_ = bol;\n"; | |
514 | } | |
515 | os_ << " end_token_ = curr_;\n"; | |
516 | os_ << " }\n"; | |
517 | os_ << " }\n\n"; | |
518 | ||
519 | if (sm_.data()._seen_EOL_assertion) | |
520 | { | |
521 | os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; | |
522 | ||
523 | os_ << " if (EOL_state_ && curr_ == end_)\n"; | |
524 | os_ << " {\n"; | |
525 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n"; | |
526 | ||
527 | os_ << " if (*ptr_)\n"; | |
528 | os_ << " {\n"; | |
529 | os_ << " end_state_ = true;\n"; | |
530 | os_ << " id_ = *(ptr_ + id_index);\n"; | |
531 | os_ << " uid_ = *(ptr_ + unique_id_index);\n"; | |
532 | if (dfas_ > 1) | |
533 | { | |
534 | os_ << " end_start_state_ = *(ptr_ + state_index);\n"; | |
535 | } | |
536 | if (sm_.data()._seen_BOL_assertion) | |
537 | { | |
538 | os_ << " end_bol_ = bol;\n"; | |
539 | } | |
540 | os_ << " end_token_ = curr_;\n"; | |
541 | os_ << " }\n"; | |
542 | os_ << " }\n\n"; | |
543 | } | |
544 | ||
545 | os_ << " if (end_state_)\n"; | |
546 | os_ << " {\n"; | |
547 | os_ << " // return longest match\n"; | |
548 | os_ << " start_token_ = end_token_;\n"; | |
549 | ||
550 | if (dfas_ > 1) | |
551 | { | |
552 | os_ << " start_state_ = end_start_state_;\n"; | |
553 | os_ << " if (id_ == 0)\n"; | |
554 | os_ << " {\n"; | |
555 | if (sm_.data()._seen_BOL_assertion) | |
556 | { | |
557 | os_ << " bol = end_bol_;\n"; | |
558 | } | |
559 | os_ << " goto again;\n"; | |
560 | os_ << " }\n"; | |
561 | if (sm_.data()._seen_BOL_assertion) | |
562 | { | |
563 | os_ << " else\n"; | |
564 | os_ << " {\n"; | |
565 | os_ << " bol_ = end_bol_;\n"; | |
566 | os_ << " }\n"; | |
567 | } | |
568 | } | |
569 | else if (sm_.data()._seen_BOL_assertion) | |
570 | { | |
571 | os_ << " bol_ = end_bol_;\n"; | |
572 | } | |
573 | ||
574 | os_ << " }\n"; | |
575 | os_ << " else\n"; | |
576 | os_ << " {\n"; | |
577 | ||
578 | if (sm_.data()._seen_BOL_assertion) | |
579 | { | |
580 | os_ << " bol_ = (*start_token_ == '\n') ? true : false;\n"; | |
581 | } | |
582 | ||
583 | os_ << " id_ = npos;\n"; | |
584 | os_ << " uid_ = npos;\n"; | |
585 | os_ << " }\n\n"; | |
586 | ||
587 | os_ << " unique_id_ = uid_;\n"; | |
588 | os_ << " return id_;\n"; | |
589 | return os_.good(); | |
590 | } | |
591 | ||
592 | /////////////////////////////////////////////////////////////////////////// | |
593 | template <typename Char> | |
594 | inline std::basic_string<Char> get_charlit(Char ch) | |
595 | { | |
596 | std::basic_string<Char> result; | |
597 | boost::lexer::basic_string_token<Char>::escape_char(ch, result); | |
598 | return result; | |
599 | } | |
600 | ||
601 | // check whether state0_0 is referenced from any of the other states | |
602 | template <typename Char> | |
603 | bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_) | |
604 | { | |
605 | typedef typename boost::lexer::basic_state_machine<Char>::iterator | |
606 | iterator_type; | |
607 | iterator_type iter_ = sm_.begin(); | |
608 | std::size_t const states_ = iter_->states; | |
609 | ||
610 | for (std::size_t state_ = 0; state_ < states_; ++state_) | |
611 | { | |
612 | if (0 == iter_->bol_index || 0 == iter_->eol_index) | |
613 | { | |
614 | return true; | |
615 | } | |
616 | ||
617 | std::size_t const transitions_ = iter_->transitions; | |
618 | for (std::size_t t_ = 0; t_ < transitions_; ++t_) | |
619 | { | |
620 | if (0 == iter_->goto_state) | |
621 | { | |
622 | return true; | |
623 | } | |
624 | ++iter_; | |
625 | } | |
626 | if (transitions_ == 0) ++iter_; | |
627 | } | |
628 | return false; | |
629 | } | |
630 | ||
631 | /////////////////////////////////////////////////////////////////////////// | |
632 | template <typename Char> | |
633 | bool generate_function_body_switch(std::basic_ostream<Char> & os_ | |
634 | , boost::lexer::basic_state_machine<Char> const &sm_) | |
635 | { | |
636 | typedef typename boost::lexer::basic_state_machine<Char>::iterator | |
637 | iterator_type; | |
638 | ||
639 | std::size_t const lookups_ = sm_.data()._lookup->front ()->size (); | |
640 | iterator_type iter_ = sm_.begin(); | |
641 | iterator_type labeliter_ = iter_; | |
642 | iterator_type end_ = sm_.end(); | |
643 | std::size_t const dfas_ = sm_.data()._dfa->size (); | |
644 | ||
645 | os_ << " static std::size_t const npos = " | |
646 | "static_cast<std::size_t>(~0);\n"; | |
647 | ||
648 | os_ << "\n if (start_token_ == end_)\n"; | |
649 | os_ << " {\n"; | |
650 | os_ << " unique_id_ = npos;\n"; | |
651 | os_ << " return 0;\n"; | |
652 | os_ << " }\n\n"; | |
653 | ||
654 | if (sm_.data()._seen_BOL_assertion) | |
655 | { | |
656 | os_ << " bool bol = bol_;\n"; | |
657 | } | |
658 | ||
659 | if (dfas_ > 1) | |
660 | { | |
661 | os_ << "again:\n"; | |
662 | } | |
663 | ||
664 | os_ << " Iterator curr_ = start_token_;\n"; | |
665 | os_ << " bool end_state_ = false;\n"; | |
666 | os_ << " std::size_t id_ = npos;\n"; | |
667 | os_ << " std::size_t uid_ = npos;\n"; | |
668 | ||
669 | if (dfas_ > 1) | |
670 | { | |
671 | os_ << " std::size_t end_start_state_ = start_state_;\n"; | |
672 | } | |
673 | ||
674 | if (sm_.data()._seen_BOL_assertion) | |
675 | { | |
676 | os_ << " bool end_bol_ = bol_;\n"; | |
677 | } | |
678 | ||
679 | os_ << " Iterator end_token_ = start_token_;\n"; | |
680 | os_ << '\n'; | |
681 | ||
682 | os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t") | |
683 | << " ch_ = 0;\n\n"; | |
684 | ||
685 | if (dfas_ > 1) | |
686 | { | |
687 | os_ << " switch (start_state_)\n"; | |
688 | os_ << " {\n"; | |
689 | ||
690 | for (std::size_t i_ = 0; i_ < dfas_; ++i_) | |
691 | { | |
692 | os_ << " case " << i_ << ":\n"; | |
693 | os_ << " goto state" << i_ << "_0;\n"; | |
694 | os_ << " break;\n"; | |
695 | } | |
696 | ||
697 | os_ << " default:\n"; | |
698 | os_ << " goto end;\n"; | |
699 | os_ << " break;\n"; | |
700 | os_ << " }\n"; | |
701 | } | |
702 | ||
703 | bool need_state0_0_label = need_label0_0(sm_); | |
704 | ||
705 | for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) | |
706 | { | |
707 | std::size_t const states_ = iter_->states; | |
708 | for (std::size_t state_ = 0; state_ < states_; ++state_) | |
709 | { | |
710 | std::size_t const transitions_ = iter_->transitions; | |
711 | std::size_t t_ = 0; | |
712 | ||
713 | if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label) | |
714 | { | |
715 | os_ << "\nstate" << dfa_ << '_' << state_ << ":\n"; | |
716 | } | |
717 | ||
718 | if (iter_->end_state) | |
719 | { | |
720 | os_ << " end_state_ = true;\n"; | |
721 | os_ << " id_ = " << iter_->id << ";\n"; | |
722 | os_ << " uid_ = " << iter_->unique_id << ";\n"; | |
723 | os_ << " end_token_ = curr_;\n"; | |
724 | ||
725 | if (dfas_ > 1) | |
726 | { | |
727 | os_ << " end_start_state_ = " << iter_->goto_dfa << | |
728 | ";\n"; | |
729 | } | |
730 | ||
731 | if (sm_.data()._seen_BOL_assertion) | |
732 | { | |
733 | os_ << " end_bol_ = bol;\n"; | |
734 | } | |
735 | ||
736 | if (transitions_) os_ << '\n'; | |
737 | } | |
738 | ||
739 | if (t_ < transitions_ || | |
740 | iter_->bol_index != boost::lexer::npos || | |
741 | iter_->eol_index != boost::lexer::npos) | |
742 | { | |
743 | os_ << " if (curr_ == end_) goto end;\n"; | |
744 | os_ << " ch_ = *curr_;\n"; | |
745 | if (iter_->bol_index != boost::lexer::npos) | |
746 | { | |
747 | os_ << "\n if (bol) goto state" << dfa_ << '_' | |
748 | << iter_->bol_index << ";\n"; | |
749 | } | |
750 | if (iter_->eol_index != boost::lexer::npos) | |
751 | { | |
752 | os_ << "\n if (ch_ == '\n') goto state" << dfa_ | |
753 | << '_' << iter_->eol_index << ";\n"; | |
754 | } | |
755 | os_ << " ++curr_;\n"; | |
756 | } | |
757 | ||
758 | for (/**/; t_ < transitions_; ++t_) | |
759 | { | |
760 | Char const *ptr_ = iter_->token._charset.c_str(); | |
761 | Char const *end_ = ptr_ + iter_->token._charset.size(); | |
762 | Char start_char_ = 0; | |
763 | Char curr_char_ = 0; | |
764 | bool range_ = false; | |
765 | bool first_char_ = true; | |
766 | ||
767 | os_ << "\n if ("; | |
768 | ||
769 | while (ptr_ != end_) | |
770 | { | |
771 | curr_char_ = *ptr_++; | |
772 | ||
773 | if (*ptr_ == curr_char_ + 1) | |
774 | { | |
775 | if (!range_) | |
776 | { | |
777 | start_char_ = curr_char_; | |
778 | } | |
779 | range_ = true; | |
780 | } | |
781 | else | |
782 | { | |
783 | if (!first_char_) | |
784 | { | |
785 | os_ << ((iter_->token._negated) ? " && " : " || "); | |
786 | } | |
787 | else | |
788 | { | |
789 | first_char_ = false; | |
790 | } | |
791 | if (range_) | |
792 | { | |
793 | if (iter_->token._negated) | |
794 | { | |
795 | os_ << "!"; | |
796 | } | |
797 | os_ << "(ch_ >= '" << get_charlit(start_char_) | |
798 | << "' && ch_ <= '" | |
799 | << get_charlit(curr_char_) << "')"; | |
800 | range_ = false; | |
801 | } | |
802 | else | |
803 | { | |
804 | os_ << "ch_ " | |
805 | << ((iter_->token._negated) ? "!=" : "==") | |
806 | << " '" << get_charlit(curr_char_) << "'"; | |
807 | } | |
808 | } | |
809 | } | |
810 | ||
811 | os_ << ") goto state" << dfa_ << '_' << iter_->goto_state | |
812 | << ";\n"; | |
813 | ++iter_; | |
814 | } | |
815 | ||
816 | if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1)) | |
817 | { | |
818 | os_ << " goto end;\n"; | |
819 | } | |
820 | ||
821 | if (transitions_ == 0) ++iter_; | |
822 | } | |
823 | } | |
824 | ||
825 | os_ << "\nend:\n"; | |
826 | os_ << " if (end_state_)\n"; | |
827 | os_ << " {\n"; | |
828 | os_ << " // return longest match\n"; | |
829 | os_ << " start_token_ = end_token_;\n"; | |
830 | ||
831 | if (dfas_ > 1) | |
832 | { | |
833 | os_ << " start_state_ = end_start_state_;\n"; | |
834 | os_ << "\n if (id_ == 0)\n"; | |
835 | os_ << " {\n"; | |
836 | ||
837 | if (sm_.data()._seen_BOL_assertion) | |
838 | { | |
839 | os_ << " bol = end_bol_;\n"; | |
840 | } | |
841 | ||
842 | os_ << " goto again;\n"; | |
843 | os_ << " }\n"; | |
844 | ||
845 | if (sm_.data()._seen_BOL_assertion) | |
846 | { | |
847 | os_ << " else\n"; | |
848 | os_ << " {\n"; | |
849 | os_ << " bol_ = end_bol_;\n"; | |
850 | os_ << " }\n"; | |
851 | } | |
852 | } | |
853 | else if (sm_.data()._seen_BOL_assertion) | |
854 | { | |
855 | os_ << " bol_ = end_bol_;\n"; | |
856 | } | |
857 | ||
858 | os_ << " }\n"; | |
859 | os_ << " else\n"; | |
860 | os_ << " {\n"; | |
861 | ||
862 | if (sm_.data()._seen_BOL_assertion) | |
863 | { | |
864 | os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; | |
865 | } | |
866 | os_ << " id_ = npos;\n"; | |
867 | os_ << " uid_ = npos;\n"; | |
868 | os_ << " }\n\n"; | |
869 | ||
870 | os_ << " unique_id_ = uid_;\n"; | |
871 | os_ << " return id_;\n"; | |
872 | return os_.good(); | |
873 | } | |
874 | ||
875 | /////////////////////////////////////////////////////////////////////////// | |
876 | // Generate a tokenizer for the given state machine. | |
877 | template <typename Char, typename F> | |
878 | inline bool | |
879 | generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_ | |
880 | , boost::lexer::basic_rules<Char> const& rules_ | |
881 | , std::basic_ostream<Char> &os_, Char const* name_suffix | |
882 | , F generate_function_body) | |
883 | { | |
884 | if (sm_.data()._lookup->empty()) | |
885 | return false; | |
886 | ||
887 | std::size_t const dfas_ = sm_.data()._dfa->size(); | |
888 | // std::size_t const lookups_ = sm_.data()._lookup->front()->size(); | |
889 | ||
890 | os_ << "// Copyright (c) 2008-2009 Ben Hanson\n"; | |
891 | os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n"; | |
892 | os_ << "//\n"; | |
893 | os_ << "// Distributed under the Boost Software License, " | |
894 | "Version 1.0. (See accompanying\n"; | |
895 | os_ << "// file licence_1_0.txt or copy at " | |
896 | "http://www.boost.org/LICENSE_1_0.txt)\n\n"; | |
897 | os_ << "// Auto-generated by boost::lexer, do not edit\n\n"; | |
898 | ||
899 | std::basic_string<Char> guard(name_suffix); | |
900 | guard += L<Char>(name_suffix[0] ? "_" : ""); | |
901 | guard += L<Char>(__DATE__ "_" __TIME__); | |
902 | typename std::basic_string<Char>::size_type p = | |
903 | guard.find_first_of(L<Char>(": ")); | |
904 | while (std::string::npos != p) | |
905 | { | |
906 | guard.replace(p, 1, L<Char>("_")); | |
907 | p = guard.find_first_of(L<Char>(": "), p); | |
908 | } | |
909 | boost::to_upper(guard); | |
910 | ||
911 | os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n"; | |
912 | os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n"; | |
913 | ||
914 | os_ << "#include <boost/detail/iterator.hpp>\n"; | |
915 | os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n"; | |
916 | ||
917 | generate_delimiter(os_); | |
918 | os_ << "// the generated table of state names and the tokenizer have to be\n" | |
919 | "// defined in the boost::spirit::lex::lexertl::static_ namespace\n"; | |
920 | os_ << "namespace boost { namespace spirit { namespace lex { " | |
921 | "namespace lexertl { namespace static_ {\n\n"; | |
922 | ||
923 | // generate the lexer state information variables | |
924 | if (!generate_cpp_state_info(rules_, os_, name_suffix)) | |
925 | return false; | |
926 | ||
927 | generate_delimiter(os_); | |
928 | os_ << "// this function returns the next matched token\n"; | |
929 | os_ << "template<typename Iterator>\n"; | |
930 | os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "") | |
931 | << name_suffix << " ("; | |
932 | ||
933 | if (dfas_ > 1) | |
934 | { | |
935 | os_ << "std::size_t& start_state_, "; | |
936 | } | |
937 | else | |
938 | { | |
939 | os_ << "std::size_t& /*start_state_*/, "; | |
940 | } | |
941 | if (sm_.data()._seen_BOL_assertion) | |
942 | { | |
943 | os_ << "bool& bol_, "; | |
944 | } | |
945 | else | |
946 | { | |
947 | os_ << "bool& /*bol_*/, "; | |
948 | } | |
949 | os_ << "\n "; | |
950 | ||
951 | os_ << "Iterator &start_token_, Iterator const& end_, "; | |
952 | os_ << "std::size_t& unique_id_)\n"; | |
953 | os_ << "{\n"; | |
954 | if (!generate_function_body(os_, sm_)) | |
955 | return false; | |
956 | os_ << "}\n\n"; | |
957 | ||
958 | if (!generate_cpp_state_table<Char>(os_, name_suffix | |
959 | , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion)) | |
960 | { | |
961 | return false; | |
962 | } | |
963 | ||
964 | os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n"; | |
965 | ||
966 | os_ << "#endif\n"; | |
967 | ||
968 | return os_.good(); | |
969 | } | |
970 | ||
971 | } // namespace detail | |
972 | ||
973 | /////////////////////////////////////////////////////////////////////////// | |
974 | template <typename Lexer, typename F> | |
975 | inline bool | |
976 | generate_static(Lexer const& lexer | |
977 | , std::basic_ostream<typename Lexer::char_type>& os | |
978 | , typename Lexer::char_type const* name_suffix, F f) | |
979 | { | |
980 | if (!lexer.init_dfa(true)) // always minimize DFA for static lexers | |
981 | return false; | |
982 | return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os | |
983 | , name_suffix, f); | |
984 | } | |
985 | ||
986 | /////////////////////////////////////////////////////////////////////////// | |
987 | // deprecated function, will be removed in the future (this has been | |
988 | // replaced by the function generate_static_dfa - see below). | |
989 | template <typename Lexer> | |
990 | inline bool | |
991 | generate_static(Lexer const& lexer | |
992 | , std::basic_ostream<typename Lexer::char_type>& os | |
993 | , typename Lexer::char_type const* name_suffix = | |
994 | detail::L<typename Lexer::char_type>()) | |
995 | { | |
996 | return generate_static(lexer, os, name_suffix | |
997 | , &detail::generate_function_body_dfa<typename Lexer::char_type>); | |
998 | } | |
999 | ||
1000 | /////////////////////////////////////////////////////////////////////////// | |
1001 | template <typename Lexer> | |
1002 | inline bool | |
1003 | generate_static_dfa(Lexer const& lexer | |
1004 | , std::basic_ostream<typename Lexer::char_type>& os | |
1005 | , typename Lexer::char_type const* name_suffix = | |
1006 | detail::L<typename Lexer::char_type>()) | |
1007 | { | |
1008 | return generate_static(lexer, os, name_suffix | |
1009 | , &detail::generate_function_body_dfa<typename Lexer::char_type>); | |
1010 | } | |
1011 | ||
1012 | /////////////////////////////////////////////////////////////////////////// | |
1013 | template <typename Lexer> | |
1014 | inline bool | |
1015 | generate_static_switch(Lexer const& lexer | |
1016 | , std::basic_ostream<typename Lexer::char_type>& os | |
1017 | , typename Lexer::char_type const* name_suffix = | |
1018 | detail::L<typename Lexer::char_type>()) | |
1019 | { | |
1020 | return generate_static(lexer, os, name_suffix | |
1021 | , &detail::generate_function_body_switch<typename Lexer::char_type>); | |
1022 | } | |
1023 | ||
1024 | /////////////////////////////////////////////////////////////////////////////// | |
1025 | }}}} | |
1026 | ||
1027 | #endif |