]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2008-2009 Ben Hanson |
2 | // Copyright (c) 2008-2011 Hartmut Kaiser | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
7 | #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM) | |
8 | #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM | |
9 | ||
10 | #if defined(_MSC_VER) | |
11 | #pragma once | |
12 | #endif | |
13 | ||
14 | #include <boost/spirit/home/support/detail/lexer/char_traits.hpp> | |
15 | #include <boost/spirit/home/support/detail/lexer/consts.hpp> | |
16 | #include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
17 | #include <boost/spirit/home/support/detail/lexer/size_t.hpp> | |
18 | #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
19 | #include <boost/spirit/home/support/detail/lexer/debug.hpp> | |
20 | #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp> | |
7c673cae | 21 | #include <boost/scoped_array.hpp> |
1e59de90 | 22 | #include <cstring> |
f67539c2 | 23 | #include <locale> |
7c673cae FG |
24 | |
25 | /////////////////////////////////////////////////////////////////////////////// | |
26 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
27 | { | |
28 | namespace detail | |
29 | { | |
30 | ||
31 | /////////////////////////////////////////////////////////////////////////// | |
32 | template <typename CharT> | |
33 | struct string_lit; | |
34 | ||
35 | template <> | |
36 | struct string_lit<char> | |
37 | { | |
38 | static char get(char c) { return c; } | |
39 | static std::string get(char const* str = "") { return str; } | |
40 | }; | |
41 | ||
42 | template <> | |
43 | struct string_lit<wchar_t> | |
44 | { | |
45 | static wchar_t get(char c) | |
46 | { | |
47 | typedef std::ctype<wchar_t> ctype_t; | |
48 | return std::use_facet<ctype_t>(std::locale()).widen(c); | |
49 | } | |
50 | static std::basic_string<wchar_t> get(char const* source = "") | |
51 | { | |
52 | using namespace std; // some systems have size_t in ns std | |
53 | size_t len = strlen(source); | |
54 | boost::scoped_array<wchar_t> result (new wchar_t[len+1]); | |
55 | result.get()[len] = '\0'; | |
56 | ||
57 | // working with wide character streams is supported only if the | |
58 | // platform provides the std::ctype<wchar_t> facet | |
59 | BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale())); | |
60 | ||
61 | std::use_facet<std::ctype<wchar_t> >(std::locale()) | |
62 | .widen(source, source + len, result.get()); | |
63 | return result.get(); | |
64 | } | |
65 | }; | |
66 | ||
67 | template <typename Char> | |
68 | inline Char L(char c) | |
69 | { | |
70 | return string_lit<Char>::get(c); | |
71 | } | |
72 | ||
73 | template <typename Char> | |
74 | inline std::basic_string<Char> L(char const* c = "") | |
75 | { | |
76 | return string_lit<Char>::get(c); | |
77 | } | |
78 | ||
79 | /////////////////////////////////////////////////////////////////////////// | |
80 | template <typename Char> | |
81 | inline bool | |
82 | generate_delimiter(std::basic_ostream<Char> &os_) | |
83 | { | |
84 | os_ << std::basic_string<Char>(80, '/') << "\n"; | |
85 | return os_.good(); | |
86 | } | |
87 | ||
88 | /////////////////////////////////////////////////////////////////////////// | |
89 | // Generate a table of the names of the used lexer states, which is a bit | |
90 | // tricky, because the table stored with the rules is sorted based on the | |
91 | // names, but we need it sorted using the state ids. | |
92 | template <typename Char> | |
93 | inline bool | |
94 | generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_ | |
95 | , std::basic_ostream<Char> &os_, Char const* name_suffix) | |
96 | { | |
97 | // we need to re-sort the state names in ascending order of the state | |
98 | // ids, filling possible gaps in between later | |
99 | typedef typename | |
100 | boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator | |
101 | state_iterator; | |
102 | typedef std::map<std::size_t, Char const*> reverse_state_map_type; | |
103 | ||
104 | reverse_state_map_type reverse_state_map; | |
105 | state_iterator send = rules_.statemap().end(); | |
106 | for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit) | |
107 | { | |
108 | typedef typename reverse_state_map_type::value_type value_type; | |
109 | reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str())); | |
110 | } | |
111 | ||
112 | generate_delimiter(os_); | |
113 | os_ << "// this table defines the names of the lexer states\n"; | |
114 | os_ << boost::lexer::detail::strings<Char>::char_name() | |
115 | << " const* const lexer_state_names" | |
116 | << (name_suffix[0] ? "_" : "") << name_suffix | |
117 | << "[" << rules_.statemap().size() << "] = \n{\n"; | |
118 | ||
119 | typedef typename reverse_state_map_type::iterator iterator; | |
120 | iterator rend = reverse_state_map.end(); | |
121 | std::size_t last_id = 0; | |
122 | for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id) | |
123 | { | |
124 | for (/**/; last_id < (*rit).first; ++last_id) | |
125 | { | |
126 | os_ << " 0, // \"<undefined state>\"\n"; | |
127 | } | |
128 | os_ << " " | |
129 | << boost::lexer::detail::strings<Char>::char_prefix() | |
130 | << "\"" << (*rit).second << "\""; | |
131 | if (++rit != rend) | |
132 | os_ << ",\n"; | |
133 | else | |
134 | os_ << "\n"; // don't generate the final comma | |
135 | } | |
136 | os_ << "};\n\n"; | |
137 | ||
138 | generate_delimiter(os_); | |
139 | os_ << "// this variable defines the number of lexer states\n"; | |
140 | os_ << "std::size_t const lexer_state_count" | |
141 | << (name_suffix[0] ? "_" : "") << name_suffix | |
142 | << " = " << rules_.statemap().size() << ";\n\n"; | |
143 | return os_.good(); | |
144 | } | |
145 | ||
146 | template <typename Char> | |
147 | inline bool | |
148 | generate_cpp_state_table (std::basic_ostream<Char> &os_ | |
149 | , Char const* name_suffix, bool bol, bool eol) | |
150 | { | |
151 | std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : "")); | |
152 | suffix += name_suffix; | |
153 | ||
154 | generate_delimiter(os_); | |
155 | os_ << "// this defines a generic accessors for the information above\n"; | |
156 | os_ << "struct lexer" << suffix << "\n{\n"; | |
157 | os_ << " // version number and feature-set of compatible static lexer engine\n"; | |
158 | os_ << " enum\n"; | |
92f5a8d4 | 159 | os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n"; |
7c673cae FG |
160 | os_ << " supports_bol = " << std::boolalpha << bol << ",\n"; |
161 | os_ << " supports_eol = " << std::boolalpha << eol << "\n"; | |
162 | os_ << " };\n\n"; | |
163 | os_ << " // return the number of lexer states\n"; | |
164 | os_ << " static std::size_t state_count()\n"; | |
165 | os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n"; | |
166 | os_ << " // return the name of the lexer state as given by 'idx'\n"; | |
167 | os_ << " static " << boost::lexer::detail::strings<Char>::char_name() | |
168 | << " const* state_name(std::size_t idx)\n"; | |
169 | os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n"; | |
170 | os_ << " // return the next matched token\n"; | |
171 | os_ << " template<typename Iterator>\n"; | |
172 | os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n"; | |
173 | os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n"; | |
174 | os_ << " {\n return next_token" << suffix | |
175 | << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n"; | |
176 | os_ << "};\n\n"; | |
177 | return os_.good(); | |
178 | } | |
179 | ||
180 | /////////////////////////////////////////////////////////////////////////// | |
181 | // generate function body based on traversing the DFA tables | |
182 | template <typename Char> | |
183 | bool generate_function_body_dfa(std::basic_ostream<Char>& os_ | |
184 | , boost::lexer::basic_state_machine<Char> const &sm_) | |
185 | { | |
186 | std::size_t const dfas_ = sm_.data()._dfa->size(); | |
187 | std::size_t const lookups_ = sm_.data()._lookup->front()->size(); | |
188 | ||
189 | os_ << " enum {end_state_index, id_index, unique_id_index, " | |
190 | "state_index, bol_index,\n"; | |
191 | os_ << " eol_index, dead_state_index, dfa_offset};\n\n"; | |
192 | os_ << " static std::size_t const npos = " | |
193 | "static_cast<std::size_t>(~0);\n"; | |
194 | ||
195 | if (dfas_ > 1) | |
196 | { | |
197 | for (std::size_t state_ = 0; state_ < dfas_; ++state_) | |
198 | { | |
199 | std::size_t i_ = 0; | |
200 | std::size_t j_ = 1; | |
201 | std::size_t count_ = lookups_ / 8; | |
202 | std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front(); | |
203 | std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front(); | |
204 | ||
205 | os_ << " static std::size_t const lookup" << state_ | |
206 | << "_[" << lookups_ << "] = {\n "; | |
207 | for (/**/; i_ < count_; ++i_) | |
208 | { | |
209 | std::size_t const index_ = i_ * 8; | |
210 | os_ << lookup_[index_]; | |
211 | for (/**/; j_ < 8; ++j_) | |
212 | { | |
213 | os_ << ", " << lookup_[index_ + j_]; | |
214 | } | |
215 | if (i_ < count_ - 1) | |
216 | { | |
217 | os_ << ",\n "; | |
218 | } | |
219 | j_ = 1; | |
220 | } | |
221 | os_ << " };\n"; | |
222 | ||
223 | count_ = sm_.data()._dfa[state_]->size (); | |
224 | os_ << " static const std::size_t dfa" << state_ << "_[" | |
225 | << count_ << "] = {\n "; | |
226 | count_ /= 8; | |
227 | for (i_ = 0; i_ < count_; ++i_) | |
228 | { | |
229 | std::size_t const index_ = i_ * 8; | |
230 | os_ << dfa_[index_]; | |
231 | for (j_ = 1; j_ < 8; ++j_) | |
232 | { | |
233 | os_ << ", " << dfa_[index_ + j_]; | |
234 | } | |
235 | if (i_ < count_ - 1) | |
236 | { | |
237 | os_ << ",\n "; | |
238 | } | |
239 | } | |
240 | ||
241 | std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8; | |
242 | if (mod_) | |
243 | { | |
244 | std::size_t const index_ = count_ * 8; | |
245 | if (count_) | |
246 | { | |
247 | os_ << ",\n "; | |
248 | } | |
249 | os_ << dfa_[index_]; | |
250 | for (j_ = 1; j_ < mod_; ++j_) | |
251 | { | |
252 | os_ << ", " << dfa_[index_ + j_]; | |
253 | } | |
254 | } | |
255 | os_ << " };\n"; | |
256 | } | |
257 | ||
258 | std::size_t count_ = sm_.data()._dfa_alphabet.size(); | |
259 | std::size_t i_ = 1; | |
260 | ||
261 | os_ << " static std::size_t const* lookup_arr_[" << count_ | |
262 | << "] = { lookup0_"; | |
263 | for (i_ = 1; i_ < count_; ++i_) | |
264 | { | |
265 | os_ << ", " << "lookup" << i_ << "_"; | |
266 | } | |
267 | os_ << " };\n"; | |
268 | ||
269 | os_ << " static std::size_t const dfa_alphabet_arr_[" | |
270 | << count_ << "] = { "; | |
271 | os_ << sm_.data()._dfa_alphabet.front (); | |
272 | for (i_ = 1; i_ < count_; ++i_) | |
273 | { | |
274 | os_ << ", " << sm_.data()._dfa_alphabet[i_]; | |
275 | } | |
276 | os_ << " };\n"; | |
277 | ||
278 | os_ << " static std::size_t const* dfa_arr_[" << count_ | |
279 | << "] = { "; | |
280 | os_ << "dfa0_"; | |
281 | for (i_ = 1; i_ < count_; ++i_) | |
282 | { | |
283 | os_ << ", " << "dfa" << i_ << "_"; | |
284 | } | |
285 | os_ << " };\n"; | |
286 | } | |
287 | else | |
288 | { | |
289 | std::size_t const* lookup_ = &sm_.data()._lookup[0]->front(); | |
290 | std::size_t const* dfa_ = &sm_.data()._dfa[0]->front(); | |
291 | std::size_t i_ = 0; | |
292 | std::size_t j_ = 1; | |
293 | std::size_t count_ = lookups_ / 8; | |
294 | ||
295 | os_ << " static std::size_t const lookup_["; | |
296 | os_ << sm_.data()._lookup[0]->size() << "] = {\n "; | |
297 | for (/**/; i_ < count_; ++i_) | |
298 | { | |
299 | const std::size_t index_ = i_ * 8; | |
300 | os_ << lookup_[index_]; | |
301 | for (/**/; j_ < 8; ++j_) | |
302 | { | |
303 | os_ << ", " << lookup_[index_ + j_]; | |
304 | } | |
305 | if (i_ < count_ - 1) | |
306 | { | |
307 | os_ << ",\n "; | |
308 | } | |
309 | j_ = 1; | |
310 | } | |
311 | os_ << " };\n"; | |
312 | ||
313 | os_ << " static std::size_t const dfa_alphabet_ = " | |
314 | << sm_.data()._dfa_alphabet.front () << ";\n"; | |
315 | os_ << " static std::size_t const dfa_[" | |
316 | << sm_.data()._dfa[0]->size () << "] = {\n "; | |
317 | count_ = sm_.data()._dfa[0]->size () / 8; | |
318 | for (i_ = 0; i_ < count_; ++i_) | |
319 | { | |
320 | const std::size_t index_ = i_ * 8; | |
321 | os_ << dfa_[index_]; | |
322 | for (j_ = 1; j_ < 8; ++j_) | |
323 | { | |
324 | os_ << ", " << dfa_[index_ + j_]; | |
325 | } | |
326 | if (i_ < count_ - 1) | |
327 | { | |
328 | os_ << ",\n "; | |
329 | } | |
330 | } | |
331 | ||
332 | const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8; | |
333 | if (mod_) | |
334 | { | |
335 | const std::size_t index_ = count_ * 8; | |
336 | if (count_) | |
337 | { | |
338 | os_ << ",\n "; | |
339 | } | |
340 | os_ << dfa_[index_]; | |
341 | for (j_ = 1; j_ < mod_; ++j_) | |
342 | { | |
343 | os_ << ", " << dfa_[index_ + j_]; | |
344 | } | |
345 | } | |
346 | os_ << " };\n"; | |
347 | } | |
348 | ||
349 | os_ << "\n if (start_token_ == end_)\n"; | |
350 | os_ << " {\n"; | |
351 | os_ << " unique_id_ = npos;\n"; | |
352 | os_ << " return 0;\n"; | |
353 | os_ << " }\n\n"; | |
354 | if (sm_.data()._seen_BOL_assertion) | |
355 | { | |
356 | os_ << " bool bol = bol_;\n\n"; | |
357 | } | |
358 | ||
359 | if (dfas_ > 1) | |
360 | { | |
361 | os_ << "again:\n"; | |
362 | os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n"; | |
363 | os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n"; | |
364 | os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n"; | |
365 | } | |
366 | ||
367 | os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n"; | |
368 | os_ << " Iterator curr_ = start_token_;\n"; | |
369 | os_ << " bool end_state_ = *ptr_ != 0;\n"; | |
370 | os_ << " std::size_t id_ = *(ptr_ + id_index);\n"; | |
371 | os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n"; | |
372 | if (dfas_ > 1) | |
373 | { | |
374 | os_ << " std::size_t end_start_state_ = start_state_;\n"; | |
375 | } | |
376 | if (sm_.data()._seen_BOL_assertion) | |
377 | { | |
378 | os_ << " bool end_bol_ = bol_;\n"; | |
379 | } | |
380 | os_ << " Iterator end_token_ = start_token_;\n\n"; | |
381 | ||
382 | os_ << " while (curr_ != end_)\n"; | |
383 | os_ << " {\n"; | |
384 | ||
385 | if (sm_.data()._seen_BOL_assertion) | |
386 | { | |
387 | os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n"; | |
388 | } | |
389 | ||
390 | if (sm_.data()._seen_EOL_assertion) | |
391 | { | |
392 | os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; | |
393 | } | |
394 | ||
395 | if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion) | |
396 | { | |
397 | os_ << " if (BOL_state_ && bol)\n"; | |
398 | os_ << " {\n"; | |
399 | os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; | |
400 | os_ << " }\n"; | |
401 | os_ << " else if (EOL_state_ && *curr_ == '\\n')\n"; | |
402 | os_ << " {\n"; | |
403 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; | |
404 | os_ << " }\n"; | |
405 | os_ << " else\n"; | |
406 | os_ << " {\n"; | |
407 | if (lookups_ == 256) | |
408 | { | |
409 | os_ << " unsigned char index = \n"; | |
410 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
411 | } | |
412 | else | |
413 | { | |
414 | os_ << " std::size_t index = *curr_++\n"; | |
415 | } | |
11fdf7f2 | 416 | os_ << " bol = (index == '\\n') ? true : false;\n"; |
7c673cae FG |
417 | os_ << " std::size_t const state_ = ptr_[\n"; |
418 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
419 | ||
420 | os_ << '\n'; | |
421 | os_ << " if (state_ == 0) break;\n"; | |
422 | os_ << '\n'; | |
423 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
424 | os_ << " }\n\n"; | |
425 | } | |
426 | else if (sm_.data()._seen_BOL_assertion) | |
427 | { | |
428 | os_ << " if (BOL_state_ && bol)\n"; | |
429 | os_ << " {\n"; | |
430 | os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; | |
431 | os_ << " }\n"; | |
432 | os_ << " else\n"; | |
433 | os_ << " {\n"; | |
434 | if (lookups_ == 256) | |
435 | { | |
436 | os_ << " unsigned char index = \n"; | |
437 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
438 | } | |
439 | else | |
440 | { | |
441 | os_ << " std::size_t index = *curr_++\n"; | |
442 | } | |
11fdf7f2 | 443 | os_ << " bol = (index == '\\n') ? true : false;\n"; |
7c673cae FG |
444 | os_ << " std::size_t const state_ = ptr_[\n"; |
445 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
446 | ||
447 | os_ << '\n'; | |
448 | os_ << " if (state_ == 0) break;\n"; | |
449 | os_ << '\n'; | |
450 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
451 | os_ << " }\n\n"; | |
452 | } | |
453 | else if (sm_.data()._seen_EOL_assertion) | |
454 | { | |
455 | os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; | |
456 | os_ << " {\n"; | |
457 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; | |
458 | os_ << " }\n"; | |
459 | os_ << " else\n"; | |
460 | os_ << " {\n"; | |
461 | if (lookups_ == 256) | |
462 | { | |
463 | os_ << " unsigned char index = \n"; | |
464 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
465 | } | |
466 | else | |
467 | { | |
468 | os_ << " std::size_t index = *curr_++\n"; | |
469 | } | |
11fdf7f2 | 470 | os_ << " bol = (index == '\\n') ? true : false;\n"; |
7c673cae FG |
471 | os_ << " std::size_t const state_ = ptr_[\n"; |
472 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
473 | ||
474 | os_ << '\n'; | |
475 | os_ << " if (state_ == 0) break;\n"; | |
476 | os_ << '\n'; | |
477 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
478 | os_ << " }\n\n"; | |
479 | } | |
480 | else | |
481 | { | |
482 | os_ << " std::size_t const state_ =\n"; | |
483 | ||
484 | if (lookups_ == 256) | |
485 | { | |
486 | os_ << " ptr_[lookup_[" | |
487 | "static_cast<unsigned char>(*curr_++)]];\n"; | |
488 | } | |
489 | else | |
490 | { | |
491 | os_ << " ptr_[lookup_[*curr_++]];\n"; | |
492 | } | |
493 | ||
494 | os_ << '\n'; | |
495 | os_ << " if (state_ == 0) break;\n"; | |
496 | os_ << '\n'; | |
497 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n"; | |
498 | } | |
499 | ||
500 | os_ << " if (*ptr_)\n"; | |
501 | os_ << " {\n"; | |
502 | os_ << " end_state_ = true;\n"; | |
503 | os_ << " id_ = *(ptr_ + id_index);\n"; | |
504 | os_ << " uid_ = *(ptr_ + unique_id_index);\n"; | |
505 | if (dfas_ > 1) | |
506 | { | |
507 | os_ << " end_start_state_ = *(ptr_ + state_index);\n"; | |
508 | } | |
509 | if (sm_.data()._seen_BOL_assertion) | |
510 | { | |
511 | os_ << " end_bol_ = bol;\n"; | |
512 | } | |
513 | os_ << " end_token_ = curr_;\n"; | |
514 | os_ << " }\n"; | |
515 | os_ << " }\n\n"; | |
516 | ||
517 | if (sm_.data()._seen_EOL_assertion) | |
518 | { | |
519 | os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; | |
520 | ||
521 | os_ << " if (EOL_state_ && curr_ == end_)\n"; | |
522 | os_ << " {\n"; | |
523 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n"; | |
524 | ||
525 | os_ << " if (*ptr_)\n"; | |
526 | os_ << " {\n"; | |
527 | os_ << " end_state_ = true;\n"; | |
528 | os_ << " id_ = *(ptr_ + id_index);\n"; | |
529 | os_ << " uid_ = *(ptr_ + unique_id_index);\n"; | |
530 | if (dfas_ > 1) | |
531 | { | |
532 | os_ << " end_start_state_ = *(ptr_ + state_index);\n"; | |
533 | } | |
534 | if (sm_.data()._seen_BOL_assertion) | |
535 | { | |
536 | os_ << " end_bol_ = bol;\n"; | |
537 | } | |
538 | os_ << " end_token_ = curr_;\n"; | |
539 | os_ << " }\n"; | |
540 | os_ << " }\n\n"; | |
541 | } | |
542 | ||
543 | os_ << " if (end_state_)\n"; | |
544 | os_ << " {\n"; | |
545 | os_ << " // return longest match\n"; | |
546 | os_ << " start_token_ = end_token_;\n"; | |
547 | ||
548 | if (dfas_ > 1) | |
549 | { | |
550 | os_ << " start_state_ = end_start_state_;\n"; | |
551 | os_ << " if (id_ == 0)\n"; | |
552 | os_ << " {\n"; | |
553 | if (sm_.data()._seen_BOL_assertion) | |
554 | { | |
555 | os_ << " bol = end_bol_;\n"; | |
556 | } | |
557 | os_ << " goto again;\n"; | |
558 | os_ << " }\n"; | |
559 | if (sm_.data()._seen_BOL_assertion) | |
560 | { | |
561 | os_ << " else\n"; | |
562 | os_ << " {\n"; | |
563 | os_ << " bol_ = end_bol_;\n"; | |
564 | os_ << " }\n"; | |
565 | } | |
566 | } | |
567 | else if (sm_.data()._seen_BOL_assertion) | |
568 | { | |
569 | os_ << " bol_ = end_bol_;\n"; | |
570 | } | |
571 | ||
572 | os_ << " }\n"; | |
573 | os_ << " else\n"; | |
574 | os_ << " {\n"; | |
575 | ||
576 | if (sm_.data()._seen_BOL_assertion) | |
577 | { | |
11fdf7f2 | 578 | os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; |
7c673cae FG |
579 | } |
580 | ||
581 | os_ << " id_ = npos;\n"; | |
582 | os_ << " uid_ = npos;\n"; | |
583 | os_ << " }\n\n"; | |
584 | ||
585 | os_ << " unique_id_ = uid_;\n"; | |
586 | os_ << " return id_;\n"; | |
587 | return os_.good(); | |
588 | } | |
589 | ||
590 | /////////////////////////////////////////////////////////////////////////// | |
591 | template <typename Char> | |
592 | inline std::basic_string<Char> get_charlit(Char ch) | |
593 | { | |
594 | std::basic_string<Char> result; | |
595 | boost::lexer::basic_string_token<Char>::escape_char(ch, result); | |
596 | return result; | |
597 | } | |
598 | ||
599 | // check whether state0_0 is referenced from any of the other states | |
600 | template <typename Char> | |
601 | bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_) | |
602 | { | |
603 | typedef typename boost::lexer::basic_state_machine<Char>::iterator | |
604 | iterator_type; | |
605 | iterator_type iter_ = sm_.begin(); | |
606 | std::size_t const states_ = iter_->states; | |
607 | ||
608 | for (std::size_t state_ = 0; state_ < states_; ++state_) | |
609 | { | |
610 | if (0 == iter_->bol_index || 0 == iter_->eol_index) | |
611 | { | |
612 | return true; | |
613 | } | |
614 | ||
615 | std::size_t const transitions_ = iter_->transitions; | |
616 | for (std::size_t t_ = 0; t_ < transitions_; ++t_) | |
617 | { | |
618 | if (0 == iter_->goto_state) | |
619 | { | |
620 | return true; | |
621 | } | |
622 | ++iter_; | |
623 | } | |
624 | if (transitions_ == 0) ++iter_; | |
625 | } | |
626 | return false; | |
627 | } | |
628 | ||
629 | /////////////////////////////////////////////////////////////////////////// | |
630 | template <typename Char> | |
631 | bool generate_function_body_switch(std::basic_ostream<Char> & os_ | |
632 | , boost::lexer::basic_state_machine<Char> const &sm_) | |
633 | { | |
634 | typedef typename boost::lexer::basic_state_machine<Char>::iterator | |
635 | iterator_type; | |
636 | ||
637 | std::size_t const lookups_ = sm_.data()._lookup->front ()->size (); | |
638 | iterator_type iter_ = sm_.begin(); | |
639 | iterator_type labeliter_ = iter_; | |
640 | iterator_type end_ = sm_.end(); | |
641 | std::size_t const dfas_ = sm_.data()._dfa->size (); | |
642 | ||
643 | os_ << " static std::size_t const npos = " | |
644 | "static_cast<std::size_t>(~0);\n"; | |
645 | ||
646 | os_ << "\n if (start_token_ == end_)\n"; | |
647 | os_ << " {\n"; | |
648 | os_ << " unique_id_ = npos;\n"; | |
649 | os_ << " return 0;\n"; | |
650 | os_ << " }\n\n"; | |
651 | ||
652 | if (sm_.data()._seen_BOL_assertion) | |
653 | { | |
654 | os_ << " bool bol = bol_;\n"; | |
655 | } | |
656 | ||
657 | if (dfas_ > 1) | |
658 | { | |
659 | os_ << "again:\n"; | |
660 | } | |
661 | ||
662 | os_ << " Iterator curr_ = start_token_;\n"; | |
663 | os_ << " bool end_state_ = false;\n"; | |
664 | os_ << " std::size_t id_ = npos;\n"; | |
665 | os_ << " std::size_t uid_ = npos;\n"; | |
666 | ||
667 | if (dfas_ > 1) | |
668 | { | |
669 | os_ << " std::size_t end_start_state_ = start_state_;\n"; | |
670 | } | |
671 | ||
672 | if (sm_.data()._seen_BOL_assertion) | |
673 | { | |
674 | os_ << " bool end_bol_ = bol_;\n"; | |
675 | } | |
676 | ||
677 | os_ << " Iterator end_token_ = start_token_;\n"; | |
678 | os_ << '\n'; | |
679 | ||
680 | os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t") | |
681 | << " ch_ = 0;\n\n"; | |
682 | ||
683 | if (dfas_ > 1) | |
684 | { | |
685 | os_ << " switch (start_state_)\n"; | |
686 | os_ << " {\n"; | |
687 | ||
688 | for (std::size_t i_ = 0; i_ < dfas_; ++i_) | |
689 | { | |
690 | os_ << " case " << i_ << ":\n"; | |
691 | os_ << " goto state" << i_ << "_0;\n"; | |
692 | os_ << " break;\n"; | |
693 | } | |
694 | ||
695 | os_ << " default:\n"; | |
696 | os_ << " goto end;\n"; | |
697 | os_ << " break;\n"; | |
698 | os_ << " }\n"; | |
699 | } | |
700 | ||
701 | bool need_state0_0_label = need_label0_0(sm_); | |
702 | ||
703 | for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) | |
704 | { | |
705 | std::size_t const states_ = iter_->states; | |
706 | for (std::size_t state_ = 0; state_ < states_; ++state_) | |
707 | { | |
708 | std::size_t const transitions_ = iter_->transitions; | |
709 | std::size_t t_ = 0; | |
710 | ||
711 | if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label) | |
712 | { | |
713 | os_ << "\nstate" << dfa_ << '_' << state_ << ":\n"; | |
714 | } | |
715 | ||
716 | if (iter_->end_state) | |
717 | { | |
718 | os_ << " end_state_ = true;\n"; | |
719 | os_ << " id_ = " << iter_->id << ";\n"; | |
720 | os_ << " uid_ = " << iter_->unique_id << ";\n"; | |
721 | os_ << " end_token_ = curr_;\n"; | |
722 | ||
723 | if (dfas_ > 1) | |
724 | { | |
725 | os_ << " end_start_state_ = " << iter_->goto_dfa << | |
726 | ";\n"; | |
727 | } | |
728 | ||
729 | if (sm_.data()._seen_BOL_assertion) | |
730 | { | |
731 | os_ << " end_bol_ = bol;\n"; | |
732 | } | |
733 | ||
734 | if (transitions_) os_ << '\n'; | |
735 | } | |
736 | ||
737 | if (t_ < transitions_ || | |
738 | iter_->bol_index != boost::lexer::npos || | |
739 | iter_->eol_index != boost::lexer::npos) | |
740 | { | |
741 | os_ << " if (curr_ == end_) goto end;\n"; | |
742 | os_ << " ch_ = *curr_;\n"; | |
743 | if (iter_->bol_index != boost::lexer::npos) | |
744 | { | |
745 | os_ << "\n if (bol) goto state" << dfa_ << '_' | |
746 | << iter_->bol_index << ";\n"; | |
747 | } | |
748 | if (iter_->eol_index != boost::lexer::npos) | |
749 | { | |
11fdf7f2 | 750 | os_ << "\n if (ch_ == '\\n') goto state" << dfa_ |
7c673cae FG |
751 | << '_' << iter_->eol_index << ";\n"; |
752 | } | |
753 | os_ << " ++curr_;\n"; | |
754 | } | |
755 | ||
756 | for (/**/; t_ < transitions_; ++t_) | |
757 | { | |
758 | Char const *ptr_ = iter_->token._charset.c_str(); | |
1e59de90 | 759 | Char const *end2_ = ptr_ + iter_->token._charset.size(); |
7c673cae FG |
760 | Char start_char_ = 0; |
761 | Char curr_char_ = 0; | |
762 | bool range_ = false; | |
763 | bool first_char_ = true; | |
764 | ||
765 | os_ << "\n if ("; | |
766 | ||
1e59de90 | 767 | while (ptr_ != end2_) |
7c673cae FG |
768 | { |
769 | curr_char_ = *ptr_++; | |
770 | ||
771 | if (*ptr_ == curr_char_ + 1) | |
772 | { | |
773 | if (!range_) | |
774 | { | |
775 | start_char_ = curr_char_; | |
776 | } | |
777 | range_ = true; | |
778 | } | |
779 | else | |
780 | { | |
781 | if (!first_char_) | |
782 | { | |
783 | os_ << ((iter_->token._negated) ? " && " : " || "); | |
784 | } | |
785 | else | |
786 | { | |
787 | first_char_ = false; | |
788 | } | |
789 | if (range_) | |
790 | { | |
791 | if (iter_->token._negated) | |
792 | { | |
793 | os_ << "!"; | |
794 | } | |
795 | os_ << "(ch_ >= '" << get_charlit(start_char_) | |
796 | << "' && ch_ <= '" | |
797 | << get_charlit(curr_char_) << "')"; | |
798 | range_ = false; | |
799 | } | |
800 | else | |
801 | { | |
802 | os_ << "ch_ " | |
803 | << ((iter_->token._negated) ? "!=" : "==") | |
804 | << " '" << get_charlit(curr_char_) << "'"; | |
805 | } | |
806 | } | |
807 | } | |
808 | ||
809 | os_ << ") goto state" << dfa_ << '_' << iter_->goto_state | |
810 | << ";\n"; | |
811 | ++iter_; | |
812 | } | |
813 | ||
814 | if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1)) | |
815 | { | |
816 | os_ << " goto end;\n"; | |
817 | } | |
818 | ||
819 | if (transitions_ == 0) ++iter_; | |
820 | } | |
821 | } | |
822 | ||
823 | os_ << "\nend:\n"; | |
824 | os_ << " if (end_state_)\n"; | |
825 | os_ << " {\n"; | |
826 | os_ << " // return longest match\n"; | |
827 | os_ << " start_token_ = end_token_;\n"; | |
828 | ||
829 | if (dfas_ > 1) | |
830 | { | |
831 | os_ << " start_state_ = end_start_state_;\n"; | |
832 | os_ << "\n if (id_ == 0)\n"; | |
833 | os_ << " {\n"; | |
834 | ||
835 | if (sm_.data()._seen_BOL_assertion) | |
836 | { | |
837 | os_ << " bol = end_bol_;\n"; | |
838 | } | |
839 | ||
840 | os_ << " goto again;\n"; | |
841 | os_ << " }\n"; | |
842 | ||
843 | if (sm_.data()._seen_BOL_assertion) | |
844 | { | |
845 | os_ << " else\n"; | |
846 | os_ << " {\n"; | |
847 | os_ << " bol_ = end_bol_;\n"; | |
848 | os_ << " }\n"; | |
849 | } | |
850 | } | |
851 | else if (sm_.data()._seen_BOL_assertion) | |
852 | { | |
853 | os_ << " bol_ = end_bol_;\n"; | |
854 | } | |
855 | ||
856 | os_ << " }\n"; | |
857 | os_ << " else\n"; | |
858 | os_ << " {\n"; | |
859 | ||
860 | if (sm_.data()._seen_BOL_assertion) | |
861 | { | |
862 | os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; | |
863 | } | |
864 | os_ << " id_ = npos;\n"; | |
865 | os_ << " uid_ = npos;\n"; | |
866 | os_ << " }\n\n"; | |
867 | ||
868 | os_ << " unique_id_ = uid_;\n"; | |
869 | os_ << " return id_;\n"; | |
870 | return os_.good(); | |
871 | } | |
872 | ||
873 | /////////////////////////////////////////////////////////////////////////// | |
874 | // Generate a tokenizer for the given state machine. | |
875 | template <typename Char, typename F> | |
876 | inline bool | |
877 | generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_ | |
878 | , boost::lexer::basic_rules<Char> const& rules_ | |
879 | , std::basic_ostream<Char> &os_, Char const* name_suffix | |
880 | , F generate_function_body) | |
881 | { | |
882 | if (sm_.data()._lookup->empty()) | |
883 | return false; | |
884 | ||
885 | std::size_t const dfas_ = sm_.data()._dfa->size(); | |
886 | // std::size_t const lookups_ = sm_.data()._lookup->front()->size(); | |
887 | ||
888 | os_ << "// Copyright (c) 2008-2009 Ben Hanson\n"; | |
889 | os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n"; | |
890 | os_ << "//\n"; | |
891 | os_ << "// Distributed under the Boost Software License, " | |
892 | "Version 1.0. (See accompanying\n"; | |
893 | os_ << "// file licence_1_0.txt or copy at " | |
894 | "http://www.boost.org/LICENSE_1_0.txt)\n\n"; | |
895 | os_ << "// Auto-generated by boost::lexer, do not edit\n\n"; | |
896 | ||
897 | std::basic_string<Char> guard(name_suffix); | |
898 | guard += L<Char>(name_suffix[0] ? "_" : ""); | |
899 | guard += L<Char>(__DATE__ "_" __TIME__); | |
900 | typename std::basic_string<Char>::size_type p = | |
901 | guard.find_first_of(L<Char>(": ")); | |
902 | while (std::string::npos != p) | |
903 | { | |
904 | guard.replace(p, 1, L<Char>("_")); | |
905 | p = guard.find_first_of(L<Char>(": "), p); | |
906 | } | |
f67539c2 TL |
907 | { // to_upper(guard) |
908 | typedef std::ctype<Char> facet_t; | |
909 | facet_t const& facet = std::use_facet<facet_t>(std::locale()); | |
910 | typedef typename std::basic_string<Char>::iterator iter_t; | |
911 | for (iter_t iter = guard.begin(), | |
912 | last = guard.end(); iter != last; ++iter) | |
913 | *iter = facet.toupper(*iter); | |
914 | } | |
7c673cae FG |
915 | |
916 | os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n"; | |
917 | os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n"; | |
918 | ||
7c673cae FG |
919 | os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n"; |
920 | ||
921 | generate_delimiter(os_); | |
922 | os_ << "// the generated table of state names and the tokenizer have to be\n" | |
923 | "// defined in the boost::spirit::lex::lexertl::static_ namespace\n"; | |
924 | os_ << "namespace boost { namespace spirit { namespace lex { " | |
925 | "namespace lexertl { namespace static_ {\n\n"; | |
926 | ||
927 | // generate the lexer state information variables | |
928 | if (!generate_cpp_state_info(rules_, os_, name_suffix)) | |
929 | return false; | |
930 | ||
931 | generate_delimiter(os_); | |
932 | os_ << "// this function returns the next matched token\n"; | |
933 | os_ << "template<typename Iterator>\n"; | |
934 | os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "") | |
935 | << name_suffix << " ("; | |
936 | ||
937 | if (dfas_ > 1) | |
938 | { | |
939 | os_ << "std::size_t& start_state_, "; | |
940 | } | |
941 | else | |
942 | { | |
943 | os_ << "std::size_t& /*start_state_*/, "; | |
944 | } | |
945 | if (sm_.data()._seen_BOL_assertion) | |
946 | { | |
947 | os_ << "bool& bol_, "; | |
948 | } | |
949 | else | |
950 | { | |
951 | os_ << "bool& /*bol_*/, "; | |
952 | } | |
953 | os_ << "\n "; | |
954 | ||
955 | os_ << "Iterator &start_token_, Iterator const& end_, "; | |
956 | os_ << "std::size_t& unique_id_)\n"; | |
957 | os_ << "{\n"; | |
958 | if (!generate_function_body(os_, sm_)) | |
959 | return false; | |
960 | os_ << "}\n\n"; | |
961 | ||
962 | if (!generate_cpp_state_table<Char>(os_, name_suffix | |
963 | , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion)) | |
964 | { | |
965 | return false; | |
966 | } | |
967 | ||
968 | os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n"; | |
969 | ||
970 | os_ << "#endif\n"; | |
971 | ||
972 | return os_.good(); | |
973 | } | |
974 | ||
975 | } // namespace detail | |
976 | ||
977 | /////////////////////////////////////////////////////////////////////////// | |
978 | template <typename Lexer, typename F> | |
979 | inline bool | |
980 | generate_static(Lexer const& lexer | |
981 | , std::basic_ostream<typename Lexer::char_type>& os | |
982 | , typename Lexer::char_type const* name_suffix, F f) | |
983 | { | |
984 | if (!lexer.init_dfa(true)) // always minimize DFA for static lexers | |
985 | return false; | |
986 | return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os | |
987 | , name_suffix, f); | |
988 | } | |
989 | ||
990 | /////////////////////////////////////////////////////////////////////////// | |
991 | // deprecated function, will be removed in the future (this has been | |
992 | // replaced by the function generate_static_dfa - see below). | |
993 | template <typename Lexer> | |
994 | inline bool | |
995 | generate_static(Lexer const& lexer | |
996 | , std::basic_ostream<typename Lexer::char_type>& os | |
997 | , typename Lexer::char_type const* name_suffix = | |
998 | detail::L<typename Lexer::char_type>()) | |
999 | { | |
1000 | return generate_static(lexer, os, name_suffix | |
1001 | , &detail::generate_function_body_dfa<typename Lexer::char_type>); | |
1002 | } | |
1003 | ||
1004 | /////////////////////////////////////////////////////////////////////////// | |
1005 | template <typename Lexer> | |
1006 | inline bool | |
1007 | generate_static_dfa(Lexer const& lexer | |
1008 | , std::basic_ostream<typename Lexer::char_type>& os | |
1009 | , typename Lexer::char_type const* name_suffix = | |
1010 | detail::L<typename Lexer::char_type>()) | |
1011 | { | |
1012 | return generate_static(lexer, os, name_suffix | |
1013 | , &detail::generate_function_body_dfa<typename Lexer::char_type>); | |
1014 | } | |
1015 | ||
1016 | /////////////////////////////////////////////////////////////////////////// | |
1017 | template <typename Lexer> | |
1018 | inline bool | |
1019 | generate_static_switch(Lexer const& lexer | |
1020 | , std::basic_ostream<typename Lexer::char_type>& os | |
1021 | , typename Lexer::char_type const* name_suffix = | |
1022 | detail::L<typename Lexer::char_type>()) | |
1023 | { | |
1024 | return generate_static(lexer, os, name_suffix | |
1025 | , &detail::generate_function_body_switch<typename Lexer::char_type>); | |
1026 | } | |
1027 | ||
1028 | /////////////////////////////////////////////////////////////////////////////// | |
1029 | }}}} | |
1030 | ||
1031 | #endif |