]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2008-2009 Ben Hanson |
2 | // Copyright (c) 2008-2011 Hartmut Kaiser | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
7 | #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM) | |
8 | #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM | |
9 | ||
10 | #if defined(_MSC_VER) | |
11 | #pragma once | |
12 | #endif | |
13 | ||
14 | #include <boost/spirit/home/support/detail/lexer/char_traits.hpp> | |
15 | #include <boost/spirit/home/support/detail/lexer/consts.hpp> | |
16 | #include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
17 | #include <boost/spirit/home/support/detail/lexer/size_t.hpp> | |
18 | #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
19 | #include <boost/spirit/home/support/detail/lexer/debug.hpp> | |
20 | #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp> | |
21 | #include <boost/algorithm/string.hpp> | |
7c673cae FG |
22 | #include <boost/scoped_array.hpp> |
23 | ||
24 | /////////////////////////////////////////////////////////////////////////////// | |
25 | namespace boost { namespace spirit { namespace lex { namespace lexertl | |
26 | { | |
27 | namespace detail | |
28 | { | |
29 | ||
30 | /////////////////////////////////////////////////////////////////////////// | |
31 | template <typename CharT> | |
32 | struct string_lit; | |
33 | ||
34 | template <> | |
35 | struct string_lit<char> | |
36 | { | |
37 | static char get(char c) { return c; } | |
38 | static std::string get(char const* str = "") { return str; } | |
39 | }; | |
40 | ||
41 | template <> | |
42 | struct string_lit<wchar_t> | |
43 | { | |
44 | static wchar_t get(char c) | |
45 | { | |
46 | typedef std::ctype<wchar_t> ctype_t; | |
47 | return std::use_facet<ctype_t>(std::locale()).widen(c); | |
48 | } | |
49 | static std::basic_string<wchar_t> get(char const* source = "") | |
50 | { | |
51 | using namespace std; // some systems have size_t in ns std | |
52 | size_t len = strlen(source); | |
53 | boost::scoped_array<wchar_t> result (new wchar_t[len+1]); | |
54 | result.get()[len] = '\0'; | |
55 | ||
56 | // working with wide character streams is supported only if the | |
57 | // platform provides the std::ctype<wchar_t> facet | |
58 | BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale())); | |
59 | ||
60 | std::use_facet<std::ctype<wchar_t> >(std::locale()) | |
61 | .widen(source, source + len, result.get()); | |
62 | return result.get(); | |
63 | } | |
64 | }; | |
65 | ||
66 | template <typename Char> | |
67 | inline Char L(char c) | |
68 | { | |
69 | return string_lit<Char>::get(c); | |
70 | } | |
71 | ||
72 | template <typename Char> | |
73 | inline std::basic_string<Char> L(char const* c = "") | |
74 | { | |
75 | return string_lit<Char>::get(c); | |
76 | } | |
77 | ||
78 | /////////////////////////////////////////////////////////////////////////// | |
79 | template <typename Char> | |
80 | inline bool | |
81 | generate_delimiter(std::basic_ostream<Char> &os_) | |
82 | { | |
83 | os_ << std::basic_string<Char>(80, '/') << "\n"; | |
84 | return os_.good(); | |
85 | } | |
86 | ||
87 | /////////////////////////////////////////////////////////////////////////// | |
88 | // Generate a table of the names of the used lexer states, which is a bit | |
89 | // tricky, because the table stored with the rules is sorted based on the | |
90 | // names, but we need it sorted using the state ids. | |
91 | template <typename Char> | |
92 | inline bool | |
93 | generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_ | |
94 | , std::basic_ostream<Char> &os_, Char const* name_suffix) | |
95 | { | |
96 | // we need to re-sort the state names in ascending order of the state | |
97 | // ids, filling possible gaps in between later | |
98 | typedef typename | |
99 | boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator | |
100 | state_iterator; | |
101 | typedef std::map<std::size_t, Char const*> reverse_state_map_type; | |
102 | ||
103 | reverse_state_map_type reverse_state_map; | |
104 | state_iterator send = rules_.statemap().end(); | |
105 | for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit) | |
106 | { | |
107 | typedef typename reverse_state_map_type::value_type value_type; | |
108 | reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str())); | |
109 | } | |
110 | ||
111 | generate_delimiter(os_); | |
112 | os_ << "// this table defines the names of the lexer states\n"; | |
113 | os_ << boost::lexer::detail::strings<Char>::char_name() | |
114 | << " const* const lexer_state_names" | |
115 | << (name_suffix[0] ? "_" : "") << name_suffix | |
116 | << "[" << rules_.statemap().size() << "] = \n{\n"; | |
117 | ||
118 | typedef typename reverse_state_map_type::iterator iterator; | |
119 | iterator rend = reverse_state_map.end(); | |
120 | std::size_t last_id = 0; | |
121 | for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id) | |
122 | { | |
123 | for (/**/; last_id < (*rit).first; ++last_id) | |
124 | { | |
125 | os_ << " 0, // \"<undefined state>\"\n"; | |
126 | } | |
127 | os_ << " " | |
128 | << boost::lexer::detail::strings<Char>::char_prefix() | |
129 | << "\"" << (*rit).second << "\""; | |
130 | if (++rit != rend) | |
131 | os_ << ",\n"; | |
132 | else | |
133 | os_ << "\n"; // don't generate the final comma | |
134 | } | |
135 | os_ << "};\n\n"; | |
136 | ||
137 | generate_delimiter(os_); | |
138 | os_ << "// this variable defines the number of lexer states\n"; | |
139 | os_ << "std::size_t const lexer_state_count" | |
140 | << (name_suffix[0] ? "_" : "") << name_suffix | |
141 | << " = " << rules_.statemap().size() << ";\n\n"; | |
142 | return os_.good(); | |
143 | } | |
144 | ||
145 | template <typename Char> | |
146 | inline bool | |
147 | generate_cpp_state_table (std::basic_ostream<Char> &os_ | |
148 | , Char const* name_suffix, bool bol, bool eol) | |
149 | { | |
150 | std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : "")); | |
151 | suffix += name_suffix; | |
152 | ||
153 | generate_delimiter(os_); | |
154 | os_ << "// this defines a generic accessors for the information above\n"; | |
155 | os_ << "struct lexer" << suffix << "\n{\n"; | |
156 | os_ << " // version number and feature-set of compatible static lexer engine\n"; | |
157 | os_ << " enum\n"; | |
92f5a8d4 | 158 | os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n"; |
7c673cae FG |
159 | os_ << " supports_bol = " << std::boolalpha << bol << ",\n"; |
160 | os_ << " supports_eol = " << std::boolalpha << eol << "\n"; | |
161 | os_ << " };\n\n"; | |
162 | os_ << " // return the number of lexer states\n"; | |
163 | os_ << " static std::size_t state_count()\n"; | |
164 | os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n"; | |
165 | os_ << " // return the name of the lexer state as given by 'idx'\n"; | |
166 | os_ << " static " << boost::lexer::detail::strings<Char>::char_name() | |
167 | << " const* state_name(std::size_t idx)\n"; | |
168 | os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n"; | |
169 | os_ << " // return the next matched token\n"; | |
170 | os_ << " template<typename Iterator>\n"; | |
171 | os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n"; | |
172 | os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n"; | |
173 | os_ << " {\n return next_token" << suffix | |
174 | << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n"; | |
175 | os_ << "};\n\n"; | |
176 | return os_.good(); | |
177 | } | |
178 | ||
179 | /////////////////////////////////////////////////////////////////////////// | |
180 | // generate function body based on traversing the DFA tables | |
181 | template <typename Char> | |
182 | bool generate_function_body_dfa(std::basic_ostream<Char>& os_ | |
183 | , boost::lexer::basic_state_machine<Char> const &sm_) | |
184 | { | |
185 | std::size_t const dfas_ = sm_.data()._dfa->size(); | |
186 | std::size_t const lookups_ = sm_.data()._lookup->front()->size(); | |
187 | ||
188 | os_ << " enum {end_state_index, id_index, unique_id_index, " | |
189 | "state_index, bol_index,\n"; | |
190 | os_ << " eol_index, dead_state_index, dfa_offset};\n\n"; | |
191 | os_ << " static std::size_t const npos = " | |
192 | "static_cast<std::size_t>(~0);\n"; | |
193 | ||
194 | if (dfas_ > 1) | |
195 | { | |
196 | for (std::size_t state_ = 0; state_ < dfas_; ++state_) | |
197 | { | |
198 | std::size_t i_ = 0; | |
199 | std::size_t j_ = 1; | |
200 | std::size_t count_ = lookups_ / 8; | |
201 | std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front(); | |
202 | std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front(); | |
203 | ||
204 | os_ << " static std::size_t const lookup" << state_ | |
205 | << "_[" << lookups_ << "] = {\n "; | |
206 | for (/**/; i_ < count_; ++i_) | |
207 | { | |
208 | std::size_t const index_ = i_ * 8; | |
209 | os_ << lookup_[index_]; | |
210 | for (/**/; j_ < 8; ++j_) | |
211 | { | |
212 | os_ << ", " << lookup_[index_ + j_]; | |
213 | } | |
214 | if (i_ < count_ - 1) | |
215 | { | |
216 | os_ << ",\n "; | |
217 | } | |
218 | j_ = 1; | |
219 | } | |
220 | os_ << " };\n"; | |
221 | ||
222 | count_ = sm_.data()._dfa[state_]->size (); | |
223 | os_ << " static const std::size_t dfa" << state_ << "_[" | |
224 | << count_ << "] = {\n "; | |
225 | count_ /= 8; | |
226 | for (i_ = 0; i_ < count_; ++i_) | |
227 | { | |
228 | std::size_t const index_ = i_ * 8; | |
229 | os_ << dfa_[index_]; | |
230 | for (j_ = 1; j_ < 8; ++j_) | |
231 | { | |
232 | os_ << ", " << dfa_[index_ + j_]; | |
233 | } | |
234 | if (i_ < count_ - 1) | |
235 | { | |
236 | os_ << ",\n "; | |
237 | } | |
238 | } | |
239 | ||
240 | std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8; | |
241 | if (mod_) | |
242 | { | |
243 | std::size_t const index_ = count_ * 8; | |
244 | if (count_) | |
245 | { | |
246 | os_ << ",\n "; | |
247 | } | |
248 | os_ << dfa_[index_]; | |
249 | for (j_ = 1; j_ < mod_; ++j_) | |
250 | { | |
251 | os_ << ", " << dfa_[index_ + j_]; | |
252 | } | |
253 | } | |
254 | os_ << " };\n"; | |
255 | } | |
256 | ||
257 | std::size_t count_ = sm_.data()._dfa_alphabet.size(); | |
258 | std::size_t i_ = 1; | |
259 | ||
260 | os_ << " static std::size_t const* lookup_arr_[" << count_ | |
261 | << "] = { lookup0_"; | |
262 | for (i_ = 1; i_ < count_; ++i_) | |
263 | { | |
264 | os_ << ", " << "lookup" << i_ << "_"; | |
265 | } | |
266 | os_ << " };\n"; | |
267 | ||
268 | os_ << " static std::size_t const dfa_alphabet_arr_[" | |
269 | << count_ << "] = { "; | |
270 | os_ << sm_.data()._dfa_alphabet.front (); | |
271 | for (i_ = 1; i_ < count_; ++i_) | |
272 | { | |
273 | os_ << ", " << sm_.data()._dfa_alphabet[i_]; | |
274 | } | |
275 | os_ << " };\n"; | |
276 | ||
277 | os_ << " static std::size_t const* dfa_arr_[" << count_ | |
278 | << "] = { "; | |
279 | os_ << "dfa0_"; | |
280 | for (i_ = 1; i_ < count_; ++i_) | |
281 | { | |
282 | os_ << ", " << "dfa" << i_ << "_"; | |
283 | } | |
284 | os_ << " };\n"; | |
285 | } | |
286 | else | |
287 | { | |
288 | std::size_t const* lookup_ = &sm_.data()._lookup[0]->front(); | |
289 | std::size_t const* dfa_ = &sm_.data()._dfa[0]->front(); | |
290 | std::size_t i_ = 0; | |
291 | std::size_t j_ = 1; | |
292 | std::size_t count_ = lookups_ / 8; | |
293 | ||
294 | os_ << " static std::size_t const lookup_["; | |
295 | os_ << sm_.data()._lookup[0]->size() << "] = {\n "; | |
296 | for (/**/; i_ < count_; ++i_) | |
297 | { | |
298 | const std::size_t index_ = i_ * 8; | |
299 | os_ << lookup_[index_]; | |
300 | for (/**/; j_ < 8; ++j_) | |
301 | { | |
302 | os_ << ", " << lookup_[index_ + j_]; | |
303 | } | |
304 | if (i_ < count_ - 1) | |
305 | { | |
306 | os_ << ",\n "; | |
307 | } | |
308 | j_ = 1; | |
309 | } | |
310 | os_ << " };\n"; | |
311 | ||
312 | os_ << " static std::size_t const dfa_alphabet_ = " | |
313 | << sm_.data()._dfa_alphabet.front () << ";\n"; | |
314 | os_ << " static std::size_t const dfa_[" | |
315 | << sm_.data()._dfa[0]->size () << "] = {\n "; | |
316 | count_ = sm_.data()._dfa[0]->size () / 8; | |
317 | for (i_ = 0; i_ < count_; ++i_) | |
318 | { | |
319 | const std::size_t index_ = i_ * 8; | |
320 | os_ << dfa_[index_]; | |
321 | for (j_ = 1; j_ < 8; ++j_) | |
322 | { | |
323 | os_ << ", " << dfa_[index_ + j_]; | |
324 | } | |
325 | if (i_ < count_ - 1) | |
326 | { | |
327 | os_ << ",\n "; | |
328 | } | |
329 | } | |
330 | ||
331 | const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8; | |
332 | if (mod_) | |
333 | { | |
334 | const std::size_t index_ = count_ * 8; | |
335 | if (count_) | |
336 | { | |
337 | os_ << ",\n "; | |
338 | } | |
339 | os_ << dfa_[index_]; | |
340 | for (j_ = 1; j_ < mod_; ++j_) | |
341 | { | |
342 | os_ << ", " << dfa_[index_ + j_]; | |
343 | } | |
344 | } | |
345 | os_ << " };\n"; | |
346 | } | |
347 | ||
348 | os_ << "\n if (start_token_ == end_)\n"; | |
349 | os_ << " {\n"; | |
350 | os_ << " unique_id_ = npos;\n"; | |
351 | os_ << " return 0;\n"; | |
352 | os_ << " }\n\n"; | |
353 | if (sm_.data()._seen_BOL_assertion) | |
354 | { | |
355 | os_ << " bool bol = bol_;\n\n"; | |
356 | } | |
357 | ||
358 | if (dfas_ > 1) | |
359 | { | |
360 | os_ << "again:\n"; | |
361 | os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n"; | |
362 | os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n"; | |
363 | os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n"; | |
364 | } | |
365 | ||
366 | os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n"; | |
367 | os_ << " Iterator curr_ = start_token_;\n"; | |
368 | os_ << " bool end_state_ = *ptr_ != 0;\n"; | |
369 | os_ << " std::size_t id_ = *(ptr_ + id_index);\n"; | |
370 | os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n"; | |
371 | if (dfas_ > 1) | |
372 | { | |
373 | os_ << " std::size_t end_start_state_ = start_state_;\n"; | |
374 | } | |
375 | if (sm_.data()._seen_BOL_assertion) | |
376 | { | |
377 | os_ << " bool end_bol_ = bol_;\n"; | |
378 | } | |
379 | os_ << " Iterator end_token_ = start_token_;\n\n"; | |
380 | ||
381 | os_ << " while (curr_ != end_)\n"; | |
382 | os_ << " {\n"; | |
383 | ||
384 | if (sm_.data()._seen_BOL_assertion) | |
385 | { | |
386 | os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n"; | |
387 | } | |
388 | ||
389 | if (sm_.data()._seen_EOL_assertion) | |
390 | { | |
391 | os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; | |
392 | } | |
393 | ||
394 | if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion) | |
395 | { | |
396 | os_ << " if (BOL_state_ && bol)\n"; | |
397 | os_ << " {\n"; | |
398 | os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; | |
399 | os_ << " }\n"; | |
400 | os_ << " else if (EOL_state_ && *curr_ == '\\n')\n"; | |
401 | os_ << " {\n"; | |
402 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; | |
403 | os_ << " }\n"; | |
404 | os_ << " else\n"; | |
405 | os_ << " {\n"; | |
406 | if (lookups_ == 256) | |
407 | { | |
408 | os_ << " unsigned char index = \n"; | |
409 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
410 | } | |
411 | else | |
412 | { | |
413 | os_ << " std::size_t index = *curr_++\n"; | |
414 | } | |
11fdf7f2 | 415 | os_ << " bol = (index == '\\n') ? true : false;\n"; |
7c673cae FG |
416 | os_ << " std::size_t const state_ = ptr_[\n"; |
417 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
418 | ||
419 | os_ << '\n'; | |
420 | os_ << " if (state_ == 0) break;\n"; | |
421 | os_ << '\n'; | |
422 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
423 | os_ << " }\n\n"; | |
424 | } | |
425 | else if (sm_.data()._seen_BOL_assertion) | |
426 | { | |
427 | os_ << " if (BOL_state_ && bol)\n"; | |
428 | os_ << " {\n"; | |
429 | os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; | |
430 | os_ << " }\n"; | |
431 | os_ << " else\n"; | |
432 | os_ << " {\n"; | |
433 | if (lookups_ == 256) | |
434 | { | |
435 | os_ << " unsigned char index = \n"; | |
436 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
437 | } | |
438 | else | |
439 | { | |
440 | os_ << " std::size_t index = *curr_++\n"; | |
441 | } | |
11fdf7f2 | 442 | os_ << " bol = (index == '\\n') ? true : false;\n"; |
7c673cae FG |
443 | os_ << " std::size_t const state_ = ptr_[\n"; |
444 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
445 | ||
446 | os_ << '\n'; | |
447 | os_ << " if (state_ == 0) break;\n"; | |
448 | os_ << '\n'; | |
449 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
450 | os_ << " }\n\n"; | |
451 | } | |
452 | else if (sm_.data()._seen_EOL_assertion) | |
453 | { | |
454 | os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; | |
455 | os_ << " {\n"; | |
456 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; | |
457 | os_ << " }\n"; | |
458 | os_ << " else\n"; | |
459 | os_ << " {\n"; | |
460 | if (lookups_ == 256) | |
461 | { | |
462 | os_ << " unsigned char index = \n"; | |
463 | os_ << " static_cast<unsigned char>(*curr_++);\n"; | |
464 | } | |
465 | else | |
466 | { | |
467 | os_ << " std::size_t index = *curr_++\n"; | |
468 | } | |
11fdf7f2 | 469 | os_ << " bol = (index == '\\n') ? true : false;\n"; |
7c673cae FG |
470 | os_ << " std::size_t const state_ = ptr_[\n"; |
471 | os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; | |
472 | ||
473 | os_ << '\n'; | |
474 | os_ << " if (state_ == 0) break;\n"; | |
475 | os_ << '\n'; | |
476 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; | |
477 | os_ << " }\n\n"; | |
478 | } | |
479 | else | |
480 | { | |
481 | os_ << " std::size_t const state_ =\n"; | |
482 | ||
483 | if (lookups_ == 256) | |
484 | { | |
485 | os_ << " ptr_[lookup_[" | |
486 | "static_cast<unsigned char>(*curr_++)]];\n"; | |
487 | } | |
488 | else | |
489 | { | |
490 | os_ << " ptr_[lookup_[*curr_++]];\n"; | |
491 | } | |
492 | ||
493 | os_ << '\n'; | |
494 | os_ << " if (state_ == 0) break;\n"; | |
495 | os_ << '\n'; | |
496 | os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n"; | |
497 | } | |
498 | ||
499 | os_ << " if (*ptr_)\n"; | |
500 | os_ << " {\n"; | |
501 | os_ << " end_state_ = true;\n"; | |
502 | os_ << " id_ = *(ptr_ + id_index);\n"; | |
503 | os_ << " uid_ = *(ptr_ + unique_id_index);\n"; | |
504 | if (dfas_ > 1) | |
505 | { | |
506 | os_ << " end_start_state_ = *(ptr_ + state_index);\n"; | |
507 | } | |
508 | if (sm_.data()._seen_BOL_assertion) | |
509 | { | |
510 | os_ << " end_bol_ = bol;\n"; | |
511 | } | |
512 | os_ << " end_token_ = curr_;\n"; | |
513 | os_ << " }\n"; | |
514 | os_ << " }\n\n"; | |
515 | ||
516 | if (sm_.data()._seen_EOL_assertion) | |
517 | { | |
518 | os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; | |
519 | ||
520 | os_ << " if (EOL_state_ && curr_ == end_)\n"; | |
521 | os_ << " {\n"; | |
522 | os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n"; | |
523 | ||
524 | os_ << " if (*ptr_)\n"; | |
525 | os_ << " {\n"; | |
526 | os_ << " end_state_ = true;\n"; | |
527 | os_ << " id_ = *(ptr_ + id_index);\n"; | |
528 | os_ << " uid_ = *(ptr_ + unique_id_index);\n"; | |
529 | if (dfas_ > 1) | |
530 | { | |
531 | os_ << " end_start_state_ = *(ptr_ + state_index);\n"; | |
532 | } | |
533 | if (sm_.data()._seen_BOL_assertion) | |
534 | { | |
535 | os_ << " end_bol_ = bol;\n"; | |
536 | } | |
537 | os_ << " end_token_ = curr_;\n"; | |
538 | os_ << " }\n"; | |
539 | os_ << " }\n\n"; | |
540 | } | |
541 | ||
542 | os_ << " if (end_state_)\n"; | |
543 | os_ << " {\n"; | |
544 | os_ << " // return longest match\n"; | |
545 | os_ << " start_token_ = end_token_;\n"; | |
546 | ||
547 | if (dfas_ > 1) | |
548 | { | |
549 | os_ << " start_state_ = end_start_state_;\n"; | |
550 | os_ << " if (id_ == 0)\n"; | |
551 | os_ << " {\n"; | |
552 | if (sm_.data()._seen_BOL_assertion) | |
553 | { | |
554 | os_ << " bol = end_bol_;\n"; | |
555 | } | |
556 | os_ << " goto again;\n"; | |
557 | os_ << " }\n"; | |
558 | if (sm_.data()._seen_BOL_assertion) | |
559 | { | |
560 | os_ << " else\n"; | |
561 | os_ << " {\n"; | |
562 | os_ << " bol_ = end_bol_;\n"; | |
563 | os_ << " }\n"; | |
564 | } | |
565 | } | |
566 | else if (sm_.data()._seen_BOL_assertion) | |
567 | { | |
568 | os_ << " bol_ = end_bol_;\n"; | |
569 | } | |
570 | ||
571 | os_ << " }\n"; | |
572 | os_ << " else\n"; | |
573 | os_ << " {\n"; | |
574 | ||
575 | if (sm_.data()._seen_BOL_assertion) | |
576 | { | |
11fdf7f2 | 577 | os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; |
7c673cae FG |
578 | } |
579 | ||
580 | os_ << " id_ = npos;\n"; | |
581 | os_ << " uid_ = npos;\n"; | |
582 | os_ << " }\n\n"; | |
583 | ||
584 | os_ << " unique_id_ = uid_;\n"; | |
585 | os_ << " return id_;\n"; | |
586 | return os_.good(); | |
587 | } | |
588 | ||
589 | /////////////////////////////////////////////////////////////////////////// | |
590 | template <typename Char> | |
591 | inline std::basic_string<Char> get_charlit(Char ch) | |
592 | { | |
593 | std::basic_string<Char> result; | |
594 | boost::lexer::basic_string_token<Char>::escape_char(ch, result); | |
595 | return result; | |
596 | } | |
597 | ||
598 | // check whether state0_0 is referenced from any of the other states | |
599 | template <typename Char> | |
600 | bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_) | |
601 | { | |
602 | typedef typename boost::lexer::basic_state_machine<Char>::iterator | |
603 | iterator_type; | |
604 | iterator_type iter_ = sm_.begin(); | |
605 | std::size_t const states_ = iter_->states; | |
606 | ||
607 | for (std::size_t state_ = 0; state_ < states_; ++state_) | |
608 | { | |
609 | if (0 == iter_->bol_index || 0 == iter_->eol_index) | |
610 | { | |
611 | return true; | |
612 | } | |
613 | ||
614 | std::size_t const transitions_ = iter_->transitions; | |
615 | for (std::size_t t_ = 0; t_ < transitions_; ++t_) | |
616 | { | |
617 | if (0 == iter_->goto_state) | |
618 | { | |
619 | return true; | |
620 | } | |
621 | ++iter_; | |
622 | } | |
623 | if (transitions_ == 0) ++iter_; | |
624 | } | |
625 | return false; | |
626 | } | |
627 | ||
628 | /////////////////////////////////////////////////////////////////////////// | |
629 | template <typename Char> | |
630 | bool generate_function_body_switch(std::basic_ostream<Char> & os_ | |
631 | , boost::lexer::basic_state_machine<Char> const &sm_) | |
632 | { | |
633 | typedef typename boost::lexer::basic_state_machine<Char>::iterator | |
634 | iterator_type; | |
635 | ||
636 | std::size_t const lookups_ = sm_.data()._lookup->front ()->size (); | |
637 | iterator_type iter_ = sm_.begin(); | |
638 | iterator_type labeliter_ = iter_; | |
639 | iterator_type end_ = sm_.end(); | |
640 | std::size_t const dfas_ = sm_.data()._dfa->size (); | |
641 | ||
642 | os_ << " static std::size_t const npos = " | |
643 | "static_cast<std::size_t>(~0);\n"; | |
644 | ||
645 | os_ << "\n if (start_token_ == end_)\n"; | |
646 | os_ << " {\n"; | |
647 | os_ << " unique_id_ = npos;\n"; | |
648 | os_ << " return 0;\n"; | |
649 | os_ << " }\n\n"; | |
650 | ||
651 | if (sm_.data()._seen_BOL_assertion) | |
652 | { | |
653 | os_ << " bool bol = bol_;\n"; | |
654 | } | |
655 | ||
656 | if (dfas_ > 1) | |
657 | { | |
658 | os_ << "again:\n"; | |
659 | } | |
660 | ||
661 | os_ << " Iterator curr_ = start_token_;\n"; | |
662 | os_ << " bool end_state_ = false;\n"; | |
663 | os_ << " std::size_t id_ = npos;\n"; | |
664 | os_ << " std::size_t uid_ = npos;\n"; | |
665 | ||
666 | if (dfas_ > 1) | |
667 | { | |
668 | os_ << " std::size_t end_start_state_ = start_state_;\n"; | |
669 | } | |
670 | ||
671 | if (sm_.data()._seen_BOL_assertion) | |
672 | { | |
673 | os_ << " bool end_bol_ = bol_;\n"; | |
674 | } | |
675 | ||
676 | os_ << " Iterator end_token_ = start_token_;\n"; | |
677 | os_ << '\n'; | |
678 | ||
679 | os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t") | |
680 | << " ch_ = 0;\n\n"; | |
681 | ||
682 | if (dfas_ > 1) | |
683 | { | |
684 | os_ << " switch (start_state_)\n"; | |
685 | os_ << " {\n"; | |
686 | ||
687 | for (std::size_t i_ = 0; i_ < dfas_; ++i_) | |
688 | { | |
689 | os_ << " case " << i_ << ":\n"; | |
690 | os_ << " goto state" << i_ << "_0;\n"; | |
691 | os_ << " break;\n"; | |
692 | } | |
693 | ||
694 | os_ << " default:\n"; | |
695 | os_ << " goto end;\n"; | |
696 | os_ << " break;\n"; | |
697 | os_ << " }\n"; | |
698 | } | |
699 | ||
700 | bool need_state0_0_label = need_label0_0(sm_); | |
701 | ||
702 | for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) | |
703 | { | |
704 | std::size_t const states_ = iter_->states; | |
705 | for (std::size_t state_ = 0; state_ < states_; ++state_) | |
706 | { | |
707 | std::size_t const transitions_ = iter_->transitions; | |
708 | std::size_t t_ = 0; | |
709 | ||
710 | if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label) | |
711 | { | |
712 | os_ << "\nstate" << dfa_ << '_' << state_ << ":\n"; | |
713 | } | |
714 | ||
715 | if (iter_->end_state) | |
716 | { | |
717 | os_ << " end_state_ = true;\n"; | |
718 | os_ << " id_ = " << iter_->id << ";\n"; | |
719 | os_ << " uid_ = " << iter_->unique_id << ";\n"; | |
720 | os_ << " end_token_ = curr_;\n"; | |
721 | ||
722 | if (dfas_ > 1) | |
723 | { | |
724 | os_ << " end_start_state_ = " << iter_->goto_dfa << | |
725 | ";\n"; | |
726 | } | |
727 | ||
728 | if (sm_.data()._seen_BOL_assertion) | |
729 | { | |
730 | os_ << " end_bol_ = bol;\n"; | |
731 | } | |
732 | ||
733 | if (transitions_) os_ << '\n'; | |
734 | } | |
735 | ||
736 | if (t_ < transitions_ || | |
737 | iter_->bol_index != boost::lexer::npos || | |
738 | iter_->eol_index != boost::lexer::npos) | |
739 | { | |
740 | os_ << " if (curr_ == end_) goto end;\n"; | |
741 | os_ << " ch_ = *curr_;\n"; | |
742 | if (iter_->bol_index != boost::lexer::npos) | |
743 | { | |
744 | os_ << "\n if (bol) goto state" << dfa_ << '_' | |
745 | << iter_->bol_index << ";\n"; | |
746 | } | |
747 | if (iter_->eol_index != boost::lexer::npos) | |
748 | { | |
11fdf7f2 | 749 | os_ << "\n if (ch_ == '\\n') goto state" << dfa_ |
7c673cae FG |
750 | << '_' << iter_->eol_index << ";\n"; |
751 | } | |
752 | os_ << " ++curr_;\n"; | |
753 | } | |
754 | ||
755 | for (/**/; t_ < transitions_; ++t_) | |
756 | { | |
757 | Char const *ptr_ = iter_->token._charset.c_str(); | |
758 | Char const *end_ = ptr_ + iter_->token._charset.size(); | |
759 | Char start_char_ = 0; | |
760 | Char curr_char_ = 0; | |
761 | bool range_ = false; | |
762 | bool first_char_ = true; | |
763 | ||
764 | os_ << "\n if ("; | |
765 | ||
766 | while (ptr_ != end_) | |
767 | { | |
768 | curr_char_ = *ptr_++; | |
769 | ||
770 | if (*ptr_ == curr_char_ + 1) | |
771 | { | |
772 | if (!range_) | |
773 | { | |
774 | start_char_ = curr_char_; | |
775 | } | |
776 | range_ = true; | |
777 | } | |
778 | else | |
779 | { | |
780 | if (!first_char_) | |
781 | { | |
782 | os_ << ((iter_->token._negated) ? " && " : " || "); | |
783 | } | |
784 | else | |
785 | { | |
786 | first_char_ = false; | |
787 | } | |
788 | if (range_) | |
789 | { | |
790 | if (iter_->token._negated) | |
791 | { | |
792 | os_ << "!"; | |
793 | } | |
794 | os_ << "(ch_ >= '" << get_charlit(start_char_) | |
795 | << "' && ch_ <= '" | |
796 | << get_charlit(curr_char_) << "')"; | |
797 | range_ = false; | |
798 | } | |
799 | else | |
800 | { | |
801 | os_ << "ch_ " | |
802 | << ((iter_->token._negated) ? "!=" : "==") | |
803 | << " '" << get_charlit(curr_char_) << "'"; | |
804 | } | |
805 | } | |
806 | } | |
807 | ||
808 | os_ << ") goto state" << dfa_ << '_' << iter_->goto_state | |
809 | << ";\n"; | |
810 | ++iter_; | |
811 | } | |
812 | ||
813 | if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1)) | |
814 | { | |
815 | os_ << " goto end;\n"; | |
816 | } | |
817 | ||
818 | if (transitions_ == 0) ++iter_; | |
819 | } | |
820 | } | |
821 | ||
822 | os_ << "\nend:\n"; | |
823 | os_ << " if (end_state_)\n"; | |
824 | os_ << " {\n"; | |
825 | os_ << " // return longest match\n"; | |
826 | os_ << " start_token_ = end_token_;\n"; | |
827 | ||
828 | if (dfas_ > 1) | |
829 | { | |
830 | os_ << " start_state_ = end_start_state_;\n"; | |
831 | os_ << "\n if (id_ == 0)\n"; | |
832 | os_ << " {\n"; | |
833 | ||
834 | if (sm_.data()._seen_BOL_assertion) | |
835 | { | |
836 | os_ << " bol = end_bol_;\n"; | |
837 | } | |
838 | ||
839 | os_ << " goto again;\n"; | |
840 | os_ << " }\n"; | |
841 | ||
842 | if (sm_.data()._seen_BOL_assertion) | |
843 | { | |
844 | os_ << " else\n"; | |
845 | os_ << " {\n"; | |
846 | os_ << " bol_ = end_bol_;\n"; | |
847 | os_ << " }\n"; | |
848 | } | |
849 | } | |
850 | else if (sm_.data()._seen_BOL_assertion) | |
851 | { | |
852 | os_ << " bol_ = end_bol_;\n"; | |
853 | } | |
854 | ||
855 | os_ << " }\n"; | |
856 | os_ << " else\n"; | |
857 | os_ << " {\n"; | |
858 | ||
859 | if (sm_.data()._seen_BOL_assertion) | |
860 | { | |
861 | os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; | |
862 | } | |
863 | os_ << " id_ = npos;\n"; | |
864 | os_ << " uid_ = npos;\n"; | |
865 | os_ << " }\n\n"; | |
866 | ||
867 | os_ << " unique_id_ = uid_;\n"; | |
868 | os_ << " return id_;\n"; | |
869 | return os_.good(); | |
870 | } | |
871 | ||
872 | /////////////////////////////////////////////////////////////////////////// | |
873 | // Generate a tokenizer for the given state machine. | |
874 | template <typename Char, typename F> | |
875 | inline bool | |
876 | generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_ | |
877 | , boost::lexer::basic_rules<Char> const& rules_ | |
878 | , std::basic_ostream<Char> &os_, Char const* name_suffix | |
879 | , F generate_function_body) | |
880 | { | |
881 | if (sm_.data()._lookup->empty()) | |
882 | return false; | |
883 | ||
884 | std::size_t const dfas_ = sm_.data()._dfa->size(); | |
885 | // std::size_t const lookups_ = sm_.data()._lookup->front()->size(); | |
886 | ||
887 | os_ << "// Copyright (c) 2008-2009 Ben Hanson\n"; | |
888 | os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n"; | |
889 | os_ << "//\n"; | |
890 | os_ << "// Distributed under the Boost Software License, " | |
891 | "Version 1.0. (See accompanying\n"; | |
892 | os_ << "// file licence_1_0.txt or copy at " | |
893 | "http://www.boost.org/LICENSE_1_0.txt)\n\n"; | |
894 | os_ << "// Auto-generated by boost::lexer, do not edit\n\n"; | |
895 | ||
896 | std::basic_string<Char> guard(name_suffix); | |
897 | guard += L<Char>(name_suffix[0] ? "_" : ""); | |
898 | guard += L<Char>(__DATE__ "_" __TIME__); | |
899 | typename std::basic_string<Char>::size_type p = | |
900 | guard.find_first_of(L<Char>(": ")); | |
901 | while (std::string::npos != p) | |
902 | { | |
903 | guard.replace(p, 1, L<Char>("_")); | |
904 | p = guard.find_first_of(L<Char>(": "), p); | |
905 | } | |
906 | boost::to_upper(guard); | |
907 | ||
908 | os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n"; | |
909 | os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n"; | |
910 | ||
7c673cae FG |
911 | os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n"; |
912 | ||
913 | generate_delimiter(os_); | |
914 | os_ << "// the generated table of state names and the tokenizer have to be\n" | |
915 | "// defined in the boost::spirit::lex::lexertl::static_ namespace\n"; | |
916 | os_ << "namespace boost { namespace spirit { namespace lex { " | |
917 | "namespace lexertl { namespace static_ {\n\n"; | |
918 | ||
919 | // generate the lexer state information variables | |
920 | if (!generate_cpp_state_info(rules_, os_, name_suffix)) | |
921 | return false; | |
922 | ||
923 | generate_delimiter(os_); | |
924 | os_ << "// this function returns the next matched token\n"; | |
925 | os_ << "template<typename Iterator>\n"; | |
926 | os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "") | |
927 | << name_suffix << " ("; | |
928 | ||
929 | if (dfas_ > 1) | |
930 | { | |
931 | os_ << "std::size_t& start_state_, "; | |
932 | } | |
933 | else | |
934 | { | |
935 | os_ << "std::size_t& /*start_state_*/, "; | |
936 | } | |
937 | if (sm_.data()._seen_BOL_assertion) | |
938 | { | |
939 | os_ << "bool& bol_, "; | |
940 | } | |
941 | else | |
942 | { | |
943 | os_ << "bool& /*bol_*/, "; | |
944 | } | |
945 | os_ << "\n "; | |
946 | ||
947 | os_ << "Iterator &start_token_, Iterator const& end_, "; | |
948 | os_ << "std::size_t& unique_id_)\n"; | |
949 | os_ << "{\n"; | |
950 | if (!generate_function_body(os_, sm_)) | |
951 | return false; | |
952 | os_ << "}\n\n"; | |
953 | ||
954 | if (!generate_cpp_state_table<Char>(os_, name_suffix | |
955 | , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion)) | |
956 | { | |
957 | return false; | |
958 | } | |
959 | ||
960 | os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n"; | |
961 | ||
962 | os_ << "#endif\n"; | |
963 | ||
964 | return os_.good(); | |
965 | } | |
966 | ||
967 | } // namespace detail | |
968 | ||
969 | /////////////////////////////////////////////////////////////////////////// | |
970 | template <typename Lexer, typename F> | |
971 | inline bool | |
972 | generate_static(Lexer const& lexer | |
973 | , std::basic_ostream<typename Lexer::char_type>& os | |
974 | , typename Lexer::char_type const* name_suffix, F f) | |
975 | { | |
976 | if (!lexer.init_dfa(true)) // always minimize DFA for static lexers | |
977 | return false; | |
978 | return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os | |
979 | , name_suffix, f); | |
980 | } | |
981 | ||
982 | /////////////////////////////////////////////////////////////////////////// | |
983 | // deprecated function, will be removed in the future (this has been | |
984 | // replaced by the function generate_static_dfa - see below). | |
985 | template <typename Lexer> | |
986 | inline bool | |
987 | generate_static(Lexer const& lexer | |
988 | , std::basic_ostream<typename Lexer::char_type>& os | |
989 | , typename Lexer::char_type const* name_suffix = | |
990 | detail::L<typename Lexer::char_type>()) | |
991 | { | |
992 | return generate_static(lexer, os, name_suffix | |
993 | , &detail::generate_function_body_dfa<typename Lexer::char_type>); | |
994 | } | |
995 | ||
996 | /////////////////////////////////////////////////////////////////////////// | |
997 | template <typename Lexer> | |
998 | inline bool | |
999 | generate_static_dfa(Lexer const& lexer | |
1000 | , std::basic_ostream<typename Lexer::char_type>& os | |
1001 | , typename Lexer::char_type const* name_suffix = | |
1002 | detail::L<typename Lexer::char_type>()) | |
1003 | { | |
1004 | return generate_static(lexer, os, name_suffix | |
1005 | , &detail::generate_function_body_dfa<typename Lexer::char_type>); | |
1006 | } | |
1007 | ||
1008 | /////////////////////////////////////////////////////////////////////////// | |
1009 | template <typename Lexer> | |
1010 | inline bool | |
1011 | generate_static_switch(Lexer const& lexer | |
1012 | , std::basic_ostream<typename Lexer::char_type>& os | |
1013 | , typename Lexer::char_type const* name_suffix = | |
1014 | detail::L<typename Lexer::char_type>()) | |
1015 | { | |
1016 | return generate_static(lexer, os, name_suffix | |
1017 | , &detail::generate_function_body_switch<typename Lexer::char_type>); | |
1018 | } | |
1019 | ||
1020 | /////////////////////////////////////////////////////////////////////////////// | |
1021 | }}}} | |
1022 | ||
1023 | #endif |