]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / spirit / home / lex / lexer / lexertl / generate_static.hpp
1 // Copyright (c) 2008-2009 Ben Hanson
2 // Copyright (c) 2008-2011 Hartmut Kaiser
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7 #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
8 #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
9
10 #if defined(_MSC_VER)
11 #pragma once
12 #endif
13
14 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
15 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
16 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
17 #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
18 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
19 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
20 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
21 #include <boost/algorithm/string.hpp>
22 #include <boost/lexical_cast.hpp>
23 #include <boost/scoped_array.hpp>
24
25 ///////////////////////////////////////////////////////////////////////////////
26 namespace boost { namespace spirit { namespace lex { namespace lexertl
27 {
28 namespace detail
29 {
30
31 ///////////////////////////////////////////////////////////////////////////
32 template <typename CharT>
33 struct string_lit;
34
35 template <>
36 struct string_lit<char>
37 {
38 static char get(char c) { return c; }
39 static std::string get(char const* str = "") { return str; }
40 };
41
42 template <>
43 struct string_lit<wchar_t>
44 {
45 static wchar_t get(char c)
46 {
47 typedef std::ctype<wchar_t> ctype_t;
48 return std::use_facet<ctype_t>(std::locale()).widen(c);
49 }
50 static std::basic_string<wchar_t> get(char const* source = "")
51 {
52 using namespace std; // some systems have size_t in ns std
53 size_t len = strlen(source);
54 boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
55 result.get()[len] = '\0';
56
57 // working with wide character streams is supported only if the
58 // platform provides the std::ctype<wchar_t> facet
59 BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
60
61 std::use_facet<std::ctype<wchar_t> >(std::locale())
62 .widen(source, source + len, result.get());
63 return result.get();
64 }
65 };
66
67 template <typename Char>
68 inline Char L(char c)
69 {
70 return string_lit<Char>::get(c);
71 }
72
73 template <typename Char>
74 inline std::basic_string<Char> L(char const* c = "")
75 {
76 return string_lit<Char>::get(c);
77 }
78
79 ///////////////////////////////////////////////////////////////////////////
80 template <typename Char>
81 inline bool
82 generate_delimiter(std::basic_ostream<Char> &os_)
83 {
84 os_ << std::basic_string<Char>(80, '/') << "\n";
85 return os_.good();
86 }
87
88 ///////////////////////////////////////////////////////////////////////////
89 // Generate a table of the names of the used lexer states, which is a bit
90 // tricky, because the table stored with the rules is sorted based on the
91 // names, but we need it sorted using the state ids.
92 template <typename Char>
93 inline bool
94 generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
95 , std::basic_ostream<Char> &os_, Char const* name_suffix)
96 {
97 // we need to re-sort the state names in ascending order of the state
98 // ids, filling possible gaps in between later
99 typedef typename
100 boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
101 state_iterator;
102 typedef std::map<std::size_t, Char const*> reverse_state_map_type;
103
104 reverse_state_map_type reverse_state_map;
105 state_iterator send = rules_.statemap().end();
106 for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
107 {
108 typedef typename reverse_state_map_type::value_type value_type;
109 reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
110 }
111
112 generate_delimiter(os_);
113 os_ << "// this table defines the names of the lexer states\n";
114 os_ << boost::lexer::detail::strings<Char>::char_name()
115 << " const* const lexer_state_names"
116 << (name_suffix[0] ? "_" : "") << name_suffix
117 << "[" << rules_.statemap().size() << "] = \n{\n";
118
119 typedef typename reverse_state_map_type::iterator iterator;
120 iterator rend = reverse_state_map.end();
121 std::size_t last_id = 0;
122 for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
123 {
124 for (/**/; last_id < (*rit).first; ++last_id)
125 {
126 os_ << " 0, // \"<undefined state>\"\n";
127 }
128 os_ << " "
129 << boost::lexer::detail::strings<Char>::char_prefix()
130 << "\"" << (*rit).second << "\"";
131 if (++rit != rend)
132 os_ << ",\n";
133 else
134 os_ << "\n"; // don't generate the final comma
135 }
136 os_ << "};\n\n";
137
138 generate_delimiter(os_);
139 os_ << "// this variable defines the number of lexer states\n";
140 os_ << "std::size_t const lexer_state_count"
141 << (name_suffix[0] ? "_" : "") << name_suffix
142 << " = " << rules_.statemap().size() << ";\n\n";
143 return os_.good();
144 }
145
146 template <typename Char>
147 inline bool
148 generate_cpp_state_table (std::basic_ostream<Char> &os_
149 , Char const* name_suffix, bool bol, bool eol)
150 {
151 std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
152 suffix += name_suffix;
153
154 generate_delimiter(os_);
155 os_ << "// this defines a generic accessors for the information above\n";
156 os_ << "struct lexer" << suffix << "\n{\n";
157 os_ << " // version number and feature-set of compatible static lexer engine\n";
158 os_ << " enum\n";
159 os_ << " {\n static_version = "
160 << boost::lexical_cast<std::basic_string<Char> >(SPIRIT_STATIC_LEXER_VERSION)
161 << ",\n";
162 os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
163 os_ << " supports_eol = " << std::boolalpha << eol << "\n";
164 os_ << " };\n\n";
165 os_ << " // return the number of lexer states\n";
166 os_ << " static std::size_t state_count()\n";
167 os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
168 os_ << " // return the name of the lexer state as given by 'idx'\n";
169 os_ << " static " << boost::lexer::detail::strings<Char>::char_name()
170 << " const* state_name(std::size_t idx)\n";
171 os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
172 os_ << " // return the next matched token\n";
173 os_ << " template<typename Iterator>\n";
174 os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
175 os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
176 os_ << " {\n return next_token" << suffix
177 << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
178 os_ << "};\n\n";
179 return os_.good();
180 }
181
182 ///////////////////////////////////////////////////////////////////////////
183 // generate function body based on traversing the DFA tables
184 template <typename Char>
185 bool generate_function_body_dfa(std::basic_ostream<Char>& os_
186 , boost::lexer::basic_state_machine<Char> const &sm_)
187 {
188 std::size_t const dfas_ = sm_.data()._dfa->size();
189 std::size_t const lookups_ = sm_.data()._lookup->front()->size();
190
191 os_ << " enum {end_state_index, id_index, unique_id_index, "
192 "state_index, bol_index,\n";
193 os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
194 os_ << " static std::size_t const npos = "
195 "static_cast<std::size_t>(~0);\n";
196
197 if (dfas_ > 1)
198 {
199 for (std::size_t state_ = 0; state_ < dfas_; ++state_)
200 {
201 std::size_t i_ = 0;
202 std::size_t j_ = 1;
203 std::size_t count_ = lookups_ / 8;
204 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
205 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
206
207 os_ << " static std::size_t const lookup" << state_
208 << "_[" << lookups_ << "] = {\n ";
209 for (/**/; i_ < count_; ++i_)
210 {
211 std::size_t const index_ = i_ * 8;
212 os_ << lookup_[index_];
213 for (/**/; j_ < 8; ++j_)
214 {
215 os_ << ", " << lookup_[index_ + j_];
216 }
217 if (i_ < count_ - 1)
218 {
219 os_ << ",\n ";
220 }
221 j_ = 1;
222 }
223 os_ << " };\n";
224
225 count_ = sm_.data()._dfa[state_]->size ();
226 os_ << " static const std::size_t dfa" << state_ << "_["
227 << count_ << "] = {\n ";
228 count_ /= 8;
229 for (i_ = 0; i_ < count_; ++i_)
230 {
231 std::size_t const index_ = i_ * 8;
232 os_ << dfa_[index_];
233 for (j_ = 1; j_ < 8; ++j_)
234 {
235 os_ << ", " << dfa_[index_ + j_];
236 }
237 if (i_ < count_ - 1)
238 {
239 os_ << ",\n ";
240 }
241 }
242
243 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
244 if (mod_)
245 {
246 std::size_t const index_ = count_ * 8;
247 if (count_)
248 {
249 os_ << ",\n ";
250 }
251 os_ << dfa_[index_];
252 for (j_ = 1; j_ < mod_; ++j_)
253 {
254 os_ << ", " << dfa_[index_ + j_];
255 }
256 }
257 os_ << " };\n";
258 }
259
260 std::size_t count_ = sm_.data()._dfa_alphabet.size();
261 std::size_t i_ = 1;
262
263 os_ << " static std::size_t const* lookup_arr_[" << count_
264 << "] = { lookup0_";
265 for (i_ = 1; i_ < count_; ++i_)
266 {
267 os_ << ", " << "lookup" << i_ << "_";
268 }
269 os_ << " };\n";
270
271 os_ << " static std::size_t const dfa_alphabet_arr_["
272 << count_ << "] = { ";
273 os_ << sm_.data()._dfa_alphabet.front ();
274 for (i_ = 1; i_ < count_; ++i_)
275 {
276 os_ << ", " << sm_.data()._dfa_alphabet[i_];
277 }
278 os_ << " };\n";
279
280 os_ << " static std::size_t const* dfa_arr_[" << count_
281 << "] = { ";
282 os_ << "dfa0_";
283 for (i_ = 1; i_ < count_; ++i_)
284 {
285 os_ << ", " << "dfa" << i_ << "_";
286 }
287 os_ << " };\n";
288 }
289 else
290 {
291 std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
292 std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
293 std::size_t i_ = 0;
294 std::size_t j_ = 1;
295 std::size_t count_ = lookups_ / 8;
296
297 os_ << " static std::size_t const lookup_[";
298 os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
299 for (/**/; i_ < count_; ++i_)
300 {
301 const std::size_t index_ = i_ * 8;
302 os_ << lookup_[index_];
303 for (/**/; j_ < 8; ++j_)
304 {
305 os_ << ", " << lookup_[index_ + j_];
306 }
307 if (i_ < count_ - 1)
308 {
309 os_ << ",\n ";
310 }
311 j_ = 1;
312 }
313 os_ << " };\n";
314
315 os_ << " static std::size_t const dfa_alphabet_ = "
316 << sm_.data()._dfa_alphabet.front () << ";\n";
317 os_ << " static std::size_t const dfa_["
318 << sm_.data()._dfa[0]->size () << "] = {\n ";
319 count_ = sm_.data()._dfa[0]->size () / 8;
320 for (i_ = 0; i_ < count_; ++i_)
321 {
322 const std::size_t index_ = i_ * 8;
323 os_ << dfa_[index_];
324 for (j_ = 1; j_ < 8; ++j_)
325 {
326 os_ << ", " << dfa_[index_ + j_];
327 }
328 if (i_ < count_ - 1)
329 {
330 os_ << ",\n ";
331 }
332 }
333
334 const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
335 if (mod_)
336 {
337 const std::size_t index_ = count_ * 8;
338 if (count_)
339 {
340 os_ << ",\n ";
341 }
342 os_ << dfa_[index_];
343 for (j_ = 1; j_ < mod_; ++j_)
344 {
345 os_ << ", " << dfa_[index_ + j_];
346 }
347 }
348 os_ << " };\n";
349 }
350
351 os_ << "\n if (start_token_ == end_)\n";
352 os_ << " {\n";
353 os_ << " unique_id_ = npos;\n";
354 os_ << " return 0;\n";
355 os_ << " }\n\n";
356 if (sm_.data()._seen_BOL_assertion)
357 {
358 os_ << " bool bol = bol_;\n\n";
359 }
360
361 if (dfas_ > 1)
362 {
363 os_ << "again:\n";
364 os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
365 os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
366 os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
367 }
368
369 os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
370 os_ << " Iterator curr_ = start_token_;\n";
371 os_ << " bool end_state_ = *ptr_ != 0;\n";
372 os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
373 os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
374 if (dfas_ > 1)
375 {
376 os_ << " std::size_t end_start_state_ = start_state_;\n";
377 }
378 if (sm_.data()._seen_BOL_assertion)
379 {
380 os_ << " bool end_bol_ = bol_;\n";
381 }
382 os_ << " Iterator end_token_ = start_token_;\n\n";
383
384 os_ << " while (curr_ != end_)\n";
385 os_ << " {\n";
386
387 if (sm_.data()._seen_BOL_assertion)
388 {
389 os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
390 }
391
392 if (sm_.data()._seen_EOL_assertion)
393 {
394 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
395 }
396
397 if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
398 {
399 os_ << " if (BOL_state_ && bol)\n";
400 os_ << " {\n";
401 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
402 os_ << " }\n";
403 os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
404 os_ << " {\n";
405 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
406 os_ << " }\n";
407 os_ << " else\n";
408 os_ << " {\n";
409 if (lookups_ == 256)
410 {
411 os_ << " unsigned char index = \n";
412 os_ << " static_cast<unsigned char>(*curr_++);\n";
413 }
414 else
415 {
416 os_ << " std::size_t index = *curr_++\n";
417 }
418 os_ << " bol = (index == '\n') ? true : false;\n";
419 os_ << " std::size_t const state_ = ptr_[\n";
420 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
421
422 os_ << '\n';
423 os_ << " if (state_ == 0) break;\n";
424 os_ << '\n';
425 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
426 os_ << " }\n\n";
427 }
428 else if (sm_.data()._seen_BOL_assertion)
429 {
430 os_ << " if (BOL_state_ && bol)\n";
431 os_ << " {\n";
432 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
433 os_ << " }\n";
434 os_ << " else\n";
435 os_ << " {\n";
436 if (lookups_ == 256)
437 {
438 os_ << " unsigned char index = \n";
439 os_ << " static_cast<unsigned char>(*curr_++);\n";
440 }
441 else
442 {
443 os_ << " std::size_t index = *curr_++\n";
444 }
445 os_ << " bol = (index == '\n') ? true : false;\n";
446 os_ << " std::size_t const state_ = ptr_[\n";
447 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
448
449 os_ << '\n';
450 os_ << " if (state_ == 0) break;\n";
451 os_ << '\n';
452 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
453 os_ << " }\n\n";
454 }
455 else if (sm_.data()._seen_EOL_assertion)
456 {
457 os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
458 os_ << " {\n";
459 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
460 os_ << " }\n";
461 os_ << " else\n";
462 os_ << " {\n";
463 if (lookups_ == 256)
464 {
465 os_ << " unsigned char index = \n";
466 os_ << " static_cast<unsigned char>(*curr_++);\n";
467 }
468 else
469 {
470 os_ << " std::size_t index = *curr_++\n";
471 }
472 os_ << " bol = (index == '\n') ? true : false;\n";
473 os_ << " std::size_t const state_ = ptr_[\n";
474 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
475
476 os_ << '\n';
477 os_ << " if (state_ == 0) break;\n";
478 os_ << '\n';
479 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
480 os_ << " }\n\n";
481 }
482 else
483 {
484 os_ << " std::size_t const state_ =\n";
485
486 if (lookups_ == 256)
487 {
488 os_ << " ptr_[lookup_["
489 "static_cast<unsigned char>(*curr_++)]];\n";
490 }
491 else
492 {
493 os_ << " ptr_[lookup_[*curr_++]];\n";
494 }
495
496 os_ << '\n';
497 os_ << " if (state_ == 0) break;\n";
498 os_ << '\n';
499 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
500 }
501
502 os_ << " if (*ptr_)\n";
503 os_ << " {\n";
504 os_ << " end_state_ = true;\n";
505 os_ << " id_ = *(ptr_ + id_index);\n";
506 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
507 if (dfas_ > 1)
508 {
509 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
510 }
511 if (sm_.data()._seen_BOL_assertion)
512 {
513 os_ << " end_bol_ = bol;\n";
514 }
515 os_ << " end_token_ = curr_;\n";
516 os_ << " }\n";
517 os_ << " }\n\n";
518
519 if (sm_.data()._seen_EOL_assertion)
520 {
521 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
522
523 os_ << " if (EOL_state_ && curr_ == end_)\n";
524 os_ << " {\n";
525 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
526
527 os_ << " if (*ptr_)\n";
528 os_ << " {\n";
529 os_ << " end_state_ = true;\n";
530 os_ << " id_ = *(ptr_ + id_index);\n";
531 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
532 if (dfas_ > 1)
533 {
534 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
535 }
536 if (sm_.data()._seen_BOL_assertion)
537 {
538 os_ << " end_bol_ = bol;\n";
539 }
540 os_ << " end_token_ = curr_;\n";
541 os_ << " }\n";
542 os_ << " }\n\n";
543 }
544
545 os_ << " if (end_state_)\n";
546 os_ << " {\n";
547 os_ << " // return longest match\n";
548 os_ << " start_token_ = end_token_;\n";
549
550 if (dfas_ > 1)
551 {
552 os_ << " start_state_ = end_start_state_;\n";
553 os_ << " if (id_ == 0)\n";
554 os_ << " {\n";
555 if (sm_.data()._seen_BOL_assertion)
556 {
557 os_ << " bol = end_bol_;\n";
558 }
559 os_ << " goto again;\n";
560 os_ << " }\n";
561 if (sm_.data()._seen_BOL_assertion)
562 {
563 os_ << " else\n";
564 os_ << " {\n";
565 os_ << " bol_ = end_bol_;\n";
566 os_ << " }\n";
567 }
568 }
569 else if (sm_.data()._seen_BOL_assertion)
570 {
571 os_ << " bol_ = end_bol_;\n";
572 }
573
574 os_ << " }\n";
575 os_ << " else\n";
576 os_ << " {\n";
577
578 if (sm_.data()._seen_BOL_assertion)
579 {
580 os_ << " bol_ = (*start_token_ == '\n') ? true : false;\n";
581 }
582
583 os_ << " id_ = npos;\n";
584 os_ << " uid_ = npos;\n";
585 os_ << " }\n\n";
586
587 os_ << " unique_id_ = uid_;\n";
588 os_ << " return id_;\n";
589 return os_.good();
590 }
591
592 ///////////////////////////////////////////////////////////////////////////
593 template <typename Char>
594 inline std::basic_string<Char> get_charlit(Char ch)
595 {
596 std::basic_string<Char> result;
597 boost::lexer::basic_string_token<Char>::escape_char(ch, result);
598 return result;
599 }
600
601 // check whether state0_0 is referenced from any of the other states
602 template <typename Char>
603 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
604 {
605 typedef typename boost::lexer::basic_state_machine<Char>::iterator
606 iterator_type;
607 iterator_type iter_ = sm_.begin();
608 std::size_t const states_ = iter_->states;
609
610 for (std::size_t state_ = 0; state_ < states_; ++state_)
611 {
612 if (0 == iter_->bol_index || 0 == iter_->eol_index)
613 {
614 return true;
615 }
616
617 std::size_t const transitions_ = iter_->transitions;
618 for (std::size_t t_ = 0; t_ < transitions_; ++t_)
619 {
620 if (0 == iter_->goto_state)
621 {
622 return true;
623 }
624 ++iter_;
625 }
626 if (transitions_ == 0) ++iter_;
627 }
628 return false;
629 }
630
631 ///////////////////////////////////////////////////////////////////////////
632 template <typename Char>
633 bool generate_function_body_switch(std::basic_ostream<Char> & os_
634 , boost::lexer::basic_state_machine<Char> const &sm_)
635 {
636 typedef typename boost::lexer::basic_state_machine<Char>::iterator
637 iterator_type;
638
639 std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
640 iterator_type iter_ = sm_.begin();
641 iterator_type labeliter_ = iter_;
642 iterator_type end_ = sm_.end();
643 std::size_t const dfas_ = sm_.data()._dfa->size ();
644
645 os_ << " static std::size_t const npos = "
646 "static_cast<std::size_t>(~0);\n";
647
648 os_ << "\n if (start_token_ == end_)\n";
649 os_ << " {\n";
650 os_ << " unique_id_ = npos;\n";
651 os_ << " return 0;\n";
652 os_ << " }\n\n";
653
654 if (sm_.data()._seen_BOL_assertion)
655 {
656 os_ << " bool bol = bol_;\n";
657 }
658
659 if (dfas_ > 1)
660 {
661 os_ << "again:\n";
662 }
663
664 os_ << " Iterator curr_ = start_token_;\n";
665 os_ << " bool end_state_ = false;\n";
666 os_ << " std::size_t id_ = npos;\n";
667 os_ << " std::size_t uid_ = npos;\n";
668
669 if (dfas_ > 1)
670 {
671 os_ << " std::size_t end_start_state_ = start_state_;\n";
672 }
673
674 if (sm_.data()._seen_BOL_assertion)
675 {
676 os_ << " bool end_bol_ = bol_;\n";
677 }
678
679 os_ << " Iterator end_token_ = start_token_;\n";
680 os_ << '\n';
681
682 os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
683 << " ch_ = 0;\n\n";
684
685 if (dfas_ > 1)
686 {
687 os_ << " switch (start_state_)\n";
688 os_ << " {\n";
689
690 for (std::size_t i_ = 0; i_ < dfas_; ++i_)
691 {
692 os_ << " case " << i_ << ":\n";
693 os_ << " goto state" << i_ << "_0;\n";
694 os_ << " break;\n";
695 }
696
697 os_ << " default:\n";
698 os_ << " goto end;\n";
699 os_ << " break;\n";
700 os_ << " }\n";
701 }
702
703 bool need_state0_0_label = need_label0_0(sm_);
704
705 for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
706 {
707 std::size_t const states_ = iter_->states;
708 for (std::size_t state_ = 0; state_ < states_; ++state_)
709 {
710 std::size_t const transitions_ = iter_->transitions;
711 std::size_t t_ = 0;
712
713 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
714 {
715 os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
716 }
717
718 if (iter_->end_state)
719 {
720 os_ << " end_state_ = true;\n";
721 os_ << " id_ = " << iter_->id << ";\n";
722 os_ << " uid_ = " << iter_->unique_id << ";\n";
723 os_ << " end_token_ = curr_;\n";
724
725 if (dfas_ > 1)
726 {
727 os_ << " end_start_state_ = " << iter_->goto_dfa <<
728 ";\n";
729 }
730
731 if (sm_.data()._seen_BOL_assertion)
732 {
733 os_ << " end_bol_ = bol;\n";
734 }
735
736 if (transitions_) os_ << '\n';
737 }
738
739 if (t_ < transitions_ ||
740 iter_->bol_index != boost::lexer::npos ||
741 iter_->eol_index != boost::lexer::npos)
742 {
743 os_ << " if (curr_ == end_) goto end;\n";
744 os_ << " ch_ = *curr_;\n";
745 if (iter_->bol_index != boost::lexer::npos)
746 {
747 os_ << "\n if (bol) goto state" << dfa_ << '_'
748 << iter_->bol_index << ";\n";
749 }
750 if (iter_->eol_index != boost::lexer::npos)
751 {
752 os_ << "\n if (ch_ == '\n') goto state" << dfa_
753 << '_' << iter_->eol_index << ";\n";
754 }
755 os_ << " ++curr_;\n";
756 }
757
758 for (/**/; t_ < transitions_; ++t_)
759 {
760 Char const *ptr_ = iter_->token._charset.c_str();
761 Char const *end_ = ptr_ + iter_->token._charset.size();
762 Char start_char_ = 0;
763 Char curr_char_ = 0;
764 bool range_ = false;
765 bool first_char_ = true;
766
767 os_ << "\n if (";
768
769 while (ptr_ != end_)
770 {
771 curr_char_ = *ptr_++;
772
773 if (*ptr_ == curr_char_ + 1)
774 {
775 if (!range_)
776 {
777 start_char_ = curr_char_;
778 }
779 range_ = true;
780 }
781 else
782 {
783 if (!first_char_)
784 {
785 os_ << ((iter_->token._negated) ? " && " : " || ");
786 }
787 else
788 {
789 first_char_ = false;
790 }
791 if (range_)
792 {
793 if (iter_->token._negated)
794 {
795 os_ << "!";
796 }
797 os_ << "(ch_ >= '" << get_charlit(start_char_)
798 << "' && ch_ <= '"
799 << get_charlit(curr_char_) << "')";
800 range_ = false;
801 }
802 else
803 {
804 os_ << "ch_ "
805 << ((iter_->token._negated) ? "!=" : "==")
806 << " '" << get_charlit(curr_char_) << "'";
807 }
808 }
809 }
810
811 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
812 << ";\n";
813 ++iter_;
814 }
815
816 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
817 {
818 os_ << " goto end;\n";
819 }
820
821 if (transitions_ == 0) ++iter_;
822 }
823 }
824
825 os_ << "\nend:\n";
826 os_ << " if (end_state_)\n";
827 os_ << " {\n";
828 os_ << " // return longest match\n";
829 os_ << " start_token_ = end_token_;\n";
830
831 if (dfas_ > 1)
832 {
833 os_ << " start_state_ = end_start_state_;\n";
834 os_ << "\n if (id_ == 0)\n";
835 os_ << " {\n";
836
837 if (sm_.data()._seen_BOL_assertion)
838 {
839 os_ << " bol = end_bol_;\n";
840 }
841
842 os_ << " goto again;\n";
843 os_ << " }\n";
844
845 if (sm_.data()._seen_BOL_assertion)
846 {
847 os_ << " else\n";
848 os_ << " {\n";
849 os_ << " bol_ = end_bol_;\n";
850 os_ << " }\n";
851 }
852 }
853 else if (sm_.data()._seen_BOL_assertion)
854 {
855 os_ << " bol_ = end_bol_;\n";
856 }
857
858 os_ << " }\n";
859 os_ << " else\n";
860 os_ << " {\n";
861
862 if (sm_.data()._seen_BOL_assertion)
863 {
864 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
865 }
866 os_ << " id_ = npos;\n";
867 os_ << " uid_ = npos;\n";
868 os_ << " }\n\n";
869
870 os_ << " unique_id_ = uid_;\n";
871 os_ << " return id_;\n";
872 return os_.good();
873 }
874
875 ///////////////////////////////////////////////////////////////////////////
876 // Generate a tokenizer for the given state machine.
877 template <typename Char, typename F>
878 inline bool
879 generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
880 , boost::lexer::basic_rules<Char> const& rules_
881 , std::basic_ostream<Char> &os_, Char const* name_suffix
882 , F generate_function_body)
883 {
884 if (sm_.data()._lookup->empty())
885 return false;
886
887 std::size_t const dfas_ = sm_.data()._dfa->size();
888 // std::size_t const lookups_ = sm_.data()._lookup->front()->size();
889
890 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
891 os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
892 os_ << "//\n";
893 os_ << "// Distributed under the Boost Software License, "
894 "Version 1.0. (See accompanying\n";
895 os_ << "// file licence_1_0.txt or copy at "
896 "http://www.boost.org/LICENSE_1_0.txt)\n\n";
897 os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
898
899 std::basic_string<Char> guard(name_suffix);
900 guard += L<Char>(name_suffix[0] ? "_" : "");
901 guard += L<Char>(__DATE__ "_" __TIME__);
902 typename std::basic_string<Char>::size_type p =
903 guard.find_first_of(L<Char>(": "));
904 while (std::string::npos != p)
905 {
906 guard.replace(p, 1, L<Char>("_"));
907 p = guard.find_first_of(L<Char>(": "), p);
908 }
909 boost::to_upper(guard);
910
911 os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
912 os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
913
914 os_ << "#include <boost/detail/iterator.hpp>\n";
915 os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
916
917 generate_delimiter(os_);
918 os_ << "// the generated table of state names and the tokenizer have to be\n"
919 "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
920 os_ << "namespace boost { namespace spirit { namespace lex { "
921 "namespace lexertl { namespace static_ {\n\n";
922
923 // generate the lexer state information variables
924 if (!generate_cpp_state_info(rules_, os_, name_suffix))
925 return false;
926
927 generate_delimiter(os_);
928 os_ << "// this function returns the next matched token\n";
929 os_ << "template<typename Iterator>\n";
930 os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
931 << name_suffix << " (";
932
933 if (dfas_ > 1)
934 {
935 os_ << "std::size_t& start_state_, ";
936 }
937 else
938 {
939 os_ << "std::size_t& /*start_state_*/, ";
940 }
941 if (sm_.data()._seen_BOL_assertion)
942 {
943 os_ << "bool& bol_, ";
944 }
945 else
946 {
947 os_ << "bool& /*bol_*/, ";
948 }
949 os_ << "\n ";
950
951 os_ << "Iterator &start_token_, Iterator const& end_, ";
952 os_ << "std::size_t& unique_id_)\n";
953 os_ << "{\n";
954 if (!generate_function_body(os_, sm_))
955 return false;
956 os_ << "}\n\n";
957
958 if (!generate_cpp_state_table<Char>(os_, name_suffix
959 , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
960 {
961 return false;
962 }
963
964 os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
965
966 os_ << "#endif\n";
967
968 return os_.good();
969 }
970
971 } // namespace detail
972
973 ///////////////////////////////////////////////////////////////////////////
974 template <typename Lexer, typename F>
975 inline bool
976 generate_static(Lexer const& lexer
977 , std::basic_ostream<typename Lexer::char_type>& os
978 , typename Lexer::char_type const* name_suffix, F f)
979 {
980 if (!lexer.init_dfa(true)) // always minimize DFA for static lexers
981 return false;
982 return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
983 , name_suffix, f);
984 }
985
986 ///////////////////////////////////////////////////////////////////////////
987 // deprecated function, will be removed in the future (this has been
988 // replaced by the function generate_static_dfa - see below).
989 template <typename Lexer>
990 inline bool
991 generate_static(Lexer const& lexer
992 , std::basic_ostream<typename Lexer::char_type>& os
993 , typename Lexer::char_type const* name_suffix =
994 detail::L<typename Lexer::char_type>())
995 {
996 return generate_static(lexer, os, name_suffix
997 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
998 }
999
1000 ///////////////////////////////////////////////////////////////////////////
1001 template <typename Lexer>
1002 inline bool
1003 generate_static_dfa(Lexer const& lexer
1004 , std::basic_ostream<typename Lexer::char_type>& os
1005 , typename Lexer::char_type const* name_suffix =
1006 detail::L<typename Lexer::char_type>())
1007 {
1008 return generate_static(lexer, os, name_suffix
1009 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1010 }
1011
1012 ///////////////////////////////////////////////////////////////////////////
1013 template <typename Lexer>
1014 inline bool
1015 generate_static_switch(Lexer const& lexer
1016 , std::basic_ostream<typename Lexer::char_type>& os
1017 , typename Lexer::char_type const* name_suffix =
1018 detail::L<typename Lexer::char_type>())
1019 {
1020 return generate_static(lexer, os, name_suffix
1021 , &detail::generate_function_body_switch<typename Lexer::char_type>);
1022 }
1023
1024 ///////////////////////////////////////////////////////////////////////////////
1025 }}}}
1026
1027 #endif