]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / spirit / home / lex / lexer / lexertl / generate_static.hpp
CommitLineData
7c673cae
FG
1// Copyright (c) 2008-2009 Ben Hanson
2// Copyright (c) 2008-2011 Hartmut Kaiser
3//
4// Distributed under the Boost Software License, Version 1.0. (See accompanying
5// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7#if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
8#define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
9
10#if defined(_MSC_VER)
11#pragma once
12#endif
13
14#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
15#include <boost/spirit/home/support/detail/lexer/consts.hpp>
16#include <boost/spirit/home/support/detail/lexer/rules.hpp>
17#include <boost/spirit/home/support/detail/lexer/size_t.hpp>
18#include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
19#include <boost/spirit/home/support/detail/lexer/debug.hpp>
20#include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
21#include <boost/algorithm/string.hpp>
7c673cae
FG
22#include <boost/scoped_array.hpp>
23
24///////////////////////////////////////////////////////////////////////////////
25namespace boost { namespace spirit { namespace lex { namespace lexertl
26{
27 namespace detail
28 {
29
30 ///////////////////////////////////////////////////////////////////////////
31 template <typename CharT>
32 struct string_lit;
33
34 template <>
35 struct string_lit<char>
36 {
37 static char get(char c) { return c; }
38 static std::string get(char const* str = "") { return str; }
39 };
40
41 template <>
42 struct string_lit<wchar_t>
43 {
44 static wchar_t get(char c)
45 {
46 typedef std::ctype<wchar_t> ctype_t;
47 return std::use_facet<ctype_t>(std::locale()).widen(c);
48 }
49 static std::basic_string<wchar_t> get(char const* source = "")
50 {
51 using namespace std; // some systems have size_t in ns std
52 size_t len = strlen(source);
53 boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
54 result.get()[len] = '\0';
55
56 // working with wide character streams is supported only if the
57 // platform provides the std::ctype<wchar_t> facet
58 BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
59
60 std::use_facet<std::ctype<wchar_t> >(std::locale())
61 .widen(source, source + len, result.get());
62 return result.get();
63 }
64 };
65
66 template <typename Char>
67 inline Char L(char c)
68 {
69 return string_lit<Char>::get(c);
70 }
71
72 template <typename Char>
73 inline std::basic_string<Char> L(char const* c = "")
74 {
75 return string_lit<Char>::get(c);
76 }
77
78 ///////////////////////////////////////////////////////////////////////////
79 template <typename Char>
80 inline bool
81 generate_delimiter(std::basic_ostream<Char> &os_)
82 {
83 os_ << std::basic_string<Char>(80, '/') << "\n";
84 return os_.good();
85 }
86
87 ///////////////////////////////////////////////////////////////////////////
88 // Generate a table of the names of the used lexer states, which is a bit
89 // tricky, because the table stored with the rules is sorted based on the
90 // names, but we need it sorted using the state ids.
91 template <typename Char>
92 inline bool
93 generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
94 , std::basic_ostream<Char> &os_, Char const* name_suffix)
95 {
96 // we need to re-sort the state names in ascending order of the state
97 // ids, filling possible gaps in between later
98 typedef typename
99 boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
100 state_iterator;
101 typedef std::map<std::size_t, Char const*> reverse_state_map_type;
102
103 reverse_state_map_type reverse_state_map;
104 state_iterator send = rules_.statemap().end();
105 for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
106 {
107 typedef typename reverse_state_map_type::value_type value_type;
108 reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
109 }
110
111 generate_delimiter(os_);
112 os_ << "// this table defines the names of the lexer states\n";
113 os_ << boost::lexer::detail::strings<Char>::char_name()
114 << " const* const lexer_state_names"
115 << (name_suffix[0] ? "_" : "") << name_suffix
116 << "[" << rules_.statemap().size() << "] = \n{\n";
117
118 typedef typename reverse_state_map_type::iterator iterator;
119 iterator rend = reverse_state_map.end();
120 std::size_t last_id = 0;
121 for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
122 {
123 for (/**/; last_id < (*rit).first; ++last_id)
124 {
125 os_ << " 0, // \"<undefined state>\"\n";
126 }
127 os_ << " "
128 << boost::lexer::detail::strings<Char>::char_prefix()
129 << "\"" << (*rit).second << "\"";
130 if (++rit != rend)
131 os_ << ",\n";
132 else
133 os_ << "\n"; // don't generate the final comma
134 }
135 os_ << "};\n\n";
136
137 generate_delimiter(os_);
138 os_ << "// this variable defines the number of lexer states\n";
139 os_ << "std::size_t const lexer_state_count"
140 << (name_suffix[0] ? "_" : "") << name_suffix
141 << " = " << rules_.statemap().size() << ";\n\n";
142 return os_.good();
143 }
144
145 template <typename Char>
146 inline bool
147 generate_cpp_state_table (std::basic_ostream<Char> &os_
148 , Char const* name_suffix, bool bol, bool eol)
149 {
150 std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
151 suffix += name_suffix;
152
153 generate_delimiter(os_);
154 os_ << "// this defines a generic accessors for the information above\n";
155 os_ << "struct lexer" << suffix << "\n{\n";
156 os_ << " // version number and feature-set of compatible static lexer engine\n";
157 os_ << " enum\n";
92f5a8d4 158 os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n";
7c673cae
FG
159 os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
160 os_ << " supports_eol = " << std::boolalpha << eol << "\n";
161 os_ << " };\n\n";
162 os_ << " // return the number of lexer states\n";
163 os_ << " static std::size_t state_count()\n";
164 os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
165 os_ << " // return the name of the lexer state as given by 'idx'\n";
166 os_ << " static " << boost::lexer::detail::strings<Char>::char_name()
167 << " const* state_name(std::size_t idx)\n";
168 os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
169 os_ << " // return the next matched token\n";
170 os_ << " template<typename Iterator>\n";
171 os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
172 os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
173 os_ << " {\n return next_token" << suffix
174 << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
175 os_ << "};\n\n";
176 return os_.good();
177 }
178
179 ///////////////////////////////////////////////////////////////////////////
180 // generate function body based on traversing the DFA tables
181 template <typename Char>
182 bool generate_function_body_dfa(std::basic_ostream<Char>& os_
183 , boost::lexer::basic_state_machine<Char> const &sm_)
184 {
185 std::size_t const dfas_ = sm_.data()._dfa->size();
186 std::size_t const lookups_ = sm_.data()._lookup->front()->size();
187
188 os_ << " enum {end_state_index, id_index, unique_id_index, "
189 "state_index, bol_index,\n";
190 os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
191 os_ << " static std::size_t const npos = "
192 "static_cast<std::size_t>(~0);\n";
193
194 if (dfas_ > 1)
195 {
196 for (std::size_t state_ = 0; state_ < dfas_; ++state_)
197 {
198 std::size_t i_ = 0;
199 std::size_t j_ = 1;
200 std::size_t count_ = lookups_ / 8;
201 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
202 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
203
204 os_ << " static std::size_t const lookup" << state_
205 << "_[" << lookups_ << "] = {\n ";
206 for (/**/; i_ < count_; ++i_)
207 {
208 std::size_t const index_ = i_ * 8;
209 os_ << lookup_[index_];
210 for (/**/; j_ < 8; ++j_)
211 {
212 os_ << ", " << lookup_[index_ + j_];
213 }
214 if (i_ < count_ - 1)
215 {
216 os_ << ",\n ";
217 }
218 j_ = 1;
219 }
220 os_ << " };\n";
221
222 count_ = sm_.data()._dfa[state_]->size ();
223 os_ << " static const std::size_t dfa" << state_ << "_["
224 << count_ << "] = {\n ";
225 count_ /= 8;
226 for (i_ = 0; i_ < count_; ++i_)
227 {
228 std::size_t const index_ = i_ * 8;
229 os_ << dfa_[index_];
230 for (j_ = 1; j_ < 8; ++j_)
231 {
232 os_ << ", " << dfa_[index_ + j_];
233 }
234 if (i_ < count_ - 1)
235 {
236 os_ << ",\n ";
237 }
238 }
239
240 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
241 if (mod_)
242 {
243 std::size_t const index_ = count_ * 8;
244 if (count_)
245 {
246 os_ << ",\n ";
247 }
248 os_ << dfa_[index_];
249 for (j_ = 1; j_ < mod_; ++j_)
250 {
251 os_ << ", " << dfa_[index_ + j_];
252 }
253 }
254 os_ << " };\n";
255 }
256
257 std::size_t count_ = sm_.data()._dfa_alphabet.size();
258 std::size_t i_ = 1;
259
260 os_ << " static std::size_t const* lookup_arr_[" << count_
261 << "] = { lookup0_";
262 for (i_ = 1; i_ < count_; ++i_)
263 {
264 os_ << ", " << "lookup" << i_ << "_";
265 }
266 os_ << " };\n";
267
268 os_ << " static std::size_t const dfa_alphabet_arr_["
269 << count_ << "] = { ";
270 os_ << sm_.data()._dfa_alphabet.front ();
271 for (i_ = 1; i_ < count_; ++i_)
272 {
273 os_ << ", " << sm_.data()._dfa_alphabet[i_];
274 }
275 os_ << " };\n";
276
277 os_ << " static std::size_t const* dfa_arr_[" << count_
278 << "] = { ";
279 os_ << "dfa0_";
280 for (i_ = 1; i_ < count_; ++i_)
281 {
282 os_ << ", " << "dfa" << i_ << "_";
283 }
284 os_ << " };\n";
285 }
286 else
287 {
288 std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
289 std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
290 std::size_t i_ = 0;
291 std::size_t j_ = 1;
292 std::size_t count_ = lookups_ / 8;
293
294 os_ << " static std::size_t const lookup_[";
295 os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
296 for (/**/; i_ < count_; ++i_)
297 {
298 const std::size_t index_ = i_ * 8;
299 os_ << lookup_[index_];
300 for (/**/; j_ < 8; ++j_)
301 {
302 os_ << ", " << lookup_[index_ + j_];
303 }
304 if (i_ < count_ - 1)
305 {
306 os_ << ",\n ";
307 }
308 j_ = 1;
309 }
310 os_ << " };\n";
311
312 os_ << " static std::size_t const dfa_alphabet_ = "
313 << sm_.data()._dfa_alphabet.front () << ";\n";
314 os_ << " static std::size_t const dfa_["
315 << sm_.data()._dfa[0]->size () << "] = {\n ";
316 count_ = sm_.data()._dfa[0]->size () / 8;
317 for (i_ = 0; i_ < count_; ++i_)
318 {
319 const std::size_t index_ = i_ * 8;
320 os_ << dfa_[index_];
321 for (j_ = 1; j_ < 8; ++j_)
322 {
323 os_ << ", " << dfa_[index_ + j_];
324 }
325 if (i_ < count_ - 1)
326 {
327 os_ << ",\n ";
328 }
329 }
330
331 const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
332 if (mod_)
333 {
334 const std::size_t index_ = count_ * 8;
335 if (count_)
336 {
337 os_ << ",\n ";
338 }
339 os_ << dfa_[index_];
340 for (j_ = 1; j_ < mod_; ++j_)
341 {
342 os_ << ", " << dfa_[index_ + j_];
343 }
344 }
345 os_ << " };\n";
346 }
347
348 os_ << "\n if (start_token_ == end_)\n";
349 os_ << " {\n";
350 os_ << " unique_id_ = npos;\n";
351 os_ << " return 0;\n";
352 os_ << " }\n\n";
353 if (sm_.data()._seen_BOL_assertion)
354 {
355 os_ << " bool bol = bol_;\n\n";
356 }
357
358 if (dfas_ > 1)
359 {
360 os_ << "again:\n";
361 os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
362 os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
363 os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
364 }
365
366 os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
367 os_ << " Iterator curr_ = start_token_;\n";
368 os_ << " bool end_state_ = *ptr_ != 0;\n";
369 os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
370 os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
371 if (dfas_ > 1)
372 {
373 os_ << " std::size_t end_start_state_ = start_state_;\n";
374 }
375 if (sm_.data()._seen_BOL_assertion)
376 {
377 os_ << " bool end_bol_ = bol_;\n";
378 }
379 os_ << " Iterator end_token_ = start_token_;\n\n";
380
381 os_ << " while (curr_ != end_)\n";
382 os_ << " {\n";
383
384 if (sm_.data()._seen_BOL_assertion)
385 {
386 os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
387 }
388
389 if (sm_.data()._seen_EOL_assertion)
390 {
391 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
392 }
393
394 if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
395 {
396 os_ << " if (BOL_state_ && bol)\n";
397 os_ << " {\n";
398 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
399 os_ << " }\n";
400 os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
401 os_ << " {\n";
402 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
403 os_ << " }\n";
404 os_ << " else\n";
405 os_ << " {\n";
406 if (lookups_ == 256)
407 {
408 os_ << " unsigned char index = \n";
409 os_ << " static_cast<unsigned char>(*curr_++);\n";
410 }
411 else
412 {
413 os_ << " std::size_t index = *curr_++\n";
414 }
11fdf7f2 415 os_ << " bol = (index == '\\n') ? true : false;\n";
7c673cae
FG
416 os_ << " std::size_t const state_ = ptr_[\n";
417 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
418
419 os_ << '\n';
420 os_ << " if (state_ == 0) break;\n";
421 os_ << '\n';
422 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
423 os_ << " }\n\n";
424 }
425 else if (sm_.data()._seen_BOL_assertion)
426 {
427 os_ << " if (BOL_state_ && bol)\n";
428 os_ << " {\n";
429 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
430 os_ << " }\n";
431 os_ << " else\n";
432 os_ << " {\n";
433 if (lookups_ == 256)
434 {
435 os_ << " unsigned char index = \n";
436 os_ << " static_cast<unsigned char>(*curr_++);\n";
437 }
438 else
439 {
440 os_ << " std::size_t index = *curr_++\n";
441 }
11fdf7f2 442 os_ << " bol = (index == '\\n') ? true : false;\n";
7c673cae
FG
443 os_ << " std::size_t const state_ = ptr_[\n";
444 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
445
446 os_ << '\n';
447 os_ << " if (state_ == 0) break;\n";
448 os_ << '\n';
449 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
450 os_ << " }\n\n";
451 }
452 else if (sm_.data()._seen_EOL_assertion)
453 {
454 os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
455 os_ << " {\n";
456 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
457 os_ << " }\n";
458 os_ << " else\n";
459 os_ << " {\n";
460 if (lookups_ == 256)
461 {
462 os_ << " unsigned char index = \n";
463 os_ << " static_cast<unsigned char>(*curr_++);\n";
464 }
465 else
466 {
467 os_ << " std::size_t index = *curr_++\n";
468 }
11fdf7f2 469 os_ << " bol = (index == '\\n') ? true : false;\n";
7c673cae
FG
470 os_ << " std::size_t const state_ = ptr_[\n";
471 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
472
473 os_ << '\n';
474 os_ << " if (state_ == 0) break;\n";
475 os_ << '\n';
476 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
477 os_ << " }\n\n";
478 }
479 else
480 {
481 os_ << " std::size_t const state_ =\n";
482
483 if (lookups_ == 256)
484 {
485 os_ << " ptr_[lookup_["
486 "static_cast<unsigned char>(*curr_++)]];\n";
487 }
488 else
489 {
490 os_ << " ptr_[lookup_[*curr_++]];\n";
491 }
492
493 os_ << '\n';
494 os_ << " if (state_ == 0) break;\n";
495 os_ << '\n';
496 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
497 }
498
499 os_ << " if (*ptr_)\n";
500 os_ << " {\n";
501 os_ << " end_state_ = true;\n";
502 os_ << " id_ = *(ptr_ + id_index);\n";
503 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
504 if (dfas_ > 1)
505 {
506 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
507 }
508 if (sm_.data()._seen_BOL_assertion)
509 {
510 os_ << " end_bol_ = bol;\n";
511 }
512 os_ << " end_token_ = curr_;\n";
513 os_ << " }\n";
514 os_ << " }\n\n";
515
516 if (sm_.data()._seen_EOL_assertion)
517 {
518 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
519
520 os_ << " if (EOL_state_ && curr_ == end_)\n";
521 os_ << " {\n";
522 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
523
524 os_ << " if (*ptr_)\n";
525 os_ << " {\n";
526 os_ << " end_state_ = true;\n";
527 os_ << " id_ = *(ptr_ + id_index);\n";
528 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
529 if (dfas_ > 1)
530 {
531 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
532 }
533 if (sm_.data()._seen_BOL_assertion)
534 {
535 os_ << " end_bol_ = bol;\n";
536 }
537 os_ << " end_token_ = curr_;\n";
538 os_ << " }\n";
539 os_ << " }\n\n";
540 }
541
542 os_ << " if (end_state_)\n";
543 os_ << " {\n";
544 os_ << " // return longest match\n";
545 os_ << " start_token_ = end_token_;\n";
546
547 if (dfas_ > 1)
548 {
549 os_ << " start_state_ = end_start_state_;\n";
550 os_ << " if (id_ == 0)\n";
551 os_ << " {\n";
552 if (sm_.data()._seen_BOL_assertion)
553 {
554 os_ << " bol = end_bol_;\n";
555 }
556 os_ << " goto again;\n";
557 os_ << " }\n";
558 if (sm_.data()._seen_BOL_assertion)
559 {
560 os_ << " else\n";
561 os_ << " {\n";
562 os_ << " bol_ = end_bol_;\n";
563 os_ << " }\n";
564 }
565 }
566 else if (sm_.data()._seen_BOL_assertion)
567 {
568 os_ << " bol_ = end_bol_;\n";
569 }
570
571 os_ << " }\n";
572 os_ << " else\n";
573 os_ << " {\n";
574
575 if (sm_.data()._seen_BOL_assertion)
576 {
11fdf7f2 577 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
7c673cae
FG
578 }
579
580 os_ << " id_ = npos;\n";
581 os_ << " uid_ = npos;\n";
582 os_ << " }\n\n";
583
584 os_ << " unique_id_ = uid_;\n";
585 os_ << " return id_;\n";
586 return os_.good();
587 }
588
589 ///////////////////////////////////////////////////////////////////////////
590 template <typename Char>
591 inline std::basic_string<Char> get_charlit(Char ch)
592 {
593 std::basic_string<Char> result;
594 boost::lexer::basic_string_token<Char>::escape_char(ch, result);
595 return result;
596 }
597
598 // check whether state0_0 is referenced from any of the other states
599 template <typename Char>
600 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
601 {
602 typedef typename boost::lexer::basic_state_machine<Char>::iterator
603 iterator_type;
604 iterator_type iter_ = sm_.begin();
605 std::size_t const states_ = iter_->states;
606
607 for (std::size_t state_ = 0; state_ < states_; ++state_)
608 {
609 if (0 == iter_->bol_index || 0 == iter_->eol_index)
610 {
611 return true;
612 }
613
614 std::size_t const transitions_ = iter_->transitions;
615 for (std::size_t t_ = 0; t_ < transitions_; ++t_)
616 {
617 if (0 == iter_->goto_state)
618 {
619 return true;
620 }
621 ++iter_;
622 }
623 if (transitions_ == 0) ++iter_;
624 }
625 return false;
626 }
627
628 ///////////////////////////////////////////////////////////////////////////
629 template <typename Char>
630 bool generate_function_body_switch(std::basic_ostream<Char> & os_
631 , boost::lexer::basic_state_machine<Char> const &sm_)
632 {
633 typedef typename boost::lexer::basic_state_machine<Char>::iterator
634 iterator_type;
635
636 std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
637 iterator_type iter_ = sm_.begin();
638 iterator_type labeliter_ = iter_;
639 iterator_type end_ = sm_.end();
640 std::size_t const dfas_ = sm_.data()._dfa->size ();
641
642 os_ << " static std::size_t const npos = "
643 "static_cast<std::size_t>(~0);\n";
644
645 os_ << "\n if (start_token_ == end_)\n";
646 os_ << " {\n";
647 os_ << " unique_id_ = npos;\n";
648 os_ << " return 0;\n";
649 os_ << " }\n\n";
650
651 if (sm_.data()._seen_BOL_assertion)
652 {
653 os_ << " bool bol = bol_;\n";
654 }
655
656 if (dfas_ > 1)
657 {
658 os_ << "again:\n";
659 }
660
661 os_ << " Iterator curr_ = start_token_;\n";
662 os_ << " bool end_state_ = false;\n";
663 os_ << " std::size_t id_ = npos;\n";
664 os_ << " std::size_t uid_ = npos;\n";
665
666 if (dfas_ > 1)
667 {
668 os_ << " std::size_t end_start_state_ = start_state_;\n";
669 }
670
671 if (sm_.data()._seen_BOL_assertion)
672 {
673 os_ << " bool end_bol_ = bol_;\n";
674 }
675
676 os_ << " Iterator end_token_ = start_token_;\n";
677 os_ << '\n';
678
679 os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
680 << " ch_ = 0;\n\n";
681
682 if (dfas_ > 1)
683 {
684 os_ << " switch (start_state_)\n";
685 os_ << " {\n";
686
687 for (std::size_t i_ = 0; i_ < dfas_; ++i_)
688 {
689 os_ << " case " << i_ << ":\n";
690 os_ << " goto state" << i_ << "_0;\n";
691 os_ << " break;\n";
692 }
693
694 os_ << " default:\n";
695 os_ << " goto end;\n";
696 os_ << " break;\n";
697 os_ << " }\n";
698 }
699
700 bool need_state0_0_label = need_label0_0(sm_);
701
702 for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
703 {
704 std::size_t const states_ = iter_->states;
705 for (std::size_t state_ = 0; state_ < states_; ++state_)
706 {
707 std::size_t const transitions_ = iter_->transitions;
708 std::size_t t_ = 0;
709
710 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
711 {
712 os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
713 }
714
715 if (iter_->end_state)
716 {
717 os_ << " end_state_ = true;\n";
718 os_ << " id_ = " << iter_->id << ";\n";
719 os_ << " uid_ = " << iter_->unique_id << ";\n";
720 os_ << " end_token_ = curr_;\n";
721
722 if (dfas_ > 1)
723 {
724 os_ << " end_start_state_ = " << iter_->goto_dfa <<
725 ";\n";
726 }
727
728 if (sm_.data()._seen_BOL_assertion)
729 {
730 os_ << " end_bol_ = bol;\n";
731 }
732
733 if (transitions_) os_ << '\n';
734 }
735
736 if (t_ < transitions_ ||
737 iter_->bol_index != boost::lexer::npos ||
738 iter_->eol_index != boost::lexer::npos)
739 {
740 os_ << " if (curr_ == end_) goto end;\n";
741 os_ << " ch_ = *curr_;\n";
742 if (iter_->bol_index != boost::lexer::npos)
743 {
744 os_ << "\n if (bol) goto state" << dfa_ << '_'
745 << iter_->bol_index << ";\n";
746 }
747 if (iter_->eol_index != boost::lexer::npos)
748 {
11fdf7f2 749 os_ << "\n if (ch_ == '\\n') goto state" << dfa_
7c673cae
FG
750 << '_' << iter_->eol_index << ";\n";
751 }
752 os_ << " ++curr_;\n";
753 }
754
755 for (/**/; t_ < transitions_; ++t_)
756 {
757 Char const *ptr_ = iter_->token._charset.c_str();
758 Char const *end_ = ptr_ + iter_->token._charset.size();
759 Char start_char_ = 0;
760 Char curr_char_ = 0;
761 bool range_ = false;
762 bool first_char_ = true;
763
764 os_ << "\n if (";
765
766 while (ptr_ != end_)
767 {
768 curr_char_ = *ptr_++;
769
770 if (*ptr_ == curr_char_ + 1)
771 {
772 if (!range_)
773 {
774 start_char_ = curr_char_;
775 }
776 range_ = true;
777 }
778 else
779 {
780 if (!first_char_)
781 {
782 os_ << ((iter_->token._negated) ? " && " : " || ");
783 }
784 else
785 {
786 first_char_ = false;
787 }
788 if (range_)
789 {
790 if (iter_->token._negated)
791 {
792 os_ << "!";
793 }
794 os_ << "(ch_ >= '" << get_charlit(start_char_)
795 << "' && ch_ <= '"
796 << get_charlit(curr_char_) << "')";
797 range_ = false;
798 }
799 else
800 {
801 os_ << "ch_ "
802 << ((iter_->token._negated) ? "!=" : "==")
803 << " '" << get_charlit(curr_char_) << "'";
804 }
805 }
806 }
807
808 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
809 << ";\n";
810 ++iter_;
811 }
812
813 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
814 {
815 os_ << " goto end;\n";
816 }
817
818 if (transitions_ == 0) ++iter_;
819 }
820 }
821
822 os_ << "\nend:\n";
823 os_ << " if (end_state_)\n";
824 os_ << " {\n";
825 os_ << " // return longest match\n";
826 os_ << " start_token_ = end_token_;\n";
827
828 if (dfas_ > 1)
829 {
830 os_ << " start_state_ = end_start_state_;\n";
831 os_ << "\n if (id_ == 0)\n";
832 os_ << " {\n";
833
834 if (sm_.data()._seen_BOL_assertion)
835 {
836 os_ << " bol = end_bol_;\n";
837 }
838
839 os_ << " goto again;\n";
840 os_ << " }\n";
841
842 if (sm_.data()._seen_BOL_assertion)
843 {
844 os_ << " else\n";
845 os_ << " {\n";
846 os_ << " bol_ = end_bol_;\n";
847 os_ << " }\n";
848 }
849 }
850 else if (sm_.data()._seen_BOL_assertion)
851 {
852 os_ << " bol_ = end_bol_;\n";
853 }
854
855 os_ << " }\n";
856 os_ << " else\n";
857 os_ << " {\n";
858
859 if (sm_.data()._seen_BOL_assertion)
860 {
861 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
862 }
863 os_ << " id_ = npos;\n";
864 os_ << " uid_ = npos;\n";
865 os_ << " }\n\n";
866
867 os_ << " unique_id_ = uid_;\n";
868 os_ << " return id_;\n";
869 return os_.good();
870 }
871
872 ///////////////////////////////////////////////////////////////////////////
873 // Generate a tokenizer for the given state machine.
874 template <typename Char, typename F>
875 inline bool
876 generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
877 , boost::lexer::basic_rules<Char> const& rules_
878 , std::basic_ostream<Char> &os_, Char const* name_suffix
879 , F generate_function_body)
880 {
881 if (sm_.data()._lookup->empty())
882 return false;
883
884 std::size_t const dfas_ = sm_.data()._dfa->size();
885// std::size_t const lookups_ = sm_.data()._lookup->front()->size();
886
887 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
888 os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
889 os_ << "//\n";
890 os_ << "// Distributed under the Boost Software License, "
891 "Version 1.0. (See accompanying\n";
892 os_ << "// file licence_1_0.txt or copy at "
893 "http://www.boost.org/LICENSE_1_0.txt)\n\n";
894 os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
895
896 std::basic_string<Char> guard(name_suffix);
897 guard += L<Char>(name_suffix[0] ? "_" : "");
898 guard += L<Char>(__DATE__ "_" __TIME__);
899 typename std::basic_string<Char>::size_type p =
900 guard.find_first_of(L<Char>(": "));
901 while (std::string::npos != p)
902 {
903 guard.replace(p, 1, L<Char>("_"));
904 p = guard.find_first_of(L<Char>(": "), p);
905 }
906 boost::to_upper(guard);
907
908 os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
909 os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
910
7c673cae
FG
911 os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
912
913 generate_delimiter(os_);
914 os_ << "// the generated table of state names and the tokenizer have to be\n"
915 "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
916 os_ << "namespace boost { namespace spirit { namespace lex { "
917 "namespace lexertl { namespace static_ {\n\n";
918
919 // generate the lexer state information variables
920 if (!generate_cpp_state_info(rules_, os_, name_suffix))
921 return false;
922
923 generate_delimiter(os_);
924 os_ << "// this function returns the next matched token\n";
925 os_ << "template<typename Iterator>\n";
926 os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
927 << name_suffix << " (";
928
929 if (dfas_ > 1)
930 {
931 os_ << "std::size_t& start_state_, ";
932 }
933 else
934 {
935 os_ << "std::size_t& /*start_state_*/, ";
936 }
937 if (sm_.data()._seen_BOL_assertion)
938 {
939 os_ << "bool& bol_, ";
940 }
941 else
942 {
943 os_ << "bool& /*bol_*/, ";
944 }
945 os_ << "\n ";
946
947 os_ << "Iterator &start_token_, Iterator const& end_, ";
948 os_ << "std::size_t& unique_id_)\n";
949 os_ << "{\n";
950 if (!generate_function_body(os_, sm_))
951 return false;
952 os_ << "}\n\n";
953
954 if (!generate_cpp_state_table<Char>(os_, name_suffix
955 , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
956 {
957 return false;
958 }
959
960 os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
961
962 os_ << "#endif\n";
963
964 return os_.good();
965 }
966
967 } // namespace detail
968
969 ///////////////////////////////////////////////////////////////////////////
970 template <typename Lexer, typename F>
971 inline bool
972 generate_static(Lexer const& lexer
973 , std::basic_ostream<typename Lexer::char_type>& os
974 , typename Lexer::char_type const* name_suffix, F f)
975 {
976 if (!lexer.init_dfa(true)) // always minimize DFA for static lexers
977 return false;
978 return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
979 , name_suffix, f);
980 }
981
982 ///////////////////////////////////////////////////////////////////////////
983 // deprecated function, will be removed in the future (this has been
984 // replaced by the function generate_static_dfa - see below).
985 template <typename Lexer>
986 inline bool
987 generate_static(Lexer const& lexer
988 , std::basic_ostream<typename Lexer::char_type>& os
989 , typename Lexer::char_type const* name_suffix =
990 detail::L<typename Lexer::char_type>())
991 {
992 return generate_static(lexer, os, name_suffix
993 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
994 }
995
996 ///////////////////////////////////////////////////////////////////////////
997 template <typename Lexer>
998 inline bool
999 generate_static_dfa(Lexer const& lexer
1000 , std::basic_ostream<typename Lexer::char_type>& os
1001 , typename Lexer::char_type const* name_suffix =
1002 detail::L<typename Lexer::char_type>())
1003 {
1004 return generate_static(lexer, os, name_suffix
1005 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1006 }
1007
1008 ///////////////////////////////////////////////////////////////////////////
1009 template <typename Lexer>
1010 inline bool
1011 generate_static_switch(Lexer const& lexer
1012 , std::basic_ostream<typename Lexer::char_type>& os
1013 , typename Lexer::char_type const* name_suffix =
1014 detail::L<typename Lexer::char_type>())
1015 {
1016 return generate_static(lexer, os, name_suffix
1017 , &detail::generate_function_body_switch<typename Lexer::char_type>);
1018 }
1019
1020///////////////////////////////////////////////////////////////////////////////
1021}}}}
1022
1023#endif