]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / boost / boost / spirit / home / lex / lexer / lexertl / generate_static.hpp
CommitLineData
7c673cae
FG
1// Copyright (c) 2008-2009 Ben Hanson
2// Copyright (c) 2008-2011 Hartmut Kaiser
3//
4// Distributed under the Boost Software License, Version 1.0. (See accompanying
5// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7#if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
8#define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
9
10#if defined(_MSC_VER)
11#pragma once
12#endif
13
14#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
15#include <boost/spirit/home/support/detail/lexer/consts.hpp>
16#include <boost/spirit/home/support/detail/lexer/rules.hpp>
17#include <boost/spirit/home/support/detail/lexer/size_t.hpp>
18#include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
19#include <boost/spirit/home/support/detail/lexer/debug.hpp>
20#include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
7c673cae 21#include <boost/scoped_array.hpp>
1e59de90 22#include <cstring>
f67539c2 23#include <locale>
7c673cae
FG
24
25///////////////////////////////////////////////////////////////////////////////
26namespace boost { namespace spirit { namespace lex { namespace lexertl
27{
28 namespace detail
29 {
30
31 ///////////////////////////////////////////////////////////////////////////
32 template <typename CharT>
33 struct string_lit;
34
35 template <>
36 struct string_lit<char>
37 {
38 static char get(char c) { return c; }
39 static std::string get(char const* str = "") { return str; }
40 };
41
42 template <>
43 struct string_lit<wchar_t>
44 {
45 static wchar_t get(char c)
46 {
47 typedef std::ctype<wchar_t> ctype_t;
48 return std::use_facet<ctype_t>(std::locale()).widen(c);
49 }
50 static std::basic_string<wchar_t> get(char const* source = "")
51 {
52 using namespace std; // some systems have size_t in ns std
53 size_t len = strlen(source);
54 boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
55 result.get()[len] = '\0';
56
57 // working with wide character streams is supported only if the
58 // platform provides the std::ctype<wchar_t> facet
59 BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
60
61 std::use_facet<std::ctype<wchar_t> >(std::locale())
62 .widen(source, source + len, result.get());
63 return result.get();
64 }
65 };
66
67 template <typename Char>
68 inline Char L(char c)
69 {
70 return string_lit<Char>::get(c);
71 }
72
73 template <typename Char>
74 inline std::basic_string<Char> L(char const* c = "")
75 {
76 return string_lit<Char>::get(c);
77 }
78
79 ///////////////////////////////////////////////////////////////////////////
80 template <typename Char>
81 inline bool
82 generate_delimiter(std::basic_ostream<Char> &os_)
83 {
84 os_ << std::basic_string<Char>(80, '/') << "\n";
85 return os_.good();
86 }
87
88 ///////////////////////////////////////////////////////////////////////////
89 // Generate a table of the names of the used lexer states, which is a bit
90 // tricky, because the table stored with the rules is sorted based on the
91 // names, but we need it sorted using the state ids.
92 template <typename Char>
93 inline bool
94 generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
95 , std::basic_ostream<Char> &os_, Char const* name_suffix)
96 {
97 // we need to re-sort the state names in ascending order of the state
98 // ids, filling possible gaps in between later
99 typedef typename
100 boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
101 state_iterator;
102 typedef std::map<std::size_t, Char const*> reverse_state_map_type;
103
104 reverse_state_map_type reverse_state_map;
105 state_iterator send = rules_.statemap().end();
106 for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
107 {
108 typedef typename reverse_state_map_type::value_type value_type;
109 reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
110 }
111
112 generate_delimiter(os_);
113 os_ << "// this table defines the names of the lexer states\n";
114 os_ << boost::lexer::detail::strings<Char>::char_name()
115 << " const* const lexer_state_names"
116 << (name_suffix[0] ? "_" : "") << name_suffix
117 << "[" << rules_.statemap().size() << "] = \n{\n";
118
119 typedef typename reverse_state_map_type::iterator iterator;
120 iterator rend = reverse_state_map.end();
121 std::size_t last_id = 0;
122 for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
123 {
124 for (/**/; last_id < (*rit).first; ++last_id)
125 {
126 os_ << " 0, // \"<undefined state>\"\n";
127 }
128 os_ << " "
129 << boost::lexer::detail::strings<Char>::char_prefix()
130 << "\"" << (*rit).second << "\"";
131 if (++rit != rend)
132 os_ << ",\n";
133 else
134 os_ << "\n"; // don't generate the final comma
135 }
136 os_ << "};\n\n";
137
138 generate_delimiter(os_);
139 os_ << "// this variable defines the number of lexer states\n";
140 os_ << "std::size_t const lexer_state_count"
141 << (name_suffix[0] ? "_" : "") << name_suffix
142 << " = " << rules_.statemap().size() << ";\n\n";
143 return os_.good();
144 }
145
146 template <typename Char>
147 inline bool
148 generate_cpp_state_table (std::basic_ostream<Char> &os_
149 , Char const* name_suffix, bool bol, bool eol)
150 {
151 std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
152 suffix += name_suffix;
153
154 generate_delimiter(os_);
155 os_ << "// this defines a generic accessors for the information above\n";
156 os_ << "struct lexer" << suffix << "\n{\n";
157 os_ << " // version number and feature-set of compatible static lexer engine\n";
158 os_ << " enum\n";
92f5a8d4 159 os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n";
7c673cae
FG
160 os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
161 os_ << " supports_eol = " << std::boolalpha << eol << "\n";
162 os_ << " };\n\n";
163 os_ << " // return the number of lexer states\n";
164 os_ << " static std::size_t state_count()\n";
165 os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
166 os_ << " // return the name of the lexer state as given by 'idx'\n";
167 os_ << " static " << boost::lexer::detail::strings<Char>::char_name()
168 << " const* state_name(std::size_t idx)\n";
169 os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
170 os_ << " // return the next matched token\n";
171 os_ << " template<typename Iterator>\n";
172 os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
173 os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
174 os_ << " {\n return next_token" << suffix
175 << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
176 os_ << "};\n\n";
177 return os_.good();
178 }
179
180 ///////////////////////////////////////////////////////////////////////////
181 // generate function body based on traversing the DFA tables
182 template <typename Char>
183 bool generate_function_body_dfa(std::basic_ostream<Char>& os_
184 , boost::lexer::basic_state_machine<Char> const &sm_)
185 {
186 std::size_t const dfas_ = sm_.data()._dfa->size();
187 std::size_t const lookups_ = sm_.data()._lookup->front()->size();
188
189 os_ << " enum {end_state_index, id_index, unique_id_index, "
190 "state_index, bol_index,\n";
191 os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
192 os_ << " static std::size_t const npos = "
193 "static_cast<std::size_t>(~0);\n";
194
195 if (dfas_ > 1)
196 {
197 for (std::size_t state_ = 0; state_ < dfas_; ++state_)
198 {
199 std::size_t i_ = 0;
200 std::size_t j_ = 1;
201 std::size_t count_ = lookups_ / 8;
202 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
203 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
204
205 os_ << " static std::size_t const lookup" << state_
206 << "_[" << lookups_ << "] = {\n ";
207 for (/**/; i_ < count_; ++i_)
208 {
209 std::size_t const index_ = i_ * 8;
210 os_ << lookup_[index_];
211 for (/**/; j_ < 8; ++j_)
212 {
213 os_ << ", " << lookup_[index_ + j_];
214 }
215 if (i_ < count_ - 1)
216 {
217 os_ << ",\n ";
218 }
219 j_ = 1;
220 }
221 os_ << " };\n";
222
223 count_ = sm_.data()._dfa[state_]->size ();
224 os_ << " static const std::size_t dfa" << state_ << "_["
225 << count_ << "] = {\n ";
226 count_ /= 8;
227 for (i_ = 0; i_ < count_; ++i_)
228 {
229 std::size_t const index_ = i_ * 8;
230 os_ << dfa_[index_];
231 for (j_ = 1; j_ < 8; ++j_)
232 {
233 os_ << ", " << dfa_[index_ + j_];
234 }
235 if (i_ < count_ - 1)
236 {
237 os_ << ",\n ";
238 }
239 }
240
241 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
242 if (mod_)
243 {
244 std::size_t const index_ = count_ * 8;
245 if (count_)
246 {
247 os_ << ",\n ";
248 }
249 os_ << dfa_[index_];
250 for (j_ = 1; j_ < mod_; ++j_)
251 {
252 os_ << ", " << dfa_[index_ + j_];
253 }
254 }
255 os_ << " };\n";
256 }
257
258 std::size_t count_ = sm_.data()._dfa_alphabet.size();
259 std::size_t i_ = 1;
260
261 os_ << " static std::size_t const* lookup_arr_[" << count_
262 << "] = { lookup0_";
263 for (i_ = 1; i_ < count_; ++i_)
264 {
265 os_ << ", " << "lookup" << i_ << "_";
266 }
267 os_ << " };\n";
268
269 os_ << " static std::size_t const dfa_alphabet_arr_["
270 << count_ << "] = { ";
271 os_ << sm_.data()._dfa_alphabet.front ();
272 for (i_ = 1; i_ < count_; ++i_)
273 {
274 os_ << ", " << sm_.data()._dfa_alphabet[i_];
275 }
276 os_ << " };\n";
277
278 os_ << " static std::size_t const* dfa_arr_[" << count_
279 << "] = { ";
280 os_ << "dfa0_";
281 for (i_ = 1; i_ < count_; ++i_)
282 {
283 os_ << ", " << "dfa" << i_ << "_";
284 }
285 os_ << " };\n";
286 }
287 else
288 {
289 std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
290 std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
291 std::size_t i_ = 0;
292 std::size_t j_ = 1;
293 std::size_t count_ = lookups_ / 8;
294
295 os_ << " static std::size_t const lookup_[";
296 os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
297 for (/**/; i_ < count_; ++i_)
298 {
299 const std::size_t index_ = i_ * 8;
300 os_ << lookup_[index_];
301 for (/**/; j_ < 8; ++j_)
302 {
303 os_ << ", " << lookup_[index_ + j_];
304 }
305 if (i_ < count_ - 1)
306 {
307 os_ << ",\n ";
308 }
309 j_ = 1;
310 }
311 os_ << " };\n";
312
313 os_ << " static std::size_t const dfa_alphabet_ = "
314 << sm_.data()._dfa_alphabet.front () << ";\n";
315 os_ << " static std::size_t const dfa_["
316 << sm_.data()._dfa[0]->size () << "] = {\n ";
317 count_ = sm_.data()._dfa[0]->size () / 8;
318 for (i_ = 0; i_ < count_; ++i_)
319 {
320 const std::size_t index_ = i_ * 8;
321 os_ << dfa_[index_];
322 for (j_ = 1; j_ < 8; ++j_)
323 {
324 os_ << ", " << dfa_[index_ + j_];
325 }
326 if (i_ < count_ - 1)
327 {
328 os_ << ",\n ";
329 }
330 }
331
332 const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
333 if (mod_)
334 {
335 const std::size_t index_ = count_ * 8;
336 if (count_)
337 {
338 os_ << ",\n ";
339 }
340 os_ << dfa_[index_];
341 for (j_ = 1; j_ < mod_; ++j_)
342 {
343 os_ << ", " << dfa_[index_ + j_];
344 }
345 }
346 os_ << " };\n";
347 }
348
349 os_ << "\n if (start_token_ == end_)\n";
350 os_ << " {\n";
351 os_ << " unique_id_ = npos;\n";
352 os_ << " return 0;\n";
353 os_ << " }\n\n";
354 if (sm_.data()._seen_BOL_assertion)
355 {
356 os_ << " bool bol = bol_;\n\n";
357 }
358
359 if (dfas_ > 1)
360 {
361 os_ << "again:\n";
362 os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
363 os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
364 os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
365 }
366
367 os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
368 os_ << " Iterator curr_ = start_token_;\n";
369 os_ << " bool end_state_ = *ptr_ != 0;\n";
370 os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
371 os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
372 if (dfas_ > 1)
373 {
374 os_ << " std::size_t end_start_state_ = start_state_;\n";
375 }
376 if (sm_.data()._seen_BOL_assertion)
377 {
378 os_ << " bool end_bol_ = bol_;\n";
379 }
380 os_ << " Iterator end_token_ = start_token_;\n\n";
381
382 os_ << " while (curr_ != end_)\n";
383 os_ << " {\n";
384
385 if (sm_.data()._seen_BOL_assertion)
386 {
387 os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
388 }
389
390 if (sm_.data()._seen_EOL_assertion)
391 {
392 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
393 }
394
395 if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
396 {
397 os_ << " if (BOL_state_ && bol)\n";
398 os_ << " {\n";
399 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
400 os_ << " }\n";
401 os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
402 os_ << " {\n";
403 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
404 os_ << " }\n";
405 os_ << " else\n";
406 os_ << " {\n";
407 if (lookups_ == 256)
408 {
409 os_ << " unsigned char index = \n";
410 os_ << " static_cast<unsigned char>(*curr_++);\n";
411 }
412 else
413 {
414 os_ << " std::size_t index = *curr_++\n";
415 }
11fdf7f2 416 os_ << " bol = (index == '\\n') ? true : false;\n";
7c673cae
FG
417 os_ << " std::size_t const state_ = ptr_[\n";
418 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
419
420 os_ << '\n';
421 os_ << " if (state_ == 0) break;\n";
422 os_ << '\n';
423 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
424 os_ << " }\n\n";
425 }
426 else if (sm_.data()._seen_BOL_assertion)
427 {
428 os_ << " if (BOL_state_ && bol)\n";
429 os_ << " {\n";
430 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
431 os_ << " }\n";
432 os_ << " else\n";
433 os_ << " {\n";
434 if (lookups_ == 256)
435 {
436 os_ << " unsigned char index = \n";
437 os_ << " static_cast<unsigned char>(*curr_++);\n";
438 }
439 else
440 {
441 os_ << " std::size_t index = *curr_++\n";
442 }
11fdf7f2 443 os_ << " bol = (index == '\\n') ? true : false;\n";
7c673cae
FG
444 os_ << " std::size_t const state_ = ptr_[\n";
445 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
446
447 os_ << '\n';
448 os_ << " if (state_ == 0) break;\n";
449 os_ << '\n';
450 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
451 os_ << " }\n\n";
452 }
453 else if (sm_.data()._seen_EOL_assertion)
454 {
455 os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
456 os_ << " {\n";
457 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
458 os_ << " }\n";
459 os_ << " else\n";
460 os_ << " {\n";
461 if (lookups_ == 256)
462 {
463 os_ << " unsigned char index = \n";
464 os_ << " static_cast<unsigned char>(*curr_++);\n";
465 }
466 else
467 {
468 os_ << " std::size_t index = *curr_++\n";
469 }
11fdf7f2 470 os_ << " bol = (index == '\\n') ? true : false;\n";
7c673cae
FG
471 os_ << " std::size_t const state_ = ptr_[\n";
472 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
473
474 os_ << '\n';
475 os_ << " if (state_ == 0) break;\n";
476 os_ << '\n';
477 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
478 os_ << " }\n\n";
479 }
480 else
481 {
482 os_ << " std::size_t const state_ =\n";
483
484 if (lookups_ == 256)
485 {
486 os_ << " ptr_[lookup_["
487 "static_cast<unsigned char>(*curr_++)]];\n";
488 }
489 else
490 {
491 os_ << " ptr_[lookup_[*curr_++]];\n";
492 }
493
494 os_ << '\n';
495 os_ << " if (state_ == 0) break;\n";
496 os_ << '\n';
497 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
498 }
499
500 os_ << " if (*ptr_)\n";
501 os_ << " {\n";
502 os_ << " end_state_ = true;\n";
503 os_ << " id_ = *(ptr_ + id_index);\n";
504 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
505 if (dfas_ > 1)
506 {
507 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
508 }
509 if (sm_.data()._seen_BOL_assertion)
510 {
511 os_ << " end_bol_ = bol;\n";
512 }
513 os_ << " end_token_ = curr_;\n";
514 os_ << " }\n";
515 os_ << " }\n\n";
516
517 if (sm_.data()._seen_EOL_assertion)
518 {
519 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
520
521 os_ << " if (EOL_state_ && curr_ == end_)\n";
522 os_ << " {\n";
523 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
524
525 os_ << " if (*ptr_)\n";
526 os_ << " {\n";
527 os_ << " end_state_ = true;\n";
528 os_ << " id_ = *(ptr_ + id_index);\n";
529 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
530 if (dfas_ > 1)
531 {
532 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
533 }
534 if (sm_.data()._seen_BOL_assertion)
535 {
536 os_ << " end_bol_ = bol;\n";
537 }
538 os_ << " end_token_ = curr_;\n";
539 os_ << " }\n";
540 os_ << " }\n\n";
541 }
542
543 os_ << " if (end_state_)\n";
544 os_ << " {\n";
545 os_ << " // return longest match\n";
546 os_ << " start_token_ = end_token_;\n";
547
548 if (dfas_ > 1)
549 {
550 os_ << " start_state_ = end_start_state_;\n";
551 os_ << " if (id_ == 0)\n";
552 os_ << " {\n";
553 if (sm_.data()._seen_BOL_assertion)
554 {
555 os_ << " bol = end_bol_;\n";
556 }
557 os_ << " goto again;\n";
558 os_ << " }\n";
559 if (sm_.data()._seen_BOL_assertion)
560 {
561 os_ << " else\n";
562 os_ << " {\n";
563 os_ << " bol_ = end_bol_;\n";
564 os_ << " }\n";
565 }
566 }
567 else if (sm_.data()._seen_BOL_assertion)
568 {
569 os_ << " bol_ = end_bol_;\n";
570 }
571
572 os_ << " }\n";
573 os_ << " else\n";
574 os_ << " {\n";
575
576 if (sm_.data()._seen_BOL_assertion)
577 {
11fdf7f2 578 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
7c673cae
FG
579 }
580
581 os_ << " id_ = npos;\n";
582 os_ << " uid_ = npos;\n";
583 os_ << " }\n\n";
584
585 os_ << " unique_id_ = uid_;\n";
586 os_ << " return id_;\n";
587 return os_.good();
588 }
589
590 ///////////////////////////////////////////////////////////////////////////
591 template <typename Char>
592 inline std::basic_string<Char> get_charlit(Char ch)
593 {
594 std::basic_string<Char> result;
595 boost::lexer::basic_string_token<Char>::escape_char(ch, result);
596 return result;
597 }
598
599 // check whether state0_0 is referenced from any of the other states
600 template <typename Char>
601 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
602 {
603 typedef typename boost::lexer::basic_state_machine<Char>::iterator
604 iterator_type;
605 iterator_type iter_ = sm_.begin();
606 std::size_t const states_ = iter_->states;
607
608 for (std::size_t state_ = 0; state_ < states_; ++state_)
609 {
610 if (0 == iter_->bol_index || 0 == iter_->eol_index)
611 {
612 return true;
613 }
614
615 std::size_t const transitions_ = iter_->transitions;
616 for (std::size_t t_ = 0; t_ < transitions_; ++t_)
617 {
618 if (0 == iter_->goto_state)
619 {
620 return true;
621 }
622 ++iter_;
623 }
624 if (transitions_ == 0) ++iter_;
625 }
626 return false;
627 }
628
629 ///////////////////////////////////////////////////////////////////////////
630 template <typename Char>
631 bool generate_function_body_switch(std::basic_ostream<Char> & os_
632 , boost::lexer::basic_state_machine<Char> const &sm_)
633 {
634 typedef typename boost::lexer::basic_state_machine<Char>::iterator
635 iterator_type;
636
637 std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
638 iterator_type iter_ = sm_.begin();
639 iterator_type labeliter_ = iter_;
640 iterator_type end_ = sm_.end();
641 std::size_t const dfas_ = sm_.data()._dfa->size ();
642
643 os_ << " static std::size_t const npos = "
644 "static_cast<std::size_t>(~0);\n";
645
646 os_ << "\n if (start_token_ == end_)\n";
647 os_ << " {\n";
648 os_ << " unique_id_ = npos;\n";
649 os_ << " return 0;\n";
650 os_ << " }\n\n";
651
652 if (sm_.data()._seen_BOL_assertion)
653 {
654 os_ << " bool bol = bol_;\n";
655 }
656
657 if (dfas_ > 1)
658 {
659 os_ << "again:\n";
660 }
661
662 os_ << " Iterator curr_ = start_token_;\n";
663 os_ << " bool end_state_ = false;\n";
664 os_ << " std::size_t id_ = npos;\n";
665 os_ << " std::size_t uid_ = npos;\n";
666
667 if (dfas_ > 1)
668 {
669 os_ << " std::size_t end_start_state_ = start_state_;\n";
670 }
671
672 if (sm_.data()._seen_BOL_assertion)
673 {
674 os_ << " bool end_bol_ = bol_;\n";
675 }
676
677 os_ << " Iterator end_token_ = start_token_;\n";
678 os_ << '\n';
679
680 os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
681 << " ch_ = 0;\n\n";
682
683 if (dfas_ > 1)
684 {
685 os_ << " switch (start_state_)\n";
686 os_ << " {\n";
687
688 for (std::size_t i_ = 0; i_ < dfas_; ++i_)
689 {
690 os_ << " case " << i_ << ":\n";
691 os_ << " goto state" << i_ << "_0;\n";
692 os_ << " break;\n";
693 }
694
695 os_ << " default:\n";
696 os_ << " goto end;\n";
697 os_ << " break;\n";
698 os_ << " }\n";
699 }
700
701 bool need_state0_0_label = need_label0_0(sm_);
702
703 for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
704 {
705 std::size_t const states_ = iter_->states;
706 for (std::size_t state_ = 0; state_ < states_; ++state_)
707 {
708 std::size_t const transitions_ = iter_->transitions;
709 std::size_t t_ = 0;
710
711 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
712 {
713 os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
714 }
715
716 if (iter_->end_state)
717 {
718 os_ << " end_state_ = true;\n";
719 os_ << " id_ = " << iter_->id << ";\n";
720 os_ << " uid_ = " << iter_->unique_id << ";\n";
721 os_ << " end_token_ = curr_;\n";
722
723 if (dfas_ > 1)
724 {
725 os_ << " end_start_state_ = " << iter_->goto_dfa <<
726 ";\n";
727 }
728
729 if (sm_.data()._seen_BOL_assertion)
730 {
731 os_ << " end_bol_ = bol;\n";
732 }
733
734 if (transitions_) os_ << '\n';
735 }
736
737 if (t_ < transitions_ ||
738 iter_->bol_index != boost::lexer::npos ||
739 iter_->eol_index != boost::lexer::npos)
740 {
741 os_ << " if (curr_ == end_) goto end;\n";
742 os_ << " ch_ = *curr_;\n";
743 if (iter_->bol_index != boost::lexer::npos)
744 {
745 os_ << "\n if (bol) goto state" << dfa_ << '_'
746 << iter_->bol_index << ";\n";
747 }
748 if (iter_->eol_index != boost::lexer::npos)
749 {
11fdf7f2 750 os_ << "\n if (ch_ == '\\n') goto state" << dfa_
7c673cae
FG
751 << '_' << iter_->eol_index << ";\n";
752 }
753 os_ << " ++curr_;\n";
754 }
755
756 for (/**/; t_ < transitions_; ++t_)
757 {
758 Char const *ptr_ = iter_->token._charset.c_str();
1e59de90 759 Char const *end2_ = ptr_ + iter_->token._charset.size();
7c673cae
FG
760 Char start_char_ = 0;
761 Char curr_char_ = 0;
762 bool range_ = false;
763 bool first_char_ = true;
764
765 os_ << "\n if (";
766
1e59de90 767 while (ptr_ != end2_)
7c673cae
FG
768 {
769 curr_char_ = *ptr_++;
770
771 if (*ptr_ == curr_char_ + 1)
772 {
773 if (!range_)
774 {
775 start_char_ = curr_char_;
776 }
777 range_ = true;
778 }
779 else
780 {
781 if (!first_char_)
782 {
783 os_ << ((iter_->token._negated) ? " && " : " || ");
784 }
785 else
786 {
787 first_char_ = false;
788 }
789 if (range_)
790 {
791 if (iter_->token._negated)
792 {
793 os_ << "!";
794 }
795 os_ << "(ch_ >= '" << get_charlit(start_char_)
796 << "' && ch_ <= '"
797 << get_charlit(curr_char_) << "')";
798 range_ = false;
799 }
800 else
801 {
802 os_ << "ch_ "
803 << ((iter_->token._negated) ? "!=" : "==")
804 << " '" << get_charlit(curr_char_) << "'";
805 }
806 }
807 }
808
809 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
810 << ";\n";
811 ++iter_;
812 }
813
814 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
815 {
816 os_ << " goto end;\n";
817 }
818
819 if (transitions_ == 0) ++iter_;
820 }
821 }
822
823 os_ << "\nend:\n";
824 os_ << " if (end_state_)\n";
825 os_ << " {\n";
826 os_ << " // return longest match\n";
827 os_ << " start_token_ = end_token_;\n";
828
829 if (dfas_ > 1)
830 {
831 os_ << " start_state_ = end_start_state_;\n";
832 os_ << "\n if (id_ == 0)\n";
833 os_ << " {\n";
834
835 if (sm_.data()._seen_BOL_assertion)
836 {
837 os_ << " bol = end_bol_;\n";
838 }
839
840 os_ << " goto again;\n";
841 os_ << " }\n";
842
843 if (sm_.data()._seen_BOL_assertion)
844 {
845 os_ << " else\n";
846 os_ << " {\n";
847 os_ << " bol_ = end_bol_;\n";
848 os_ << " }\n";
849 }
850 }
851 else if (sm_.data()._seen_BOL_assertion)
852 {
853 os_ << " bol_ = end_bol_;\n";
854 }
855
856 os_ << " }\n";
857 os_ << " else\n";
858 os_ << " {\n";
859
860 if (sm_.data()._seen_BOL_assertion)
861 {
862 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
863 }
864 os_ << " id_ = npos;\n";
865 os_ << " uid_ = npos;\n";
866 os_ << " }\n\n";
867
868 os_ << " unique_id_ = uid_;\n";
869 os_ << " return id_;\n";
870 return os_.good();
871 }
872
873 ///////////////////////////////////////////////////////////////////////////
874 // Generate a tokenizer for the given state machine.
875 template <typename Char, typename F>
876 inline bool
877 generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
878 , boost::lexer::basic_rules<Char> const& rules_
879 , std::basic_ostream<Char> &os_, Char const* name_suffix
880 , F generate_function_body)
881 {
882 if (sm_.data()._lookup->empty())
883 return false;
884
885 std::size_t const dfas_ = sm_.data()._dfa->size();
886// std::size_t const lookups_ = sm_.data()._lookup->front()->size();
887
888 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
889 os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
890 os_ << "//\n";
891 os_ << "// Distributed under the Boost Software License, "
892 "Version 1.0. (See accompanying\n";
893 os_ << "// file licence_1_0.txt or copy at "
894 "http://www.boost.org/LICENSE_1_0.txt)\n\n";
895 os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
896
897 std::basic_string<Char> guard(name_suffix);
898 guard += L<Char>(name_suffix[0] ? "_" : "");
899 guard += L<Char>(__DATE__ "_" __TIME__);
900 typename std::basic_string<Char>::size_type p =
901 guard.find_first_of(L<Char>(": "));
902 while (std::string::npos != p)
903 {
904 guard.replace(p, 1, L<Char>("_"));
905 p = guard.find_first_of(L<Char>(": "), p);
906 }
f67539c2
TL
907 { // to_upper(guard)
908 typedef std::ctype<Char> facet_t;
909 facet_t const& facet = std::use_facet<facet_t>(std::locale());
910 typedef typename std::basic_string<Char>::iterator iter_t;
911 for (iter_t iter = guard.begin(),
912 last = guard.end(); iter != last; ++iter)
913 *iter = facet.toupper(*iter);
914 }
7c673cae
FG
915
916 os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
917 os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
918
7c673cae
FG
919 os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
920
921 generate_delimiter(os_);
922 os_ << "// the generated table of state names and the tokenizer have to be\n"
923 "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
924 os_ << "namespace boost { namespace spirit { namespace lex { "
925 "namespace lexertl { namespace static_ {\n\n";
926
927 // generate the lexer state information variables
928 if (!generate_cpp_state_info(rules_, os_, name_suffix))
929 return false;
930
931 generate_delimiter(os_);
932 os_ << "// this function returns the next matched token\n";
933 os_ << "template<typename Iterator>\n";
934 os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
935 << name_suffix << " (";
936
937 if (dfas_ > 1)
938 {
939 os_ << "std::size_t& start_state_, ";
940 }
941 else
942 {
943 os_ << "std::size_t& /*start_state_*/, ";
944 }
945 if (sm_.data()._seen_BOL_assertion)
946 {
947 os_ << "bool& bol_, ";
948 }
949 else
950 {
951 os_ << "bool& /*bol_*/, ";
952 }
953 os_ << "\n ";
954
955 os_ << "Iterator &start_token_, Iterator const& end_, ";
956 os_ << "std::size_t& unique_id_)\n";
957 os_ << "{\n";
958 if (!generate_function_body(os_, sm_))
959 return false;
960 os_ << "}\n\n";
961
962 if (!generate_cpp_state_table<Char>(os_, name_suffix
963 , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
964 {
965 return false;
966 }
967
968 os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
969
970 os_ << "#endif\n";
971
972 return os_.good();
973 }
974
975 } // namespace detail
976
977 ///////////////////////////////////////////////////////////////////////////
978 template <typename Lexer, typename F>
979 inline bool
980 generate_static(Lexer const& lexer
981 , std::basic_ostream<typename Lexer::char_type>& os
982 , typename Lexer::char_type const* name_suffix, F f)
983 {
984 if (!lexer.init_dfa(true)) // always minimize DFA for static lexers
985 return false;
986 return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
987 , name_suffix, f);
988 }
989
990 ///////////////////////////////////////////////////////////////////////////
991 // deprecated function, will be removed in the future (this has been
992 // replaced by the function generate_static_dfa - see below).
993 template <typename Lexer>
994 inline bool
995 generate_static(Lexer const& lexer
996 , std::basic_ostream<typename Lexer::char_type>& os
997 , typename Lexer::char_type const* name_suffix =
998 detail::L<typename Lexer::char_type>())
999 {
1000 return generate_static(lexer, os, name_suffix
1001 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1002 }
1003
1004 ///////////////////////////////////////////////////////////////////////////
1005 template <typename Lexer>
1006 inline bool
1007 generate_static_dfa(Lexer const& lexer
1008 , std::basic_ostream<typename Lexer::char_type>& os
1009 , typename Lexer::char_type const* name_suffix =
1010 detail::L<typename Lexer::char_type>())
1011 {
1012 return generate_static(lexer, os, name_suffix
1013 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1014 }
1015
1016 ///////////////////////////////////////////////////////////////////////////
1017 template <typename Lexer>
1018 inline bool
1019 generate_static_switch(Lexer const& lexer
1020 , std::basic_ostream<typename Lexer::char_type>& os
1021 , typename Lexer::char_type const* name_suffix =
1022 detail::L<typename Lexer::char_type>())
1023 {
1024 return generate_static(lexer, os, name_suffix
1025 , &detail::generate_function_body_switch<typename Lexer::char_type>);
1026 }
1027
1028///////////////////////////////////////////////////////////////////////////////
1029}}}}
1030
1031#endif