]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // rules.hpp |
2 | // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
5 | // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
f67539c2 TL |
6 | #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_RULES_HPP |
7 | #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_RULES_HPP | |
7c673cae FG |
8 | |
9 | #include "consts.hpp" | |
10 | #include <deque> | |
11 | #include <locale> | |
12 | #include <map> | |
13 | #include "runtime_error.hpp" | |
14 | #include <set> | |
15 | #include "size_t.hpp" | |
16 | #include <sstream> | |
17 | #include <string> | |
18 | #include <vector> | |
19 | ||
20 | namespace boost | |
21 | { | |
22 | namespace lexer | |
23 | { | |
24 | namespace detail | |
25 | { | |
26 | // return name of initial state | |
27 | template <typename CharT> | |
28 | struct strings; | |
29 | ||
30 | template <> | |
31 | struct strings<char> | |
32 | { | |
33 | static const char *initial () | |
34 | { | |
35 | return "INITIAL"; | |
36 | } | |
37 | ||
38 | static const char *dot () | |
39 | { | |
40 | return "."; | |
41 | } | |
42 | ||
43 | static const char *all_states () | |
44 | { | |
45 | return "*"; | |
46 | } | |
47 | ||
48 | static const char *char_name () | |
49 | { | |
50 | return "char"; | |
51 | } | |
52 | ||
53 | static const char *char_prefix () | |
54 | { | |
55 | return ""; | |
56 | } | |
57 | }; | |
58 | ||
59 | template <> | |
60 | struct strings<wchar_t> | |
61 | { | |
62 | static const wchar_t *initial () | |
63 | { | |
64 | return L"INITIAL"; | |
65 | } | |
66 | ||
67 | static const wchar_t *dot () | |
68 | { | |
69 | return L"."; | |
70 | } | |
71 | ||
72 | static const wchar_t *all_states () | |
73 | { | |
74 | return L"*"; | |
75 | } | |
76 | ||
77 | static const char *char_name () | |
78 | { | |
79 | return "wchar_t"; | |
80 | } | |
81 | ||
82 | static const char *char_prefix () | |
83 | { | |
84 | return "L"; | |
85 | } | |
86 | }; | |
87 | } | |
88 | ||
89 | template<typename CharT> | |
90 | class basic_rules | |
91 | { | |
92 | public: | |
93 | typedef std::vector<std::size_t> id_vector; | |
94 | typedef std::deque<id_vector> id_vector_deque; | |
95 | typedef std::basic_string<CharT> string; | |
96 | typedef std::deque<string> string_deque; | |
97 | typedef std::deque<string_deque> string_deque_deque; | |
98 | typedef std::set<string> string_set; | |
99 | typedef std::pair<string, string> string_pair; | |
100 | typedef std::deque<string_pair> string_pair_deque; | |
101 | typedef std::map<string, std::size_t> string_size_t_map; | |
102 | typedef std::pair<string, std::size_t> string_size_t_pair; | |
103 | ||
104 | basic_rules (const regex_flags flags_ = dot_not_newline, | |
105 | std::size_t (*counter_ptr_) () = 0) : | |
106 | _flags (flags_), | |
107 | _counter (0), | |
108 | _counter_ptr (counter_ptr_) | |
109 | { | |
110 | add_state (initial ()); | |
111 | } | |
112 | ||
113 | void clear () | |
114 | { | |
115 | _statemap.clear (); | |
116 | _macrodeque.clear (); | |
117 | _macroset.clear (); | |
118 | _regexes.clear (); | |
119 | _ids.clear (); | |
120 | _unique_ids.clear (); | |
121 | _states.clear (); | |
122 | _flags = dot_not_newline; | |
123 | _locale = std::locale (); | |
124 | add_state (initial ()); | |
125 | } | |
126 | ||
127 | void clear (const CharT *state_name_) | |
128 | { | |
129 | std::size_t state_ = state (state_name_); | |
130 | ||
131 | if (state_ != npos) | |
132 | { | |
133 | _regexes[state_].clear (); | |
134 | _ids[state_].clear (); | |
135 | _unique_ids[state_].clear (); | |
136 | _states[state_].clear (); | |
137 | } | |
138 | } | |
139 | ||
140 | void flags (const regex_flags flags_) | |
141 | { | |
142 | _flags = flags_; | |
143 | } | |
144 | ||
145 | regex_flags flags () const | |
146 | { | |
147 | return _flags; | |
148 | } | |
149 | ||
150 | std::size_t next_unique_id () | |
151 | { | |
152 | return _counter_ptr ? _counter_ptr () : _counter++; | |
153 | } | |
154 | ||
155 | std::locale imbue (std::locale &locale_) | |
156 | { | |
157 | std::locale loc_ = _locale; | |
158 | ||
159 | _locale = locale_; | |
160 | return loc_; | |
161 | } | |
162 | ||
163 | const std::locale &locale () const | |
164 | { | |
165 | return _locale; | |
166 | } | |
167 | ||
168 | std::size_t state (const CharT *name_) const | |
169 | { | |
170 | std::size_t state_ = npos; | |
171 | typename string_size_t_map::const_iterator iter_ = | |
172 | _statemap.find (name_); | |
173 | ||
174 | if (iter_ != _statemap.end ()) | |
175 | { | |
176 | state_ = iter_->second; | |
177 | } | |
178 | ||
179 | return state_; | |
180 | } | |
181 | ||
182 | const CharT *state (const std::size_t index_) const | |
183 | { | |
184 | if (index_ == 0) | |
185 | { | |
186 | return initial (); | |
187 | } | |
188 | else | |
189 | { | |
190 | const std::size_t vec_index_ = index_ - 1; | |
191 | ||
192 | if (vec_index_ > _lexer_state_names.size () - 1) | |
193 | { | |
194 | return 0; | |
195 | } | |
196 | else | |
197 | { | |
198 | return _lexer_state_names[vec_index_].c_str (); | |
199 | } | |
200 | } | |
201 | } | |
202 | ||
203 | std::size_t add_state (const CharT *name_) | |
204 | { | |
205 | validate (name_); | |
206 | ||
207 | if (_statemap.insert (string_size_t_pair (name_, | |
208 | _statemap.size ())).second) | |
209 | { | |
210 | _regexes.push_back (string_deque ()); | |
211 | _ids.push_back (id_vector ()); | |
212 | _unique_ids.push_back (id_vector ()); | |
213 | _states.push_back (id_vector ()); | |
214 | ||
215 | if (string (name_) != initial ()) | |
216 | { | |
217 | _lexer_state_names.push_back (name_); | |
218 | } | |
219 | } | |
220 | ||
221 | // Initial is not stored, so no need to - 1. | |
222 | return _lexer_state_names.size (); | |
223 | } | |
224 | ||
225 | void add_macro (const CharT *name_, const CharT *regex_) | |
226 | { | |
227 | add_macro (name_, string (regex_)); | |
228 | } | |
229 | ||
230 | void add_macro (const CharT *name_, const CharT *regex_start_, | |
231 | const CharT *regex_end_) | |
232 | { | |
233 | add_macro (name_, string (regex_start_, regex_end_)); | |
234 | } | |
235 | ||
236 | void add_macro (const CharT *name_, const string ®ex_) | |
237 | { | |
238 | validate (name_); | |
239 | ||
240 | typename string_set::const_iterator iter_ = _macroset.find (name_); | |
241 | ||
242 | if (iter_ == _macroset.end ()) | |
243 | { | |
244 | _macrodeque.push_back (string_pair (name_, regex_)); | |
245 | _macroset.insert (name_); | |
246 | } | |
247 | else | |
248 | { | |
249 | std::basic_stringstream<CharT> ss_; | |
250 | std::ostringstream os_; | |
251 | ||
252 | os_ << "Attempt to redefine MACRO '"; | |
253 | ||
254 | while (*name_) | |
255 | { | |
256 | os_ << ss_.narrow (*name_++, static_cast<CharT> (' ')); | |
257 | } | |
258 | ||
259 | os_ << "'."; | |
260 | throw runtime_error (os_.str ()); | |
261 | } | |
262 | } | |
263 | ||
264 | void add_macros (const basic_rules<CharT> &rules_) | |
265 | { | |
266 | const string_pair_deque ¯os_ = rules_.macrodeque (); | |
267 | typename string_pair_deque::const_iterator macro_iter_ = | |
268 | macros_.begin (); | |
269 | typename string_pair_deque::const_iterator macro_end_ = | |
270 | macros_.end (); | |
271 | ||
272 | for (; macro_iter_ != macro_end_; ++macro_iter_) | |
273 | { | |
274 | add_macro (macro_iter_->first.c_str (), | |
275 | macro_iter_->second.c_str ()); | |
276 | } | |
277 | } | |
278 | ||
279 | void merge_macros (const basic_rules<CharT> &rules_) | |
280 | { | |
281 | const string_pair_deque ¯os_ = rules_.macrodeque (); | |
282 | typename string_pair_deque::const_iterator macro_iter_ = | |
283 | macros_.begin (); | |
284 | typename string_pair_deque::const_iterator macro_end_ = | |
285 | macros_.end (); | |
286 | typename string_set::const_iterator macro_dest_iter_; | |
287 | typename string_set::const_iterator macro_dest_end_ = _macroset.end (); | |
288 | ||
289 | for (; macro_iter_ != macro_end_; ++macro_iter_) | |
290 | { | |
291 | macro_dest_iter_ = _macroset.find (macro_iter_->first); | |
292 | ||
293 | if (macro_dest_iter_ == macro_dest_end_) | |
294 | { | |
295 | add_macro (macro_iter_->first.c_str (), | |
296 | macro_iter_->second.c_str ()); | |
297 | } | |
298 | } | |
299 | } | |
300 | ||
301 | std::size_t add (const CharT *regex_, const std::size_t id_) | |
302 | { | |
303 | return add (string (regex_), id_); | |
304 | } | |
305 | ||
306 | std::size_t add (const CharT *regex_start_, const CharT *regex_end_, | |
307 | const std::size_t id_) | |
308 | { | |
309 | return add (string (regex_start_, regex_end_), id_); | |
310 | } | |
311 | ||
312 | std::size_t add (const string ®ex_, const std::size_t id_) | |
313 | { | |
314 | const std::size_t counter_ = next_unique_id (); | |
315 | ||
316 | check_for_invalid_id (id_); | |
317 | _regexes.front ().push_back (regex_); | |
318 | _ids.front ().push_back (id_); | |
319 | _unique_ids.front ().push_back (counter_); | |
320 | _states.front ().push_back (0); | |
321 | return counter_; | |
322 | } | |
323 | ||
324 | std::size_t add (const CharT *curr_state_, const CharT *regex_, | |
325 | const CharT *new_state_) | |
326 | { | |
327 | return add (curr_state_, string (regex_), new_state_); | |
328 | } | |
329 | ||
330 | std::size_t add (const CharT *curr_state_, const CharT *regex_start_, | |
331 | const CharT *regex_end_, const CharT *new_state_) | |
332 | { | |
333 | return add (curr_state_, string (regex_start_, regex_end_), | |
334 | new_state_); | |
335 | } | |
336 | ||
337 | std::size_t add (const CharT *curr_state_, const string ®ex_, | |
338 | const CharT *new_state_) | |
339 | { | |
340 | return add (curr_state_, regex_, 0, new_state_, false); | |
341 | } | |
342 | ||
343 | std::size_t add (const CharT *curr_state_, const CharT *regex_, | |
344 | const std::size_t id_, const CharT *new_state_) | |
345 | { | |
346 | return add (curr_state_, string (regex_), id_, new_state_); | |
347 | } | |
348 | ||
349 | std::size_t add (const CharT *curr_state_, const CharT *regex_start_, | |
350 | const CharT *regex_end_, const std::size_t id_, | |
351 | const CharT *new_state_) | |
352 | { | |
353 | return add (curr_state_, string (regex_start_, regex_end_), id_, | |
354 | new_state_); | |
355 | } | |
356 | ||
357 | std::size_t add (const CharT *curr_state_, const string ®ex_, | |
358 | const std::size_t id_, const CharT *new_state_) | |
359 | { | |
360 | return add (curr_state_, regex_, id_, new_state_, true); | |
361 | } | |
362 | ||
363 | void add (const CharT *source_, const basic_rules<CharT> &rules_, | |
364 | const CharT *dest_, const CharT *to_ = detail::strings<CharT>::dot ()) | |
365 | { | |
366 | const bool star_ = *source_ == '*' && *(source_ + 1) == 0; | |
367 | const bool dest_dot_ = *dest_ == '.' && *(dest_ + 1) == 0; | |
368 | const bool to_dot_ = *to_ == '.' && *(to_ + 1) == 0; | |
369 | std::size_t state_ = 0; | |
370 | const string_deque_deque &all_regexes_ = rules_.regexes (); | |
371 | const id_vector_deque &all_ids_ = rules_.ids (); | |
372 | const id_vector_deque &all_unique_ids_ = rules_.unique_ids (); | |
373 | const id_vector_deque &all_states_ = rules_.states (); | |
374 | typename string_deque::const_iterator regex_iter_; | |
375 | typename string_deque::const_iterator regex_end_; | |
376 | typename id_vector::const_iterator id_iter_; | |
377 | typename id_vector::const_iterator uid_iter_; | |
378 | typename id_vector::const_iterator state_iter_; | |
379 | ||
380 | if (star_) | |
381 | { | |
382 | typename string_deque_deque::const_iterator all_regexes_iter_ = | |
383 | all_regexes_.begin (); | |
384 | typename string_deque_deque::const_iterator all_regexes_end_ = | |
385 | all_regexes_.end (); | |
386 | typename id_vector_deque::const_iterator all_ids_iter_ = | |
387 | all_ids_.begin (); | |
388 | typename id_vector_deque::const_iterator all_uids_iter_ = | |
389 | all_unique_ids_.begin (); | |
390 | typename id_vector_deque::const_iterator all_states_iter_ = | |
391 | all_states_.begin (); | |
392 | ||
393 | for (; all_regexes_iter_ != all_regexes_end_; | |
394 | ++state_, ++all_regexes_iter_, ++all_ids_iter_, | |
395 | ++all_uids_iter_, ++all_states_iter_) | |
396 | { | |
397 | regex_iter_ = all_regexes_iter_->begin (); | |
398 | regex_end_ = all_regexes_iter_->end (); | |
399 | id_iter_ = all_ids_iter_->begin (); | |
400 | uid_iter_ = all_uids_iter_->begin (); | |
401 | state_iter_ = all_states_iter_->begin (); | |
402 | ||
403 | for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_, | |
404 | ++uid_iter_, ++state_iter_) | |
405 | { | |
406 | // If ..._dot_ then lookup state name from rules_; otherwise | |
407 | // pass name through. | |
408 | add (dest_dot_ ? rules_.state (state_) : dest_, *regex_iter_, | |
409 | *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true, | |
410 | *uid_iter_); | |
411 | } | |
412 | } | |
413 | } | |
414 | else | |
415 | { | |
416 | const CharT *start_ = source_; | |
417 | string state_name_; | |
418 | ||
419 | while (*source_) | |
420 | { | |
421 | while (*source_ && *source_ != ',') | |
422 | { | |
423 | ++source_; | |
424 | } | |
425 | ||
426 | state_name_.assign (start_, source_); | |
427 | ||
428 | if (*source_) | |
429 | { | |
430 | ++source_; | |
431 | start_ = source_; | |
432 | } | |
433 | ||
434 | state_ = rules_.state (state_name_.c_str ()); | |
435 | ||
436 | if (state_ == npos) | |
437 | { | |
438 | std::basic_stringstream<CharT> ss_; | |
439 | std::ostringstream os_; | |
440 | ||
441 | os_ << "Unknown state name '"; | |
442 | source_ = state_name_.c_str (); | |
443 | ||
444 | while (*source_) | |
445 | { | |
446 | os_ << ss_.narrow (*source_++, ' '); | |
447 | } | |
448 | ||
449 | os_ << "'."; | |
450 | throw runtime_error (os_.str ()); | |
451 | } | |
452 | ||
453 | regex_iter_ = all_regexes_[state_].begin (); | |
454 | regex_end_ = all_regexes_[state_].end (); | |
455 | id_iter_ = all_ids_[state_].begin (); | |
456 | uid_iter_ = all_unique_ids_[state_].begin (); | |
457 | state_iter_ = all_states_[state_].begin (); | |
458 | ||
459 | for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_, | |
460 | ++uid_iter_, ++state_iter_) | |
461 | { | |
462 | // If ..._dot_ then lookup state name from rules_; otherwise | |
463 | // pass name through. | |
464 | add (dest_dot_ ? state_name_.c_str () : dest_, *regex_iter_, | |
465 | *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true, | |
466 | *uid_iter_); | |
467 | } | |
468 | } | |
469 | } | |
470 | } | |
471 | /* | |
472 | void add (const CharT *curr_state_, const basic_rules<CharT> &rules_) | |
473 | { | |
474 | const string_deque_deque ®exes_ = rules_.regexes (); | |
475 | const id_vector_deque &ids_ = rules_.ids (); | |
476 | const id_vector_deque &unique_ids_ = rules_.unique_ids (); | |
477 | typename string_deque_deque::const_iterator state_regex_iter_ = | |
478 | regexes_.begin (); | |
479 | typename string_deque_deque::const_iterator state_regex_end_ = | |
480 | regexes_.end (); | |
481 | typename id_vector_deque::const_iterator state_id_iter_ = | |
482 | ids_.begin (); | |
483 | typename id_vector_deque::const_iterator state_uid_iter_ = | |
484 | unique_ids_.begin (); | |
485 | typename string_deque::const_iterator regex_iter_; | |
486 | typename string_deque::const_iterator regex_end_; | |
487 | typename id_vector::const_iterator id_iter_; | |
488 | typename id_vector::const_iterator uid_iter_; | |
489 | ||
490 | for (; state_regex_iter_ != state_regex_end_; ++state_regex_iter_) | |
491 | { | |
492 | regex_iter_ = state_regex_iter_->begin (); | |
493 | regex_end_ = state_regex_iter_->end (); | |
494 | id_iter_ = state_id_iter_->begin (); | |
495 | uid_iter_ = state_uid_iter_->begin (); | |
496 | ||
497 | for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_, | |
498 | ++uid_iter_) | |
499 | { | |
500 | add (curr_state_, *regex_iter_, *id_iter_, curr_state_, true, | |
501 | *uid_iter_); | |
502 | } | |
503 | } | |
504 | } | |
505 | */ | |
506 | const string_size_t_map &statemap () const | |
507 | { | |
508 | return _statemap; | |
509 | } | |
510 | ||
511 | const string_pair_deque ¯odeque () const | |
512 | { | |
513 | return _macrodeque; | |
514 | } | |
515 | ||
516 | const string_deque_deque ®exes () const | |
517 | { | |
518 | return _regexes; | |
519 | } | |
520 | ||
521 | const id_vector_deque &ids () const | |
522 | { | |
523 | return _ids; | |
524 | } | |
525 | ||
526 | const id_vector_deque &unique_ids () const | |
527 | { | |
528 | return _unique_ids; | |
529 | } | |
530 | ||
531 | const id_vector_deque &states () const | |
532 | { | |
533 | return _states; | |
534 | } | |
535 | ||
536 | bool empty () const | |
537 | { | |
538 | typename string_deque_deque::const_iterator iter_ = _regexes.begin (); | |
539 | typename string_deque_deque::const_iterator end_ = _regexes.end (); | |
540 | bool empty_ = true; | |
541 | ||
542 | for (; iter_ != end_; ++iter_) | |
543 | { | |
544 | if (!iter_->empty ()) | |
545 | { | |
546 | empty_ = false; | |
547 | break; | |
548 | } | |
549 | } | |
550 | ||
551 | return empty_; | |
552 | } | |
553 | ||
554 | static const CharT *initial () | |
555 | { | |
556 | return detail::strings<CharT>::initial (); | |
557 | } | |
558 | ||
559 | static const CharT *all_states () | |
560 | { | |
561 | return detail::strings<CharT>::all_states (); | |
562 | } | |
563 | ||
564 | static const CharT *dot () | |
565 | { | |
566 | return detail::strings<CharT>::dot (); | |
567 | } | |
568 | ||
569 | private: | |
570 | string_size_t_map _statemap; | |
571 | string_pair_deque _macrodeque; | |
572 | string_set _macroset; | |
573 | string_deque_deque _regexes; | |
574 | id_vector_deque _ids; | |
575 | id_vector_deque _unique_ids; | |
576 | id_vector_deque _states; | |
577 | regex_flags _flags; | |
578 | std::size_t _counter; | |
579 | std::size_t (*_counter_ptr) (); | |
580 | std::locale _locale; | |
581 | string_deque _lexer_state_names; | |
582 | ||
583 | std::size_t add (const CharT *curr_state_, const string ®ex_, | |
584 | const std::size_t id_, const CharT *new_state_, const bool check_, | |
585 | const std::size_t uid_ = npos) | |
586 | { | |
587 | const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0; | |
588 | const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0; | |
589 | ||
590 | if (check_) | |
591 | { | |
592 | check_for_invalid_id (id_); | |
593 | } | |
594 | ||
595 | if (!dot_) | |
596 | { | |
597 | validate (new_state_); | |
598 | } | |
599 | ||
600 | std::size_t new_ = string::npos; | |
601 | typename string_size_t_map::const_iterator iter_; | |
602 | typename string_size_t_map::const_iterator end_ = _statemap.end (); | |
603 | id_vector states_; | |
604 | ||
605 | if (!dot_) | |
606 | { | |
607 | iter_ = _statemap.find (new_state_); | |
608 | ||
609 | if (iter_ == end_) | |
610 | { | |
611 | std::basic_stringstream<CharT> ss_; | |
612 | std::ostringstream os_; | |
613 | ||
614 | os_ << "Unknown state name '"; | |
615 | ||
616 | while (*new_state_) | |
617 | { | |
618 | os_ << ss_.narrow (*new_state_++, ' '); | |
619 | } | |
620 | ||
621 | os_ << "'."; | |
622 | throw runtime_error (os_.str ()); | |
623 | } | |
624 | ||
625 | new_ = iter_->second; | |
626 | } | |
627 | ||
628 | if (star_) | |
629 | { | |
630 | const std::size_t size_ = _statemap.size (); | |
631 | ||
632 | for (std::size_t i_ = 0; i_ < size_; ++i_) | |
633 | { | |
634 | states_.push_back (i_); | |
635 | } | |
636 | } | |
637 | else | |
638 | { | |
639 | const CharT *start_ = curr_state_; | |
640 | string state_; | |
641 | ||
642 | while (*curr_state_) | |
643 | { | |
644 | while (*curr_state_ && *curr_state_ != ',') | |
645 | { | |
646 | ++curr_state_; | |
647 | } | |
648 | ||
649 | state_.assign (start_, curr_state_); | |
650 | ||
651 | if (*curr_state_) | |
652 | { | |
653 | ++curr_state_; | |
654 | start_ = curr_state_; | |
655 | } | |
656 | ||
657 | validate (state_.c_str ()); | |
658 | iter_ = _statemap.find (state_.c_str ()); | |
659 | ||
660 | if (iter_ == end_) | |
661 | { | |
662 | std::basic_stringstream<CharT> ss_; | |
663 | std::ostringstream os_; | |
664 | ||
665 | os_ << "Unknown state name '"; | |
666 | curr_state_ = state_.c_str (); | |
667 | ||
668 | while (*curr_state_) | |
669 | { | |
670 | os_ << ss_.narrow (*curr_state_++, ' '); | |
671 | } | |
672 | ||
673 | os_ << "'."; | |
674 | throw runtime_error (os_.str ()); | |
675 | } | |
676 | ||
677 | states_.push_back (iter_->second); | |
678 | } | |
679 | } | |
680 | ||
681 | std::size_t first_counter_ = npos; | |
682 | ||
683 | for (std::size_t i_ = 0, size_ = states_.size (); i_ < size_; ++i_) | |
684 | { | |
685 | const std::size_t curr_ = states_[i_]; | |
686 | ||
687 | _regexes[curr_].push_back (regex_); | |
688 | _ids[curr_].push_back (id_); | |
689 | ||
690 | if (uid_ == npos) | |
691 | { | |
692 | const std::size_t counter_ = next_unique_id (); | |
693 | ||
694 | if (first_counter_ == npos) | |
695 | { | |
696 | first_counter_ = counter_; | |
697 | } | |
698 | ||
699 | _unique_ids[curr_].push_back (counter_); | |
700 | } | |
701 | else | |
702 | { | |
703 | if (first_counter_ == npos) | |
704 | { | |
705 | first_counter_ = uid_; | |
706 | } | |
707 | ||
708 | _unique_ids[curr_].push_back (uid_); | |
709 | } | |
710 | ||
711 | _states[curr_].push_back (dot_ ? curr_ : new_); | |
712 | } | |
713 | ||
714 | return first_counter_; | |
715 | } | |
716 | ||
717 | void validate (const CharT *name_) const | |
718 | { | |
719 | const CharT *start_ = name_; | |
720 | ||
721 | if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') && | |
722 | !(*name_ >= 'a' && *name_ <= 'z')) | |
723 | { | |
724 | std::basic_stringstream<CharT> ss_; | |
725 | std::ostringstream os_; | |
726 | ||
727 | os_ << "Invalid name '"; | |
728 | ||
729 | while (*name_) | |
730 | { | |
731 | os_ << ss_.narrow (*name_++, ' '); | |
732 | } | |
733 | ||
734 | os_ << "'."; | |
735 | throw runtime_error (os_.str ()); | |
736 | } | |
737 | else if (*name_) | |
738 | { | |
739 | ++name_; | |
740 | } | |
741 | ||
742 | while (*name_) | |
743 | { | |
744 | if (*name_ != '_' && *name_ != '-' && | |
745 | !(*name_ >= 'A' && *name_ <= 'Z') && | |
746 | !(*name_ >= 'a' && *name_ <= 'z') && | |
747 | !(*name_ >= '0' && *name_ <= '9')) | |
748 | { | |
749 | std::basic_stringstream<CharT> ss_; | |
750 | std::ostringstream os_; | |
751 | ||
752 | os_ << "Invalid name '"; | |
753 | name_ = start_; | |
754 | ||
755 | while (*name_) | |
756 | { | |
757 | os_ << ss_.narrow (*name_++, ' '); | |
758 | } | |
759 | ||
760 | os_ << "'."; | |
761 | throw runtime_error (os_.str ()); | |
762 | } | |
763 | ||
764 | ++name_; | |
765 | } | |
766 | ||
767 | if (name_ - start_ > static_cast<std::ptrdiff_t>(max_macro_len)) | |
768 | { | |
769 | std::basic_stringstream<CharT> ss_; | |
770 | std::ostringstream os_; | |
771 | ||
772 | os_ << "Name '"; | |
773 | name_ = start_; | |
774 | ||
775 | while (*name_) | |
776 | { | |
777 | os_ << ss_.narrow (*name_++, ' '); | |
778 | } | |
779 | ||
780 | os_ << "' too long."; | |
781 | throw runtime_error (os_.str ()); | |
782 | } | |
783 | } | |
784 | ||
785 | void check_for_invalid_id (const std::size_t id_) const | |
786 | { | |
787 | switch (id_) | |
788 | { | |
789 | case 0: | |
790 | throw runtime_error ("id 0 is reserved for EOF."); | |
791 | case npos: | |
792 | throw runtime_error ("id npos is reserved for the " | |
793 | "UNKNOWN token."); | |
794 | default: | |
795 | // OK | |
796 | break; | |
797 | } | |
798 | } | |
799 | }; | |
800 | ||
801 | typedef basic_rules<char> rules; | |
802 | typedef basic_rules<wchar_t> wrules; | |
803 | } | |
804 | } | |
805 | ||
806 | #endif |