]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/regex/v5/basic_regex_creator.hpp
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / boost / boost / regex / v5 / basic_regex_creator.hpp
1 /*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_creator.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_creator which fills in
17 * the data members of a regex_data object.
18 */
19
20 #ifndef BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP
21 #define BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP
22
23 #ifdef BOOST_REGEX_MSVC
24 # pragma warning(push)
25 #pragma warning(disable:4459)
26 #if BOOST_REGEX_MSVC < 1910
27 #pragma warning(disable:4800)
28 #endif
29 #endif
30
31 #include <set>
32
33 namespace boost{
34
35 namespace BOOST_REGEX_DETAIL_NS{
36
37 template <class charT>
38 struct digraph : public std::pair<charT, charT>
39 {
40 digraph() : std::pair<charT, charT>(charT(0), charT(0)){}
41 digraph(charT c1) : std::pair<charT, charT>(c1, charT(0)){}
42 digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2)
43 {}
44 digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){}
45 digraph<charT>& operator=(const digraph<charT>&) = default;
46 template <class Seq>
47 digraph(const Seq& s) : std::pair<charT, charT>()
48 {
49 BOOST_REGEX_ASSERT(s.size() <= 2);
50 BOOST_REGEX_ASSERT(s.size());
51 this->first = s[0];
52 this->second = (s.size() > 1) ? s[1] : 0;
53 }
54 };
55
56 template <class charT, class traits>
57 class basic_char_set
58 {
59 public:
60 typedef digraph<charT> digraph_type;
61 typedef typename traits::string_type string_type;
62 typedef typename traits::char_class_type m_type;
63
64 basic_char_set()
65 {
66 m_negate = false;
67 m_has_digraphs = false;
68 m_classes = 0;
69 m_negated_classes = 0;
70 m_empty = true;
71 }
72
73 void add_single(const digraph_type& s)
74 {
75 m_singles.insert(s);
76 if(s.second)
77 m_has_digraphs = true;
78 m_empty = false;
79 }
80 void add_range(const digraph_type& first, const digraph_type& end)
81 {
82 m_ranges.push_back(first);
83 m_ranges.push_back(end);
84 if(first.second)
85 {
86 m_has_digraphs = true;
87 add_single(first);
88 }
89 if(end.second)
90 {
91 m_has_digraphs = true;
92 add_single(end);
93 }
94 m_empty = false;
95 }
96 void add_class(m_type m)
97 {
98 m_classes |= m;
99 m_empty = false;
100 }
101 void add_negated_class(m_type m)
102 {
103 m_negated_classes |= m;
104 m_empty = false;
105 }
106 void add_equivalent(const digraph_type& s)
107 {
108 m_equivalents.insert(s);
109 if(s.second)
110 {
111 m_has_digraphs = true;
112 add_single(s);
113 }
114 m_empty = false;
115 }
116 void negate()
117 {
118 m_negate = true;
119 //m_empty = false;
120 }
121
122 //
123 // accessor functions:
124 //
125 bool has_digraphs()const
126 {
127 return m_has_digraphs;
128 }
129 bool is_negated()const
130 {
131 return m_negate;
132 }
133 typedef typename std::vector<digraph_type>::const_iterator list_iterator;
134 typedef typename std::set<digraph_type>::const_iterator set_iterator;
135 set_iterator singles_begin()const
136 {
137 return m_singles.begin();
138 }
139 set_iterator singles_end()const
140 {
141 return m_singles.end();
142 }
143 list_iterator ranges_begin()const
144 {
145 return m_ranges.begin();
146 }
147 list_iterator ranges_end()const
148 {
149 return m_ranges.end();
150 }
151 set_iterator equivalents_begin()const
152 {
153 return m_equivalents.begin();
154 }
155 set_iterator equivalents_end()const
156 {
157 return m_equivalents.end();
158 }
159 m_type classes()const
160 {
161 return m_classes;
162 }
163 m_type negated_classes()const
164 {
165 return m_negated_classes;
166 }
167 bool empty()const
168 {
169 return m_empty;
170 }
171 private:
172 std::set<digraph_type> m_singles; // a list of single characters to match
173 std::vector<digraph_type> m_ranges; // a list of end points of our ranges
174 bool m_negate; // true if the set is to be negated
175 bool m_has_digraphs; // true if we have digraphs present
176 m_type m_classes; // character classes to match
177 m_type m_negated_classes; // negated character classes to match
178 bool m_empty; // whether we've added anything yet
179 std::set<digraph_type> m_equivalents; // a list of equivalence classes
180 };
181
182 template <class charT, class traits>
183 class basic_regex_creator
184 {
185 public:
186 basic_regex_creator(regex_data<charT, traits>* data);
187 std::ptrdiff_t getoffset(void* addr)
188 {
189 return getoffset(addr, m_pdata->m_data.data());
190 }
191 std::ptrdiff_t getoffset(const void* addr, const void* base)
192 {
193 return static_cast<const char*>(addr) - static_cast<const char*>(base);
194 }
195 re_syntax_base* getaddress(std::ptrdiff_t off)
196 {
197 return getaddress(off, m_pdata->m_data.data());
198 }
199 re_syntax_base* getaddress(std::ptrdiff_t off, void* base)
200 {
201 return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off));
202 }
203 void init(unsigned l_flags)
204 {
205 m_pdata->m_flags = l_flags;
206 m_icase = l_flags & regex_constants::icase;
207 }
208 regbase::flag_type flags()
209 {
210 return m_pdata->m_flags;
211 }
212 void flags(regbase::flag_type f)
213 {
214 m_pdata->m_flags = f;
215 if(m_icase != static_cast<bool>(f & regbase::icase))
216 {
217 m_icase = static_cast<bool>(f & regbase::icase);
218 }
219 }
220 re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
221 re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
222 re_literal* append_literal(charT c);
223 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
224 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, false>*);
225 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, true>*);
226 void finalize(const charT* p1, const charT* p2);
227 protected:
228 regex_data<charT, traits>* m_pdata; // pointer to the basic_regex_data struct we are filling in
229 const ::boost::regex_traits_wrapper<traits>&
230 m_traits; // convenience reference to traits class
231 re_syntax_base* m_last_state; // the last state we added
232 bool m_icase; // true for case insensitive matches
233 unsigned m_repeater_id; // the state_id of the next repeater
234 bool m_has_backrefs; // true if there are actually any backrefs
235 std::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
236 bool m_has_recursions; // set when we have recursive expressions to fixup
237 std::vector<unsigned char> m_recursion_checks; // notes which recursions we've followed while analysing this expression
238 typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
239 typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
240 typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
241 typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character
242 typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character
243 private:
244 basic_regex_creator& operator=(const basic_regex_creator&);
245 basic_regex_creator(const basic_regex_creator&);
246
247 void fixup_pointers(re_syntax_base* state);
248 void fixup_recursions(re_syntax_base* state);
249 void create_startmaps(re_syntax_base* state);
250 int calculate_backstep(re_syntax_base* state);
251 void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
252 unsigned get_restart_type(re_syntax_base* state);
253 void set_all_masks(unsigned char* bits, unsigned char);
254 bool is_bad_repeat(re_syntax_base* pt);
255 void set_bad_repeat(re_syntax_base* pt);
256 syntax_element_type get_repeat_type(re_syntax_base* state);
257 void probe_leading_repeat(re_syntax_base* state);
258 };
259
260 template <class charT, class traits>
261 basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
262 : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_icase(false), m_repeater_id(0),
263 m_has_backrefs(false), m_bad_repeats(0), m_has_recursions(false), m_word_mask(0), m_mask_space(0), m_lower_mask(0), m_upper_mask(0), m_alpha_mask(0)
264 {
265 m_pdata->m_data.clear();
266 m_pdata->m_status = ::boost::regex_constants::error_ok;
267 static const charT w = 'w';
268 static const charT s = 's';
269 static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', };
270 static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', };
271 static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', };
272 m_word_mask = m_traits.lookup_classname(&w, &w +1);
273 m_mask_space = m_traits.lookup_classname(&s, &s +1);
274 m_lower_mask = m_traits.lookup_classname(l, l + 5);
275 m_upper_mask = m_traits.lookup_classname(u, u + 5);
276 m_alpha_mask = m_traits.lookup_classname(a, a + 5);
277 m_pdata->m_word_mask = m_word_mask;
278 BOOST_REGEX_ASSERT(m_word_mask != 0);
279 BOOST_REGEX_ASSERT(m_mask_space != 0);
280 BOOST_REGEX_ASSERT(m_lower_mask != 0);
281 BOOST_REGEX_ASSERT(m_upper_mask != 0);
282 BOOST_REGEX_ASSERT(m_alpha_mask != 0);
283 }
284
285 template <class charT, class traits>
286 re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
287 {
288 // if the state is a backref then make a note of it:
289 if(t == syntax_element_backref)
290 this->m_has_backrefs = true;
291 // append a new state, start by aligning our last one:
292 m_pdata->m_data.align();
293 // set the offset to the next state in our last one:
294 if(m_last_state)
295 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
296 // now actually extend our data:
297 m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s));
298 // fill in boilerplate options in the new state:
299 m_last_state->next.i = 0;
300 m_last_state->type = t;
301 return m_last_state;
302 }
303
304 template <class charT, class traits>
305 re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s)
306 {
307 // append a new state, start by aligning our last one:
308 m_pdata->m_data.align();
309 // set the offset to the next state in our last one:
310 if(m_last_state)
311 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
312 // remember the last state position:
313 std::ptrdiff_t off = getoffset(m_last_state) + s;
314 // now actually insert our data:
315 re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s));
316 // fill in boilerplate options in the new state:
317 new_state->next.i = s;
318 new_state->type = t;
319 m_last_state = getaddress(off);
320 return new_state;
321 }
322
323 template <class charT, class traits>
324 re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
325 {
326 re_literal* result;
327 // start by seeing if we have an existing re_literal we can extend:
328 if((0 == m_last_state) || (m_last_state->type != syntax_element_literal))
329 {
330 // no existing re_literal, create a new one:
331 result = static_cast<re_literal*>(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
332 result->length = 1;
333 *static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase);
334 }
335 else
336 {
337 // we have an existing re_literal, extend it:
338 std::ptrdiff_t off = getoffset(m_last_state);
339 m_pdata->m_data.extend(sizeof(charT));
340 m_last_state = result = static_cast<re_literal*>(getaddress(off));
341 charT* characters = static_cast<charT*>(static_cast<void*>(result+1));
342 characters[result->length] = m_traits.translate(c, m_icase);
343 result->length += 1;
344 }
345 return result;
346 }
347
348 template <class charT, class traits>
349 inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
350 const basic_char_set<charT, traits>& char_set)
351 {
352 typedef std::integral_constant<bool, (sizeof(charT) == 1) > truth_type;
353 return char_set.has_digraphs()
354 ? append_set(char_set, static_cast<std::integral_constant<bool, false>*>(0))
355 : append_set(char_set, static_cast<truth_type*>(0));
356 }
357
358 template <class charT, class traits>
359 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
360 const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, false>*)
361 {
362 typedef typename traits::string_type string_type;
363 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
364 typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
365 typedef typename traits::char_class_type m_type;
366
367 re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>)));
368 //
369 // fill in the basics:
370 //
371 result->csingles = static_cast<unsigned int>(std::distance(char_set.singles_begin(), char_set.singles_end()));
372 result->cranges = static_cast<unsigned int>(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
373 result->cequivalents = static_cast<unsigned int>(std::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
374 result->cclasses = char_set.classes();
375 result->cnclasses = char_set.negated_classes();
376 if(flags() & regbase::icase)
377 {
378 // adjust classes as needed:
379 if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
380 result->cclasses |= m_alpha_mask;
381 if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
382 result->cnclasses |= m_alpha_mask;
383 }
384
385 result->isnot = char_set.is_negated();
386 result->singleton = !char_set.has_digraphs();
387 //
388 // remember where the state is for later:
389 //
390 std::ptrdiff_t offset = getoffset(result);
391 //
392 // now extend with all the singles:
393 //
394 item_iterator first, last;
395 set_iterator sfirst, slast;
396 sfirst = char_set.singles_begin();
397 slast = char_set.singles_end();
398 while(sfirst != slast)
399 {
400 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (sfirst->first == static_cast<charT>(0) ? 1 : sfirst->second ? 3 : 2)));
401 p[0] = m_traits.translate(sfirst->first, m_icase);
402 if(sfirst->first == static_cast<charT>(0))
403 {
404 p[0] = 0;
405 }
406 else if(sfirst->second)
407 {
408 p[1] = m_traits.translate(sfirst->second, m_icase);
409 p[2] = 0;
410 }
411 else
412 p[1] = 0;
413 ++sfirst;
414 }
415 //
416 // now extend with all the ranges:
417 //
418 first = char_set.ranges_begin();
419 last = char_set.ranges_end();
420 while(first != last)
421 {
422 // first grab the endpoints of the range:
423 digraph<charT> c1 = *first;
424 c1.first = this->m_traits.translate(c1.first, this->m_icase);
425 c1.second = this->m_traits.translate(c1.second, this->m_icase);
426 ++first;
427 digraph<charT> c2 = *first;
428 c2.first = this->m_traits.translate(c2.first, this->m_icase);
429 c2.second = this->m_traits.translate(c2.second, this->m_icase);
430 ++first;
431 string_type s1, s2;
432 // different actions now depending upon whether collation is turned on:
433 if(flags() & regex_constants::collate)
434 {
435 // we need to transform our range into sort keys:
436 charT a1[3] = { c1.first, c1.second, charT(0), };
437 charT a2[3] = { c2.first, c2.second, charT(0), };
438 s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
439 s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
440 if(s1.empty())
441 s1 = string_type(1, charT(0));
442 if(s2.empty())
443 s2 = string_type(1, charT(0));
444 }
445 else
446 {
447 if(c1.second)
448 {
449 s1.insert(s1.end(), c1.first);
450 s1.insert(s1.end(), c1.second);
451 }
452 else
453 s1 = string_type(1, c1.first);
454 if(c2.second)
455 {
456 s2.insert(s2.end(), c2.first);
457 s2.insert(s2.end(), c2.second);
458 }
459 else
460 s2.insert(s2.end(), c2.first);
461 }
462 if(s1 > s2)
463 {
464 // Oops error:
465 return 0;
466 }
467 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
468 BOOST_REGEX_DETAIL_NS::copy(s1.begin(), s1.end(), p);
469 p[s1.size()] = charT(0);
470 p += s1.size() + 1;
471 BOOST_REGEX_DETAIL_NS::copy(s2.begin(), s2.end(), p);
472 p[s2.size()] = charT(0);
473 }
474 //
475 // now process the equivalence classes:
476 //
477 sfirst = char_set.equivalents_begin();
478 slast = char_set.equivalents_end();
479 while(sfirst != slast)
480 {
481 string_type s;
482 if(sfirst->second)
483 {
484 charT cs[3] = { sfirst->first, sfirst->second, charT(0), };
485 s = m_traits.transform_primary(cs, cs+2);
486 }
487 else
488 s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
489 if(s.empty())
490 return 0; // invalid or unsupported equivalence class
491 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
492 BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p);
493 p[s.size()] = charT(0);
494 ++sfirst;
495 }
496 //
497 // finally reset the address of our last state:
498 //
499 m_last_state = result = static_cast<re_set_long<m_type>*>(getaddress(offset));
500 return result;
501 }
502
503 template<class T>
504 inline bool char_less(T t1, T t2)
505 {
506 return t1 < t2;
507 }
508 inline bool char_less(char t1, char t2)
509 {
510 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
511 }
512 inline bool char_less(signed char t1, signed char t2)
513 {
514 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
515 }
516
517 template <class charT, class traits>
518 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
519 const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, true>*)
520 {
521 typedef typename traits::string_type string_type;
522 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
523 typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
524
525 re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
526 bool negate = char_set.is_negated();
527 std::memset(result->_map, 0, sizeof(result->_map));
528 //
529 // handle singles first:
530 //
531 item_iterator first, last;
532 set_iterator sfirst, slast;
533 sfirst = char_set.singles_begin();
534 slast = char_set.singles_end();
535 while(sfirst != slast)
536 {
537 for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
538 {
539 if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
540 == this->m_traits.translate(sfirst->first, this->m_icase))
541 result->_map[i] = true;
542 }
543 ++sfirst;
544 }
545 //
546 // OK now handle ranges:
547 //
548 first = char_set.ranges_begin();
549 last = char_set.ranges_end();
550 while(first != last)
551 {
552 // first grab the endpoints of the range:
553 charT c1 = this->m_traits.translate(first->first, this->m_icase);
554 ++first;
555 charT c2 = this->m_traits.translate(first->first, this->m_icase);
556 ++first;
557 // different actions now depending upon whether collation is turned on:
558 if(flags() & regex_constants::collate)
559 {
560 // we need to transform our range into sort keys:
561 charT c3[2] = { c1, charT(0), };
562 string_type s1 = this->m_traits.transform(c3, c3+1);
563 c3[0] = c2;
564 string_type s2 = this->m_traits.transform(c3, c3+1);
565 if(s1 > s2)
566 {
567 // Oops error:
568 return 0;
569 }
570 BOOST_REGEX_ASSERT(c3[1] == charT(0));
571 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
572 {
573 c3[0] = static_cast<charT>(i);
574 string_type s3 = this->m_traits.transform(c3, c3 +1);
575 if((s1 <= s3) && (s3 <= s2))
576 result->_map[i] = true;
577 }
578 }
579 else
580 {
581 if(char_less(c2, c1))
582 {
583 // Oops error:
584 return 0;
585 }
586 // everything in range matches:
587 std::memset(result->_map + static_cast<unsigned char>(c1), true, static_cast<unsigned char>(1u) + static_cast<unsigned char>(static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1)));
588 }
589 }
590 //
591 // and now the classes:
592 //
593 typedef typename traits::char_class_type m_type;
594 m_type m = char_set.classes();
595 if(flags() & regbase::icase)
596 {
597 // adjust m as needed:
598 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
599 m |= m_alpha_mask;
600 }
601 if(m != 0)
602 {
603 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
604 {
605 if(this->m_traits.isctype(static_cast<charT>(i), m))
606 result->_map[i] = true;
607 }
608 }
609 //
610 // and now the negated classes:
611 //
612 m = char_set.negated_classes();
613 if(flags() & regbase::icase)
614 {
615 // adjust m as needed:
616 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
617 m |= m_alpha_mask;
618 }
619 if(m != 0)
620 {
621 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
622 {
623 if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
624 result->_map[i] = true;
625 }
626 }
627 //
628 // now process the equivalence classes:
629 //
630 sfirst = char_set.equivalents_begin();
631 slast = char_set.equivalents_end();
632 while(sfirst != slast)
633 {
634 string_type s;
635 BOOST_REGEX_ASSERT(static_cast<charT>(0) == sfirst->second);
636 s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
637 if(s.empty())
638 return 0; // invalid or unsupported equivalence class
639 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
640 {
641 charT c[2] = { (static_cast<charT>(i)), charT(0), };
642 string_type s2 = this->m_traits.transform_primary(c, c+1);
643 if(s == s2)
644 result->_map[i] = true;
645 }
646 ++sfirst;
647 }
648 if(negate)
649 {
650 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
651 {
652 result->_map[i] = !(result->_map[i]);
653 }
654 }
655 return result;
656 }
657
658 template <class charT, class traits>
659 void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
660 {
661 if(this->m_pdata->m_status)
662 return;
663 // we've added all the states we need, now finish things off.
664 // start by adding a terminating state:
665 append_state(syntax_element_match);
666 // extend storage to store original expression:
667 std::ptrdiff_t len = p2 - p1;
668 m_pdata->m_expression_len = len;
669 charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
670 m_pdata->m_expression = ps;
671 BOOST_REGEX_DETAIL_NS::copy(p1, p2, ps);
672 ps[p2 - p1] = 0;
673 // fill in our other data...
674 // successful parsing implies a zero status:
675 m_pdata->m_status = 0;
676 // get the first state of the machine:
677 m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
678 // fixup pointers in the machine:
679 fixup_pointers(m_pdata->m_first_state);
680 if(m_has_recursions)
681 {
682 m_pdata->m_has_recursions = true;
683 fixup_recursions(m_pdata->m_first_state);
684 if(this->m_pdata->m_status)
685 return;
686 }
687 else
688 m_pdata->m_has_recursions = false;
689 // create nested startmaps:
690 create_startmaps(m_pdata->m_first_state);
691 // create main startmap:
692 std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
693 m_pdata->m_can_be_null = 0;
694
695 m_bad_repeats = 0;
696 if(m_has_recursions)
697 m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
698 create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
699 // get the restart type:
700 m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
701 // optimise a leading repeat if there is one:
702 probe_leading_repeat(m_pdata->m_first_state);
703 }
704
705 template <class charT, class traits>
706 void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
707 {
708 while(state)
709 {
710 switch(state->type)
711 {
712 case syntax_element_recurse:
713 m_has_recursions = true;
714 if(state->next.i)
715 state->next.p = getaddress(state->next.i, state);
716 else
717 state->next.p = 0;
718 break;
719 case syntax_element_rep:
720 case syntax_element_dot_rep:
721 case syntax_element_char_rep:
722 case syntax_element_short_set_rep:
723 case syntax_element_long_set_rep:
724 // set the state_id of this repeat:
725 static_cast<re_repeat*>(state)->state_id = m_repeater_id++;
726 BOOST_REGEX_FALLTHROUGH;
727 case syntax_element_alt:
728 std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
729 static_cast<re_alt*>(state)->can_be_null = 0;
730 BOOST_REGEX_FALLTHROUGH;
731 case syntax_element_jump:
732 static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
733 BOOST_REGEX_FALLTHROUGH;
734 default:
735 if(state->next.i)
736 state->next.p = getaddress(state->next.i, state);
737 else
738 state->next.p = 0;
739 }
740 state = state->next.p;
741 }
742 }
743
744 template <class charT, class traits>
745 void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
746 {
747 re_syntax_base* base = state;
748 while(state)
749 {
750 switch(state->type)
751 {
752 case syntax_element_assert_backref:
753 {
754 // just check that the index is valid:
755 int idx = static_cast<const re_brace*>(state)->index;
756 if(idx < 0)
757 {
758 idx = -idx-1;
759 if(idx >= hash_value_mask)
760 {
761 idx = m_pdata->get_id(idx);
762 if(idx <= 0)
763 {
764 // check of sub-expression that doesn't exist:
765 if(0 == this->m_pdata->m_status) // update the error code if not already set
766 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
767 //
768 // clear the expression, we should be empty:
769 //
770 this->m_pdata->m_expression = 0;
771 this->m_pdata->m_expression_len = 0;
772 //
773 // and throw if required:
774 //
775 if(0 == (this->flags() & regex_constants::no_except))
776 {
777 std::string message = "Encountered a forward reference to a marked sub-expression that does not exist.";
778 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
779 e.raise();
780 }
781 }
782 }
783 }
784 }
785 break;
786 case syntax_element_recurse:
787 {
788 bool ok = false;
789 re_syntax_base* p = base;
790 std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
791 if(idx >= hash_value_mask)
792 {
793 //
794 // There may be more than one capture group with this hash, just do what Perl
795 // does and recurse to the leftmost:
796 //
797 idx = m_pdata->get_id(static_cast<int>(idx));
798 }
799 if(idx < 0)
800 {
801 ok = false;
802 }
803 else
804 {
805 while(p)
806 {
807 if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
808 {
809 //
810 // We've found the target of the recursion, set the jump target:
811 //
812 static_cast<re_jump*>(state)->alt.p = p;
813 ok = true;
814 //
815 // Now scan the target for nested repeats:
816 //
817 p = p->next.p;
818 int next_rep_id = 0;
819 while(p)
820 {
821 switch(p->type)
822 {
823 case syntax_element_rep:
824 case syntax_element_dot_rep:
825 case syntax_element_char_rep:
826 case syntax_element_short_set_rep:
827 case syntax_element_long_set_rep:
828 next_rep_id = static_cast<re_repeat*>(p)->state_id;
829 break;
830 case syntax_element_endmark:
831 if(static_cast<const re_brace*>(p)->index == idx)
832 next_rep_id = -1;
833 break;
834 default:
835 break;
836 }
837 if(next_rep_id)
838 break;
839 p = p->next.p;
840 }
841 if(next_rep_id > 0)
842 {
843 static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
844 }
845
846 break;
847 }
848 p = p->next.p;
849 }
850 }
851 if(!ok)
852 {
853 // recursion to sub-expression that doesn't exist:
854 if(0 == this->m_pdata->m_status) // update the error code if not already set
855 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
856 //
857 // clear the expression, we should be empty:
858 //
859 this->m_pdata->m_expression = 0;
860 this->m_pdata->m_expression_len = 0;
861 //
862 // and throw if required:
863 //
864 if(0 == (this->flags() & regex_constants::no_except))
865 {
866 std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist.";
867 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
868 e.raise();
869 }
870 }
871 }
872 break;
873 default:
874 break;
875 }
876 state = state->next.p;
877 }
878 }
879
880 template <class charT, class traits>
881 void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
882 {
883 // non-recursive implementation:
884 // create the last map in the machine first, so that earlier maps
885 // can make use of the result...
886 //
887 // This was originally a recursive implementation, but that caused stack
888 // overflows with complex expressions on small stacks (think COM+).
889
890 // start by saving the case setting:
891 bool l_icase = m_icase;
892 std::vector<std::pair<bool, re_syntax_base*> > v;
893
894 while(state)
895 {
896 switch(state->type)
897 {
898 case syntax_element_toggle_case:
899 // we need to track case changes here:
900 m_icase = static_cast<re_case*>(state)->icase;
901 state = state->next.p;
902 continue;
903 case syntax_element_alt:
904 case syntax_element_rep:
905 case syntax_element_dot_rep:
906 case syntax_element_char_rep:
907 case syntax_element_short_set_rep:
908 case syntax_element_long_set_rep:
909 // just push the state onto our stack for now:
910 v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state));
911 state = state->next.p;
912 break;
913 case syntax_element_backstep:
914 // we need to calculate how big the backstep is:
915 static_cast<re_brace*>(state)->index
916 = this->calculate_backstep(state->next.p);
917 if(static_cast<re_brace*>(state)->index < 0)
918 {
919 // Oops error:
920 if(0 == this->m_pdata->m_status) // update the error code if not already set
921 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
922 //
923 // clear the expression, we should be empty:
924 //
925 this->m_pdata->m_expression = 0;
926 this->m_pdata->m_expression_len = 0;
927 //
928 // and throw if required:
929 //
930 if(0 == (this->flags() & regex_constants::no_except))
931 {
932 std::string message = "Invalid lookbehind assertion encountered in the regular expression.";
933 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
934 e.raise();
935 }
936 }
937 BOOST_REGEX_FALLTHROUGH;
938 default:
939 state = state->next.p;
940 }
941 }
942
943 // now work through our list, building all the maps as we go:
944 while(!v.empty())
945 {
946 // Initialize m_recursion_checks if we need it:
947 if(m_has_recursions)
948 m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
949
950 const std::pair<bool, re_syntax_base*>& p = v.back();
951 m_icase = p.first;
952 state = p.second;
953 v.pop_back();
954
955 // Build maps:
956 m_bad_repeats = 0;
957 create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
958 m_bad_repeats = 0;
959
960 if(m_has_recursions)
961 m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
962 create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
963 // adjust the type of the state to allow for faster matching:
964 state->type = this->get_repeat_type(state);
965 }
966 // restore case sensitivity:
967 m_icase = l_icase;
968 }
969
970 template <class charT, class traits>
971 int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
972 {
973 typedef typename traits::char_class_type m_type;
974 int result = 0;
975 while(state)
976 {
977 switch(state->type)
978 {
979 case syntax_element_startmark:
980 if((static_cast<re_brace*>(state)->index == -1)
981 || (static_cast<re_brace*>(state)->index == -2))
982 {
983 state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
984 continue;
985 }
986 else if(static_cast<re_brace*>(state)->index == -3)
987 {
988 state = state->next.p->next.p;
989 continue;
990 }
991 break;
992 case syntax_element_endmark:
993 if((static_cast<re_brace*>(state)->index == -1)
994 || (static_cast<re_brace*>(state)->index == -2))
995 return result;
996 break;
997 case syntax_element_literal:
998 result += static_cast<re_literal*>(state)->length;
999 break;
1000 case syntax_element_wild:
1001 case syntax_element_set:
1002 result += 1;
1003 break;
1004 case syntax_element_dot_rep:
1005 case syntax_element_char_rep:
1006 case syntax_element_short_set_rep:
1007 case syntax_element_backref:
1008 case syntax_element_rep:
1009 case syntax_element_combining:
1010 case syntax_element_long_set_rep:
1011 case syntax_element_backstep:
1012 {
1013 re_repeat* rep = static_cast<re_repeat *>(state);
1014 // adjust the type of the state to allow for faster matching:
1015 state->type = this->get_repeat_type(state);
1016 if((state->type == syntax_element_dot_rep)
1017 || (state->type == syntax_element_char_rep)
1018 || (state->type == syntax_element_short_set_rep))
1019 {
1020 if(rep->max != rep->min)
1021 return -1;
1022 result += static_cast<int>(rep->min);
1023 state = rep->alt.p;
1024 continue;
1025 }
1026 else if(state->type == syntax_element_long_set_rep)
1027 {
1028 BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set);
1029 if(static_cast<re_set_long<m_type>*>(rep->next.p)->singleton == 0)
1030 return -1;
1031 if(rep->max != rep->min)
1032 return -1;
1033 result += static_cast<int>(rep->min);
1034 state = rep->alt.p;
1035 continue;
1036 }
1037 }
1038 return -1;
1039 case syntax_element_long_set:
1040 if(static_cast<re_set_long<m_type>*>(state)->singleton == 0)
1041 return -1;
1042 result += 1;
1043 break;
1044 case syntax_element_jump:
1045 state = static_cast<re_jump*>(state)->alt.p;
1046 continue;
1047 case syntax_element_alt:
1048 {
1049 int r1 = calculate_backstep(state->next.p);
1050 int r2 = calculate_backstep(static_cast<re_alt*>(state)->alt.p);
1051 if((r1 < 0) || (r1 != r2))
1052 return -1;
1053 return result + r1;
1054 }
1055 default:
1056 break;
1057 }
1058 state = state->next.p;
1059 }
1060 return -1;
1061 }
1062
1063 struct recursion_saver
1064 {
1065 std::vector<unsigned char> saved_state;
1066 std::vector<unsigned char>* state;
1067 recursion_saver(std::vector<unsigned char>* p) : saved_state(*p), state(p) {}
1068 ~recursion_saver()
1069 {
1070 state->swap(saved_state);
1071 }
1072 };
1073
1074 template <class charT, class traits>
1075 void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
1076 {
1077 recursion_saver saved_recursions(&m_recursion_checks);
1078 int not_last_jump = 1;
1079 re_syntax_base* recursion_start = 0;
1080 int recursion_sub = 0;
1081 re_syntax_base* recursion_restart = 0;
1082
1083 // track case sensitivity:
1084 bool l_icase = m_icase;
1085
1086 while(state)
1087 {
1088 switch(state->type)
1089 {
1090 case syntax_element_toggle_case:
1091 l_icase = static_cast<re_case*>(state)->icase;
1092 state = state->next.p;
1093 break;
1094 case syntax_element_literal:
1095 {
1096 // don't set anything in *pnull, set each element in l_map
1097 // that could match the first character in the literal:
1098 if(l_map)
1099 {
1100 l_map[0] |= mask_init;
1101 charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
1102 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1103 {
1104 if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
1105 l_map[i] |= mask;
1106 }
1107 }
1108 return;
1109 }
1110 case syntax_element_end_line:
1111 {
1112 // next character must be a line separator (if there is one):
1113 if(l_map)
1114 {
1115 l_map[0] |= mask_init;
1116 l_map[static_cast<unsigned>('\n')] |= mask;
1117 l_map[static_cast<unsigned>('\r')] |= mask;
1118 l_map[static_cast<unsigned>('\f')] |= mask;
1119 l_map[0x85] |= mask;
1120 }
1121 // now figure out if we can match a NULL string at this point:
1122 if(pnull)
1123 create_startmap(state->next.p, 0, pnull, mask);
1124 return;
1125 }
1126 case syntax_element_recurse:
1127 {
1128 BOOST_REGEX_ASSERT(static_cast<const re_jump*>(state)->alt.p->type == syntax_element_startmark);
1129 recursion_sub = static_cast<re_brace*>(static_cast<const re_jump*>(state)->alt.p)->index;
1130 if(m_recursion_checks[recursion_sub] & 1u)
1131 {
1132 // Infinite recursion!!
1133 if(0 == this->m_pdata->m_status) // update the error code if not already set
1134 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
1135 //
1136 // clear the expression, we should be empty:
1137 //
1138 this->m_pdata->m_expression = 0;
1139 this->m_pdata->m_expression_len = 0;
1140 //
1141 // and throw if required:
1142 //
1143 if(0 == (this->flags() & regex_constants::no_except))
1144 {
1145 std::string message = "Encountered an infinite recursion.";
1146 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
1147 e.raise();
1148 }
1149 }
1150 else if(recursion_start == 0)
1151 {
1152 recursion_start = state;
1153 recursion_restart = state->next.p;
1154 state = static_cast<re_jump*>(state)->alt.p;
1155 m_recursion_checks[recursion_sub] |= 1u;
1156 break;
1157 }
1158 m_recursion_checks[recursion_sub] |= 1u;
1159 // can't handle nested recursion here...
1160 BOOST_REGEX_FALLTHROUGH;
1161 }
1162 case syntax_element_backref:
1163 // can be null, and any character can match:
1164 if(pnull)
1165 *pnull |= mask;
1166 BOOST_REGEX_FALLTHROUGH;
1167 case syntax_element_wild:
1168 {
1169 // can't be null, any character can match:
1170 set_all_masks(l_map, mask);
1171 return;
1172 }
1173 case syntax_element_accept:
1174 case syntax_element_match:
1175 {
1176 // must be null, any character can match:
1177 set_all_masks(l_map, mask);
1178 if(pnull)
1179 *pnull |= mask;
1180 return;
1181 }
1182 case syntax_element_word_start:
1183 {
1184 // recurse, then AND with all the word characters:
1185 create_startmap(state->next.p, l_map, pnull, mask);
1186 if(l_map)
1187 {
1188 l_map[0] |= mask_init;
1189 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1190 {
1191 if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
1192 l_map[i] &= static_cast<unsigned char>(~mask);
1193 }
1194 }
1195 return;
1196 }
1197 case syntax_element_word_end:
1198 {
1199 // recurse, then AND with all the word characters:
1200 create_startmap(state->next.p, l_map, pnull, mask);
1201 if(l_map)
1202 {
1203 l_map[0] |= mask_init;
1204 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1205 {
1206 if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
1207 l_map[i] &= static_cast<unsigned char>(~mask);
1208 }
1209 }
1210 return;
1211 }
1212 case syntax_element_buffer_end:
1213 {
1214 // we *must be null* :
1215 if(pnull)
1216 *pnull |= mask;
1217 return;
1218 }
1219 case syntax_element_long_set:
1220 if(l_map)
1221 {
1222 typedef typename traits::char_class_type m_type;
1223 if(static_cast<re_set_long<m_type>*>(state)->singleton)
1224 {
1225 l_map[0] |= mask_init;
1226 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1227 {
1228 charT c = static_cast<charT>(i);
1229 if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<m_type>*>(state), *m_pdata, l_icase))
1230 l_map[i] |= mask;
1231 }
1232 }
1233 else
1234 set_all_masks(l_map, mask);
1235 }
1236 return;
1237 case syntax_element_set:
1238 if(l_map)
1239 {
1240 l_map[0] |= mask_init;
1241 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1242 {
1243 if(static_cast<re_set*>(state)->_map[
1244 static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
1245 l_map[i] |= mask;
1246 }
1247 }
1248 return;
1249 case syntax_element_jump:
1250 // take the jump:
1251 state = static_cast<re_alt*>(state)->alt.p;
1252 not_last_jump = -1;
1253 break;
1254 case syntax_element_alt:
1255 case syntax_element_rep:
1256 case syntax_element_dot_rep:
1257 case syntax_element_char_rep:
1258 case syntax_element_short_set_rep:
1259 case syntax_element_long_set_rep:
1260 {
1261 re_alt* rep = static_cast<re_alt*>(state);
1262 if(rep->_map[0] & mask_init)
1263 {
1264 if(l_map)
1265 {
1266 // copy previous results:
1267 l_map[0] |= mask_init;
1268 for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
1269 {
1270 if(rep->_map[i] & mask_any)
1271 l_map[i] |= mask;
1272 }
1273 }
1274 if(pnull)
1275 {
1276 if(rep->can_be_null & mask_any)
1277 *pnull |= mask;
1278 }
1279 }
1280 else
1281 {
1282 // we haven't created a startmap for this alternative yet
1283 // so take the union of the two options:
1284 if(is_bad_repeat(state))
1285 {
1286 set_all_masks(l_map, mask);
1287 if(pnull)
1288 *pnull |= mask;
1289 return;
1290 }
1291 set_bad_repeat(state);
1292 create_startmap(state->next.p, l_map, pnull, mask);
1293 if((state->type == syntax_element_alt)
1294 || (static_cast<re_repeat*>(state)->min == 0)
1295 || (not_last_jump == 0))
1296 create_startmap(rep->alt.p, l_map, pnull, mask);
1297 }
1298 }
1299 return;
1300 case syntax_element_soft_buffer_end:
1301 // match newline or null:
1302 if(l_map)
1303 {
1304 l_map[0] |= mask_init;
1305 l_map[static_cast<unsigned>('\n')] |= mask;
1306 l_map[static_cast<unsigned>('\r')] |= mask;
1307 }
1308 if(pnull)
1309 *pnull |= mask;
1310 return;
1311 case syntax_element_endmark:
1312 // need to handle independent subs as a special case:
1313 if(static_cast<re_brace*>(state)->index < 0)
1314 {
1315 // can be null, any character can match:
1316 set_all_masks(l_map, mask);
1317 if(pnull)
1318 *pnull |= mask;
1319 return;
1320 }
1321 else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
1322 {
1323 // recursion termination:
1324 recursion_start = 0;
1325 state = recursion_restart;
1326 break;
1327 }
1328
1329 //
1330 // Normally we just go to the next state... but if this sub-expression is
1331 // the target of a recursion, then we might be ending a recursion, in which
1332 // case we should check whatever follows that recursion, as well as whatever
1333 // follows this state:
1334 //
1335 if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
1336 {
1337 bool ok = false;
1338 re_syntax_base* p = m_pdata->m_first_state;
1339 while(p)
1340 {
1341 if(p->type == syntax_element_recurse)
1342 {
1343 re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
1344 if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
1345 {
1346 ok = true;
1347 break;
1348 }
1349 }
1350 p = p->next.p;
1351 }
1352 if(ok && ((m_recursion_checks[static_cast<re_brace*>(state)->index] & 2u) == 0))
1353 {
1354 m_recursion_checks[static_cast<re_brace*>(state)->index] |= 2u;
1355 create_startmap(p->next.p, l_map, pnull, mask);
1356 }
1357 }
1358 state = state->next.p;
1359 break;
1360
1361 case syntax_element_commit:
1362 set_all_masks(l_map, mask);
1363 // Continue scanning so we can figure out whether we can be null:
1364 state = state->next.p;
1365 break;
1366 case syntax_element_startmark:
1367 // need to handle independent subs as a special case:
1368 if(static_cast<re_brace*>(state)->index == -3)
1369 {
1370 state = state->next.p->next.p;
1371 break;
1372 }
1373 BOOST_REGEX_FALLTHROUGH;
1374 default:
1375 state = state->next.p;
1376 }
1377 ++not_last_jump;
1378 }
1379 }
1380
1381 template <class charT, class traits>
1382 unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
1383 {
1384 //
1385 // find out how the machine starts, so we can optimise the search:
1386 //
1387 while(state)
1388 {
1389 switch(state->type)
1390 {
1391 case syntax_element_startmark:
1392 case syntax_element_endmark:
1393 state = state->next.p;
1394 continue;
1395 case syntax_element_start_line:
1396 return regbase::restart_line;
1397 case syntax_element_word_start:
1398 return regbase::restart_word;
1399 case syntax_element_buffer_start:
1400 return regbase::restart_buf;
1401 case syntax_element_restart_continue:
1402 return regbase::restart_continue;
1403 default:
1404 state = 0;
1405 continue;
1406 }
1407 }
1408 return regbase::restart_any;
1409 }
1410
1411 template <class charT, class traits>
1412 void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
1413 {
1414 //
1415 // set mask in all of bits elements,
1416 // if bits[0] has mask_init not set then we can
1417 // optimise this to a call to memset:
1418 //
1419 if(bits)
1420 {
1421 if(bits[0] == 0)
1422 (std::memset)(bits, mask, 1u << CHAR_BIT);
1423 else
1424 {
1425 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
1426 bits[i] |= mask;
1427 }
1428 bits[0] |= mask_init;
1429 }
1430 }
1431
1432 template <class charT, class traits>
1433 bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
1434 {
1435 switch(pt->type)
1436 {
1437 case syntax_element_rep:
1438 case syntax_element_dot_rep:
1439 case syntax_element_char_rep:
1440 case syntax_element_short_set_rep:
1441 case syntax_element_long_set_rep:
1442 {
1443 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1444 if(state_id >= sizeof(m_bad_repeats) * CHAR_BIT)
1445 return true; // run out of bits, assume we can't traverse this one.
1446 static const std::uintmax_t one = 1uL;
1447 return m_bad_repeats & (one << state_id);
1448 }
1449 default:
1450 return false;
1451 }
1452 }
1453
1454 template <class charT, class traits>
1455 void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
1456 {
1457 switch(pt->type)
1458 {
1459 case syntax_element_rep:
1460 case syntax_element_dot_rep:
1461 case syntax_element_char_rep:
1462 case syntax_element_short_set_rep:
1463 case syntax_element_long_set_rep:
1464 {
1465 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1466 static const std::uintmax_t one = 1uL;
1467 if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)
1468 m_bad_repeats |= (one << state_id);
1469 }
1470 break;
1471 default:
1472 break;
1473 }
1474 }
1475
1476 template <class charT, class traits>
1477 syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
1478 {
1479 typedef typename traits::char_class_type m_type;
1480 if(state->type == syntax_element_rep)
1481 {
1482 // check to see if we are repeating a single state:
1483 if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
1484 {
1485 switch(state->next.p->type)
1486 {
1487 case BOOST_REGEX_DETAIL_NS::syntax_element_wild:
1488 return BOOST_REGEX_DETAIL_NS::syntax_element_dot_rep;
1489 case BOOST_REGEX_DETAIL_NS::syntax_element_literal:
1490 return BOOST_REGEX_DETAIL_NS::syntax_element_char_rep;
1491 case BOOST_REGEX_DETAIL_NS::syntax_element_set:
1492 return BOOST_REGEX_DETAIL_NS::syntax_element_short_set_rep;
1493 case BOOST_REGEX_DETAIL_NS::syntax_element_long_set:
1494 if(static_cast<BOOST_REGEX_DETAIL_NS::re_set_long<m_type>*>(state->next.p)->singleton)
1495 return BOOST_REGEX_DETAIL_NS::syntax_element_long_set_rep;
1496 break;
1497 default:
1498 break;
1499 }
1500 }
1501 }
1502 return state->type;
1503 }
1504
1505 template <class charT, class traits>
1506 void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
1507 {
1508 // enumerate our states, and see if we have a leading repeat
1509 // for which failed search restarts can be optimized;
1510 do
1511 {
1512 switch(state->type)
1513 {
1514 case syntax_element_startmark:
1515 if(static_cast<re_brace*>(state)->index >= 0)
1516 {
1517 state = state->next.p;
1518 continue;
1519 }
1520 #ifdef BOOST_REGEX_MSVC
1521 # pragma warning(push)
1522 #pragma warning(disable:6011)
1523 #endif
1524 if((static_cast<re_brace*>(state)->index == -1)
1525 || (static_cast<re_brace*>(state)->index == -2))
1526 {
1527 // skip past the zero width assertion:
1528 state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;
1529 continue;
1530 }
1531 #ifdef BOOST_REGEX_MSVC
1532 # pragma warning(pop)
1533 #endif
1534 if(static_cast<re_brace*>(state)->index == -3)
1535 {
1536 // Have to skip the leading jump state:
1537 state = state->next.p->next.p;
1538 continue;
1539 }
1540 return;
1541 case syntax_element_endmark:
1542 case syntax_element_start_line:
1543 case syntax_element_end_line:
1544 case syntax_element_word_boundary:
1545 case syntax_element_within_word:
1546 case syntax_element_word_start:
1547 case syntax_element_word_end:
1548 case syntax_element_buffer_start:
1549 case syntax_element_buffer_end:
1550 case syntax_element_restart_continue:
1551 state = state->next.p;
1552 break;
1553 case syntax_element_dot_rep:
1554 case syntax_element_char_rep:
1555 case syntax_element_short_set_rep:
1556 case syntax_element_long_set_rep:
1557 if(this->m_has_backrefs == 0)
1558 static_cast<re_repeat*>(state)->leading = true;
1559 BOOST_REGEX_FALLTHROUGH;
1560 default:
1561 return;
1562 }
1563 }while(state);
1564 }
1565
1566 } // namespace BOOST_REGEX_DETAIL_NS
1567
1568 } // namespace boost
1569
1570 #ifdef BOOST_REGEX_MSVC
1571 # pragma warning(pop)
1572 #endif
1573
1574 #endif