3 * Copyright (c) 1998-2004 John Maddock
4 * Copyright 2011 Garmin Ltd. or its subsidiaries
6 * Distributed under the Boost Software License, Version 1.0.
7 * (See accompanying file LICENSE_1_0.txt or copy at
8 * http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org/ for most recent version.
14 * FILE basic_regex.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex.
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP
20 #define BOOST_REGEX_V4_BASIC_REGEX_HPP
22 #include <boost/type_traits/is_same.hpp>
23 #include <boost/container_hash/hash.hpp>
27 #pragma warning(disable: 4103)
29 #ifdef BOOST_HAS_ABI_HEADERS
30 # include BOOST_ABI_PREFIX
39 #pragma warning(disable : 4251)
41 # pragma warning(disable : 4231)
44 #pragma warning(disable : 4660)
47 #pragma warning(disable:4800)
51 namespace BOOST_REGEX_DETAIL_NS{
54 // forward declaration, we will need this one later:
56 template <class charT, class traits>
57 class basic_regex_parser;
60 void bubble_down_one(I first, I last)
65 while((next != first) && (*next < *(next-1)))
67 (next-1)->swap(*next);
73 static const int hash_value_mask = 1 << (std::numeric_limits<int>::digits - 1);
75 template <class Iterator>
76 inline int hash_value_from_capture_name(Iterator i, Iterator j)
78 std::size_t r = boost::hash_range(i, j);
79 r %= ((std::numeric_limits<int>::max)());
80 return static_cast<int>(r) | hash_value_mask;
83 class named_subexpressions
88 template <class charT>
89 name(const charT* i, const charT* j, int idx)
92 hash = hash_value_from_capture_name(i, j);
100 bool operator < (const name& other)const
102 return hash < other.hash;
104 bool operator == (const name& other)const
106 return hash == other.hash;
108 void swap(name& other)
110 std::swap(index, other.index);
111 std::swap(hash, other.hash);
115 typedef std::vector<name>::const_iterator const_iterator;
116 typedef std::pair<const_iterator, const_iterator> range_type;
118 named_subexpressions(){}
120 template <class charT>
121 void set_name(const charT* i, const charT* j, int index)
123 m_sub_names.push_back(name(i, j, index));
124 bubble_down_one(m_sub_names.begin(), m_sub_names.end());
126 template <class charT>
127 int get_id(const charT* i, const charT* j)const
130 typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
131 if((pos != m_sub_names.end()) && (*pos == t))
137 template <class charT>
138 range_type equal_range(const charT* i, const charT* j)const
141 return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t);
143 int get_id(int h)const
146 std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
147 if((pos != m_sub_names.end()) && (*pos == t))
153 range_type equal_range(int h)const
156 return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t);
159 std::vector<name> m_sub_names;
164 // represents the data we wish to expose to the matching algorithms.
166 template <class charT, class traits>
167 struct regex_data : public named_subexpressions
169 typedef regex_constants::syntax_option_type flag_type;
170 typedef std::size_t size_type;
172 regex_data(const ::boost::shared_ptr<
173 ::boost::regex_traits_wrapper<traits> >& t)
174 : m_ptraits(t), m_flags(0), m_status(0), m_expression(0), m_expression_len(0),
175 m_mark_count(0), m_first_state(0), m_restart_type(0),
176 #if !defined(BOOST_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX) && !(defined(BOOST_MSVC) && (BOOST_MSVC < 1900))
179 m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {}
181 : m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_flags(0), m_status(0), m_expression(0), m_expression_len(0),
182 m_mark_count(0), m_first_state(0), m_restart_type(0),
183 #if !defined(BOOST_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX) && !(defined(BOOST_MSVC) && (BOOST_MSVC < 1900))
186 m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {}
189 ::boost::regex_traits_wrapper<traits>
190 > m_ptraits; // traits class instance
191 flag_type m_flags; // flags with which we were compiled
192 int m_status; // error code (0 implies OK).
193 const charT* m_expression; // the original expression
194 std::ptrdiff_t m_expression_len; // the length of the original expression
195 size_type m_mark_count; // the number of marked sub-expressions
196 BOOST_REGEX_DETAIL_NS::re_syntax_base* m_first_state; // the first state of the machine
197 unsigned m_restart_type; // search optimisation type
198 unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match
199 unsigned int m_can_be_null; // whether we can match a null string
200 BOOST_REGEX_DETAIL_NS::raw_storage m_data; // the buffer in which our states are constructed
201 typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
204 std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
205 bool m_has_recursions; // whether we have recursive expressions;
206 bool m_disable_match_any; // when set we need to disable the match_any flag as it causes different/buggy behaviour.
209 // class basic_regex_implementation
210 // pimpl implementation class for basic_regex.
212 template <class charT, class traits>
213 class basic_regex_implementation
214 : public regex_data<charT, traits>
217 typedef regex_constants::syntax_option_type flag_type;
218 typedef std::ptrdiff_t difference_type;
219 typedef std::size_t size_type;
220 typedef typename traits::locale_type locale_type;
221 typedef const charT* const_iterator;
223 basic_regex_implementation(){}
224 basic_regex_implementation(const ::boost::shared_ptr<
225 ::boost::regex_traits_wrapper<traits> >& t)
226 : regex_data<charT, traits>(t) {}
227 void assign(const charT* arg_first,
228 const charT* arg_last,
231 regex_data<charT, traits>* pdat = this;
232 basic_regex_parser<charT, traits> parser(pdat);
233 parser.parse(arg_first, arg_last, f);
236 locale_type BOOST_REGEX_CALL imbue(locale_type l)
238 return this->m_ptraits->imbue(l);
240 locale_type BOOST_REGEX_CALL getloc()const
242 return this->m_ptraits->getloc();
244 std::basic_string<charT> BOOST_REGEX_CALL str()const
246 std::basic_string<charT> result;
247 if(this->m_status == 0)
248 result = std::basic_string<charT>(this->m_expression, this->m_expression_len);
251 const_iterator BOOST_REGEX_CALL expression()const
253 return this->m_expression;
255 std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
257 const std::pair<std::size_t, std::size_t>& pi = this->m_subs.at(n);
258 std::pair<const_iterator, const_iterator> p(expression() + pi.first, expression() + pi.second);
263 const_iterator BOOST_REGEX_CALL begin()const
265 return (this->m_status ? 0 : this->m_expression);
267 const_iterator BOOST_REGEX_CALL end()const
269 return (this->m_status ? 0 : this->m_expression + this->m_expression_len);
271 flag_type BOOST_REGEX_CALL flags()const
273 return this->m_flags;
275 size_type BOOST_REGEX_CALL size()const
277 return this->m_expression_len;
279 int BOOST_REGEX_CALL status()const
281 return this->m_status;
283 size_type BOOST_REGEX_CALL mark_count()const
285 return this->m_mark_count - 1;
287 const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const
289 return this->m_first_state;
291 unsigned get_restart_type()const
293 return this->m_restart_type;
295 const unsigned char* get_map()const
297 return this->m_startmap;
299 const ::boost::regex_traits_wrapper<traits>& get_traits()const
301 return *(this->m_ptraits);
303 bool can_be_null()const
305 return this->m_can_be_null;
307 const regex_data<charT, traits>& get_data()const
309 basic_regex_implementation<charT, traits> const* p = this;
310 return *static_cast<const regex_data<charT, traits>*>(p);
314 } // namespace BOOST_REGEX_DETAIL_NS
316 // class basic_regex:
317 // represents the compiled
318 // regular expression:
321 #ifdef BOOST_REGEX_NO_FWD
322 template <class charT, class traits = regex_traits<charT> >
324 template <class charT, class traits >
326 class basic_regex : public regbase
330 typedef std::size_t traits_size_type;
331 typedef typename traits::string_type traits_string_type;
332 typedef charT char_type;
333 typedef traits traits_type;
335 typedef charT value_type;
336 typedef charT& reference;
337 typedef const charT& const_reference;
338 typedef const charT* const_iterator;
339 typedef const_iterator iterator;
340 typedef std::ptrdiff_t difference_type;
341 typedef std::size_t size_type;
342 typedef regex_constants::syntax_option_type flag_type;
344 // placeholder for actual locale type used by the
345 // traits class to localise *this.
346 typedef typename traits::locale_type locale_type;
349 explicit basic_regex(){}
350 explicit basic_regex(const charT* p, flag_type f = regex_constants::normal)
354 basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
358 basic_regex(const charT* p, size_type len, flag_type f)
362 basic_regex(const basic_regex& that)
363 : m_pimpl(that.m_pimpl) {}
365 basic_regex& BOOST_REGEX_CALL operator=(const basic_regex& that)
369 basic_regex& BOOST_REGEX_CALL operator=(const charT* ptr)
376 basic_regex& assign(const basic_regex& that)
378 m_pimpl = that.m_pimpl;
381 basic_regex& assign(const charT* p, flag_type f = regex_constants::normal)
383 return assign(p, p + traits::length(p), f);
385 basic_regex& assign(const charT* p, size_type len, flag_type f)
387 return assign(p, p + len, f);
390 basic_regex& do_assign(const charT* p1,
394 basic_regex& assign(const charT* p1,
396 flag_type f = regex_constants::normal)
398 return do_assign(p1, p2, f);
400 #if !defined(BOOST_NO_MEMBER_TEMPLATES)
402 template <class ST, class SA>
403 unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
405 return set_expression(p.data(), p.data() + p.size(), f);
408 template <class ST, class SA>
409 explicit basic_regex(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
414 template <class InputIterator>
415 basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
417 typedef typename traits::string_type seq_type;
418 seq_type a(arg_first, arg_last);
420 assign(static_cast<const charT*>(&*a.begin()), static_cast<const charT*>(&*a.begin() + a.size()), f);
422 assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
425 template <class ST, class SA>
426 basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p)
428 return assign(p.data(), p.data() + p.size(), regex_constants::normal);
431 template <class string_traits, class A>
432 basic_regex& BOOST_REGEX_CALL assign(
433 const std::basic_string<charT, string_traits, A>& s,
434 flag_type f = regex_constants::normal)
436 return assign(s.data(), s.data() + s.size(), f);
439 template <class InputIterator>
440 basic_regex& BOOST_REGEX_CALL assign(InputIterator arg_first,
441 InputIterator arg_last,
442 flag_type f = regex_constants::normal)
444 typedef typename traits::string_type seq_type;
445 seq_type a(arg_first, arg_last);
448 const charT* p1 = &*a.begin();
449 const charT* p2 = &*a.begin() + a.size();
450 return assign(p1, p2, f);
452 return assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
455 unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
457 return set_expression(p.data(), p.data() + p.size(), f);
460 basic_regex(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
465 basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p)
467 return assign(p.data(), p.data() + p.size(), regex_constants::normal);
470 basic_regex& BOOST_REGEX_CALL assign(
471 const std::basic_string<charT>& s,
472 flag_type f = regex_constants::normal)
474 return assign(s.data(), s.data() + s.size(), f);
481 locale_type BOOST_REGEX_CALL imbue(locale_type l);
482 locale_type BOOST_REGEX_CALL getloc()const
484 return m_pimpl.get() ? m_pimpl->getloc() : locale_type();
488 // retained for backwards compatibility only, "flags"
489 // is now the preferred name:
490 flag_type BOOST_REGEX_CALL getflags()const
494 flag_type BOOST_REGEX_CALL flags()const
496 return m_pimpl.get() ? m_pimpl->flags() : 0;
500 std::basic_string<charT> BOOST_REGEX_CALL str()const
502 return m_pimpl.get() ? m_pimpl->str() : std::basic_string<charT>();
505 // begin, end, subexpression:
506 std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
509 boost::throw_exception(std::logic_error("Can't access subexpressions in an invalid regex."));
510 return m_pimpl->subexpression(n);
512 const_iterator BOOST_REGEX_CALL begin()const
514 return (m_pimpl.get() ? m_pimpl->begin() : 0);
516 const_iterator BOOST_REGEX_CALL end()const
518 return (m_pimpl.get() ? m_pimpl->end() : 0);
522 void BOOST_REGEX_CALL swap(basic_regex& that)throw()
524 m_pimpl.swap(that.m_pimpl);
528 size_type BOOST_REGEX_CALL size()const
530 return (m_pimpl.get() ? m_pimpl->size() : 0);
534 size_type BOOST_REGEX_CALL max_size()const
540 bool BOOST_REGEX_CALL empty()const
542 return (m_pimpl.get() ? 0 != m_pimpl->status() : true);
545 size_type BOOST_REGEX_CALL mark_count()const
547 return (m_pimpl.get() ? m_pimpl->mark_count() : 0);
552 return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty);
555 int BOOST_REGEX_CALL compare(const basic_regex& that) const
557 if(m_pimpl.get() == that.m_pimpl.get())
561 if(!that.m_pimpl.get())
563 if(status() != that.status())
564 return status() - that.status();
565 if(flags() != that.flags())
566 return flags() - that.flags();
567 return str().compare(that.str());
569 bool BOOST_REGEX_CALL operator==(const basic_regex& e)const
571 return compare(e) == 0;
573 bool BOOST_REGEX_CALL operator != (const basic_regex& e)const
575 return compare(e) != 0;
577 bool BOOST_REGEX_CALL operator<(const basic_regex& e)const
579 return compare(e) < 0;
581 bool BOOST_REGEX_CALL operator>(const basic_regex& e)const
583 return compare(e) > 0;
585 bool BOOST_REGEX_CALL operator<=(const basic_regex& e)const
587 return compare(e) <= 0;
589 bool BOOST_REGEX_CALL operator>=(const basic_regex& e)const
591 return compare(e) >= 0;
595 // The following are deprecated as public interfaces
596 // but are available for compatibility with earlier versions.
597 const charT* BOOST_REGEX_CALL expression()const
599 return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0);
601 unsigned int BOOST_REGEX_CALL set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
603 assign(p1, p2, f | regex_constants::no_except);
606 unsigned int BOOST_REGEX_CALL set_expression(const charT* p, flag_type f = regex_constants::normal)
608 assign(p, f | regex_constants::no_except);
611 unsigned int BOOST_REGEX_CALL error_code()const
616 // private access methods:
618 const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const
620 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
621 return m_pimpl->get_first_state();
623 unsigned get_restart_type()const
625 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
626 return m_pimpl->get_restart_type();
628 const unsigned char* get_map()const
630 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
631 return m_pimpl->get_map();
633 const ::boost::regex_traits_wrapper<traits>& get_traits()const
635 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
636 return m_pimpl->get_traits();
638 bool can_be_null()const
640 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
641 return m_pimpl->can_be_null();
643 const BOOST_REGEX_DETAIL_NS::regex_data<charT, traits>& get_data()const
645 BOOST_REGEX_ASSERT(0 != m_pimpl.get());
646 return m_pimpl->get_data();
648 boost::shared_ptr<BOOST_REGEX_DETAIL_NS::named_subexpressions > get_named_subs()const
654 shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> > m_pimpl;
658 // out of line members;
659 // these are the only members that mutate the basic_regex object,
660 // and are designed to provide the strong exception guarantee
661 // (in the event of a throw, the state of the object remains unchanged).
663 template <class charT, class traits>
664 basic_regex<charT, traits>& basic_regex<charT, traits>::do_assign(const charT* p1,
668 shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> > temp;
671 temp = shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits>());
675 temp = shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits>(m_pimpl->m_ptraits));
677 temp->assign(p1, p2, f);
682 template <class charT, class traits>
683 typename basic_regex<charT, traits>::locale_type BOOST_REGEX_CALL basic_regex<charT, traits>::imbue(locale_type l)
685 shared_ptr<BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits> > temp(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation<charT, traits>());
686 locale_type result = temp->imbue(l);
694 template <class charT, class traits>
695 void swap(basic_regex<charT, traits>& e1, basic_regex<charT, traits>& e2)
700 #ifndef BOOST_NO_STD_LOCALE
701 template <class charT, class traits, class traits2>
702 std::basic_ostream<charT, traits>&
703 operator << (std::basic_ostream<charT, traits>& os,
704 const basic_regex<charT, traits2>& e)
706 return (os << e.str());
709 template <class traits>
710 std::ostream& operator << (std::ostream& os, const basic_regex<char, traits>& e)
712 return (os << e.str());
717 // class reg_expression:
718 // this is provided for backwards compatibility only,
719 // it is deprecated, no not use!
721 #ifdef BOOST_REGEX_NO_FWD
722 template <class charT, class traits = regex_traits<charT> >
724 template <class charT, class traits >
726 class reg_expression : public basic_regex<charT, traits>
729 typedef typename basic_regex<charT, traits>::flag_type flag_type;
730 typedef typename basic_regex<charT, traits>::size_type size_type;
731 explicit reg_expression(){}
732 explicit reg_expression(const charT* p, flag_type f = regex_constants::normal)
733 : basic_regex<charT, traits>(p, f){}
734 reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
735 : basic_regex<charT, traits>(p1, p2, f){}
736 reg_expression(const charT* p, size_type len, flag_type f)
737 : basic_regex<charT, traits>(p, len, f){}
738 reg_expression(const reg_expression& that)
739 : basic_regex<charT, traits>(that) {}
741 reg_expression& BOOST_REGEX_CALL operator=(const reg_expression& that)
743 return this->assign(that);
746 #if !defined(BOOST_NO_MEMBER_TEMPLATES)
747 template <class ST, class SA>
748 explicit reg_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
749 : basic_regex<charT, traits>(p, f)
753 template <class InputIterator>
754 reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
755 : basic_regex<charT, traits>(arg_first, arg_last, f)
759 template <class ST, class SA>
760 reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p)
766 explicit reg_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
767 : basic_regex<charT, traits>(p, f)
771 reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p)
781 #pragma warning (pop)
787 #pragma warning(push)
788 #pragma warning(disable: 4103)
790 #ifdef BOOST_HAS_ABI_HEADERS
791 # include BOOST_ABI_SUFFIX