2 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 #ifndef BOOST_LEXER_STRING_TOKEN_HPP
7 #define BOOST_LEXER_STRING_TOKEN_HPP
11 #include "consts.hpp" // num_chars, num_wchar_ts
19 template<typename CharT>
20 struct basic_string_token
22 typedef std::basic_string<CharT> string;
27 basic_string_token () :
32 basic_string_token (const bool negated_, const string &charset_) :
38 void remove_duplicates ()
40 const CharT *start_ = _charset.c_str ();
41 const CharT *end_ = start_ + _charset.size ();
43 // Optimisation for very large charsets:
44 // sorting via pointers is much quicker than
46 std::sort (const_cast<CharT *> (start_), const_cast<CharT *> (end_));
47 _charset.erase (std::unique (_charset.begin (), _charset.end ()),
53 const std::size_t max_chars_ = sizeof (CharT) == 1 ?
54 num_chars : num_wchar_ts;
56 if (_charset.length () == max_chars_)
61 else if (_charset.length () > max_chars_ / 2)
69 const std::size_t max_chars_ = sizeof (CharT) == 1 ?
70 num_chars : num_wchar_ts;
71 CharT curr_char_ = (std::numeric_limits<CharT>::min)();
73 const CharT *curr_ = _charset.c_str ();
74 const CharT *chars_end_ = curr_ + _charset.size ();
77 temp_.resize (max_chars_ - _charset.size ());
79 CharT *ptr_ = const_cast<CharT *> (temp_.c_str ());
82 while (curr_ < chars_end_)
84 while (*curr_ > curr_char_)
97 for (; i_ < max_chars_; ++i_)
107 bool operator < (const basic_string_token &rhs_) const
109 return _negated < rhs_._negated ||
110 (_negated == rhs_._negated && _charset < rhs_._charset);
115 return _charset.empty () && !_negated;
120 return _charset.empty () && _negated;
129 void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
131 if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
132 !any () && !rhs_.any ()))
134 intersect_same_types (rhs_, overlap_);
138 intersect_diff_types (rhs_, overlap_);
142 static void escape_char (const CharT ch_, string &out_)
198 if (ch_ < 32 && ch_ >= 0)
200 std::basic_stringstream<CharT> ss_;
205 static_cast<std::size_t> (ch_);
219 void intersect_same_types (basic_string_token &rhs_,
220 basic_string_token &overlap_)
225 overlap_._negated = true;
230 typename string::iterator iter_ = _charset.begin ();
231 typename string::iterator end_ = _charset.end ();
232 typename string::iterator rhs_iter_ = rhs_._charset.begin ();
233 typename string::iterator rhs_end_ = rhs_._charset.end ();
235 overlap_._negated = _negated;
237 while (iter_ != end_ && rhs_iter_ != rhs_end_)
239 if (*iter_ < *rhs_iter_)
243 else if (*iter_ > *rhs_iter_)
249 overlap_._charset += *iter_;
250 iter_ = _charset.erase (iter_);
251 end_ = _charset.end ();
252 rhs_iter_ = rhs_._charset.erase (rhs_iter_);
253 rhs_end_ = rhs_._charset.end ();
259 // duplicates already merged, so safe to merge
263 merge (_charset, overlap_._charset);
264 // duplicates already merged, so safe to merge
268 merge (rhs_._charset, overlap_._charset);
270 rhs_._negated = false;
271 std::swap (_charset, rhs_._charset);
273 overlap_.normalise ();
276 else if (!overlap_._charset.empty ())
279 overlap_.normalise ();
285 void intersect_diff_types (basic_string_token &rhs_,
286 basic_string_token &overlap_)
290 intersect_any (rhs_, overlap_);
294 intersect_negated (rhs_, overlap_);
296 else // _negated == false
298 intersect_charset (rhs_, overlap_);
302 void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_)
306 rhs_.intersect_negated (*this, overlap_);
308 else // rhs._negated == false
310 rhs_.intersect_charset (*this, overlap_);
314 void intersect_negated (basic_string_token &rhs_,
315 basic_string_token &overlap_)
319 overlap_._negated = true;
320 overlap_._charset = _charset;
321 rhs_._negated = false;
322 rhs_._charset = _charset;
325 else // rhs._negated == false
327 rhs_.intersect_charset (*this, overlap_);
331 void intersect_charset (basic_string_token &rhs_,
332 basic_string_token &overlap_)
336 overlap_._charset = _charset;
337 rhs_._negated = true;
338 rhs_._charset = _charset;
341 else // rhs_._negated == true
343 typename string::iterator iter_ = _charset.begin ();
344 typename string::iterator end_ = _charset.end ();
345 typename string::iterator rhs_iter_ = rhs_._charset.begin ();
346 typename string::iterator rhs_end_ = rhs_._charset.end ();
348 while (iter_ != end_ && rhs_iter_ != rhs_end_)
350 if (*iter_ < *rhs_iter_)
352 overlap_._charset += *iter_;
353 rhs_iter_ = rhs_._charset.insert (rhs_iter_, *iter_);
355 rhs_end_ = rhs_._charset.end ();
356 iter_ = _charset.erase (iter_);
357 end_ = _charset.end ();
359 else if (*iter_ > *rhs_iter_)
372 // nothing bigger in rhs_ than iter_,
373 // so safe to merge using std lib.
374 string temp_ (iter_, end_);
377 merge (temp_, overlap_._charset);
378 _charset.erase (iter_, end_);
381 if (!overlap_._charset.empty ())
383 merge (overlap_._charset, rhs_._charset);
384 // possible duplicates, so check for any and erase.
385 rhs_._charset.erase (std::unique (rhs_._charset.begin (),
386 rhs_._charset.end ()), rhs_._charset.end ());
388 overlap_.normalise ();
394 void merge (string &src_, string &dest_)
396 string tmp_ (src_.size () + dest_.size (), 0);
398 std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (),