ceph/src/boost/boost/token_functions.hpp

   1 // Boost token_functions.hpp  ------------------------------------------------//
   2
   3 // Copyright John R. Bandela 2001.
   4
   5 // Distributed under the Boost Software License, Version 1.0. (See
   6 // accompanying file LICENSE_1_0.txt or copy at
   7 // http://www.boost.org/LICENSE_1_0.txt)
   8
   9 // See http://www.boost.org/libs/tokenizer/ for documentation.
  10
  11 // Revision History:
  12 // 01 Oct 2004   Joaquin M Lopez Munoz
  13 //      Workaround for a problem with string::assign in msvc-stlport
  14 // 06 Apr 2004   John Bandela
  15 //      Fixed a bug involving using char_delimiter with a true input iterator
  16 // 28 Nov 2003   Robert Zeh and John Bandela
  17 //      Converted into "fast" functions that avoid using += when
  18 //      the supplied iterator isn't an input_iterator; based on
  19 //      some work done at Archelon and a version that was checked into
  20 //      the boost CVS for a short period of time.
  21 // 20 Feb 2002   John Maddock
  22 //      Removed using namespace std declarations and added
  23 //      workaround for BOOST_NO_STDC_NAMESPACE (the library
  24 //      can be safely mixed with regex).
  25 // 06 Feb 2002   Jeremy Siek
  26 //      Added char_separator.
  27 // 02 Feb 2002   Jeremy Siek
  28 //      Removed tabs and a little cleanup.
  29
  30
  31 #ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
  32 #define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_
  33
  34 #include <vector>
  35 #include <stdexcept>
  36 #include <string>
  37 #include <cctype>
  38 #include <algorithm> // for find_if
  39 #include <boost/config.hpp>
  40 #include <boost/assert.hpp>
  41 #include <boost/type_traits/is_pointer.hpp>
  42 #include <boost/detail/workaround.hpp>
  43 #include <boost/mpl/if.hpp>
  44 #include <boost/throw_exception.hpp>
  45 #if !defined(BOOST_NO_CWCTYPE)
  46 #include <cwctype>
  47 #endif
  48
  49 //
  50 // the following must not be macros if we are to prefix them
  51 // with std:: (they shouldn't be macros anyway...)
  52 //
  53 #ifdef ispunct
  54 #  undef ispunct
  55 #endif
  56 #ifdef iswpunct
  57 #  undef iswpunct
  58 #endif
  59 #ifdef isspace
  60 #  undef isspace
  61 #endif
  62 #ifdef iswspace
  63 #  undef iswspace
  64 #endif
  65 //
  66 // fix namespace problems:
  67 //
  68 #ifdef BOOST_NO_STDC_NAMESPACE
  69 namespace std{
  70  using ::ispunct;
  71  using ::isspace;
  72 #if !defined(BOOST_NO_CWCTYPE)
  73  using ::iswpunct;
  74  using ::iswspace;
  75 #endif
  76 }
  77 #endif
  78
  79 namespace boost{
  80   //===========================================================================
  81   // The escaped_list_separator class. Which is a model of TokenizerFunction
  82   // An escaped list is a super-set of what is commonly known as a comma
  83   // separated value (csv) list.It is separated into fields by a comma or
  84   // other character. If the delimiting character is inside quotes, then it is
  85   // counted as a regular character.To allow for embedded quotes in a field,
  86   // there can be escape sequences using the \ much like C.
  87   // The role of the comma, the quotation mark, and the escape
  88   // character (backslash \), can be assigned to other characters.
  89
  90   struct escaped_list_error : public std::runtime_error{
  91     escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { }
  92   };
  93
  94
  95 // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
  96 // MSVC does not like the following typename
  97   template <class Char,
  98     class Traits = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
  99   class escaped_list_separator {
 100
 101   private:
 102     typedef std::basic_string<Char,Traits> string_type;
 103     struct char_eq {
 104       Char e_;
 105       char_eq(Char e):e_(e) { }
 106       bool operator()(Char c) {
 107         return Traits::eq(e_,c);
 108       }
 109     };
 110     string_type  escape_;
 111     string_type  c_;
 112     string_type  quote_;
 113     bool last_;
 114
 115     bool is_escape(Char e) {
 116       char_eq f(e);
 117       return std::find_if(escape_.begin(),escape_.end(),f)!=escape_.end();
 118     }
 119     bool is_c(Char e) {
 120       char_eq f(e);
 121       return std::find_if(c_.begin(),c_.end(),f)!=c_.end();
 122     }
 123     bool is_quote(Char e) {
 124       char_eq f(e);
 125       return std::find_if(quote_.begin(),quote_.end(),f)!=quote_.end();
 126     }
 127     template <typename iterator, typename Token>
 128     void do_escape(iterator& next,iterator end,Token& tok) {
 129       if (++next == end)
 130         BOOST_THROW_EXCEPTION(escaped_list_error(std::string("cannot end with escape")));
 131       if (Traits::eq(*next,'n')) {
 132         tok+='\n';
 133         return;
 134       }
 135       else if (is_quote(*next)) {
 136         tok+=*next;
 137         return;
 138       }
 139       else if (is_c(*next)) {
 140         tok+=*next;
 141         return;
 142       }
 143       else if (is_escape(*next)) {
 144         tok+=*next;
 145         return;
 146       }
 147       else
 148         BOOST_THROW_EXCEPTION(escaped_list_error(std::string("unknown escape sequence")));
 149     }
 150
 151     public:
 152
 153     explicit escaped_list_separator(Char  e = '\\',
 154                                     Char c = ',',Char  q = '\"')
 155       : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { }
 156
 157     escaped_list_separator(string_type e, string_type c, string_type q)
 158       : escape_(e), c_(c), quote_(q), last_(false) { }
 159
 160     void reset() {last_=false;}
 161
 162     template <typename InputIterator, typename Token>
 163     bool operator()(InputIterator& next,InputIterator end,Token& tok) {
 164       bool bInQuote = false;
 165       tok = Token();
 166
 167       if (next == end) {
 168         if (last_) {
 169           last_ = false;
 170           return true;
 171         }
 172         else
 173           return false;
 174       }
 175       last_ = false;
 176       for (;next != end;++next) {
 177         if (is_escape(*next)) {
 178           do_escape(next,end,tok);
 179         }
 180         else if (is_c(*next)) {
 181           if (!bInQuote) {
 182             // If we are not in quote, then we are done
 183             ++next;
 184             // The last character was a c, that means there is
 185             // 1 more blank field
 186             last_ = true;
 187             return true;
 188           }
 189           else tok+=*next;
 190         }
 191         else if (is_quote(*next)) {
 192           bInQuote=!bInQuote;
 193         }
 194         else {
 195           tok += *next;
 196         }
 197       }
 198       return true;
 199     }
 200   };
 201
 202   //===========================================================================
 203   // The classes here are used by offset_separator and char_separator to implement
 204   // faster assigning of tokens using assign instead of +=
 205
 206   namespace tokenizer_detail {
 207   //===========================================================================
 208   // Tokenizer was broken for wide character separators, at least on Windows, since
 209   // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts
 210   // if higher values are passed in. The traits extension class should take care of this.
 211   // Assuming that the conditional will always get optimized out in the function
 212   // implementations, argument types are not a problem since both forms of character classifiers
 213   // expect an int.
 214
 215 #if !defined(BOOST_NO_CWCTYPE)
 216   template<typename traits, int N>
 217   struct traits_extension_details : public traits {
 218     typedef typename traits::char_type char_type;
 219     static bool isspace(char_type c)
 220     {
 221        return std::iswspace(c) != 0;
 222     }
 223     static bool ispunct(char_type c)
 224     {
 225        return std::iswpunct(c) != 0;
 226     }
 227   };
 228
 229   template<typename traits>
 230   struct traits_extension_details<traits, 1> : public traits {
 231     typedef typename traits::char_type char_type;
 232     static bool isspace(char_type c)
 233     {
 234        return std::isspace(c) != 0;
 235     }
 236     static bool ispunct(char_type c)
 237     {
 238        return std::ispunct(c) != 0;
 239     }
 240   };
 241 #endif
 242
 243
 244   // In case there is no cwctype header, we implement the checks manually.
 245   // We make use of the fact that the tested categories should fit in ASCII.
 246   template<typename traits>
 247   struct traits_extension : public traits {
 248     typedef typename traits::char_type char_type;
 249     static bool isspace(char_type c)
 250     {
 251 #if !defined(BOOST_NO_CWCTYPE)
 252       return traits_extension_details<traits, sizeof(char_type)>::isspace(c);
 253 #else
 254       return static_cast< unsigned >(c) <= 255 && std::isspace(c) != 0;
 255 #endif
 256     }
 257
 258     static bool ispunct(char_type c)
 259     {
 260 #if !defined(BOOST_NO_CWCTYPE)
 261       return traits_extension_details<traits, sizeof(char_type)>::ispunct(c);
 262 #else
 263       return static_cast< unsigned >(c) <= 255 && std::ispunct(c) != 0;
 264 #endif
 265     }
 266   };
 267
 268   // The assign_or_plus_equal struct contains functions that implement
 269   // assign, +=, and clearing based on the iterator type.  The
 270   // generic case does nothing for plus_equal and clearing, while
 271   // passing through the call for assign.
 272   //
 273   // When an input iterator is being used, the situation is reversed.
 274   // The assign method does nothing, plus_equal invokes operator +=,
 275   // and the clearing method sets the supplied token to the default
 276   // token constructor's result.
 277   //
 278
 279   template<class IteratorTag>
 280   struct assign_or_plus_equal {
 281     template<class Iterator, class Token>
 282     static void assign(Iterator b, Iterator e, Token &t) {
 283       t.assign(b, e);
 284     }
 285
 286     template<class Token, class Value>
 287     static void plus_equal(Token &, const Value &) { }
 288
 289     // If we are doing an assign, there is no need for the
 290     // the clear.
 291     //
 292     template<class Token>
 293     static void clear(Token &) { }
 294   };
 295
 296   template <>
 297   struct assign_or_plus_equal<std::input_iterator_tag> {
 298     template<class Iterator, class Token>
 299     static void assign(Iterator , Iterator , Token &) { }
 300     template<class Token, class Value>
 301     static void plus_equal(Token &t, const Value &v) {
 302       t += v;
 303     }
 304     template<class Token>
 305     static void clear(Token &t) {
 306       t = Token();
 307     }
 308   };
 309
 310
 311   template<class Iterator>
 312   struct pointer_iterator_category{
 313     typedef std::random_access_iterator_tag type;
 314   };
 315
 316
 317   template<class Iterator>
 318   struct class_iterator_category{
 319     typedef typename Iterator::iterator_category type;
 320   };
 321
 322
 323
 324   // This portably gets the iterator_tag without partial template specialization
 325   template<class Iterator>
 326     struct get_iterator_category{
 327     typedef typename mpl::if_<is_pointer<Iterator>,
 328       pointer_iterator_category<Iterator>,
 329       class_iterator_category<Iterator>
 330     >::type cat;
 331
 332     typedef typename cat::type iterator_category;
 333   };
 334
 335
 336   } // namespace tokenizer_detail
 337
 338
 339   //===========================================================================
 340   // The offset_separator class, which is a model of TokenizerFunction.
 341   // Offset breaks a string into tokens based on a range of offsets
 342
 343   class offset_separator {
 344   private:
 345
 346     std::vector<int> offsets_;
 347     unsigned int current_offset_;
 348     bool wrap_offsets_;
 349     bool return_partial_last_;
 350
 351   public:
 352     template <typename Iter>
 353     offset_separator(Iter begin, Iter end, bool wrap_offsets = true,
 354                      bool return_partial_last = true)
 355       : offsets_(begin,end), current_offset_(0),
 356         wrap_offsets_(wrap_offsets),
 357         return_partial_last_(return_partial_last) { }
 358
 359     offset_separator()
 360       : offsets_(1,1), current_offset_(),
 361         wrap_offsets_(true), return_partial_last_(true) { }
 362
 363     void reset() {
 364       current_offset_ = 0;
 365     }
 366
 367     template <typename InputIterator, typename Token>
 368     bool operator()(InputIterator& next, InputIterator end, Token& tok)
 369     {
 370       typedef tokenizer_detail::assign_or_plus_equal<
 371         BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
 372           InputIterator
 373         >::iterator_category
 374       > assigner;
 375
 376       BOOST_ASSERT(!offsets_.empty());
 377
 378       assigner::clear(tok);
 379       InputIterator start(next);
 380
 381       if (next == end)
 382         return false;
 383
 384       if (current_offset_ == offsets_.size())
 385       {
 386         if (wrap_offsets_)
 387           current_offset_=0;
 388         else
 389           return false;
 390       }
 391
 392       int c = offsets_[current_offset_];
 393       int i = 0;
 394       for (; i < c; ++i) {
 395         if (next == end)break;
 396         assigner::plus_equal(tok,*next++);
 397       }
 398       assigner::assign(start,next,tok);
 399
 400       if (!return_partial_last_)
 401         if (i < (c-1) )
 402           return false;
 403
 404       ++current_offset_;
 405       return true;
 406     }
 407   };
 408
 409
 410   //===========================================================================
 411   // The char_separator class breaks a sequence of characters into
 412   // tokens based on the character delimiters (very much like bad old
 413   // strtok). A delimiter character can either be kept or dropped. A
 414   // kept delimiter shows up as an output token, whereas a dropped
 415   // delimiter does not.
 416
 417   // This class replaces the char_delimiters_separator class. The
 418   // constructor for the char_delimiters_separator class was too
 419   // confusing and needed to be deprecated. However, because of the
 420   // default arguments to the constructor, adding the new constructor
 421   // would cause ambiguity, so instead I deprecated the whole class.
 422   // The implementation of the class was also simplified considerably.
 423
 424   enum empty_token_policy { drop_empty_tokens, keep_empty_tokens };
 425
 426   // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
 427   template <typename Char,
 428     typename Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
 429   class char_separator
 430   {
 431     typedef tokenizer_detail::traits_extension<Tr> Traits;
 432     typedef std::basic_string<Char,Tr> string_type;
 433   public:
 434     explicit
 435     char_separator(const Char* dropped_delims,
 436                    const Char* kept_delims = 0,
 437                    empty_token_policy empty_tokens = drop_empty_tokens)
 438       : m_dropped_delims(dropped_delims),
 439         m_use_ispunct(false),
 440         m_use_isspace(false),
 441         m_empty_tokens(empty_tokens),
 442         m_output_done(false)
 443     {
 444       // Borland workaround
 445       if (kept_delims)
 446         m_kept_delims = kept_delims;
 447     }
 448
 449                 // use ispunct() for kept delimiters and isspace for dropped.
 450     explicit
 451     char_separator()
 452       : m_use_ispunct(true),
 453         m_use_isspace(true),
 454         m_empty_tokens(drop_empty_tokens),
 455         m_output_done(false) { }
 456
 457     void reset() { }
 458
 459     template <typename InputIterator, typename Token>
 460     bool operator()(InputIterator& next, InputIterator end, Token& tok)
 461     {
 462       typedef tokenizer_detail::assign_or_plus_equal<
 463         BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category<
 464           InputIterator
 465         >::iterator_category
 466       > assigner;
 467
 468       assigner::clear(tok);
 469
 470       // skip past all dropped_delims
 471       if (m_empty_tokens == drop_empty_tokens)
 472         for (; next != end  && is_dropped(*next); ++next)
 473           { }
 474
 475       InputIterator start(next);
 476
 477       if (m_empty_tokens == drop_empty_tokens) {
 478
 479         if (next == end)
 480           return false;
 481
 482
 483         // if we are on a kept_delims move past it and stop
 484         if (is_kept(*next)) {
 485           assigner::plus_equal(tok,*next);
 486           ++next;
 487         } else
 488           // append all the non delim characters
 489           for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
 490             assigner::plus_equal(tok,*next);
 491       }
 492       else { // m_empty_tokens == keep_empty_tokens
 493
 494         // Handle empty token at the end
 495         if (next == end)
 496         {
 497           if (m_output_done == false)
 498           {
 499             m_output_done = true;
 500             assigner::assign(start,next,tok);
 501             return true;
 502           }
 503           else
 504             return false;
 505         }
 506
 507         if (is_kept(*next)) {
 508           if (m_output_done == false)
 509             m_output_done = true;
 510           else {
 511             assigner::plus_equal(tok,*next);
 512             ++next;
 513             m_output_done = false;
 514           }
 515         }
 516         else if (m_output_done == false && is_dropped(*next)) {
 517           m_output_done = true;
 518         }
 519         else {
 520           if (is_dropped(*next))
 521             start=++next;
 522           for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next)
 523             assigner::plus_equal(tok,*next);
 524           m_output_done = true;
 525         }
 526       }
 527       assigner::assign(start,next,tok);
 528       return true;
 529     }
 530
 531   private:
 532     string_type m_kept_delims;
 533     string_type m_dropped_delims;
 534     bool m_use_ispunct;
 535     bool m_use_isspace;
 536     empty_token_policy m_empty_tokens;
 537     bool m_output_done;
 538
 539     bool is_kept(Char E) const
 540     {
 541       if (m_kept_delims.length())
 542         return m_kept_delims.find(E) != string_type::npos;
 543       else if (m_use_ispunct) {
 544         return Traits::ispunct(E) != 0;
 545       } else
 546         return false;
 547     }
 548     bool is_dropped(Char E) const
 549     {
 550       if (m_dropped_delims.length())
 551         return m_dropped_delims.find(E) != string_type::npos;
 552       else if (m_use_isspace) {
 553         return Traits::isspace(E) != 0;
 554       } else
 555         return false;
 556     }
 557   };
 558
 559   //===========================================================================
 560   // The following class is DEPRECATED, use class char_separators instead.
 561   //
 562   // The char_delimiters_separator class, which is a model of
 563   // TokenizerFunction.  char_delimiters_separator breaks a string
 564   // into tokens based on character delimiters. There are 2 types of
 565   // delimiters. returnable delimiters can be returned as
 566   // tokens. These are often punctuation. nonreturnable delimiters
 567   // cannot be returned as tokens. These are often whitespace
 568
 569   // The out of the box GCC 2.95 on cygwin does not have a char_traits class.
 570   template <class Char,
 571     class Tr = BOOST_DEDUCED_TYPENAME std::basic_string<Char>::traits_type >
 572   class char_delimiters_separator {
 573   private:
 574
 575     typedef tokenizer_detail::traits_extension<Tr> Traits;
 576     typedef std::basic_string<Char,Tr> string_type;
 577     string_type returnable_;
 578     string_type nonreturnable_;
 579     bool return_delims_;
 580     bool no_ispunct_;
 581     bool no_isspace_;
 582
 583     bool is_ret(Char E)const
 584     {
 585       if (returnable_.length())
 586         return  returnable_.find(E) != string_type::npos;
 587       else{
 588         if (no_ispunct_) {return false;}
 589         else{
 590           int r = Traits::ispunct(E);
 591           return r != 0;
 592         }
 593       }
 594     }
 595     bool is_nonret(Char E)const
 596     {
 597       if (nonreturnable_.length())
 598         return  nonreturnable_.find(E) != string_type::npos;
 599       else{
 600         if (no_isspace_) {return false;}
 601         else{
 602           int r = Traits::isspace(E);
 603           return r != 0;
 604         }
 605       }
 606     }
 607
 608   public:
 609     explicit char_delimiters_separator(bool return_delims = false,
 610                                        const Char* returnable = 0,
 611                                        const Char* nonreturnable = 0)
 612       : returnable_(returnable ? returnable : string_type().c_str()),
 613         nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()),
 614         return_delims_(return_delims), no_ispunct_(returnable!=0),
 615         no_isspace_(nonreturnable!=0) { }
 616
 617     void reset() { }
 618
 619   public:
 620
 621      template <typename InputIterator, typename Token>
 622      bool operator()(InputIterator& next, InputIterator end,Token& tok) {
 623      tok = Token();
 624
 625      // skip past all nonreturnable delims
 626      // skip past the returnable only if we are not returning delims
 627      for (;next!=end && ( is_nonret(*next) || (is_ret(*next)
 628        && !return_delims_ ) );++next) { }
 629
 630      if (next == end) {
 631        return false;
 632      }
 633
 634      // if we are to return delims and we are one a returnable one
 635      // move past it and stop
 636      if (is_ret(*next) && return_delims_) {
 637        tok+=*next;
 638        ++next;
 639      }
 640      else
 641        // append all the non delim characters
 642        for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next)
 643          tok+=*next;
 644
 645
 646      return true;
 647    }
 648   };
 649
 650
 651 } //namespace boost
 652
 653 #endif