ceph/src/boost/libs/spirit/example/lex/strip_comments.cpp

   1 //  Copyright (c) 2001-2010 Hartmut Kaiser
   2 //
   3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
   4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   5
   6 //  This example is the equivalent to the following lex program:
   7 //
   8 //       %{
   9 //       /* INITIAL is the default start state.  COMMENT is our new  */
  10 //       /* state where we remove comments.                          */
  11 //       %}
  12 //
  13 //       %s COMMENT
  14 //       %%
  15 //       <INITIAL>"//".*    ;
  16 //       <INITIAL>"/*"      BEGIN COMMENT;
  17 //       <INITIAL>.         ECHO;
  18 //       <INITIAL>[\n]      ECHO;
  19 //       <COMMENT>"*/"      BEGIN INITIAL;
  20 //       <COMMENT>.         ;
  21 //       <COMMENT>[\n]      ;
  22 //       %%
  23 //
  24 //       main()
  25 //       {
  26 //         yylex();
  27 //       }
  28 //
  29 //  Its purpose is to strip comments out of C code.
  30 //
  31 //  Additionally this example demonstrates the use of lexer states to structure
  32 //  the lexer definition.
  33
  34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
  35
  36 #include <boost/config/warning_disable.hpp>
  37 #include <boost/spirit/include/qi.hpp>
  38 #include <boost/spirit/include/lex_lexertl.hpp>
  39 #include <boost/spirit/include/phoenix_operator.hpp>
  40 #include <boost/spirit/include/phoenix_container.hpp>
  41
  42 #include <iostream>
  43 #include <string>
  44
  45 #include "example.hpp"
  46
  47 using namespace boost::spirit;
  48
  49 ///////////////////////////////////////////////////////////////////////////////
  50 //  Token definition: We use the lexertl based lexer engine as the underlying
  51 //                    lexer type.
  52 ///////////////////////////////////////////////////////////////////////////////
  53 enum tokenids
  54 {
  55     IDANY = lex::min_token_id + 10
  56 };
  57
  58 template <typename Lexer>
  59 struct strip_comments_tokens : lex::lexer<Lexer>
  60 {
  61     strip_comments_tokens()
  62       : strip_comments_tokens::base_type(lex::match_flags::match_default)
  63     {
  64         // define tokens and associate them with the lexer
  65         cppcomment = "\"//\"[^\n]*";    // '//[^\n]*'
  66         ccomment = "\"/*\"";            // '/*'
  67         endcomment = "\"*/\"";          // '*/'
  68
  69         // The following tokens are associated with the default lexer state
  70         // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
  71         // strictly optional.
  72         this->self.add
  73             (cppcomment)    // no explicit token id is associated
  74             (ccomment)
  75             (".", IDANY)    // IDANY is the token id associated with this token
  76                             // definition
  77         ;
  78
  79         // The following tokens are associated with the lexer state "COMMENT".
  80         // We switch lexer states from inside the parsing process using the
  81         // in_state("COMMENT")[] parser component as shown below.
  82         this->self("COMMENT").add
  83             (endcomment)
  84             (".", IDANY)
  85         ;
  86     }
  87
  88     lex::token_def<> cppcomment, ccomment, endcomment;
  89 };
  90
  91 ///////////////////////////////////////////////////////////////////////////////
  92 //  Grammar definition
  93 ///////////////////////////////////////////////////////////////////////////////
  94 template <typename Iterator>
  95 struct strip_comments_grammar : qi::grammar<Iterator>
  96 {
  97     template <typename TokenDef>
  98     strip_comments_grammar(TokenDef const& tok)
  99       : strip_comments_grammar::base_type(start)
 100     {
 101         // The in_state("COMMENT")[...] parser component switches the lexer
 102         // state to be 'COMMENT' during the matching of the embedded parser.
 103         start =  *(   tok.ccomment
 104                       >>  qi::in_state("COMMENT")
 105                           [
 106                               // the lexer is in the 'COMMENT' state during
 107                               // matching of the following parser components
 108                               *token(IDANY) >> tok.endcomment
 109                           ]
 110                   |   tok.cppcomment
 111                   |   qi::token(IDANY)   [ std::cout << _1 ]
 112                   )
 113               ;
 114     }
 115
 116     qi::rule<Iterator> start;
 117 };
 118
 119 ///////////////////////////////////////////////////////////////////////////////
 120 int main(int argc, char* argv[])
 121 {
 122     // iterator type used to expose the underlying input stream
 123     typedef std::string::iterator base_iterator_type;
 124
 125     // lexer type
 126     typedef
 127         lex::lexertl::lexer<lex::lexertl::token<base_iterator_type> >
 128     lexer_type;
 129
 130     // iterator type exposed by the lexer
 131     typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
 132
 133     // now we use the types defined above to create the lexer and grammar
 134     // object instances needed to invoke the parsing process
 135     strip_comments_tokens<lexer_type> strip_comments;           // Our lexer
 136     strip_comments_grammar<iterator_type> g (strip_comments);   // Our parser
 137
 138     // Parsing is done based on the token stream, not the character
 139     // stream read from the input.
 140     std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
 141     base_iterator_type first = str.begin();
 142
 143     bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, g);
 144
 145     if (r) {
 146         std::cout << "-------------------------\n";
 147         std::cout << "Parsing succeeded\n";
 148         std::cout << "-------------------------\n";
 149     }
 150     else {
 151         std::string rest(first, str.end());
 152         std::cout << "-------------------------\n";
 153         std::cout << "Parsing failed\n";
 154         std::cout << "stopped at: \"" << rest << "\"\n";
 155         std::cout << "-------------------------\n";
 156     }
 157
 158     std::cout << "Bye... :-) \n\n";
 159     return 0;
 160 }
 161
 162
 163