ceph/src/boost/libs/spirit/example/lex/example3.cpp

   1 //  Copyright (c) 2001-2010 Hartmut Kaiser
   2 //
   3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
   4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   5
   6 //  This example shows how to create a simple lexer recognizing a couple of
   7 //  different tokens and how to use this with a grammar. This example has a
   8 //  heavily backtracking grammar which makes it a candidate for lexer based
   9 //  parsing (all tokens are scanned and generated only once, even if
  10 //  backtracking is required) which speeds up the overall parsing process
  11 //  considerably, out-weighting the overhead needed for setting up the lexer.
  12 //
  13 //  Additionally, this example demonstrates, how to define a token set usable
  14 //  as the skip parser during parsing, allowing to define several tokens to be
  15 //  ignored.
  16 //
  17 //  This example recognizes couplets, which are sequences of numbers enclosed
  18 //  in matching pairs of parenthesis. See the comments below to for details
  19 //  and examples.
  20
  21 // #define BOOST_SPIRIT_LEXERTL_DEBUG
  22 // #define BOOST_SPIRIT_DEBUG
  23
  24 #include <boost/config/warning_disable.hpp>
  25 #include <boost/spirit/include/qi.hpp>
  26 #include <boost/spirit/include/lex_lexertl.hpp>
  27
  28 #include <iostream>
  29 #include <fstream>
  30 #include <string>
  31
  32 #include "example.hpp"
  33
  34 using namespace boost::spirit;
  35
  36 ///////////////////////////////////////////////////////////////////////////////
  37 //  Token definition
  38 ///////////////////////////////////////////////////////////////////////////////
  39 template <typename Lexer>
  40 struct example3_tokens : lex::lexer<Lexer>
  41 {
  42     example3_tokens()
  43     {
  44         // define the tokens to match
  45         ellipses = "\\.\\.\\.";
  46         number = "[0-9]+";
  47
  48         // associate the tokens and the token set with the lexer
  49         this->self = ellipses | '(' | ')' | number;
  50
  51         // define the whitespace to ignore (spaces, tabs, newlines and C-style
  52         // comments)
  53         this->self("WS")
  54             =   lex::token_def<>("[ \\t\\n]+")          // whitespace
  55             |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"   // C style comments
  56             ;
  57     }
  58
  59     // these tokens expose the iterator_range of the matched input sequence
  60     lex::token_def<> ellipses, identifier, number;
  61 };
  62
  63 ///////////////////////////////////////////////////////////////////////////////
  64 //  Grammar definition
  65 ///////////////////////////////////////////////////////////////////////////////
  66 template <typename Iterator, typename Lexer>
  67 struct example3_grammar
  68   : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
  69 {
  70     template <typename TokenDef>
  71     example3_grammar(TokenDef const& tok)
  72       : example3_grammar::base_type(start)
  73     {
  74         start
  75             =  +(couplet | tok.ellipses)
  76             ;
  77
  78         //  A couplet matches nested left and right parenthesis.
  79         //  For example:
  80         //    (1) (1 2) (1 2 3) ...
  81         //    ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
  82         //    (((1))) ...
  83         couplet
  84             =   tok.number
  85             |   '(' >> +couplet >> ')'
  86             ;
  87
  88         BOOST_SPIRIT_DEBUG_NODE(start);
  89         BOOST_SPIRIT_DEBUG_NODE(couplet);
  90     }
  91
  92     qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
  93 };
  94
  95 ///////////////////////////////////////////////////////////////////////////////
  96 int main()
  97 {
  98     // iterator type used to expose the underlying input stream
  99     typedef std::string::iterator base_iterator_type;
 100
 101     // This is the token type to return from the lexer iterator
 102     typedef lex::lexertl::token<base_iterator_type> token_type;
 103
 104     // This is the lexer type to use to tokenize the input.
 105     // Here we use the lexertl based lexer engine.
 106     typedef lex::lexertl::lexer<token_type> lexer_type;
 107
 108     // This is the token definition type (derived from the given lexer type).
 109     typedef example3_tokens<lexer_type> example3_tokens;
 110
 111     // this is the iterator type exposed by the lexer
 112     typedef example3_tokens::iterator_type iterator_type;
 113
 114     // this is the type of the grammar to parse
 115     typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
 116
 117     // now we use the types defined above to create the lexer and grammar
 118     // object instances needed to invoke the parsing process
 119     example3_tokens tokens;                         // Our lexer
 120     example3_grammar calc(tokens);                  // Our parser
 121
 122     std::string str (read_from_file("example3.input"));
 123
 124     // At this point we generate the iterator pair used to expose the
 125     // tokenized input stream.
 126     std::string::iterator it = str.begin();
 127     iterator_type iter = tokens.begin(it, str.end());
 128     iterator_type end = tokens.end();
 129
 130     // Parsing is done based on the token stream, not the character
 131     // stream read from the input.
 132     // Note how we use the lexer defined above as the skip parser.
 133     bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
 134
 135     if (r && iter == end)
 136     {
 137         std::cout << "-------------------------\n";
 138         std::cout << "Parsing succeeded\n";
 139         std::cout << "-------------------------\n";
 140     }
 141     else
 142     {
 143         std::cout << "-------------------------\n";
 144         std::cout << "Parsing failed\n";
 145         std::cout << "-------------------------\n";
 146     }
 147
 148     std::cout << "Bye... :-) \n\n";
 149     return 0;
 150 }