[ceph.git] / ceph / src / boost / libs / spirit / example / lex / example4.cpp

//  Copyright (c) 2001-2010 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

//  This example shows how to create a simple lexer recognizing a couple of 
//  different tokens aimed at a simple language and how to use this lexer with 
//  a grammar. It shows how to associate attributes to tokens and how to access 
//  the token attributes from inside the grammar.
//
//  We use explicit token attribute types, making the corresponding token instances
//  carry convert the matched input into an instance of that type. The token 
//  attribute is exposed as the parser attribute if this token is used as a 
//  parser component somewhere in a grammar.
//
//  Additionally, this example demonstrates, how to define a token set usable 
//  as the skip parser during parsing, allowing to define several tokens to be 
//  ignored.
//
//  This example recognizes a very simple programming language having 
//  assignment statements and if and while control structures. Look at the file
//  example4.input for an example.

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/operator.hpp>

#include <iostream>
#include <fstream>
#include <string>

#include "example.hpp"

using namespace boost::spirit;
using boost::phoenix::val;

///////////////////////////////////////////////////////////////////////////////
//  Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example4_tokens : lex::lexer<Lexer>
{
    example4_tokens()
    {
        // define the tokens to match
        identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
        constant = "[0-9]+";
        if_ = "if";
        else_ = "else";
        while_ = "while";

        // associate the tokens and the token set with the lexer
        this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
        this->self += if_ | else_ | while_ | identifier;

        // define the whitespace to ignore (spaces, tabs, newlines and C-style 
        // comments)
        this->self("WS")
            =   lex::token_def<>("[ \\t\\n]+") 
            |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
            ;
    }

//[example4_token_def
    // these tokens expose the iterator_range of the matched input sequence
    lex::token_def<> if_, else_, while_;

    // The following two tokens have an associated attribute type, 'identifier'
    // carries a string (the identifier name) and 'constant' carries the 
    // matched integer value.
    //
    // Note: any token attribute type explicitly specified in a token_def<>
    //       declaration needs to be listed during token type definition as 
    //       well (see the typedef for the token_type below).
    //
    // The conversion of the matched input to an instance of this type occurs
    // once (on first access), which makes token attributes as efficient as 
    // possible. Moreover, token instances are constructed once by the lexer
    // library. From this point on tokens are passed by reference only, 
    // avoiding them being copied around.
    lex::token_def<std::string> identifier;
    lex::token_def<unsigned int> constant;
//]
};

///////////////////////////////////////////////////////////////////////////////
//  Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example4_grammar 
  : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
{
    template <typename TokenDef>
    example4_grammar(TokenDef const& tok)
      : example4_grammar::base_type(program)
    {
        using boost::spirit::_val;

        program 
            =  +block
            ;

        block
            =   '{' >> *statement >> '}'
            ;

        statement 
            =   assignment
            |   if_stmt
            |   while_stmt
            ;

        assignment 
            =   (tok.identifier >> '=' >> expression >> ';')
                [
                    std::cout << val("assignment statement to: ") << _1 << "\n"
                ]
            ;

        if_stmt
            =   (   tok.if_ >> '(' >> expression >> ')' >> block 
                    >> -(tok.else_ >> block) 
                )
                [
                    std::cout << val("if expression: ") << _2 << "\n"
                ]
            ;

        while_stmt 
            =   (tok.while_ >> '(' >> expression >> ')' >> block)
                [
                    std::cout << val("while expression: ") << _2 << "\n"
                ]
            ;

        //  since expression has a variant return type accommodating for 
        //  std::string and unsigned integer, both possible values may be 
        //  returned to the calling rule
        expression 
            =   tok.identifier [ _val = _1 ]
            |   tok.constant   [ _val = _1 ]
            ;
    }

    typedef boost::variant<unsigned int, std::string> expression_type;

    qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement;
    qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt;
    qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt;

    //  the expression is the only rule having a return value
    qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> >  expression;
};

///////////////////////////////////////////////////////////////////////////////
int main()
{
    // iterator type used to expose the underlying input stream
    typedef std::string::iterator base_iterator_type;

//[example4_token
    // This is the lexer token type to use. The second template parameter lists 
    // all attribute types used for token_def's during token definition (see 
    // calculator_tokens<> above). Here we use the predefined lexertl token 
    // type, but any compatible token type may be used instead.
    //
    // If you don't list any token attribute types in the following declaration 
    // (or just use the default token type: lexertl_token<base_iterator_type>)  
    // it will compile and work just fine, just a bit less efficient. This is  
    // because the token attribute will be generated from the matched input  
    // sequence every time it is requested. But as soon as you specify at 
    // least one token attribute type you'll have to list all attribute types 
    // used for token_def<> declarations in the token definition class above, 
    // otherwise compilation errors will occur.
    typedef lex::lexertl::token<
        base_iterator_type, boost::mpl::vector<unsigned int, std::string> 
    > token_type;
//]
    // Here we use the lexertl based lexer engine.
    typedef lex::lexertl::lexer<token_type> lexer_type;

    // This is the token definition type (derived from the given lexer type).
    typedef example4_tokens<lexer_type> example4_tokens;

    // this is the iterator type exposed by the lexer 
    typedef example4_tokens::iterator_type iterator_type;

    // this is the type of the grammar to parse
    typedef example4_grammar<iterator_type, example4_tokens::lexer_def> example4_grammar;

    // now we use the types defined above to create the lexer and grammar
    // object instances needed to invoke the parsing process
    example4_tokens tokens;                         // Our lexer
    example4_grammar calc(tokens);                  // Our parser

    std::string str (read_from_file("example4.input"));

    // At this point we generate the iterator pair used to expose the
    // tokenized input stream.
    std::string::iterator it = str.begin();
    iterator_type iter = tokens.begin(it, str.end());
    iterator_type end = tokens.end();
        
    // Parsing is done based on the token stream, not the character 
    // stream read from the input.
    // Note how we use the lexer defined above as the skip parser. It must
    // be explicitly wrapped inside a state directive, switching the lexer 
    // state for the duration of skipping whitespace.
    bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);

    if (r && iter == end)
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "-------------------------\n";
    }
    else
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "-------------------------\n";
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}
Commit	Line	Data
7c673cae FG	1	// Copyright (c) 2001-2010 Hartmut Kaiser
	2	//
	3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	5
	6	// This example shows how to create a simple lexer recognizing a couple of
	7	// different tokens aimed at a simple language and how to use this lexer with
	8	// a grammar. It shows how to associate attributes to tokens and how to access
	9	// the token attributes from inside the grammar.
	10	//
	11	// We use explicit token attribute types, making the corresponding token instances
	12	// carry convert the matched input into an instance of that type. The token
	13	// attribute is exposed as the parser attribute if this token is used as a
	14	// parser component somewhere in a grammar.
	15	//
	16	// Additionally, this example demonstrates, how to define a token set usable
	17	// as the skip parser during parsing, allowing to define several tokens to be
	18	// ignored.
	19	//
	20	// This example recognizes a very simple programming language having
	21	// assignment statements and if and while control structures. Look at the file
	22	// example4.input for an example.
	23
7c673cae FG	24	#include <boost/spirit/include/qi.hpp>
7c673cae FG	25	#include <boost/spirit/include/lex_lexertl.hpp>
1e59de90	26	#include <boost/phoenix/operator.hpp>
7c673cae FG	27
	28	#include <iostream>
	29	#include <fstream>
	30	#include <string>
	31
	32	#include "example.hpp"
	33
	34	using namespace boost::spirit;
	35	using boost::phoenix::val;
	36
	37	///////////////////////////////////////////////////////////////////////////////
	38	// Token definition
	39	///////////////////////////////////////////////////////////////////////////////
	40	template <typename Lexer>
	41	struct example4_tokens : lex::lexer<Lexer>
	42	{
	43	example4_tokens()
	44	{
	45	// define the tokens to match
	46	identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
	47	constant = "[0-9]+";
	48	if_ = "if";
	49	else_ = "else";
	50	while_ = "while";
	51
	52	// associate the tokens and the token set with the lexer
	53	this->self = lex::token_def<>('(') \| ')' \| '{' \| '}' \| '=' \| ';' \| constant;
	54	this->self += if_ \| else_ \| while_ \| identifier;
	55
	56	// define the whitespace to ignore (spaces, tabs, newlines and C-style
	57	// comments)
	58	this->self("WS")
	59	= lex::token_def<>("[ \\t\\n]+")
	60	\| "\\/\\[^]\\+([^/][^]\\+)*\\/"
	61	;
	62	}
	63
	64	//[example4_token_def
	65	// these tokens expose the iterator_range of the matched input sequence
	66	lex::token_def<> if_, else_, while_;
	67
	68	// The following two tokens have an associated attribute type, 'identifier'
	69	// carries a string (the identifier name) and 'constant' carries the
	70	// matched integer value.
	71	//
	72	// Note: any token attribute type explicitly specified in a token_def<>
	73	// declaration needs to be listed during token type definition as
	74	// well (see the typedef for the token_type below).
	75	//
	76	// The conversion of the matched input to an instance of this type occurs
	77	// once (on first access), which makes token attributes as efficient as
	78	// possible. Moreover, token instances are constructed once by the lexer
	79	// library. From this point on tokens are passed by reference only,
	80	// avoiding them being copied around.
	81	lex::token_def<std::string> identifier;
	82	lex::token_def<unsigned int> constant;
	83	//]
	84	};
	85
	86	///////////////////////////////////////////////////////////////////////////////
	87	// Grammar definition
	88	///////////////////////////////////////////////////////////////////////////////
	89	template <typename Iterator, typename Lexer>
	90	struct example4_grammar
91	: qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
92	{
93	template <typename TokenDef>
94	example4_grammar(TokenDef const& tok)
95	: example4_grammar::base_type(program)
96	{
97	using boost::spirit::_val;
98
99	program
100	= +block
101	;
102
103	block
104	= '{' >> *statement >> '}'
105	;
106
107	statement
108	= assignment
109	\| if_stmt
110	\| while_stmt
111	;
112
113	assignment
114	= (tok.identifier >> '=' >> expression >> ';')
115	[
116	std::cout << val("assignment statement to: ") << _1 << "\n"
117	]
118	;
119
120	if_stmt
121	= ( tok.if_ >> '(' >> expression >> ')' >> block
122	>> -(tok.else_ >> block)
123	)
124	[
125	std::cout << val("if expression: ") << _2 << "\n"
126	]
127	;
128
129	while_stmt
130	= (tok.while_ >> '(' >> expression >> ')' >> block)
131	[
132	std::cout << val("while expression: ") << _2 << "\n"
133	]
134	;
135
136	// since expression has a variant return type accommodating for
137	// std::string and unsigned integer, both possible values may be
138	// returned to the calling rule
139	expression
140	= tok.identifier [ _val = _1 ]
141	\| tok.constant [ _val = _1 ]
142	;
143	}
144
145	typedef boost::variant<unsigned int, std::string> expression_type;
146
147	qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement;
148	qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt;
149	qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt;
150
151	// the expression is the only rule having a return value
152	qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> > expression;
153	};
154
155	///////////////////////////////////////////////////////////////////////////////
156	int main()
157	{
158	// iterator type used to expose the underlying input stream
159	typedef std::string::iterator base_iterator_type;
160
161	//[example4_token
162	// This is the lexer token type to use. The second template parameter lists
163	// all attribute types used for token_def's during token definition (see
164	// calculator_tokens<> above). Here we use the predefined lexertl token
165	// type, but any compatible token type may be used instead.
166	//
167	// If you don't list any token attribute types in the following declaration
168	// (or just use the default token type: lexertl_token<base_iterator_type>)
169	// it will compile and work just fine, just a bit less efficient. This is
170	// because the token attribute will be generated from the matched input
171	// sequence every time it is requested. But as soon as you specify at
172	// least one token attribute type you'll have to list all attribute types
173	// used for token_def<> declarations in the token definition class above,
174	// otherwise compilation errors will occur.
175	typedef lex::lexertl::token<
176	base_iterator_type, boost::mpl::vector<unsigned int, std::string>
177	> token_type;
178	//]
179	// Here we use the lexertl based lexer engine.
180	typedef lex::lexertl::lexer<token_type> lexer_type;
181
182	// This is the token definition type (derived from the given lexer type).
183	typedef example4_tokens<lexer_type> example4_tokens;
184
185	// this is the iterator type exposed by the lexer
186	typedef example4_tokens::iterator_type iterator_type;
187
188	// this is the type of the grammar to parse
189	typedef example4_grammar<iterator_type, example4_tokens::lexer_def> example4_grammar;
190
191	// now we use the types defined above to create the lexer and grammar
192	// object instances needed to invoke the parsing process
193	example4_tokens tokens; // Our lexer
194	example4_grammar calc(tokens); // Our parser
195
196	std::string str (read_from_file("example4.input"));
197
198	// At this point we generate the iterator pair used to expose the
199	// tokenized input stream.
200	std::string::iterator it = str.begin();
201	iterator_type iter = tokens.begin(it, str.end());
202	iterator_type end = tokens.end();
203
204	// Parsing is done based on the token stream, not the character
205	// stream read from the input.
206	// Note how we use the lexer defined above as the skip parser. It must
207	// be explicitly wrapped inside a state directive, switching the lexer
208	// state for the duration of skipping whitespace.
209	bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
210
211	if (r && iter == end)
212	{
213	std::cout << "-------------------------\n";
214	std::cout << "Parsing succeeded\n";
215	std::cout << "-------------------------\n";
216	}
217	else
218	{
219	std::cout << "-------------------------\n";
220	std::cout << "Parsing failed\n";
221	std::cout << "-------------------------\n";
222	}
223
224	std::cout << "Bye... :-) \n\n";
225	return 0;
226	}