[ceph.git] / ceph / src / boost / libs / spirit / example / lex / example2.cpp

//  Copyright (c) 2001-2010 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

//  This example shows how to create a simple lexer recognizing a couple of 
//  different tokens and how to use this with a grammar. This example has a 
//  heavily backtracking grammar which makes it a candidate for lexer based 
//  parsing (all tokens are scanned and generated only once, even if 
//  backtracking is required) which speeds up the overall parsing process 
//  considerably, out-weighting the overhead needed for setting up the lexer.
//  Additionally it demonstrates how to use one of the defined tokens as a 
//  parser component in the grammar.
//
//  The grammar recognizes a simple input structure: any number of English 
//  simple sentences (statements, questions and commands) are recognized and
//  are being counted separately.

// #define BOOST_SPIRIT_DEBUG 
// #define BOOST_SPIRIT_LEXERTL_DEBUG

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/operator.hpp>

#include <iostream>
#include <fstream>
#include <string>

#include "example.hpp"

using namespace boost::spirit;
using namespace boost::spirit::ascii;
using boost::phoenix::ref;

///////////////////////////////////////////////////////////////////////////////
//  Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example2_tokens : lex::lexer<Lexer>
{
    example2_tokens()
    {
        //  A 'word' is comprised of one or more letters and an optional 
        //  apostrophe. If it contains an apostrophe, there may only be one and 
        //  the apostrophe must be preceded and succeeded by at least 1 letter.  
        //  For example, "I'm" and "doesn't" meet the definition of 'word' we 
        //  define below.
        word = "[a-zA-Z]+('[a-zA-Z]+)?";

        // Associate the tokens and the token set with the lexer. Note that 
        // single character token definitions as used below always get 
        // interpreted literally and never as special regex characters. This is
        // done to be able to assign single characters the id of their character
        // code value, allowing to reference those as literals in Qi grammars.
        this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
    }

    lex::token_def<> word;
};

///////////////////////////////////////////////////////////////////////////////
//  Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct example2_grammar : qi::grammar<Iterator>
{
    template <typename TokenDef>
    example2_grammar(TokenDef const& tok)
      : example2_grammar::base_type(story)
      , paragraphs(0), commands(0), questions(0), statements(0)
    {
        story 
            =  +paragraph
            ;

        paragraph
            =   (  +(   command [ ++ref(commands) ] 
                    |   question [ ++ref(questions) ]
                    |   statement [ ++ref(statements) ]
                    ) 
                    >> *char_(' ') >> +char_('\n')
                ) 
                [ ++ref(paragraphs) ]
            ;

        command 
            =  +(tok.word | ' ' | ',') >> '!' 
            ;

        question 
            =  +(tok.word | ' ' | ',') >> '?' 
            ;

        statement 
            =  +(tok.word | ' ' | ',') >> '.' 
            ;

        BOOST_SPIRIT_DEBUG_NODE(story);
        BOOST_SPIRIT_DEBUG_NODE(paragraph);
        BOOST_SPIRIT_DEBUG_NODE(command);
        BOOST_SPIRIT_DEBUG_NODE(question);
        BOOST_SPIRIT_DEBUG_NODE(statement);
    }

    qi::rule<Iterator> story, paragraph, command, question, statement;
    int paragraphs, commands, questions, statements;
};

///////////////////////////////////////////////////////////////////////////////
int main()
{
    // iterator type used to expose the underlying input stream
    typedef std::string::iterator base_iterator_type;

    // This is the token type to return from the lexer iterator
    typedef lex::lexertl::token<base_iterator_type> token_type;

    // This is the lexer type to use to tokenize the input.
    // Here we use the lexertl based lexer engine.
    typedef lex::lexertl::lexer<token_type> lexer_type;

    // This is the token definition type (derived from the given lexer type).
    typedef example2_tokens<lexer_type> example2_tokens;

    // this is the iterator type exposed by the lexer 
    typedef example2_tokens::iterator_type iterator_type;

    // this is the type of the grammar to parse
    typedef example2_grammar<iterator_type> example2_grammar;

    // now we use the types defined above to create the lexer and grammar
    // object instances needed to invoke the parsing process
    example2_tokens tokens;                         // Our lexer
    example2_grammar calc(tokens);                  // Our parser 

    std::string str (read_from_file("example2.input"));

    // At this point we generate the iterator pair used to expose the
    // tokenized input stream.
    std::string::iterator it = str.begin();
    iterator_type iter = tokens.begin(it, str.end());
    iterator_type end = tokens.end();

    // Parsing is done based on the token stream, not the character 
    // stream read from the input.
    bool r = qi::parse(iter, end, calc);

    if (r && iter == end)
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "There were " 
                  << calc.commands << " commands, " 
                  << calc.questions << " questions, and " 
                  << calc.statements << " statements.\n";
        std::cout << "-------------------------\n";
    }
    else
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "-------------------------\n";
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}
Commit	Line	Data
7c673cae FG	1	// Copyright (c) 2001-2010 Hartmut Kaiser
	2	//
	3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	5
	6	// This example shows how to create a simple lexer recognizing a couple of
	7	// different tokens and how to use this with a grammar. This example has a
	8	// heavily backtracking grammar which makes it a candidate for lexer based
	9	// parsing (all tokens are scanned and generated only once, even if
	10	// backtracking is required) which speeds up the overall parsing process
	11	// considerably, out-weighting the overhead needed for setting up the lexer.
	12	// Additionally it demonstrates how to use one of the defined tokens as a
	13	// parser component in the grammar.
	14	//
	15	// The grammar recognizes a simple input structure: any number of English
	16	// simple sentences (statements, questions and commands) are recognized and
	17	// are being counted separately.
	18
	19	// #define BOOST_SPIRIT_DEBUG
	20	// #define BOOST_SPIRIT_LEXERTL_DEBUG
	21
7c673cae FG	22	#include <boost/spirit/include/qi.hpp>
7c673cae FG	23	#include <boost/spirit/include/lex_lexertl.hpp>
1e59de90	24	#include <boost/phoenix/operator.hpp>
7c673cae FG	25
	26	#include <iostream>
	27	#include <fstream>
	28	#include <string>
	29
	30	#include "example.hpp"
	31
	32	using namespace boost::spirit;
	33	using namespace boost::spirit::ascii;
	34	using boost::phoenix::ref;
	35
	36	///////////////////////////////////////////////////////////////////////////////
	37	// Token definition
	38	///////////////////////////////////////////////////////////////////////////////
	39	template <typename Lexer>
	40	struct example2_tokens : lex::lexer<Lexer>
	41	{
	42	example2_tokens()
	43	{
	44	// A 'word' is comprised of one or more letters and an optional
	45	// apostrophe. If it contains an apostrophe, there may only be one and
	46	// the apostrophe must be preceded and succeeded by at least 1 letter.
	47	// For example, "I'm" and "doesn't" meet the definition of 'word' we
	48	// define below.
	49	word = "[a-zA-Z]+('[a-zA-Z]+)?";
	50
	51	// Associate the tokens and the token set with the lexer. Note that
	52	// single character token definitions as used below always get
	53	// interpreted literally and never as special regex characters. This is
	54	// done to be able to assign single characters the id of their character
	55	// code value, allowing to reference those as literals in Qi grammars.
	56	this->self = lex::token_def<>(',') \| '!' \| '.' \| '?' \| ' ' \| '\n' \| word;
	57	}
	58
	59	lex::token_def<> word;
	60	};
	61
	62	///////////////////////////////////////////////////////////////////////////////
	63	// Grammar definition
	64	///////////////////////////////////////////////////////////////////////////////
	65	template <typename Iterator>
	66	struct example2_grammar : qi::grammar<Iterator>
	67	{
	68	template <typename TokenDef>
	69	example2_grammar(TokenDef const& tok)
	70	: example2_grammar::base_type(story)
	71	, paragraphs(0), commands(0), questions(0), statements(0)
	72	{
	73	story
	74	= +paragraph
	75	;
	76
	77	paragraph
	78	= ( +( command [ ++ref(commands) ]
	79	\| question [ ++ref(questions) ]
	80	\| statement [ ++ref(statements) ]
	81	)
	82	>> *char_(' ') >> +char_('\n')
	83	)
	84	[ ++ref(paragraphs) ]
	85	;
	86
	87	command
	88	= +(tok.word \| ' ' \| ',') >> '!'
89	;
90
91	question
92	= +(tok.word \| ' ' \| ',') >> '?'
93	;
94
95	statement
96	= +(tok.word \| ' ' \| ',') >> '.'
97	;
98
99	BOOST_SPIRIT_DEBUG_NODE(story);
100	BOOST_SPIRIT_DEBUG_NODE(paragraph);
101	BOOST_SPIRIT_DEBUG_NODE(command);
102	BOOST_SPIRIT_DEBUG_NODE(question);
103	BOOST_SPIRIT_DEBUG_NODE(statement);
104	}
105
106	qi::rule<Iterator> story, paragraph, command, question, statement;
107	int paragraphs, commands, questions, statements;
108	};
109
110	///////////////////////////////////////////////////////////////////////////////
111	int main()
112	{
113	// iterator type used to expose the underlying input stream
114	typedef std::string::iterator base_iterator_type;
115
116	// This is the token type to return from the lexer iterator
117	typedef lex::lexertl::token<base_iterator_type> token_type;
118
119	// This is the lexer type to use to tokenize the input.
120	// Here we use the lexertl based lexer engine.
121	typedef lex::lexertl::lexer<token_type> lexer_type;
122
123	// This is the token definition type (derived from the given lexer type).
124	typedef example2_tokens<lexer_type> example2_tokens;
125
126	// this is the iterator type exposed by the lexer
127	typedef example2_tokens::iterator_type iterator_type;
128
129	// this is the type of the grammar to parse
130	typedef example2_grammar<iterator_type> example2_grammar;
131
132	// now we use the types defined above to create the lexer and grammar
133	// object instances needed to invoke the parsing process
134	example2_tokens tokens; // Our lexer
135	example2_grammar calc(tokens); // Our parser
136
137	std::string str (read_from_file("example2.input"));
138
139	// At this point we generate the iterator pair used to expose the
140	// tokenized input stream.
141	std::string::iterator it = str.begin();
142	iterator_type iter = tokens.begin(it, str.end());
143	iterator_type end = tokens.end();
144
145	// Parsing is done based on the token stream, not the character
146	// stream read from the input.
147	bool r = qi::parse(iter, end, calc);
148
149	if (r && iter == end)
150	{
151	std::cout << "-------------------------\n";
152	std::cout << "Parsing succeeded\n";
153	std::cout << "There were "
154	<< calc.commands << " commands, "
155	<< calc.questions << " questions, and "
156	<< calc.statements << " statements.\n";
157	std::cout << "-------------------------\n";
158	}
159	else
160	{
161	std::cout << "-------------------------\n";
162	std::cout << "Parsing failed\n";
163	std::cout << "-------------------------\n";
164	}
165
166	std::cout << "Bye... :-) \n\n";
167	return 0;
168	}