[ceph.git] / ceph / src / boost / libs / spirit / example / lex / strip_comments_lexer.cpp

//  Copyright (c) 2001-2010 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

//  This example is the equivalent to the following lex program:
//
//       %{
//       /* INITIAL is the default start state.  COMMENT is our new  */
//       /* state where we remove comments.                          */
//       %}
// 
//       %s COMMENT
//       %%
//       <INITIAL>"//".*    ;
//       <INITIAL>"/*"      BEGIN COMMENT; 
//       <INITIAL>.         ECHO;
//       <INITIAL>[\n]      ECHO;
//       <COMMENT>"*/"      BEGIN INITIAL;
//       <COMMENT>.         ;
//       <COMMENT>[\n]      ;
//       %%
// 
//       main() 
//       {
//         yylex();
//       }
//
//  Its purpose is to strip comments out of C code.
//
//  Additionally this example demonstrates the use of lexer states to structure
//  the lexer definition.

// #define BOOST_SPIRIT_LEXERTL_DEBUG

#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/operator.hpp>
#include <boost/phoenix/statement.hpp>
#include <boost/phoenix/core.hpp>

#include <iostream>
#include <string>

#include "example.hpp"

using namespace boost::spirit;  

///////////////////////////////////////////////////////////////////////////////
//  Token definition: We use the lexertl based lexer engine as the underlying 
//                    lexer type.
///////////////////////////////////////////////////////////////////////////////
enum tokenids 
{
    IDANY = lex::min_token_id + 10,
    IDEOL = lex::min_token_id + 11
};

///////////////////////////////////////////////////////////////////////////////
// Simple custom semantic action function object used to print the matched
// input sequence for a particular token
template <typename Char, typename Traits>
struct echo_input_functor
{
    echo_input_functor (std::basic_ostream<Char, Traits>& os_)
      : os(os_) {}

    // This is called by the semantic action handling code during the lexing
    template <typename Iterator, typename Context>
    void operator()(Iterator const& b, Iterator const& e
      , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
      , std::size_t&, Context&) const
    {
        os << std::string(b, e);
    }

    std::basic_ostream<Char, Traits>& os;
};

template <typename Char, typename Traits>
inline echo_input_functor<Char, Traits> 
echo_input(std::basic_ostream<Char, Traits>& os)
{
    return echo_input_functor<Char, Traits>(os);
}

///////////////////////////////////////////////////////////////////////////////
// Another simple custom semantic action function object used to switch the 
// state of the lexer 
struct set_lexer_state
{
    set_lexer_state(char const* state_)
      : state(state_) {}

    // This is called by the semantic action handling code during the lexing
    template <typename Iterator, typename Context>
    void operator()(Iterator const&, Iterator const&
      , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
      , std::size_t&, Context& ctx) const
    {
        ctx.set_state_name(state.c_str());
    }

    std::string state;
};

///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct strip_comments_tokens : lex::lexer<Lexer>
{
    strip_comments_tokens()
      : strip_comments_tokens::base_type(lex::match_flags::match_default)
    {
        // define tokens and associate them with the lexer
        cppcomment = "\"//\"[^\n]*";    // '//[^\n]*'
        ccomment = "\"/*\"";            // '/*'
        endcomment = "\"*/\"";          // '*/'
        any = std::string(".");
        eol = "\n";

        // The following tokens are associated with the default lexer state 
        // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is 
        // strictly optional.
        this->self 
            =   cppcomment
            |   ccomment    [ set_lexer_state("COMMENT") ]
            |   eol         [ echo_input(std::cout) ]
            |   any         [ echo_input(std::cout) ]
            ;

        // The following tokens are associated with the lexer state 'COMMENT'.
        this->self("COMMENT") 
            =   endcomment  [ set_lexer_state("INITIAL") ]
            |   "\n"
            |   std::string(".") 
            ;
    }

    lex::token_def<> cppcomment, ccomment, endcomment, any, eol;
};

  ///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
    // iterator type used to expose the underlying input stream
    typedef std::string::iterator base_iterator_type;

    // lexer type
    typedef 
        lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> > 
    lexer_type;

    // now we use the types defined above to create the lexer and grammar
    // object instances needed to invoke the parsing process
    strip_comments_tokens<lexer_type> strip_comments;             // Our lexer

    // No parsing is done altogether, everything happens in the lexer semantic
    // actions.
    std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
    base_iterator_type first = str.begin();
    bool r = lex::tokenize(first, str.end(), strip_comments);

    if (!r) {
        std::string rest(first, str.end());
        std::cerr << "Lexical analysis failed\n" << "stopped at: \"" 
                  << rest << "\"\n";
    }
    return 0;
}
Commit	Line	Data
7c673cae FG	1	// Copyright (c) 2001-2010 Hartmut Kaiser
	2	//
	3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	5
	6	// This example is the equivalent to the following lex program:
	7	//
	8	// %{
	9	// /* INITIAL is the default start state. COMMENT is our new */
	10	// /* state where we remove comments. */
	11	// %}
	12	//
	13	// %s COMMENT
	14	// %%
	15	// <INITIAL>"//".* ;
	16	// <INITIAL>"/*" BEGIN COMMENT;
	17	// <INITIAL>. ECHO;
	18	// <INITIAL>[\n] ECHO;
	19	// <COMMENT>"*/" BEGIN INITIAL;
	20	// <COMMENT>. ;
	21	// <COMMENT>[\n] ;
	22	// %%
	23	//
	24	// main()
	25	// {
	26	// yylex();
	27	// }
	28	//
	29	// Its purpose is to strip comments out of C code.
	30	//
	31	// Additionally this example demonstrates the use of lexer states to structure
	32	// the lexer definition.
	33
	34	// #define BOOST_SPIRIT_LEXERTL_DEBUG
	35
7c673cae	36	#include <boost/spirit/include/lex_lexertl.hpp>
1e59de90 TL	37	#include <boost/phoenix/operator.hpp>
	38	#include <boost/phoenix/statement.hpp>
	39	#include <boost/phoenix/core.hpp>
7c673cae FG	40
	41	#include <iostream>
	42	#include <string>
	43
	44	#include "example.hpp"
	45
	46	using namespace boost::spirit;
	47
	48	///////////////////////////////////////////////////////////////////////////////
	49	// Token definition: We use the lexertl based lexer engine as the underlying
	50	// lexer type.
	51	///////////////////////////////////////////////////////////////////////////////
	52	enum tokenids
	53	{
	54	IDANY = lex::min_token_id + 10,
	55	IDEOL = lex::min_token_id + 11
	56	};
	57
	58	///////////////////////////////////////////////////////////////////////////////
	59	// Simple custom semantic action function object used to print the matched
	60	// input sequence for a particular token
	61	template <typename Char, typename Traits>
	62	struct echo_input_functor
	63	{
	64	echo_input_functor (std::basic_ostream<Char, Traits>& os_)
	65	: os(os_) {}
	66
	67	// This is called by the semantic action handling code during the lexing
	68	template <typename Iterator, typename Context>
	69	void operator()(Iterator const& b, Iterator const& e
	70	, BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
	71	, std::size_t&, Context&) const
	72	{
	73	os << std::string(b, e);
	74	}
	75
	76	std::basic_ostream<Char, Traits>& os;
	77	};
	78
	79	template <typename Char, typename Traits>
	80	inline echo_input_functor<Char, Traits>
	81	echo_input(std::basic_ostream<Char, Traits>& os)
	82	{
	83	return echo_input_functor<Char, Traits>(os);
	84	}
	85
	86	///////////////////////////////////////////////////////////////////////////////
	87	// Another simple custom semantic action function object used to switch the
	88	// state of the lexer
	89	struct set_lexer_state
	90	{
	91	set_lexer_state(char const* state_)
	92	: state(state_) {}
	93
	94	// This is called by the semantic action handling code during the lexing
	95	template <typename Iterator, typename Context>
	96	void operator()(Iterator const&, Iterator const&
	97	, BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
	98	, std::size_t&, Context& ctx) const
	99	{
	100	ctx.set_state_name(state.c_str());
	101	}
	102
	103	std::string state;
104	};
105
106	///////////////////////////////////////////////////////////////////////////////
107	template <typename Lexer>
108	struct strip_comments_tokens : lex::lexer<Lexer>
109	{
110	strip_comments_tokens()
111	: strip_comments_tokens::base_type(lex::match_flags::match_default)
112	{
113	// define tokens and associate them with the lexer
114	cppcomment = "\"//\"[^\n]"; // '//[^\n]'
115	ccomment = "\"/\""; // '/'
116	endcomment = "\"/\""; // '/'
117	any = std::string(".");
118	eol = "\n";
119
120	// The following tokens are associated with the default lexer state
121	// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
122	// strictly optional.
123	this->self
124	= cppcomment
125	\| ccomment [ set_lexer_state("COMMENT") ]
126	\| eol [ echo_input(std::cout) ]
127	\| any [ echo_input(std::cout) ]
128	;
129
130	// The following tokens are associated with the lexer state 'COMMENT'.
131	this->self("COMMENT")
132	= endcomment [ set_lexer_state("INITIAL") ]
133	\| "\n"
134	\| std::string(".")
135	;
136	}
137
138	lex::token_def<> cppcomment, ccomment, endcomment, any, eol;
139	};
140
141	///////////////////////////////////////////////////////////////////////////////
142	int main(int argc, char* argv[])
143	{
144	// iterator type used to expose the underlying input stream
145	typedef std::string::iterator base_iterator_type;
146
147	// lexer type
148	typedef
149	lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> >
150	lexer_type;
151
152	// now we use the types defined above to create the lexer and grammar
153	// object instances needed to invoke the parsing process
154	strip_comments_tokens<lexer_type> strip_comments; // Our lexer
155
f67539c2	156	// No parsing is done altogether, everything happens in the lexer semantic
7c673cae FG	157	// actions.
	158	std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
	159	base_iterator_type first = str.begin();
	160	bool r = lex::tokenize(first, str.end(), strip_comments);
	161
	162	if (!r) {
	163	std::string rest(first, str.end());
	164	std::cerr << "Lexical analysis failed\n" << "stopped at: \""
	165	<< rest << "\"\n";
	166	}
	167	return 0;
	168	}
	169
	170
	171