[ceph.git] / ceph / src / boost / libs / spirit / example / lex / word_count_lexer.cpp

//  Copyright (c) 2001-2010 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

//  This example is the equivalent to the following lex program:
/*
//[wcl_flex_version
    %{
        int c = 0, w = 0, l = 0;
    %}
    %%
    [^ \t\n]+  { ++w; c += yyleng; }
    \n         { ++c; ++l; }
    .          { ++c; }
    %%
    main()
    {
        yylex();
        printf("%d %d %d\n", l, w, c);
    }
//]
*/
//  Its purpose is to do the word count function of the wc command in UNIX. It 
//  prints the number of lines, words and characters in a file. 
//
//  This examples shows how to use semantic actions associated with token 
//  definitions to directly attach actions to tokens. These get executed 
//  whenever the corresponding token got matched in the input sequence. Note,
//  how this example implements all functionality directly in the lexer 
//  definition without any need for a parser.

// #define BOOST_SPIRIT_LEXERTL_DEBUG

//[wcl_includes
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/phoenix/operator.hpp>
#include <boost/phoenix/statement.hpp>
#include <boost/phoenix/stl/algorithm.hpp>
#include <boost/phoenix/core.hpp>
//]

#include <iostream>
#include <string>

#include "example.hpp"

//[wcl_namespaces
namespace lex = boost::spirit::lex;
//]

///////////////////////////////////////////////////////////////////////////////
//  Token definition: We use the lexertl based lexer engine as the underlying 
//                    lexer type.
//
//  Note, the token definition type is derived from the 'lexertl_actor_lexer'
//  template, which is a necessary to being able to use lexer semantic actions.
///////////////////////////////////////////////////////////////////////////////
struct distance_func
{
    template <typename Iterator1, typename Iterator2>
    struct result : boost::iterator_difference<Iterator1> {};

    template <typename Iterator1, typename Iterator2>
    typename result<Iterator1, Iterator2>::type 
    operator()(Iterator1 const& begin, Iterator2 const& end) const
    {
        return std::distance(begin, end);
    }
};
boost::phoenix::function<distance_func> const distance = distance_func();

//[wcl_token_definition
template <typename Lexer>
struct word_count_tokens : lex::lexer<Lexer>
{
    word_count_tokens()
      : c(0), w(0), l(0)
      , word("[^ \t\n]+")     // define tokens
      , eol("\n")
      , any(".")
    {
        using boost::spirit::lex::_start;
        using boost::spirit::lex::_end;
        using boost::phoenix::ref;

        // associate tokens with the lexer
        this->self 
            =   word  [++ref(w), ref(c) += distance(_start, _end)]
            |   eol   [++ref(c), ++ref(l)] 
            |   any   [++ref(c)]
            ;
    }

    std::size_t c, w, l;
    lex::token_def<> word, eol, any;
};
//]

///////////////////////////////////////////////////////////////////////////////
//[wcl_main
int main(int argc, char* argv[])
{

/*<  Specifying `omit` as the token attribute type generates a token class 
     not holding any token attribute at all (not even the iterator range of the 
     matched input sequence), therefore optimizing the token, the lexer, and 
     possibly the parser implementation as much as possible. Specifying 
     `mpl::false_` as the 3rd template parameter generates a token
     type and an iterator, both holding no lexer state, allowing for even more 
     aggressive optimizations. As a result the token instances contain the token 
     ids as the only data member.
>*/  typedef 
        lex::lexertl::token<char const*, lex::omit, boost::mpl::false_> 
     token_type;

/*<  This defines the lexer type to use
>*/  typedef lex::lexertl::actor_lexer<token_type> lexer_type;

/*<  Create the lexer object instance needed to invoke the lexical analysis 
>*/  word_count_tokens<lexer_type> word_count_lexer;

/*<  Read input from the given file, tokenize all the input, while discarding
     all generated tokens
>*/  std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
    char const* first = str.c_str();
    char const* last = &first[str.size()];

/*<  Create a pair of iterators returning the sequence of generated tokens
>*/  lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
    lexer_type::iterator_type end = word_count_lexer.end();

/*<  Here we simply iterate over all tokens, making sure to break the loop
     if an invalid token gets returned from the lexer
>*/  while (iter != end && token_is_valid(*iter))
        ++iter;

    if (iter == end) {
        std::cout << "lines: " << word_count_lexer.l 
                  << ", words: " << word_count_lexer.w 
                  << ", characters: " << word_count_lexer.c 
                  << "\n";
    }
    else {
        std::string rest(first, last);
        std::cout << "Lexical analysis failed\n" << "stopped at: \"" 
                  << rest << "\"\n";
    }
    return 0;
}
//]
Commit	Line	Data
7c673cae FG	1	// Copyright (c) 2001-2010 Hartmut Kaiser
	2	//
	3	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	4	// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	5
	6	// This example is the equivalent to the following lex program:
	7	/*
	8	//[wcl_flex_version
	9	%{
	10	int c = 0, w = 0, l = 0;
	11	%}
	12	%%
	13	[^ \t\n]+ { ++w; c += yyleng; }
	14	\n { ++c; ++l; }
	15	. { ++c; }
	16	%%
	17	main()
	18	{
	19	yylex();
	20	printf("%d %d %d\n", l, w, c);
	21	}
	22	//]
	23	*/
	24	// Its purpose is to do the word count function of the wc command in UNIX. It
	25	// prints the number of lines, words and characters in a file.
	26	//
	27	// This examples shows how to use semantic actions associated with token
	28	// definitions to directly attach actions to tokens. These get executed
	29	// whenever the corresponding token got matched in the input sequence. Note,
	30	// how this example implements all functionality directly in the lexer
	31	// definition without any need for a parser.
	32
	33	// #define BOOST_SPIRIT_LEXERTL_DEBUG
	34
7c673cae FG	35	//[wcl_includes
7c673cae FG	36	#include <boost/spirit/include/lex_lexertl.hpp>
1e59de90 TL	37	#include <boost/phoenix/operator.hpp>
	38	#include <boost/phoenix/statement.hpp>
	39	#include <boost/phoenix/stl/algorithm.hpp>
	40	#include <boost/phoenix/core.hpp>
7c673cae FG	41	//]
	42
	43	#include <iostream>
	44	#include <string>
	45
	46	#include "example.hpp"
	47
	48	//[wcl_namespaces
	49	namespace lex = boost::spirit::lex;
	50	//]
	51
	52	///////////////////////////////////////////////////////////////////////////////
	53	// Token definition: We use the lexertl based lexer engine as the underlying
	54	// lexer type.
	55	//
	56	// Note, the token definition type is derived from the 'lexertl_actor_lexer'
	57	// template, which is a necessary to being able to use lexer semantic actions.
	58	///////////////////////////////////////////////////////////////////////////////
	59	struct distance_func
	60	{
	61	template <typename Iterator1, typename Iterator2>
	62	struct result : boost::iterator_difference<Iterator1> {};
	63
	64	template <typename Iterator1, typename Iterator2>
	65	typename result<Iterator1, Iterator2>::type
	66	operator()(Iterator1 const& begin, Iterator2 const& end) const
	67	{
	68	return std::distance(begin, end);
	69	}
	70	};
	71	boost::phoenix::function<distance_func> const distance = distance_func();
	72
	73	//[wcl_token_definition
	74	template <typename Lexer>
	75	struct word_count_tokens : lex::lexer<Lexer>
	76	{
	77	word_count_tokens()
	78	: c(0), w(0), l(0)
	79	, word("[^ \t\n]+") // define tokens
	80	, eol("\n")
	81	, any(".")
	82	{
	83	using boost::spirit::lex::_start;
	84	using boost::spirit::lex::_end;
	85	using boost::phoenix::ref;
	86
	87	// associate tokens with the lexer
	88	this->self
	89	= word [++ref(w), ref(c) += distance(_start, _end)]
	90	\| eol [++ref(c), ++ref(l)]
	91	\| any [++ref(c)]
	92	;
	93	}
	94
	95	std::size_t c, w, l;
	96	lex::token_def<> word, eol, any;
	97	};
	98	//]
	99
	100	///////////////////////////////////////////////////////////////////////////////
	101	//[wcl_main
	102	int main(int argc, char* argv[])
	103	{
	104
105	/*< Specifying `omit` as the token attribute type generates a token class
106	not holding any token attribute at all (not even the iterator range of the
107	matched input sequence), therefore optimizing the token, the lexer, and
108	possibly the parser implementation as much as possible. Specifying
109	`mpl::false_` as the 3rd template parameter generates a token
110	type and an iterator, both holding no lexer state, allowing for even more
111	aggressive optimizations. As a result the token instances contain the token
112	ids as the only data member.
113	>*/ typedef
114	lex::lexertl::token<char const*, lex::omit, boost::mpl::false_>
115	token_type;
116
117	/*< This defines the lexer type to use
118	>*/ typedef lex::lexertl::actor_lexer<token_type> lexer_type;
119
120	/*< Create the lexer object instance needed to invoke the lexical analysis
121	>*/ word_count_tokens<lexer_type> word_count_lexer;
122
123	/*< Read input from the given file, tokenize all the input, while discarding
124	all generated tokens
125	>*/ std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
126	char const* first = str.c_str();
127	char const* last = &first[str.size()];
128
129	/*< Create a pair of iterators returning the sequence of generated tokens
130	>*/ lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
131	lexer_type::iterator_type end = word_count_lexer.end();
132
133	/*< Here we simply iterate over all tokens, making sure to break the loop
134	if an invalid token gets returned from the lexer
135	>/ while (iter != end && token_is_valid(iter))
136	++iter;
137
138	if (iter == end) {
139	std::cout << "lines: " << word_count_lexer.l
140	<< ", words: " << word_count_lexer.w
141	<< ", characters: " << word_count_lexer.c
142	<< "\n";
143	}
144	else {
145	std::string rest(first, last);
146	std::cout << "Lexical analysis failed\n" << "stopped at: \""
147	<< rest << "\"\n";
148	}
149	return 0;
150	}
151	//]