1 // Copyright (c) 2001-2010 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 // This example is the equivalent to the following lex program:
9 // /* INITIAL is the default start state. COMMENT is our new */
10 // /* state where we remove comments. */
16 // <INITIAL>"/*" BEGIN COMMENT;
18 // <INITIAL>[\n] ECHO;
19 // <COMMENT>"*/" BEGIN INITIAL;
29 // Its purpose is to strip comments out of C code.
31 // Additionally this example demonstrates the use of lexer states to structure
32 // the lexer definition.
34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
36 #include <boost/config/warning_disable.hpp>
37 #include <boost/spirit/include/lex_lexertl.hpp>
38 #include <boost/spirit/include/phoenix_operator.hpp>
39 #include <boost/spirit/include/phoenix_statement.hpp>
40 #include <boost/spirit/include/phoenix_core.hpp>
45 #include "example.hpp"
47 using namespace boost::spirit
;
49 ///////////////////////////////////////////////////////////////////////////////
50 // Token definition: We use the lexertl based lexer engine as the underlying
52 ///////////////////////////////////////////////////////////////////////////////
55 IDANY
= lex::min_token_id
+ 10,
56 IDEOL
= lex::min_token_id
+ 11
59 ///////////////////////////////////////////////////////////////////////////////
60 // Simple custom semantic action function object used to print the matched
61 // input sequence for a particular token
62 template <typename Char
, typename Traits
>
63 struct echo_input_functor
65 echo_input_functor (std::basic_ostream
<Char
, Traits
>& os_
)
68 // This is called by the semantic action handling code during the lexing
69 template <typename Iterator
, typename Context
>
70 void operator()(Iterator
const& b
, Iterator
const& e
71 , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags
)&
72 , std::size_t&, Context
&) const
74 os
<< std::string(b
, e
);
77 std::basic_ostream
<Char
, Traits
>& os
;
80 template <typename Char
, typename Traits
>
81 inline echo_input_functor
<Char
, Traits
>
82 echo_input(std::basic_ostream
<Char
, Traits
>& os
)
84 return echo_input_functor
<Char
, Traits
>(os
);
87 ///////////////////////////////////////////////////////////////////////////////
88 // Another simple custom semantic action function object used to switch the
90 struct set_lexer_state
92 set_lexer_state(char const* state_
)
95 // This is called by the semantic action handling code during the lexing
96 template <typename Iterator
, typename Context
>
97 void operator()(Iterator
const&, Iterator
const&
98 , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags
)&
99 , std::size_t&, Context
& ctx
) const
101 ctx
.set_state_name(state
.c_str());
107 ///////////////////////////////////////////////////////////////////////////////
108 template <typename Lexer
>
109 struct strip_comments_tokens
: lex::lexer
<Lexer
>
111 strip_comments_tokens()
112 : strip_comments_tokens::base_type(lex::match_flags::match_default
)
114 // define tokens and associate them with the lexer
115 cppcomment
= "\"//\"[^\n]*"; // '//[^\n]*'
116 ccomment
= "\"/*\""; // '/*'
117 endcomment
= "\"*/\""; // '*/'
118 any
= std::string(".");
121 // The following tokens are associated with the default lexer state
122 // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
123 // strictly optional.
126 | ccomment
[ set_lexer_state("COMMENT") ]
127 | eol
[ echo_input(std::cout
) ]
128 | any
[ echo_input(std::cout
) ]
131 // The following tokens are associated with the lexer state 'COMMENT'.
132 this->self("COMMENT")
133 = endcomment
[ set_lexer_state("INITIAL") ]
139 lex::token_def
<> cppcomment
, ccomment
, endcomment
, any
, eol
;
142 ///////////////////////////////////////////////////////////////////////////////
143 int main(int argc
, char* argv
[])
145 // iterator type used to expose the underlying input stream
146 typedef std::string::iterator base_iterator_type
;
150 lex::lexertl::actor_lexer
<lex::lexertl::token
<base_iterator_type
> >
153 // now we use the types defined above to create the lexer and grammar
154 // object instances needed to invoke the parsing process
155 strip_comments_tokens
<lexer_type
> strip_comments
; // Our lexer
157 // No parsing is done alltogether, everything happens in the lexer semantic
159 std::string
str (read_from_file(1 == argc
? "strip_comments.input" : argv
[1]));
160 base_iterator_type first
= str
.begin();
161 bool r
= lex::tokenize(first
, str
.end(), strip_comments
);
164 std::string
rest(first
, str
.end());
165 std::cerr
<< "Lexical analysis failed\n" << "stopped at: \""