1 // Copyright (c) 2001-2010 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 // This example is the equivalent to the following lex program:
9 // /* INITIAL is the default start state. COMMENT is our new */
10 // /* state where we remove comments. */
16 // <INITIAL>"/*" BEGIN COMMENT;
18 // <INITIAL>[\n] ECHO;
19 // <COMMENT>"*/" BEGIN INITIAL;
29 // Its purpose is to strip comments out of C code.
31 // Additionally this example demonstrates the use of lexer states to structure
32 // the lexer definition.
34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
36 #include <boost/spirit/include/lex_lexertl.hpp>
37 #include <boost/phoenix/operator.hpp>
38 #include <boost/phoenix/statement.hpp>
39 #include <boost/phoenix/core.hpp>
44 #include "example.hpp"
46 using namespace boost::spirit
;
48 ///////////////////////////////////////////////////////////////////////////////
49 // Token definition: We use the lexertl based lexer engine as the underlying
51 ///////////////////////////////////////////////////////////////////////////////
54 IDANY
= lex::min_token_id
+ 10,
55 IDEOL
= lex::min_token_id
+ 11
58 ///////////////////////////////////////////////////////////////////////////////
59 // Simple custom semantic action function object used to print the matched
60 // input sequence for a particular token
61 template <typename Char
, typename Traits
>
62 struct echo_input_functor
64 echo_input_functor (std::basic_ostream
<Char
, Traits
>& os_
)
67 // This is called by the semantic action handling code during the lexing
68 template <typename Iterator
, typename Context
>
69 void operator()(Iterator
const& b
, Iterator
const& e
70 , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags
)&
71 , std::size_t&, Context
&) const
73 os
<< std::string(b
, e
);
76 std::basic_ostream
<Char
, Traits
>& os
;
79 template <typename Char
, typename Traits
>
80 inline echo_input_functor
<Char
, Traits
>
81 echo_input(std::basic_ostream
<Char
, Traits
>& os
)
83 return echo_input_functor
<Char
, Traits
>(os
);
86 ///////////////////////////////////////////////////////////////////////////////
87 // Another simple custom semantic action function object used to switch the
89 struct set_lexer_state
91 set_lexer_state(char const* state_
)
94 // This is called by the semantic action handling code during the lexing
95 template <typename Iterator
, typename Context
>
96 void operator()(Iterator
const&, Iterator
const&
97 , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags
)&
98 , std::size_t&, Context
& ctx
) const
100 ctx
.set_state_name(state
.c_str());
106 ///////////////////////////////////////////////////////////////////////////////
107 template <typename Lexer
>
108 struct strip_comments_tokens
: lex::lexer
<Lexer
>
110 strip_comments_tokens()
111 : strip_comments_tokens::base_type(lex::match_flags::match_default
)
113 // define tokens and associate them with the lexer
114 cppcomment
= "\"//\"[^\n]*"; // '//[^\n]*'
115 ccomment
= "\"/*\""; // '/*'
116 endcomment
= "\"*/\""; // '*/'
117 any
= std::string(".");
120 // The following tokens are associated with the default lexer state
121 // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
122 // strictly optional.
125 | ccomment
[ set_lexer_state("COMMENT") ]
126 | eol
[ echo_input(std::cout
) ]
127 | any
[ echo_input(std::cout
) ]
130 // The following tokens are associated with the lexer state 'COMMENT'.
131 this->self("COMMENT")
132 = endcomment
[ set_lexer_state("INITIAL") ]
138 lex::token_def
<> cppcomment
, ccomment
, endcomment
, any
, eol
;
141 ///////////////////////////////////////////////////////////////////////////////
142 int main(int argc
, char* argv
[])
144 // iterator type used to expose the underlying input stream
145 typedef std::string::iterator base_iterator_type
;
149 lex::lexertl::actor_lexer
<lex::lexertl::token
<base_iterator_type
> >
152 // now we use the types defined above to create the lexer and grammar
153 // object instances needed to invoke the parsing process
154 strip_comments_tokens
<lexer_type
> strip_comments
; // Our lexer
156 // No parsing is done altogether, everything happens in the lexer semantic
158 std::string
str (read_from_file(1 == argc
? "strip_comments.input" : argv
[1]));
159 base_iterator_type first
= str
.begin();
160 bool r
= lex::tokenize(first
, str
.end(), strip_comments
);
163 std::string
rest(first
, str
.end());
164 std::cerr
<< "Lexical analysis failed\n" << "stopped at: \""