1 // Copyright (c) 2001-2010 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 // This example is the equivalent to the following lex program:
9 // /* INITIAL is the default start state. COMMENT is our new */
10 // /* state where we remove comments. */
16 // <INITIAL>"/*" BEGIN COMMENT;
18 // <INITIAL>[\n] ECHO;
19 // <COMMENT>"*/" BEGIN INITIAL;
29 // Its purpose is to strip comments out of C code.
31 // Additionally this example demonstrates the use of lexer states to structure
32 // the lexer definition.
34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
36 #include <boost/config/warning_disable.hpp>
37 #include <boost/spirit/include/qi.hpp>
38 #include <boost/spirit/include/lex_lexertl.hpp>
39 #include <boost/spirit/include/phoenix_operator.hpp>
40 #include <boost/spirit/include/phoenix_container.hpp>
45 #include "example.hpp"
47 using namespace boost::spirit
;
49 ///////////////////////////////////////////////////////////////////////////////
50 // Token definition: We use the lexertl based lexer engine as the underlying
52 ///////////////////////////////////////////////////////////////////////////////
55 IDANY
= lex::min_token_id
+ 10
58 template <typename Lexer
>
59 struct strip_comments_tokens
: lex::lexer
<Lexer
>
61 strip_comments_tokens()
62 : strip_comments_tokens::base_type(lex::match_flags::match_default
)
64 // define tokens and associate them with the lexer
65 cppcomment
= "\"//\"[^\n]*"; // '//[^\n]*'
66 ccomment
= "\"/*\""; // '/*'
67 endcomment
= "\"*/\""; // '*/'
69 // The following tokens are associated with the default lexer state
70 // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
73 (cppcomment
) // no explicit token id is associated
75 (".", IDANY
) // IDANY is the token id associated with this token
79 // The following tokens are associated with the lexer state "COMMENT".
80 // We switch lexer states from inside the parsing process using the
81 // in_state("COMMENT")[] parser component as shown below.
82 this->self("COMMENT").add
88 lex::token_def
<> cppcomment
, ccomment
, endcomment
;
91 ///////////////////////////////////////////////////////////////////////////////
93 ///////////////////////////////////////////////////////////////////////////////
94 template <typename Iterator
>
95 struct strip_comments_grammar
: qi::grammar
<Iterator
>
97 template <typename TokenDef
>
98 strip_comments_grammar(TokenDef
const& tok
)
99 : strip_comments_grammar::base_type(start
)
101 // The in_state("COMMENT")[...] parser component switches the lexer
102 // state to be 'COMMENT' during the matching of the embedded parser.
103 start
= *( tok
.ccomment
104 >> qi::in_state("COMMENT")
106 // the lexer is in the 'COMMENT' state during
107 // matching of the following parser components
108 *token(IDANY
) >> tok
.endcomment
111 | qi::token(IDANY
) [ std::cout
<< _1
]
116 qi::rule
<Iterator
> start
;
119 ///////////////////////////////////////////////////////////////////////////////
120 int main(int argc
, char* argv
[])
122 // iterator type used to expose the underlying input stream
123 typedef std::string::iterator base_iterator_type
;
127 lex::lexertl::lexer
<lex::lexertl::token
<base_iterator_type
> >
130 // iterator type exposed by the lexer
131 typedef strip_comments_tokens
<lexer_type
>::iterator_type iterator_type
;
133 // now we use the types defined above to create the lexer and grammar
134 // object instances needed to invoke the parsing process
135 strip_comments_tokens
<lexer_type
> strip_comments
; // Our lexer
136 strip_comments_grammar
<iterator_type
> g (strip_comments
); // Our parser
138 // Parsing is done based on the token stream, not the character
139 // stream read from the input.
140 std::string
str (read_from_file(1 == argc
? "strip_comments.input" : argv
[1]));
141 base_iterator_type first
= str
.begin();
143 bool r
= lex::tokenize_and_parse(first
, str
.end(), strip_comments
, g
);
146 std::cout
<< "-------------------------\n";
147 std::cout
<< "Parsing succeeded\n";
148 std::cout
<< "-------------------------\n";
151 std::string
rest(first
, str
.end());
152 std::cout
<< "-------------------------\n";
153 std::cout
<< "Parsing failed\n";
154 std::cout
<< "stopped at: \"" << rest
<< "\"\n";
155 std::cout
<< "-------------------------\n";
158 std::cout
<< "Bye... :-) \n\n";