1 // Copyright (c) 2001-2011 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 // #define BOOST_SPIRIT_LEXERTL_DEBUG
8 #include <boost/config/warning_disable.hpp>
9 #include <boost/detail/lightweight_test.hpp>
11 #include <boost/spirit/include/lex_lexertl.hpp>
12 #include <boost/spirit/include/lex_lexertl_position_token.hpp>
13 #include <boost/spirit/include/phoenix_object.hpp>
14 #include <boost/spirit/include/phoenix_operator.hpp>
15 #include <boost/spirit/include/phoenix_statement.hpp>
16 #include <boost/spirit/include/phoenix_stl.hpp>
18 namespace lex
= boost::spirit::lex
;
19 namespace phoenix
= boost::phoenix
;
20 namespace mpl
= boost::mpl
;
22 ///////////////////////////////////////////////////////////////////////////////
29 template <typename Lexer
>
30 struct token_definitions
: lex::lexer
<Lexer
>
34 this->self
.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
35 this->self
.add_pattern("OCTALDIGIT", "[0-7]");
36 this->self
.add_pattern("DIGIT", "[0-9]");
38 this->self
.add_pattern("OPTSIGN", "[-+]?");
39 this->self
.add_pattern("EXPSTART", "[eE][-+]");
40 this->self
.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
42 // define tokens and associate them with the lexer
43 int_
= "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
46 double_
= "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
47 double_
.id(ID_DOUBLE
);
49 whitespace
= "[ \t\n]+";
54 | whitespace
[ lex::_pass
= lex::pass_flags::pass_ignore
]
58 lex::token_def
<lex::omit
> int_
;
59 lex::token_def
<lex::omit
> double_
;
60 lex::token_def
<lex::omit
> whitespace
;
63 template <typename Lexer
>
64 struct token_definitions_with_state
: lex::lexer
<Lexer
>
66 token_definitions_with_state()
68 this->self
.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
69 this->self
.add_pattern("OCTALDIGIT", "[0-7]");
70 this->self
.add_pattern("DIGIT", "[0-9]");
72 this->self
.add_pattern("OPTSIGN", "[-+]?");
73 this->self
.add_pattern("EXPSTART", "[eE][-+]");
74 this->self
.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
76 this->self
.add_state();
77 this->self
.add_state("INT");
78 this->self
.add_state("DOUBLE");
80 // define tokens and associate them with the lexer
81 int_
= "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
84 double_
= "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
85 double_
.id(ID_DOUBLE
);
87 whitespace
= "[ \t\n]+";
90 double_
[ lex::_state
= "DOUBLE"]
91 | int_
[ lex::_state
= "INT" ]
92 | whitespace
[ lex::_pass
= lex::pass_flags::pass_ignore
]
96 lex::token_def
<lex::omit
> int_
;
97 lex::token_def
<lex::omit
> double_
;
98 lex::token_def
<lex::omit
> whitespace
;
101 ///////////////////////////////////////////////////////////////////////////////
102 template <typename Token
>
104 test_token_ids(int const* ids
, std::vector
<Token
> const& tokens
)
106 BOOST_FOREACH(Token
const& t
, tokens
)
109 return false; // reached end of expected data
111 if (t
.id() != static_cast<std::size_t>(*ids
)) // token id must match
116 return (*ids
== -1) ? true : false;
119 template <typename Token
>
121 test_token_states(std::size_t const* states
, std::vector
<Token
> const& tokens
)
123 BOOST_FOREACH(Token
const& t
, tokens
)
125 if (*states
== std::size_t(-1))
126 return false; // reached end of expected data
128 if (t
.state() != *states
) // token state must match
133 return (*states
== std::size_t(-1)) ? true : false;
136 ///////////////////////////////////////////////////////////////////////////////
139 std::size_t begin
, end
;
142 template <typename Iterator
, typename Token
>
144 test_token_positions(Iterator begin
, position_type
const* positions
,
145 std::vector
<Token
> const& tokens
)
147 BOOST_FOREACH(Token
const& t
, tokens
)
149 if (positions
->begin
== std::size_t(-1) &&
150 positions
->end
== std::size_t(-1))
152 return false; // reached end of expected data
155 boost::iterator_range
<Iterator
> matched
= t
.matched();
156 std::size_t start
= std::distance(begin
, matched
.begin());
157 std::size_t end
= std::distance(begin
, matched
.end());
159 // position must match
160 if (start
!= positions
->begin
|| end
!= positions
->end
)
166 return (positions
->begin
== std::size_t(-1) &&
167 positions
->end
== std::size_t(-1)) ? true : false;
170 ///////////////////////////////////////////////////////////////////////////////
173 typedef std::string::iterator base_iterator_type
;
174 std::string
input(" 01 1.2 -2 0x3 2.3e6 -3.4");
175 int ids
[] = { ID_INT
, ID_DOUBLE
, ID_INT
, ID_INT
, ID_DOUBLE
, ID_DOUBLE
, -1 };
176 std::size_t states
[] = { 0, 1, 2, 1, 1, 2, std::size_t(-1) };
177 position_type positions
[] =
179 { 1, 3 }, { 4, 7 }, { 8, 10 }, { 11, 14 }, { 15, 20 }, { 21, 25 },
180 { std::size_t(-1), std::size_t(-1) }
183 // token type: token id, iterator_pair as token value, no state
185 typedef lex::lexertl::token
<
186 base_iterator_type
, mpl::vector
<>, mpl::false_
> token_type
;
187 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
189 token_definitions
<lexer_type
> lexer
;
190 std::vector
<token_type
> tokens
;
191 base_iterator_type first
= input
.begin();
193 using phoenix::arg_names::_1
;
194 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
195 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
197 BOOST_TEST(test_token_ids(ids
, tokens
));
201 typedef lex::lexertl::position_token
<
202 base_iterator_type
, mpl::vector
<>, mpl::false_
> token_type
;
203 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
205 token_definitions
<lexer_type
> lexer
;
206 std::vector
<token_type
> tokens
;
207 base_iterator_type first
= input
.begin();
209 using phoenix::arg_names::_1
;
210 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
211 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
213 BOOST_TEST(test_token_ids(ids
, tokens
));
214 BOOST_TEST(test_token_positions(input
.begin(), positions
, tokens
));
217 // token type: holds token id, state, iterator_pair as token value
219 typedef lex::lexertl::token
<
220 base_iterator_type
, mpl::vector
<>, mpl::true_
> token_type
;
221 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
223 token_definitions_with_state
<lexer_type
> lexer
;
224 std::vector
<token_type
> tokens
;
225 base_iterator_type first
= input
.begin();
227 using phoenix::arg_names::_1
;
228 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
229 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
231 BOOST_TEST(test_token_ids(ids
, tokens
));
232 BOOST_TEST(test_token_states(states
, tokens
));
236 typedef lex::lexertl::position_token
<
237 base_iterator_type
, mpl::vector
<>, mpl::true_
> token_type
;
238 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
240 token_definitions_with_state
<lexer_type
> lexer
;
241 std::vector
<token_type
> tokens
;
242 base_iterator_type first
= input
.begin();
244 using phoenix::arg_names::_1
;
245 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
246 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
248 BOOST_TEST(test_token_ids(ids
, tokens
));
249 BOOST_TEST(test_token_states(states
, tokens
));
250 BOOST_TEST(test_token_positions(input
.begin(), positions
, tokens
));
253 return boost::report_errors();