1 // Copyright (c) 2001-2011 Hartmut Kaiser
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 // #define BOOST_SPIRIT_LEXERTL_DEBUG
8 #include <boost/config/warning_disable.hpp>
9 #include <boost/detail/lightweight_test.hpp>
11 #include <boost/spirit/include/lex_lexertl.hpp>
12 #include <boost/spirit/include/lex_lexertl_position_token.hpp>
13 #include <boost/spirit/include/phoenix_object.hpp>
14 #include <boost/spirit/include/phoenix_operator.hpp>
15 #include <boost/spirit/include/phoenix_statement.hpp>
16 #include <boost/spirit/include/phoenix_stl.hpp>
18 namespace lex
= boost::spirit::lex
;
19 namespace phoenix
= boost::phoenix
;
20 namespace mpl
= boost::mpl
;
22 ///////////////////////////////////////////////////////////////////////////////
29 template <typename Lexer
>
30 struct token_definitions
: lex::lexer
<Lexer
>
34 this->self
.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
35 this->self
.add_pattern("OCTALDIGIT", "[0-7]");
36 this->self
.add_pattern("DIGIT", "[0-9]");
38 this->self
.add_pattern("OPTSIGN", "[-+]?");
39 this->self
.add_pattern("EXPSTART", "[eE][-+]");
40 this->self
.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
42 // define tokens and associate them with the lexer
43 int_
= "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
46 double_
= "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
47 double_
.id(ID_DOUBLE
);
49 whitespace
= "[ \t\n]+";
54 | whitespace
[ lex::_pass
= lex::pass_flags::pass_ignore
]
58 lex::token_def
<lex::omit
> int_
;
59 lex::token_def
<lex::omit
> double_
;
60 lex::token_def
<lex::omit
> whitespace
;
63 template <typename Lexer
>
64 struct token_definitions_with_state
: lex::lexer
<Lexer
>
66 token_definitions_with_state()
68 this->self
.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
69 this->self
.add_pattern("OCTALDIGIT", "[0-7]");
70 this->self
.add_pattern("DIGIT", "[0-9]");
72 this->self
.add_pattern("OPTSIGN", "[-+]?");
73 this->self
.add_pattern("EXPSTART", "[eE][-+]");
74 this->self
.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
76 this->self
.add_state();
77 this->self
.add_state("INT");
78 this->self
.add_state("DOUBLE");
80 // define tokens and associate them with the lexer
81 int_
= "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
84 double_
= "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
85 double_
.id(ID_DOUBLE
);
87 whitespace
= "[ \t\n]+";
90 double_
[ lex::_state
= "DOUBLE"]
91 | int_
[ lex::_state
= "INT" ]
92 | whitespace
[ lex::_pass
= lex::pass_flags::pass_ignore
]
96 lex::token_def
<lex::omit
> int_
;
97 lex::token_def
<lex::omit
> double_
;
98 lex::token_def
<lex::omit
> whitespace
;
101 ///////////////////////////////////////////////////////////////////////////////
102 template <typename Token
>
104 test_token_ids(int const* ids
, std::vector
<Token
> const& tokens
)
106 BOOST_FOREACH(Token
const& t
, tokens
)
109 return false; // reached end of expected data
111 typename
Token::token_value_type
const& value (t
.value());
112 if (t
.id() != static_cast<std::size_t>(*ids
)) // token id must match
117 return (*ids
== -1) ? true : false;
120 template <typename Token
>
122 test_token_states(std::size_t const* states
, std::vector
<Token
> const& tokens
)
124 BOOST_FOREACH(Token
const& t
, tokens
)
126 if (*states
== std::size_t(-1))
127 return false; // reached end of expected data
129 typename
Token::token_value_type
const& value (t
.value());
130 if (t
.state() != *states
) // token state must match
135 return (*states
== std::size_t(-1)) ? true : false;
138 ///////////////////////////////////////////////////////////////////////////////
141 std::size_t begin
, end
;
144 template <typename Iterator
, typename Token
>
146 test_token_positions(Iterator begin
, position_type
const* positions
,
147 std::vector
<Token
> const& tokens
)
149 BOOST_FOREACH(Token
const& t
, tokens
)
151 if (positions
->begin
== std::size_t(-1) &&
152 positions
->end
== std::size_t(-1))
154 return false; // reached end of expected data
157 boost::iterator_range
<Iterator
> matched
= t
.matched();
158 std::size_t start
= std::distance(begin
, matched
.begin());
159 std::size_t end
= std::distance(begin
, matched
.end());
161 // position must match
162 if (start
!= positions
->begin
|| end
!= positions
->end
)
168 return (positions
->begin
== std::size_t(-1) &&
169 positions
->end
== std::size_t(-1)) ? true : false;
172 ///////////////////////////////////////////////////////////////////////////////
175 typedef std::string::iterator base_iterator_type
;
176 std::string
input(" 01 1.2 -2 0x3 2.3e6 -3.4");
177 int ids
[] = { ID_INT
, ID_DOUBLE
, ID_INT
, ID_INT
, ID_DOUBLE
, ID_DOUBLE
, -1 };
178 std::size_t states
[] = { 0, 1, 2, 1, 1, 2, std::size_t(-1) };
179 position_type positions
[] =
181 { 1, 3 }, { 4, 7 }, { 8, 10 }, { 11, 14 }, { 15, 20 }, { 21, 25 },
182 { std::size_t(-1), std::size_t(-1) }
185 // token type: token id, iterator_pair as token value, no state
187 typedef lex::lexertl::token
<
188 base_iterator_type
, mpl::vector
<>, mpl::false_
> token_type
;
189 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
191 token_definitions
<lexer_type
> lexer
;
192 std::vector
<token_type
> tokens
;
193 base_iterator_type first
= input
.begin();
195 using phoenix::arg_names::_1
;
196 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
197 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
199 BOOST_TEST(test_token_ids(ids
, tokens
));
203 typedef lex::lexertl::position_token
<
204 base_iterator_type
, mpl::vector
<>, mpl::false_
> token_type
;
205 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
207 token_definitions
<lexer_type
> lexer
;
208 std::vector
<token_type
> tokens
;
209 base_iterator_type first
= input
.begin();
211 using phoenix::arg_names::_1
;
212 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
213 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
215 BOOST_TEST(test_token_ids(ids
, tokens
));
216 BOOST_TEST(test_token_positions(input
.begin(), positions
, tokens
));
219 // token type: holds token id, state, iterator_pair as token value
221 typedef lex::lexertl::token
<
222 base_iterator_type
, mpl::vector
<>, mpl::true_
> token_type
;
223 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
225 token_definitions_with_state
<lexer_type
> lexer
;
226 std::vector
<token_type
> tokens
;
227 base_iterator_type first
= input
.begin();
229 using phoenix::arg_names::_1
;
230 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
231 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
233 BOOST_TEST(test_token_ids(ids
, tokens
));
234 BOOST_TEST(test_token_states(states
, tokens
));
238 typedef lex::lexertl::position_token
<
239 base_iterator_type
, mpl::vector
<>, mpl::true_
> token_type
;
240 typedef lex::lexertl::actor_lexer
<token_type
> lexer_type
;
242 token_definitions_with_state
<lexer_type
> lexer
;
243 std::vector
<token_type
> tokens
;
244 base_iterator_type first
= input
.begin();
246 using phoenix::arg_names::_1
;
247 BOOST_TEST(lex::tokenize(first
, input
.end(), lexer
248 , phoenix::push_back(phoenix::ref(tokens
), _1
)));
250 BOOST_TEST(test_token_ids(ids
, tokens
));
251 BOOST_TEST(test_token_states(states
, tokens
));
252 BOOST_TEST(test_token_positions(input
.begin(), positions
, tokens
));
255 return boost::report_errors();