]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2011 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | // #define BOOST_SPIRIT_LEXERTL_DEBUG | |
7 | ||
8 | #include <boost/detail/lightweight_test.hpp> | |
9 | #include <boost/spirit/include/phoenix_object.hpp> | |
10 | #include <boost/spirit/include/phoenix_operator.hpp> | |
11 | #include <boost/spirit/include/phoenix_statement.hpp> | |
12 | #include <boost/spirit/include/phoenix_stl.hpp> | |
13 | #include <boost/spirit/include/lex_lexertl.hpp> | |
14 | #include <boost/foreach.hpp> | |
15 | ||
16 | using namespace boost::spirit; | |
17 | ||
18 | /////////////////////////////////////////////////////////////////////////////// | |
19 | // semantic action analyzing leading whitespace | |
20 | enum tokenids | |
21 | { | |
22 | ID_INDENT = 1000, | |
23 | ID_DEDENT | |
24 | }; | |
25 | ||
26 | struct handle_whitespace | |
27 | { | |
28 | handle_whitespace(std::stack<unsigned int>& indents) | |
29 | : indents_(indents) {} | |
30 | ||
31 | template <typename Iterator, typename IdType, typename Context> | |
32 | void operator()(Iterator& start, Iterator& end | |
33 | , BOOST_SCOPED_ENUM(lex::pass_flags)& pass, IdType& id | |
34 | , Context& ctx) | |
35 | { | |
36 | unsigned int level = 0; | |
37 | if (is_indent(start, end, level)) { | |
38 | id = ID_INDENT; | |
39 | ctx.set_value(level); | |
40 | } | |
41 | else if (is_dedent(start, end, level)) { | |
42 | id = ID_DEDENT; | |
43 | ctx.set_value(level); | |
44 | } | |
45 | else { | |
46 | pass = lex::pass_flags::pass_ignore; | |
47 | } | |
48 | } | |
49 | ||
50 | // Get indentation level, for now (no tabs) we just count the spaces | |
51 | // once we allow tabs in the regex this needs to be expanded | |
52 | template <typename Iterator> | |
53 | unsigned int get_indent(Iterator& start, Iterator& end) | |
54 | { | |
11fdf7f2 | 55 | return static_cast<unsigned int>(std::distance(start, end)); |
7c673cae FG |
56 | } |
57 | ||
58 | template <typename Iterator> | |
59 | bool is_dedent(Iterator& start, Iterator& end, unsigned int& level) | |
60 | { | |
61 | unsigned int newindent = get_indent(start, end); | |
62 | while (!indents_.empty() && newindent < indents_.top()) { | |
63 | level++; // dedent one more level | |
64 | indents_.pop(); | |
65 | } | |
66 | return level > 0; | |
67 | } | |
68 | ||
69 | // Handle additional indentation | |
70 | template <typename Iterator> | |
71 | bool is_indent(Iterator& start, Iterator& end, unsigned int& level) | |
72 | { | |
73 | unsigned int newindent = get_indent(start, end); | |
74 | if (indents_.empty() || newindent > indents_.top()) { | |
75 | level = 1; // indent one more level | |
76 | indents_.push(newindent); | |
77 | return true; | |
78 | } | |
79 | return false; | |
80 | } | |
81 | ||
82 | std::stack<unsigned int>& indents_; | |
83 | ||
84 | private: | |
85 | // silence MSVC warning C4512: assignment operator could not be generated | |
86 | handle_whitespace& operator= (handle_whitespace const&); | |
87 | }; | |
88 | ||
89 | /////////////////////////////////////////////////////////////////////////////// | |
90 | // Token definition | |
91 | template <typename Lexer> | |
92 | struct set_token_value : boost::spirit::lex::lexer<Lexer> | |
93 | { | |
94 | set_token_value() | |
95 | { | |
96 | using lex::_pass; | |
97 | ||
98 | // define tokens and associate them with the lexer | |
99 | whitespace = "^[ ]+"; | |
100 | newline = '\n'; | |
101 | ||
102 | this->self = whitespace[ handle_whitespace(indents) ]; | |
103 | this->self += newline[ _pass = lex::pass_flags::pass_ignore ]; | |
104 | } | |
105 | ||
106 | lex::token_def<unsigned int> whitespace; | |
107 | lex::token_def<> newline; | |
108 | std::stack<unsigned int> indents; | |
109 | }; | |
110 | ||
111 | /////////////////////////////////////////////////////////////////////////////// | |
112 | struct token_data | |
113 | { | |
114 | int id; | |
115 | unsigned int value; | |
116 | }; | |
117 | ||
118 | template <typename Token> | |
119 | inline | |
120 | bool test_tokens(token_data const* d, std::vector<Token> const& tokens) | |
121 | { | |
122 | BOOST_FOREACH(Token const& t, tokens) | |
123 | { | |
124 | if (d->id == -1) | |
125 | return false; // reached end of expected data | |
126 | ||
127 | typename Token::token_value_type const& value (t.value()); | |
128 | if (t.id() != static_cast<std::size_t>(d->id)) // token id must match | |
129 | return false; | |
130 | if (value.which() != 1) // must have an integer value | |
131 | return false; | |
132 | if (boost::get<unsigned int>(value) != d->value) // value must match | |
133 | return false; | |
134 | ++d; | |
135 | } | |
136 | ||
137 | return (d->id == -1) ? true : false; | |
138 | } | |
139 | ||
140 | inline | |
141 | bool test_indents(int *i, std::stack<unsigned int>& indents) | |
142 | { | |
143 | while (!indents.empty()) | |
144 | { | |
145 | if (*i == -1) | |
146 | return false; // reached end of expected data | |
147 | if (indents.top() != static_cast<unsigned int>(*i)) | |
148 | return false; // value must match | |
149 | ||
150 | ++i; | |
151 | indents.pop(); | |
152 | } | |
153 | ||
154 | return (*i == -1) ? true : false; | |
155 | } | |
156 | ||
157 | /////////////////////////////////////////////////////////////////////////////// | |
158 | int main() | |
159 | { | |
160 | namespace lex = boost::spirit::lex; | |
161 | namespace phoenix = boost::phoenix; | |
162 | ||
163 | typedef std::string::iterator base_iterator_type; | |
164 | typedef boost::mpl::vector<unsigned int> token_value_types; | |
165 | typedef lex::lexertl::token<base_iterator_type, token_value_types> token_type; | |
166 | typedef lex::lexertl::actor_lexer<token_type> lexer_type; | |
167 | ||
168 | // test simple indent | |
169 | { | |
170 | set_token_value<lexer_type> lexer; | |
171 | std::vector<token_type> tokens; | |
172 | std::string input(" "); | |
173 | base_iterator_type first = input.begin(); | |
174 | ||
175 | using phoenix::arg_names::_1; | |
176 | BOOST_TEST(lex::tokenize(first, input.end(), lexer | |
177 | , phoenix::push_back(phoenix::ref(tokens), _1))); | |
178 | ||
179 | int i[] = { 4, -1 }; | |
180 | BOOST_TEST(test_indents(i, lexer.indents)); | |
181 | ||
182 | token_data d[] = { { ID_INDENT, 1 }, { -1, 0 } }; | |
183 | BOOST_TEST(test_tokens(d, tokens)); | |
184 | } | |
185 | ||
186 | // test two indents | |
187 | { | |
188 | set_token_value<lexer_type> lexer; | |
189 | std::vector<token_type> tokens; | |
190 | std::string input( | |
191 | " \n" | |
192 | " \n"); | |
193 | base_iterator_type first = input.begin(); | |
194 | ||
195 | using phoenix::arg_names::_1; | |
196 | BOOST_TEST(lex::tokenize(first, input.end(), lexer | |
197 | , phoenix::push_back(phoenix::ref(tokens), _1))); | |
198 | ||
199 | int i[] = { 8, 4, -1 }; | |
200 | BOOST_TEST(test_indents(i, lexer.indents)); | |
201 | ||
202 | token_data d[] = { | |
203 | { ID_INDENT, 1 }, { ID_INDENT, 1 } | |
204 | , { -1, 0 } }; | |
205 | BOOST_TEST(test_tokens(d, tokens)); | |
206 | } | |
207 | ||
208 | // test one dedent | |
209 | { | |
210 | set_token_value<lexer_type> lexer; | |
211 | std::vector<token_type> tokens; | |
212 | std::string input( | |
213 | " \n" | |
214 | " \n" | |
215 | " \n"); | |
216 | base_iterator_type first = input.begin(); | |
217 | ||
218 | using phoenix::arg_names::_1; | |
219 | BOOST_TEST(lex::tokenize(first, input.end(), lexer | |
220 | , phoenix::push_back(phoenix::ref(tokens), _1))); | |
221 | ||
222 | int i[] = { 4, -1 }; | |
223 | BOOST_TEST(test_indents(i, lexer.indents)); | |
224 | ||
225 | token_data d[] = { | |
226 | { ID_INDENT, 1 }, { ID_INDENT, 1 } | |
227 | , { ID_DEDENT, 1 } | |
228 | , { -1, 0 } }; | |
229 | BOOST_TEST(test_tokens(d, tokens)); | |
230 | } | |
231 | ||
232 | // test two dedents | |
233 | { | |
234 | set_token_value<lexer_type> lexer; | |
235 | std::vector<token_type> tokens; | |
236 | std::string input( | |
237 | " \n" | |
238 | " \n" | |
239 | " \n" | |
240 | " \n"); | |
241 | base_iterator_type first = input.begin(); | |
242 | ||
243 | using phoenix::arg_names::_1; | |
244 | BOOST_TEST(lex::tokenize(first, input.end(), lexer | |
245 | , phoenix::push_back(phoenix::ref(tokens), _1))); | |
246 | ||
247 | int i[] = { 4, -1 }; | |
248 | BOOST_TEST(test_indents(i, lexer.indents)); | |
249 | ||
250 | token_data d[] = { | |
251 | { ID_INDENT, 1 }, { ID_INDENT, 1 }, { ID_INDENT, 1 } | |
252 | , { ID_DEDENT, 2 } | |
253 | , { -1, 0 } }; | |
254 | BOOST_TEST(test_tokens(d, tokens)); | |
255 | } | |
256 | ||
257 | return boost::report_errors(); | |
258 | } | |
259 |