]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2010 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | // This example is the equivalent to the following lex program: | |
7 | /* | |
8 | //[wcl_flex_version | |
9 | %{ | |
10 | int c = 0, w = 0, l = 0; | |
11 | %} | |
12 | %% | |
13 | [^ \t\n]+ { ++w; c += yyleng; } | |
14 | \n { ++c; ++l; } | |
15 | . { ++c; } | |
16 | %% | |
17 | main() | |
18 | { | |
19 | yylex(); | |
20 | printf("%d %d %d\n", l, w, c); | |
21 | } | |
22 | //] | |
23 | */ | |
24 | // Its purpose is to do the word count function of the wc command in UNIX. It | |
25 | // prints the number of lines, words and characters in a file. | |
26 | // | |
27 | // This examples shows how to use semantic actions associated with token | |
28 | // definitions to directly attach actions to tokens. These get executed | |
29 | // whenever the corresponding token got matched in the input sequence. Note, | |
30 | // how this example implements all functionality directly in the lexer | |
31 | // definition without any need for a parser. | |
32 | ||
33 | // #define BOOST_SPIRIT_LEXERTL_DEBUG | |
34 | ||
7c673cae FG |
35 | //[wcl_includes |
36 | #include <boost/spirit/include/lex_lexertl.hpp> | |
1e59de90 TL |
37 | #include <boost/phoenix/operator.hpp> |
38 | #include <boost/phoenix/statement.hpp> | |
39 | #include <boost/phoenix/stl/algorithm.hpp> | |
40 | #include <boost/phoenix/core.hpp> | |
7c673cae FG |
41 | //] |
42 | ||
43 | #include <iostream> | |
44 | #include <string> | |
45 | ||
46 | #include "example.hpp" | |
47 | ||
48 | //[wcl_namespaces | |
49 | namespace lex = boost::spirit::lex; | |
50 | //] | |
51 | ||
52 | /////////////////////////////////////////////////////////////////////////////// | |
53 | // Token definition: We use the lexertl based lexer engine as the underlying | |
54 | // lexer type. | |
55 | // | |
56 | // Note, the token definition type is derived from the 'lexertl_actor_lexer' | |
57 | // template, which is a necessary to being able to use lexer semantic actions. | |
58 | /////////////////////////////////////////////////////////////////////////////// | |
59 | struct distance_func | |
60 | { | |
61 | template <typename Iterator1, typename Iterator2> | |
62 | struct result : boost::iterator_difference<Iterator1> {}; | |
63 | ||
64 | template <typename Iterator1, typename Iterator2> | |
65 | typename result<Iterator1, Iterator2>::type | |
66 | operator()(Iterator1 const& begin, Iterator2 const& end) const | |
67 | { | |
68 | return std::distance(begin, end); | |
69 | } | |
70 | }; | |
71 | boost::phoenix::function<distance_func> const distance = distance_func(); | |
72 | ||
73 | //[wcl_token_definition | |
74 | template <typename Lexer> | |
75 | struct word_count_tokens : lex::lexer<Lexer> | |
76 | { | |
77 | word_count_tokens() | |
78 | : c(0), w(0), l(0) | |
79 | , word("[^ \t\n]+") // define tokens | |
80 | , eol("\n") | |
81 | , any(".") | |
82 | { | |
83 | using boost::spirit::lex::_start; | |
84 | using boost::spirit::lex::_end; | |
85 | using boost::phoenix::ref; | |
86 | ||
87 | // associate tokens with the lexer | |
88 | this->self | |
89 | = word [++ref(w), ref(c) += distance(_start, _end)] | |
90 | | eol [++ref(c), ++ref(l)] | |
91 | | any [++ref(c)] | |
92 | ; | |
93 | } | |
94 | ||
95 | std::size_t c, w, l; | |
96 | lex::token_def<> word, eol, any; | |
97 | }; | |
98 | //] | |
99 | ||
100 | /////////////////////////////////////////////////////////////////////////////// | |
101 | //[wcl_main | |
102 | int main(int argc, char* argv[]) | |
103 | { | |
104 | ||
105 | /*< Specifying `omit` as the token attribute type generates a token class | |
106 | not holding any token attribute at all (not even the iterator range of the | |
107 | matched input sequence), therefore optimizing the token, the lexer, and | |
108 | possibly the parser implementation as much as possible. Specifying | |
109 | `mpl::false_` as the 3rd template parameter generates a token | |
110 | type and an iterator, both holding no lexer state, allowing for even more | |
111 | aggressive optimizations. As a result the token instances contain the token | |
112 | ids as the only data member. | |
113 | >*/ typedef | |
114 | lex::lexertl::token<char const*, lex::omit, boost::mpl::false_> | |
115 | token_type; | |
116 | ||
117 | /*< This defines the lexer type to use | |
118 | >*/ typedef lex::lexertl::actor_lexer<token_type> lexer_type; | |
119 | ||
120 | /*< Create the lexer object instance needed to invoke the lexical analysis | |
121 | >*/ word_count_tokens<lexer_type> word_count_lexer; | |
122 | ||
123 | /*< Read input from the given file, tokenize all the input, while discarding | |
124 | all generated tokens | |
125 | >*/ std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); | |
126 | char const* first = str.c_str(); | |
127 | char const* last = &first[str.size()]; | |
128 | ||
129 | /*< Create a pair of iterators returning the sequence of generated tokens | |
130 | >*/ lexer_type::iterator_type iter = word_count_lexer.begin(first, last); | |
131 | lexer_type::iterator_type end = word_count_lexer.end(); | |
132 | ||
133 | /*< Here we simply iterate over all tokens, making sure to break the loop | |
134 | if an invalid token gets returned from the lexer | |
135 | >*/ while (iter != end && token_is_valid(*iter)) | |
136 | ++iter; | |
137 | ||
138 | if (iter == end) { | |
139 | std::cout << "lines: " << word_count_lexer.l | |
140 | << ", words: " << word_count_lexer.w | |
141 | << ", characters: " << word_count_lexer.c | |
142 | << "\n"; | |
143 | } | |
144 | else { | |
145 | std::string rest(first, last); | |
146 | std::cout << "Lexical analysis failed\n" << "stopped at: \"" | |
147 | << rest << "\"\n"; | |
148 | } | |
149 | return 0; | |
150 | } | |
151 | //] |