]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2001-2010 Hartmut Kaiser |
2 | // | |
3 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | |
4 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | // This example shows how to create a simple lexer recognizing a couple of | |
7 | // different tokens and how to use this with a grammar. This example has a | |
8 | // heavily backtracking grammar which makes it a candidate for lexer based | |
9 | // parsing (all tokens are scanned and generated only once, even if | |
10 | // backtracking is required) which speeds up the overall parsing process | |
11 | // considerably, out-weighting the overhead needed for setting up the lexer. | |
12 | // Additionally it demonstrates how to use one of the defined tokens as a | |
13 | // parser component in the grammar. | |
14 | // | |
15 | // The grammar recognizes a simple input structure: any number of English | |
16 | // simple sentences (statements, questions and commands) are recognized and | |
17 | // are being counted separately. | |
18 | ||
19 | // #define BOOST_SPIRIT_DEBUG | |
20 | // #define BOOST_SPIRIT_LEXERTL_DEBUG | |
21 | ||
7c673cae FG |
22 | #include <boost/spirit/include/qi.hpp> |
23 | #include <boost/spirit/include/lex_lexertl.hpp> | |
1e59de90 | 24 | #include <boost/phoenix/operator.hpp> |
7c673cae FG |
25 | |
26 | #include <iostream> | |
27 | #include <fstream> | |
28 | #include <string> | |
29 | ||
30 | #include "example.hpp" | |
31 | ||
32 | using namespace boost::spirit; | |
33 | using namespace boost::spirit::ascii; | |
34 | using boost::phoenix::ref; | |
35 | ||
36 | /////////////////////////////////////////////////////////////////////////////// | |
37 | // Token definition | |
38 | /////////////////////////////////////////////////////////////////////////////// | |
39 | template <typename Lexer> | |
40 | struct example2_tokens : lex::lexer<Lexer> | |
41 | { | |
42 | example2_tokens() | |
43 | { | |
44 | // A 'word' is comprised of one or more letters and an optional | |
45 | // apostrophe. If it contains an apostrophe, there may only be one and | |
46 | // the apostrophe must be preceded and succeeded by at least 1 letter. | |
47 | // For example, "I'm" and "doesn't" meet the definition of 'word' we | |
48 | // define below. | |
49 | word = "[a-zA-Z]+('[a-zA-Z]+)?"; | |
50 | ||
51 | // Associate the tokens and the token set with the lexer. Note that | |
52 | // single character token definitions as used below always get | |
53 | // interpreted literally and never as special regex characters. This is | |
54 | // done to be able to assign single characters the id of their character | |
55 | // code value, allowing to reference those as literals in Qi grammars. | |
56 | this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word; | |
57 | } | |
58 | ||
59 | lex::token_def<> word; | |
60 | }; | |
61 | ||
62 | /////////////////////////////////////////////////////////////////////////////// | |
63 | // Grammar definition | |
64 | /////////////////////////////////////////////////////////////////////////////// | |
65 | template <typename Iterator> | |
66 | struct example2_grammar : qi::grammar<Iterator> | |
67 | { | |
68 | template <typename TokenDef> | |
69 | example2_grammar(TokenDef const& tok) | |
70 | : example2_grammar::base_type(story) | |
71 | , paragraphs(0), commands(0), questions(0), statements(0) | |
72 | { | |
73 | story | |
74 | = +paragraph | |
75 | ; | |
76 | ||
77 | paragraph | |
78 | = ( +( command [ ++ref(commands) ] | |
79 | | question [ ++ref(questions) ] | |
80 | | statement [ ++ref(statements) ] | |
81 | ) | |
82 | >> *char_(' ') >> +char_('\n') | |
83 | ) | |
84 | [ ++ref(paragraphs) ] | |
85 | ; | |
86 | ||
87 | command | |
88 | = +(tok.word | ' ' | ',') >> '!' | |
89 | ; | |
90 | ||
91 | question | |
92 | = +(tok.word | ' ' | ',') >> '?' | |
93 | ; | |
94 | ||
95 | statement | |
96 | = +(tok.word | ' ' | ',') >> '.' | |
97 | ; | |
98 | ||
99 | BOOST_SPIRIT_DEBUG_NODE(story); | |
100 | BOOST_SPIRIT_DEBUG_NODE(paragraph); | |
101 | BOOST_SPIRIT_DEBUG_NODE(command); | |
102 | BOOST_SPIRIT_DEBUG_NODE(question); | |
103 | BOOST_SPIRIT_DEBUG_NODE(statement); | |
104 | } | |
105 | ||
106 | qi::rule<Iterator> story, paragraph, command, question, statement; | |
107 | int paragraphs, commands, questions, statements; | |
108 | }; | |
109 | ||
110 | /////////////////////////////////////////////////////////////////////////////// | |
111 | int main() | |
112 | { | |
113 | // iterator type used to expose the underlying input stream | |
114 | typedef std::string::iterator base_iterator_type; | |
115 | ||
116 | // This is the token type to return from the lexer iterator | |
117 | typedef lex::lexertl::token<base_iterator_type> token_type; | |
118 | ||
119 | // This is the lexer type to use to tokenize the input. | |
120 | // Here we use the lexertl based lexer engine. | |
121 | typedef lex::lexertl::lexer<token_type> lexer_type; | |
122 | ||
123 | // This is the token definition type (derived from the given lexer type). | |
124 | typedef example2_tokens<lexer_type> example2_tokens; | |
125 | ||
126 | // this is the iterator type exposed by the lexer | |
127 | typedef example2_tokens::iterator_type iterator_type; | |
128 | ||
129 | // this is the type of the grammar to parse | |
130 | typedef example2_grammar<iterator_type> example2_grammar; | |
131 | ||
132 | // now we use the types defined above to create the lexer and grammar | |
133 | // object instances needed to invoke the parsing process | |
134 | example2_tokens tokens; // Our lexer | |
135 | example2_grammar calc(tokens); // Our parser | |
136 | ||
137 | std::string str (read_from_file("example2.input")); | |
138 | ||
139 | // At this point we generate the iterator pair used to expose the | |
140 | // tokenized input stream. | |
141 | std::string::iterator it = str.begin(); | |
142 | iterator_type iter = tokens.begin(it, str.end()); | |
143 | iterator_type end = tokens.end(); | |
144 | ||
145 | // Parsing is done based on the token stream, not the character | |
146 | // stream read from the input. | |
147 | bool r = qi::parse(iter, end, calc); | |
148 | ||
149 | if (r && iter == end) | |
150 | { | |
151 | std::cout << "-------------------------\n"; | |
152 | std::cout << "Parsing succeeded\n"; | |
153 | std::cout << "There were " | |
154 | << calc.commands << " commands, " | |
155 | << calc.questions << " questions, and " | |
156 | << calc.statements << " statements.\n"; | |
157 | std::cout << "-------------------------\n"; | |
158 | } | |
159 | else | |
160 | { | |
161 | std::cout << "-------------------------\n"; | |
162 | std::cout << "Parsing failed\n"; | |
163 | std::cout << "-------------------------\n"; | |
164 | } | |
165 | ||
166 | std::cout << "Bye... :-) \n\n"; | |
167 | return 0; | |
168 | } |