]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/spirit/example/lex/example2.cpp
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / boost / libs / spirit / example / lex / example2.cpp
CommitLineData
7c673cae
FG
1// Copyright (c) 2001-2010 Hartmut Kaiser
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6// This example shows how to create a simple lexer recognizing a couple of
7// different tokens and how to use this with a grammar. This example has a
8// heavily backtracking grammar which makes it a candidate for lexer based
9// parsing (all tokens are scanned and generated only once, even if
10// backtracking is required) which speeds up the overall parsing process
11// considerably, out-weighting the overhead needed for setting up the lexer.
12// Additionally it demonstrates how to use one of the defined tokens as a
13// parser component in the grammar.
14//
15// The grammar recognizes a simple input structure: any number of English
16// simple sentences (statements, questions and commands) are recognized and
17// are being counted separately.
18
19// #define BOOST_SPIRIT_DEBUG
20// #define BOOST_SPIRIT_LEXERTL_DEBUG
21
7c673cae
FG
22#include <boost/spirit/include/qi.hpp>
23#include <boost/spirit/include/lex_lexertl.hpp>
1e59de90 24#include <boost/phoenix/operator.hpp>
7c673cae
FG
25
26#include <iostream>
27#include <fstream>
28#include <string>
29
30#include "example.hpp"
31
32using namespace boost::spirit;
33using namespace boost::spirit::ascii;
34using boost::phoenix::ref;
35
36///////////////////////////////////////////////////////////////////////////////
37// Token definition
38///////////////////////////////////////////////////////////////////////////////
39template <typename Lexer>
40struct example2_tokens : lex::lexer<Lexer>
41{
42 example2_tokens()
43 {
44 // A 'word' is comprised of one or more letters and an optional
45 // apostrophe. If it contains an apostrophe, there may only be one and
46 // the apostrophe must be preceded and succeeded by at least 1 letter.
47 // For example, "I'm" and "doesn't" meet the definition of 'word' we
48 // define below.
49 word = "[a-zA-Z]+('[a-zA-Z]+)?";
50
51 // Associate the tokens and the token set with the lexer. Note that
52 // single character token definitions as used below always get
53 // interpreted literally and never as special regex characters. This is
54 // done to be able to assign single characters the id of their character
55 // code value, allowing to reference those as literals in Qi grammars.
56 this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
57 }
58
59 lex::token_def<> word;
60};
61
62///////////////////////////////////////////////////////////////////////////////
63// Grammar definition
64///////////////////////////////////////////////////////////////////////////////
65template <typename Iterator>
66struct example2_grammar : qi::grammar<Iterator>
67{
68 template <typename TokenDef>
69 example2_grammar(TokenDef const& tok)
70 : example2_grammar::base_type(story)
71 , paragraphs(0), commands(0), questions(0), statements(0)
72 {
73 story
74 = +paragraph
75 ;
76
77 paragraph
78 = ( +( command [ ++ref(commands) ]
79 | question [ ++ref(questions) ]
80 | statement [ ++ref(statements) ]
81 )
82 >> *char_(' ') >> +char_('\n')
83 )
84 [ ++ref(paragraphs) ]
85 ;
86
87 command
88 = +(tok.word | ' ' | ',') >> '!'
89 ;
90
91 question
92 = +(tok.word | ' ' | ',') >> '?'
93 ;
94
95 statement
96 = +(tok.word | ' ' | ',') >> '.'
97 ;
98
99 BOOST_SPIRIT_DEBUG_NODE(story);
100 BOOST_SPIRIT_DEBUG_NODE(paragraph);
101 BOOST_SPIRIT_DEBUG_NODE(command);
102 BOOST_SPIRIT_DEBUG_NODE(question);
103 BOOST_SPIRIT_DEBUG_NODE(statement);
104 }
105
106 qi::rule<Iterator> story, paragraph, command, question, statement;
107 int paragraphs, commands, questions, statements;
108};
109
110///////////////////////////////////////////////////////////////////////////////
111int main()
112{
113 // iterator type used to expose the underlying input stream
114 typedef std::string::iterator base_iterator_type;
115
116 // This is the token type to return from the lexer iterator
117 typedef lex::lexertl::token<base_iterator_type> token_type;
118
119 // This is the lexer type to use to tokenize the input.
120 // Here we use the lexertl based lexer engine.
121 typedef lex::lexertl::lexer<token_type> lexer_type;
122
123 // This is the token definition type (derived from the given lexer type).
124 typedef example2_tokens<lexer_type> example2_tokens;
125
126 // this is the iterator type exposed by the lexer
127 typedef example2_tokens::iterator_type iterator_type;
128
129 // this is the type of the grammar to parse
130 typedef example2_grammar<iterator_type> example2_grammar;
131
132 // now we use the types defined above to create the lexer and grammar
133 // object instances needed to invoke the parsing process
134 example2_tokens tokens; // Our lexer
135 example2_grammar calc(tokens); // Our parser
136
137 std::string str (read_from_file("example2.input"));
138
139 // At this point we generate the iterator pair used to expose the
140 // tokenized input stream.
141 std::string::iterator it = str.begin();
142 iterator_type iter = tokens.begin(it, str.end());
143 iterator_type end = tokens.end();
144
145 // Parsing is done based on the token stream, not the character
146 // stream read from the input.
147 bool r = qi::parse(iter, end, calc);
148
149 if (r && iter == end)
150 {
151 std::cout << "-------------------------\n";
152 std::cout << "Parsing succeeded\n";
153 std::cout << "There were "
154 << calc.commands << " commands, "
155 << calc.questions << " questions, and "
156 << calc.statements << " statements.\n";
157 std::cout << "-------------------------\n";
158 }
159 else
160 {
161 std::cout << "-------------------------\n";
162 std::cout << "Parsing failed\n";
163 std::cout << "-------------------------\n";
164 }
165
166 std::cout << "Bye... :-) \n\n";
167 return 0;
168}