]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * | |
3 | * Copyright (c) 1998-2002 | |
4 | * John Maddock | |
5 | * | |
6 | * Use, modification and distribution are subject to the | |
7 | * Boost Software License, Version 1.0. (See accompanying file | |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 | * | |
10 | */ | |
11 | ||
12 | #ifdef _MSC_VER | |
13 | #pragma warning(disable: 4996 4127) | |
14 | #endif | |
15 | ||
16 | #include <string> | |
17 | #include <algorithm> | |
18 | #include <deque> | |
19 | #include <iterator> | |
20 | ||
21 | #ifdef BOOST_RE_OLD_IOSTREAM | |
22 | #include <iostream.h> | |
23 | #include <fstream.h> | |
24 | #else | |
25 | #include <iostream> | |
26 | #include <fstream> | |
27 | using std::cout; | |
28 | using std::cin; | |
29 | using std::cerr; | |
30 | using std::istream; | |
31 | using std::ostream; | |
32 | using std::endl; | |
33 | using std::ifstream; | |
34 | using std::streambuf; | |
35 | using std::getline; | |
36 | #endif | |
37 | ||
38 | #include <boost/config.hpp> | |
39 | #include <boost/regex.hpp> | |
40 | #include <boost/cregex.hpp> | |
41 | #include <boost/timer.hpp> | |
42 | #include <boost/smart_ptr.hpp> | |
43 | ||
44 | #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) | |
45 | #include <windows.h> | |
46 | #endif | |
47 | ||
48 | #if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi) | |
49 | // maybe no Koenig lookup, use using declaration instead: | |
50 | using namespace boost; | |
51 | #endif | |
52 | ||
53 | #ifndef BOOST_NO_WREGEX | |
54 | ostream& operator << (ostream& os, const std::wstring& s) | |
55 | { | |
56 | std::wstring::const_iterator i, j; | |
57 | i = s.begin(); | |
58 | j = s.end(); | |
59 | while(i != j) | |
60 | { | |
61 | os.put(static_cast<char>(*i)); | |
62 | ++i; | |
63 | } | |
64 | return os; | |
65 | } | |
66 | #endif | |
67 | ||
68 | template <class S> | |
69 | class string_out_iterator | |
70 | #ifndef BOOST_NO_STD_ITERATOR | |
71 | : public std::iterator<std::output_iterator_tag, void, void, void, void> | |
72 | #endif // ndef BOOST_NO_STD_ITERATOR | |
73 | { | |
74 | #ifdef BOOST_NO_STD_ITERATOR | |
75 | typedef std::output_iterator_tag iterator_category; | |
76 | typedef void value_type; | |
77 | typedef void difference_type; | |
78 | typedef void pointer; | |
79 | typedef void reference; | |
80 | #endif // BOOST_NO_STD_ITERATOR | |
81 | ||
82 | S* out; | |
83 | public: | |
84 | string_out_iterator(S& s) : out(&s) {} | |
85 | string_out_iterator& operator++() { return *this; } | |
86 | string_out_iterator& operator++(int) { return *this; } | |
87 | string_out_iterator& operator*() { return *this; } | |
88 | string_out_iterator& operator=(typename S::value_type v) | |
89 | { | |
90 | out->append(1, v); | |
91 | return *this; | |
92 | } | |
93 | }; | |
94 | ||
95 | namespace boost{ | |
96 | #if defined(BOOST_MSVC) || (defined(__BORLANDC__) && (__BORLANDC__ == 0x550)) || defined(__SGI_STL_PORT) | |
97 | // | |
98 | // problem with std::getline under MSVC6sp3 | |
99 | // and C++ Builder 5.5, is this really that hard? | |
100 | istream& getline(istream& is, std::string& s) | |
101 | { | |
102 | s.erase(); | |
103 | char c = static_cast<char>(is.get()); | |
104 | while(c != '\n') | |
105 | { | |
106 | BOOST_ASSERT(is.good()); | |
107 | s.append(1, c); | |
108 | c = static_cast<char>(is.get()); | |
109 | } | |
110 | return is; | |
111 | } | |
112 | #else | |
113 | istream& getline(istream& is, std::string& s) | |
114 | { | |
115 | std::getline(is, s); | |
116 | if(s.size() && (s[s.size() -1] == '\r')) | |
117 | s.erase(s.size() - 1); | |
118 | return is; | |
119 | } | |
120 | #endif | |
121 | } | |
122 | ||
123 | ||
124 | int main(int argc, char**argv) | |
125 | { | |
126 | ifstream ifs; | |
127 | std::istream* p_in = &std::cin; | |
128 | if(argc == 2) | |
129 | { | |
130 | ifs.open(argv[1]); | |
131 | ifs.peek(); | |
132 | if(!ifs.good()) | |
133 | { | |
134 | cout << "Bad filename: " << argv[1] << endl; | |
135 | return -1; | |
136 | } | |
137 | p_in = &ifs; | |
138 | } | |
139 | ||
140 | boost::regex ex; | |
141 | boost::match_results<std::string::const_iterator> sm; | |
142 | #ifndef BOOST_NO_WREGEX | |
143 | std::wstring ws1, ws2; | |
144 | boost::wregex wex; | |
145 | boost::match_results<std::wstring::const_iterator> wsm; | |
146 | #endif | |
147 | boost::match_results<std::deque<char>::iterator> dm; | |
148 | std::string s1, s2, ts; | |
149 | std::deque<char> ds; | |
150 | boost::regex_tA r; | |
151 | boost::scoped_array<boost::regmatch_t> matches; | |
152 | std::size_t nsubs; | |
153 | boost::timer t; | |
154 | double tim; | |
155 | int result = 0; | |
156 | unsigned iters = 100; | |
157 | double wait_time = (std::min)(t.elapsed_min() * 1000, 0.5); | |
158 | ||
159 | while(true) | |
160 | { | |
161 | cout << "Enter expression (or \"quit\" to exit): "; | |
162 | boost::getline(*p_in, s1); | |
163 | if(argc == 2) | |
164 | cout << endl << s1 << endl; | |
165 | if(s1 == "quit") | |
166 | break; | |
167 | #ifndef BOOST_NO_WREGEX | |
168 | ws1.erase(); | |
169 | std::copy(s1.begin(), s1.end(), string_out_iterator<std::wstring>(ws1)); | |
170 | #endif | |
171 | try{ | |
172 | ex.assign(s1); | |
173 | #ifndef BOOST_NO_WREGEX | |
174 | wex.assign(ws1); | |
175 | #endif | |
176 | } | |
177 | catch(std::exception& e) | |
178 | { | |
179 | cout << "Error in expression: \"" << e.what() << "\"" << endl; | |
180 | continue; | |
181 | } | |
182 | int code = regcompA(&r, s1.c_str(), boost::REG_PERL); | |
183 | if(code != 0) | |
184 | { | |
185 | char buf[256]; | |
186 | regerrorA(code, &r, buf, 256); | |
187 | cout << "regcompA error: \"" << buf << "\"" << endl; | |
188 | continue; | |
189 | } | |
190 | nsubs = r.re_nsub + 1; | |
191 | matches.reset(new boost::regmatch_t[nsubs]); | |
192 | ||
193 | while(true) | |
194 | { | |
195 | cout << "Enter string to search (or \"quit\" to exit): "; | |
196 | boost::getline(*p_in, s2); | |
197 | if(argc == 2) | |
198 | cout << endl << s2 << endl; | |
199 | if(s2 == "quit") | |
200 | break; | |
201 | ||
202 | #ifndef BOOST_NO_WREGEX | |
203 | ws2.erase(); | |
204 | std::copy(s2.begin(), s2.end(), string_out_iterator<std::wstring>(ws2)); | |
205 | #endif | |
206 | ds.erase(ds.begin(), ds.end()); | |
207 | std::copy(s2.begin(), s2.end(), std::back_inserter(ds)); | |
208 | ||
209 | unsigned i; | |
210 | iters = 10; | |
211 | tim = 1.1; | |
212 | ||
213 | #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) | |
214 | MSG msg; | |
215 | PeekMessage(&msg, 0, 0, 0, 0); | |
216 | Sleep(0); | |
217 | #endif | |
218 | ||
219 | // cache load: | |
220 | regex_search(s2, sm, ex); | |
221 | ||
222 | // measure time interval for basic_regex<char> | |
223 | do{ | |
224 | iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100); | |
225 | t.restart(); | |
226 | for(i =0; i < iters; ++i) | |
227 | { | |
228 | result = regex_search(s2, sm, ex); | |
229 | } | |
230 | tim = t.elapsed(); | |
231 | }while(tim < wait_time); | |
232 | ||
233 | cout << "regex time: " << (tim * 1000000 / iters) << "us" << endl; | |
234 | if(result) | |
235 | { | |
236 | for(i = 0; i < sm.size(); ++i) | |
237 | { | |
238 | ts = sm[i]; | |
239 | cout << "\tmatch " << i << ": \""; | |
240 | cout << ts; | |
241 | cout << "\" (matched=" << sm[i].matched << ")" << endl; | |
242 | } | |
243 | cout << "\tmatch $`: \""; | |
244 | cout << std::string(sm[-1]); | |
245 | cout << "\" (matched=" << sm[-1].matched << ")" << endl; | |
246 | cout << "\tmatch $': \""; | |
247 | cout << std::string(sm[-2]); | |
248 | cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl; | |
249 | } | |
250 | ||
251 | #ifndef BOOST_NO_WREGEX | |
252 | // measure time interval for boost::wregex | |
253 | iters = 10; | |
254 | tim = 1.1; | |
255 | // cache load: | |
256 | regex_search(ws2, wsm, wex); | |
257 | do{ | |
258 | iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100); | |
259 | t.restart(); | |
260 | for(i = 0; i < iters; ++i) | |
261 | { | |
262 | result = regex_search(ws2, wsm, wex); | |
263 | } | |
264 | tim = t.elapsed(); | |
265 | }while(tim < wait_time); | |
266 | cout << "wregex time: " << (tim * 1000000 / iters) << "us" << endl; | |
267 | if(result) | |
268 | { | |
269 | std::wstring tw; | |
270 | for(i = 0; i < wsm.size(); ++i) | |
271 | { | |
272 | tw.erase(); | |
273 | std::copy(wsm[i].first, wsm[i].second, string_out_iterator<std::wstring>(tw)); | |
274 | cout << "\tmatch " << i << ": \"" << tw; | |
275 | cout << "\" (matched=" << sm[i].matched << ")" << endl; | |
276 | } | |
277 | cout << "\tmatch $`: \""; | |
278 | tw.erase(); | |
279 | std::copy(wsm[-1].first, wsm[-1].second, string_out_iterator<std::wstring>(tw)); | |
280 | cout << tw; | |
281 | cout << "\" (matched=" << sm[-1].matched << ")" << endl; | |
282 | cout << "\tmatch $': \""; | |
283 | tw.erase(); | |
284 | std::copy(wsm[-2].first, wsm[-2].second, string_out_iterator<std::wstring>(tw)); | |
285 | cout << tw; | |
286 | cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl; | |
287 | } | |
288 | #endif | |
289 | ||
290 | // measure time interval for basic_regex<char> using a deque | |
291 | iters = 10; | |
292 | tim = 1.1; | |
293 | // cache load: | |
294 | regex_search(ds.begin(), ds.end(), dm, ex); | |
295 | do{ | |
296 | iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100); | |
297 | t.restart(); | |
298 | for(i = 0; i < iters; ++i) | |
299 | { | |
300 | result = regex_search(ds.begin(), ds.end(), dm, ex); | |
301 | } | |
302 | tim = t.elapsed(); | |
303 | }while(tim < wait_time); | |
304 | cout << "regex time (search over std::deque<char>): " << (tim * 1000000 / iters) << "us" << endl; | |
305 | ||
306 | if(result) | |
307 | { | |
308 | for(i = 0; i < dm.size(); ++i) | |
309 | { | |
310 | ts.erase(); | |
311 | std::copy(dm[i].first, dm[i].second, string_out_iterator<std::string>(ts)); | |
312 | cout << "\tmatch " << i << ": \"" << ts; | |
313 | cout << "\" (matched=" << sm[i].matched << ")" << endl; | |
314 | } | |
315 | cout << "\tmatch $`: \""; | |
316 | ts.erase(); | |
317 | std::copy(dm[-1].first, dm[-1].second, string_out_iterator<std::string>(ts)); | |
318 | cout << ts; | |
319 | cout << "\" (matched=" << sm[-1].matched << ")" << endl; | |
320 | cout << "\tmatch $': \""; | |
321 | ts.erase(); | |
322 | std::copy(dm[-2].first, dm[-2].second, string_out_iterator<std::string>(ts)); | |
323 | cout << ts; | |
324 | cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl; | |
325 | } | |
326 | ||
327 | // measure time interval for POSIX matcher: | |
328 | iters = 10; | |
329 | tim = 1.1; | |
330 | // cache load: | |
331 | regexecA(&r, s2.c_str(), nsubs, matches.get(), 0); | |
332 | do{ | |
333 | iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100); | |
334 | t.restart(); | |
335 | for(i = 0; i < iters; ++i) | |
336 | { | |
337 | result = regexecA(&r, s2.c_str(), nsubs, matches.get(), 0); | |
338 | } | |
339 | tim = t.elapsed(); | |
340 | }while(tim < wait_time); | |
341 | cout << "POSIX regexecA time: " << (tim * 1000000 / iters) << "us" << endl; | |
342 | ||
343 | if(result == 0) | |
344 | { | |
345 | for(i = 0; i < nsubs; ++i) | |
346 | { | |
347 | if(matches[i].rm_so >= 0) | |
348 | { | |
349 | ts.assign(s2.begin() + matches[i].rm_so, s2.begin() + matches[i].rm_eo); | |
350 | cout << "\tmatch " << i << ": \"" << ts << "\" (matched=" << (matches[i].rm_so != -1) << ")"<< endl; | |
351 | } | |
352 | else | |
353 | cout << "\tmatch " << i << ": \"\" (matched=" << (matches[i].rm_so != -1) << ")" << endl; // no match | |
354 | } | |
355 | cout << "\tmatch $`: \""; | |
356 | ts.erase(); | |
357 | ts.assign(s2.begin(), s2.begin() + matches[0].rm_so); | |
358 | cout << ts; | |
359 | cout << "\" (matched=" << (matches[0].rm_so != 0) << ")" << endl; | |
360 | cout << "\tmatch $': \""; | |
361 | ts.erase(); | |
362 | ts.assign(s2.begin() + matches[0].rm_eo, s2.end()); | |
363 | cout << ts; | |
364 | cout << "\" (matched=" << (matches[0].rm_eo != (int)s2.size()) << ")" << endl << endl; | |
365 | } | |
366 | } | |
367 | regfreeA(&r); | |
368 | } | |
369 | ||
370 | return 0; | |
371 | } | |
372 | ||
373 | #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(UNDER_CE) | |
374 | #pragma comment(lib, "user32.lib") | |
375 | #endif | |
376 | ||
377 | ||
378 | ||
379 | ||
380 | ||
381 | ||
382 | ||
383 | ||
384 | ||
385 | ||
386 |