1 ///////////////////////////////////////////////////////////////
2 // Copyright 2015 John Maddock. Distributed under the Boost
3 // Software License, Version 1.0. (See accompanying file
4 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_
7 #include "performance.hpp"
12 #include <boost/chrono.hpp>
13 #include <boost/detail/lightweight_main.hpp>
14 #include <boost/regex.hpp>
15 #include <boost/filesystem.hpp>
17 void load_file(std::string
& text
, const char* file
)
19 std::deque
<char> temp_copy
;
20 std::ifstream
is(file
);
23 std::string
msg("Unable to open file: \"");
26 throw std::runtime_error(msg
);
28 is
.seekg(0, std::ios_base::end
);
29 std::istream::pos_type pos
= is
.tellg();
30 is
.seekg(0, std::ios_base::beg
);
33 std::istreambuf_iterator
<char> it(is
);
34 std::copy(it
, std::istreambuf_iterator
<char>(), std::back_inserter(text
));
38 typedef std::list
<boost::shared_ptr
<abstract_regex
> > list_type
;
46 void abstract_regex::register_instance(boost::shared_ptr
<abstract_regex
> item
)
48 engines().push_back(item
);
51 template <class Clock
>
54 typedef typename
Clock::duration duration
;
57 m_start
= Clock::now();
61 return Clock::now() - m_start
;
65 m_start
= Clock::now();
69 typename
Clock::time_point m_start
;
73 unsigned last_value_returned
= 0;
76 double exec_timed_test(Func f
)
81 stopwatch
<boost::chrono::high_resolution_clock
> w
;
83 for(unsigned count
= 0; count
< repeats
; ++count
)
85 last_value_returned
= f();
86 sum
+= last_value_returned
;
89 t
= boost::chrono::duration_cast
<boost::chrono::duration
<double>>(w
.elapsed()).count();
97 std::string
format_expression_as_quickbook(std::string s
)
99 static const boost::regex
e("[`/_*=$^@#&%\\\\]");
100 static const boost::regex
open_b("\\[");
101 static const boost::regex
close_b("\\]");
102 s
= regex_replace(s
, e
, "\\\\$0");
103 s
= regex_replace(s
, open_b
, "\\\\u005B");
104 s
= regex_replace(s
, close_b
, "\\\\u005D");
110 return "[^" + s
+ "]";
113 void test_match(const char* expression
, const char* text
, bool isperl
= false)
115 std::string table
= "Testing simple " + (isperl
? std::string("Perl") : std::string("leftmost-longest")) + " matches (platform = " + platform_name() + ", compiler = " + compiler_name() + ")";
116 std::string row
= format_expression_as_quickbook(expression
);
118 row
+= format_expression_as_quickbook(text
);
119 for(list_type::const_iterator i
= engines().begin(); i
!= engines().end(); ++i
)
121 std::string heading
= (*i
)->name();
122 if((*i
)->set_expression(expression
, isperl
))
124 double time
= exec_timed_test([&]() { return (*i
)->match_test(text
) ? 1 : 0; });
125 report_execution_time(time
, table
, row
, heading
);
130 void test_search(const char* expression
, const char* text
, bool isperl
= false, const char* filename
= 0)
132 std::string table
= "Testing " + (isperl
? std::string("Perl") : std::string("leftmost-longest")) + " searches (platform = " + platform_name() + ", compiler = " + compiler_name() + ")";
133 std::string row
= format_expression_as_quickbook(expression
);
142 row
+= format_expression_as_quickbook(text
);
144 for(list_type::const_iterator i
= engines().begin(); i
!= engines().end(); ++i
)
146 std::string heading
= (*i
)->name();
147 if((*i
)->set_expression(expression
, isperl
))
149 double time
= exec_timed_test([&]() { return (*i
)->find_all(text
); });
150 report_execution_time(time
, table
, row
, heading
);
151 std::cout
<< "Search with library: " << heading
<< " found " << last_value_returned
<< " occurrences.\n";
156 int cpp_main(int argc
, char* argv
[])
158 boost::filesystem::path
here(__FILE__
);
159 here
= here
.parent_path().parent_path().parent_path().parent_path();
161 boost::filesystem::path cpp_file
= here
/ "boost";
162 cpp_file
/= "crc.hpp";
164 // start with a simple test, this is basically a measure of the minimal overhead
165 // involved in calling a regex matcher:
166 test_match("abc", "abc");
167 // these are from the regex docs:
168 test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string");
169 test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456");
170 // these are from http://www.regxlib.com/
171 test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk");
172 test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu");
173 test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv");
174 test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ");
175 test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA");
176 test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ");
177 test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001");
178 test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001");
179 test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123");
180 test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159");
181 test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159");
183 // start with a simple test, this is basically a measure of the minimal overhead
184 // involved in calling a regex matcher:
185 test_match("abc", "abc", true);
186 // these are from the regex docs:
187 test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string", true);
188 test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456", true);
189 // these are from http://www.regxlib.com/
190 test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk", true);
191 test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu", true);
192 test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv", true);
193 test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ", true);
194 test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA", true);
195 test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ", true);
196 test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001", true);
197 test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001", true);
198 test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123", true);
199 test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159", true);
200 test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159", true);
202 std::string file_contents
;
204 const char* highlight_expression
= // preprocessor directives: index 1
205 "(^[ \\t]*#(?:(?>[^\\\\\\n]+)|\\\\(?>\\s*\\n|.))*)|";
207 "(//[^\\n]*|/\\*.*?\\*/)|"
209 "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
210 // string literals: index 4
211 "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
213 "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
214 "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
215 "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
216 "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
217 "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
218 "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
219 "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
220 "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
221 "|using|virtual|void|volatile|wchar_t|while)\\>"
223 const char* class_expression
= "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
224 "(class|struct)[[:space:]]*(\\w+([ \t]*\\([^)]*\\))?"
225 "[[:space:]]*)*(\\w*)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
226 "(\\{|:[^;\\{()]*\\{)";
227 const char* call_expression
= "\\w+\\s*(\\([^()]++(?:(?1)[^()]++)*+[^)]*\\))";
229 const char* include_expression
= "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)";
230 const char* boost_include_expression
= "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)";
231 const char* brace_expression
= "\\{[^{}]++((?0)[^{}]++)*+[^}]*+\\}";
232 const char* function_with_body_expression
= "(\\w+)\\s*(\\([^()]++(?:(?2)[^()]++)*+[^)]*\\))\\s*(\\{[^{}]++((?3)[^{}]++)*+[^}]*+\\})";
235 load_file(file_contents
, "../../../libs/libraries.htm");
236 test_search("Beman|John|Dave", file_contents
.c_str(), false, "../../../libs/libraries.htm");
237 test_search("Beman|John|Dave", file_contents
.c_str(), true, "../../../libs/libraries.htm");
238 test_search("(?i)<p>.*?</p>", file_contents
.c_str(), true, "../../../libs/libraries.htm");
239 test_search("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents
.c_str(), false, "../../../libs/libraries.htm");
240 test_search("(?i)<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents
.c_str(), true, "../../../libs/libraries.htm");
241 test_search("(?i)<h[12345678][^>]*>.*?</h[12345678]>", file_contents
.c_str(), true, "../../../libs/libraries.htm");
242 test_search("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents
.c_str(), false, "../../../libs/libraries.htm");
243 test_search("(?i)<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents
.c_str(), true, "../../../libs/libraries.htm");
244 test_search("(?i)<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents
.c_str(), true, "../../../libs/libraries.htm");
247 load_file(file_contents
, "../../../boost/multiprecision/number.hpp");
249 test_search(function_with_body_expression
, file_contents
.c_str(), true, "boost/multiprecision/number.hpp");
250 test_search(brace_expression
, file_contents
.c_str(), true, "boost/multiprecision/number.hpp");
251 test_search(call_expression
, file_contents
.c_str(), true, "boost/multiprecision/number.hpp");
252 test_search(highlight_expression
, file_contents
.c_str(), true, "boost/multiprecision/number.hpp");
253 test_search(class_expression
, file_contents
.c_str(), true, "boost/multiprecision/number.hpp");
254 test_search(include_expression
, file_contents
.c_str(), true, "boost/multiprecision/number.hpp");
255 test_search(boost_include_expression
, file_contents
.c_str(), true, "boost/multiprecision/number.hpp");