]>
git.proxmox.com Git - ceph.git/blob - ceph/src/boost/tools/auto_index/src/file_scanning.cpp
1 // Copyright 2008 John Maddock
3 // Use, modification and distribution are subject to the
4 // Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt
6 // or copy at http://www.boost.org/LICENSE_1_0.txt)
8 #include "auto_index.hpp"
10 bool need_defaults
= true;
12 void install_default_scanners()
14 need_defaults
= false;
16 // Set the default scanners if they're not defined already:
19 s
.type
= "class_name";
20 if(file_scanner_set
.find(s
) == file_scanner_set
.end())
23 "class_name", // Index type
24 // Header file scanner regex:
25 // possibly leading whitespace:
27 // possible template declaration:
28 "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
30 "(class|struct)[[:space:]]*"
31 // leading declspec macros etc:
35 "[[:blank:]]*\\([^)]*\\)"
40 "(\\<\\w*\\>)[[:space:]]*"
41 // template specialisation parameters
42 "(<[^;:{]+>)?[[:space:]]*"
43 // terminate in { or :
44 "(\\{|:[^;\\{()]*\\{)",
46 "(?:class|struct)[^;{]+\\\\<\\5\\\\>[^;{]+\\\\{", // Format string to create indexing regex.
47 "\\5", // Format string to create index term.
48 "", // Filter regex for section id's.
49 "" // Filter regex for filenames.
53 s
.type
= "typedef_name";
54 if(file_scanner_set
.find(s
) == file_scanner_set
.end())
57 "typedef_name", // Index type
58 "typedef[^;{}#]+?(\\w+)\\s*;", // scanner regex
59 "typedef[^;]+\\\\<\\1\\\\>\\\\s*;", // Format string to create indexing regex.
60 "\\1", // Format string to create index term.
61 "", // Filter regex for section id's.
62 "" // Filter regex for filenames.
66 s
.type
= "macro_name";
67 if(file_scanner_set
.find(s
) == file_scanner_set
.end())
70 "macro_name", // Index type
71 "^\\s*#\\s*define\\s+(\\w+)", // scanner regex
72 "\\\\<\\1\\\\>", // Format string to create indexing regex.
73 "\\1", // Format string to create index term.
74 "", // Filter regex for section id's.
75 "" // Filter regex for filenames.
79 s
.type
= "function_name";
80 if(file_scanner_set
.find(s
) == file_scanner_set
.end())
83 "function_name", // Index type
84 "\\w++(?:\\s*+<[^>]++>)?[\\s&*]+?(\\w+)\\s*(?:BOOST_[[:upper:]_]+\\s*)?\\([^;{}]*\\)\\s*[;{]", // scanner regex
85 "\\\\<\\\\w+\\\\>(?:\\\\s+<[^>]*>)*[\\\\s&*]+\\\\<\\1\\\\>\\\\s*\\\\([^;{]*\\\\)", // Format string to create indexing regex.
86 "\\1", // Format string to create index term.
87 "", // Filter regex for section id's.
88 "" // Filter regex for filenames.
94 // Helper to dump file contents into a std::string:
96 void load_file(std::string
& s
, std::istream
& is
)
100 s
.reserve(is
.rdbuf()->in_avail());
104 if(s
.capacity() == s
.size())
105 s
.reserve(s
.capacity() * 3);
110 // Helper to convert string from external source into valid XML:
112 std::string
escape_to_xml(const std::string
& in
)
115 for(std::string::size_type i
= 0; i
< in
.size(); ++i
)
120 result
.append("&");
123 result
.append("<");
126 result
.append(">");
129 result
.append(""");
132 result
.append(1, in
[i
]);
138 // Scan a source file for things to index:
140 void scan_file(const std::string
& file
)
143 install_default_scanners();
145 std::cout
<< "Scanning file... " << file
<< std::endl
;
147 std::ifstream
is(file
.c_str());
148 if(!is
.peek() || !is
.good())
149 throw std::runtime_error(std::string("Unable to read from file: ") + file
);
152 for(file_scanner_set_type::iterator pscan
= file_scanner_set
.begin(); pscan
!= file_scanner_set
.end(); ++pscan
)
154 bool need_debug
= false;
155 if(!debug
.empty() && regex_match(pscan
->type
, ::debug
))
158 std::cout
<< "Processing scanner " << pscan
->type
<< " on file " << file
<< std::endl
;
159 std::cout
<< "Scanner regex:" << pscan
->scanner
<< std::endl
;
160 std::cout
<< "Scanner formatter (search regex):" << pscan
->format_string
<< std::endl
;
161 std::cout
<< "Scanner formatter (index term):" << pscan
->term_formatter
<< std::endl
;
162 std::cout
<< "Scanner file name filter:" << pscan
->file_name_filter
<< std::endl
;
163 std::cout
<< "Scanner section id filter:" << pscan
->section_filter
<< std::endl
;
165 if(!pscan
->file_name_filter
.empty())
167 if(!regex_match(file
, pscan
->file_name_filter
))
171 std::cout
<< "File failed to match file name filter, this file will be skipped..." << std::endl
;
173 continue; // skip this file
176 if(verbose
&& !need_debug
)
177 std::cout
<< "Scanning for type \"" << (*pscan
).type
<< "\" ... " << std::endl
;
178 boost::sregex_iterator
i(text
.begin(), text
.end(), (*pscan
).scanner
), j
;
184 info
.term
= escape_to_xml(i
->format(pscan
->term_formatter
));
185 info
.search_text
= i
->format(pscan
->format_string
);
186 info
.category
= pscan
->type
;
187 if(!pscan
->section_filter
.empty())
188 info
.search_id
= pscan
->section_filter
;
189 std::pair
<std::set
<index_info
>::iterator
, bool> pos
= index_terms
.insert(info
);
192 if(verbose
|| need_debug
)
193 std::cout
<< "Indexing " << info
.term
<< " as type " << info
.category
<< std::endl
;
195 std::cout
<< "Search regex will be: \"" << info
.search_text
<< "\"" <<
196 " ID constraint is: \"" << info
.search_id
<< "\""
197 << "Found text was: " << i
->str() << std::endl
;
198 if(pos
.first
->search_text
!= info
.search_text
)
201 // Merge the search terms:
203 const_cast<boost::regex
&>(pos
.first
->search_text
) =
204 "(?:" + pos
.first
->search_text
.str() + ")|(?:" + info
.search_text
.str() + ")";
206 if(pos
.first
->search_id
!= info
.search_id
)
209 // Merge the ID constraints:
211 const_cast<boost::regex
&>(pos
.first
->search_id
) =
212 "(?:" + pos
.first
->search_id
.str() + ")|(?:" + info
.search_id
.str() + ")";
216 catch(const boost::regex_error
& e
)
218 std::cerr
<< "Unable to create regular expression from found index term:\""
219 << i
->format(pscan
->term_formatter
) << "\" In file " << file
<< std::endl
;
220 std::cerr
<< e
.what() << std::endl
;
222 catch(const std::exception
& e
)
224 std::cerr
<< "Unable to create index term:\""
225 << i
->format(pscan
->term_formatter
) << "\" In file " << file
<< std::endl
;
226 std::cerr
<< e
.what() << std::endl
;
234 // Scan a whole directory for files to search:
236 void scan_dir(const std::string
& dir
, const std::string
& mask
, bool recurse
)
238 using namespace boost::filesystem
;
239 boost::regex
e(mask
);
240 directory_iterator
i(dir
), j
;
244 if(regex_match(i
->path().filename().string(), e
))
246 scan_file(i
->path().string());
248 else if(recurse
&& is_directory(i
->status()))
250 scan_dir(i
->path().string(), mask
, recurse
);
256 // Remove quotes from a string:
258 std::string
unquote(const std::string
& s
)
260 std::string
result(s
);
261 if((s
.size() >= 2) && (*s
.begin() == '\"') && (*s
.rbegin() == '\"'))
263 result
.erase(result
.begin());
264 result
.erase(result
.end() - 1);
269 // Load and process a script file:
271 void process_script(const std::string
& script
)
273 static const boost::regex
comment_parser(
276 static const boost::regex
scan_parser(
278 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
280 static const boost::regex
scan_dir_parser(
281 "!scan-path[[:space:]]+"
282 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
284 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
287 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
290 static const boost::regex
entry_parser(
291 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
294 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
297 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
300 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
305 static const boost::regex
rewrite_parser(
306 "!(rewrite-name|rewrite-id)\\s+"
307 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"
308 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
310 static const boost::regex
debug_parser(
312 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
314 static const boost::regex
define_scanner_parser(
315 "!define-scanner\\s+"
316 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // type, index 1
317 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // scanner regex, index 2
318 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // format string, index 3
319 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // format string for name, index 4
321 "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // id-filter, index 5
323 "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // filename-filter, index 6
328 static const boost::regex
error_parser("!.*");
331 std::cout
<< "Processing script " << script
<< std::endl
;
334 std::ifstream
is(script
.c_str());
335 if(is
.bad() || !exists(boost::filesystem::path(script
)))
337 throw std::runtime_error(std::string("Could not open script file: ") + script
);
339 while(std::getline(is
, line
).good())
341 if(regex_match(line
, what
, comment_parser
))
343 // Nothing to do here...
345 else if(regex_match(line
, what
, scan_parser
))
347 std::string f
= unquote(what
[1].str());
348 if(!boost::filesystem::path(f
).is_complete())
352 boost::filesystem::path
base(prefix
);
358 boost::filesystem::path
base(script
);
359 base
.remove_filename();
364 if(!exists(boost::filesystem::path(f
)))
365 throw std::runtime_error("Error the file requested for scanning does not exist: " + f
);
368 else if(regex_match(line
, what
, debug_parser
))
370 debug
= unquote(what
[1].str());
372 else if(regex_match(line
, what
, define_scanner_parser
))
374 add_file_scanner(unquote(what
.str(1)), unquote(what
.str(2)), unquote(what
.str(3)),
375 unquote(what
.str(4)), unquote(what
.str(5)), unquote(what
.str(6)));
377 else if(regex_match(line
, what
, scan_dir_parser
))
379 std::string d
= unquote(what
[1].str());
380 std::string m
= unquote(what
[2].str());
381 bool r
= unquote(what
[3].str()) == "true";
382 if(!boost::filesystem::path(d
).is_complete())
386 boost::filesystem::path
base(prefix
);
392 boost::filesystem::path
base(script
);
393 base
.remove_filename();
399 std::cout
<< "Scanning directory " << d
<< std::endl
;
400 if(!exists(boost::filesystem::path(d
)))
401 throw std::runtime_error("Error the path requested for scanning does not exist: " + d
);
404 else if(regex_match(line
, what
, rewrite_parser
))
406 bool id
= what
[1] == "rewrite-id";
407 std::string a
= unquote(what
[2].str());
408 std::string b
= unquote(what
[3].str());
409 id_rewrite_list
.push_back(id_rewrite_rule(a
, b
, id
));
411 else if(line
.compare(0, 9, "!exclude ") == 0)
413 static const boost::regex
delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")");
414 boost::sregex_token_iterator
i(line
.begin() + 9, line
.end(), delim
, 0), j
;
418 info
.term
= escape_to_xml(unquote(*i
));
419 // Erase all entries that have a category in our scanner set,
420 // plus any entry with no category at all:
421 index_terms
.erase(info
);
422 for(file_scanner_set_type::iterator pscan
= file_scanner_set
.begin(); pscan
!= file_scanner_set
.end(); ++pscan
)
424 info
.category
= (*pscan
).type
;
425 index_terms
.erase(info
);
430 else if(regex_match(line
, error_parser
))
432 std::cerr
<< "Error: Unable to process line: " << line
<< std::endl
;
434 else if(regex_match(line
, what
, entry_parser
))
437 // what[1] is the Index entry
438 // what[2] is the regex to search for (optional)
439 // what[3] is a section id that must be matched
440 // in order for the term to be indexed (optional)
441 // what[4] is the index category to place the term in (optional).
443 info
.term
= escape_to_xml(unquote(what
.str(1)));
444 std::string s
= unquote(what
.str(2));
446 info
.search_text
= boost::regex(s
, boost::regex::icase
|boost::regex::perl
);
448 info
.search_text
= boost::regex("\\<" + what
.str(1) + "\\>", boost::regex::icase
|boost::regex::perl
);
450 s
= unquote(what
.str(3));
454 info
.category
= unquote(what
.str(4));
455 std::pair
<std::set
<index_info
>::iterator
, bool> pos
= index_terms
.insert(info
);
458 if(pos
.first
->search_text
!= info
.search_text
)
461 // Merge the search terms:
463 const_cast<boost::regex
&>(pos
.first
->search_text
) =
464 "(?:" + pos
.first
->search_text
.str() + ")|(?:" + info
.search_text
.str() + ")";
466 if(pos
.first
->search_id
!= info
.search_id
)
469 // Merge the ID constraints:
471 const_cast<boost::regex
&>(pos
.first
->search_id
) =
472 "(?:" + pos
.first
->search_id
.str() + ")|(?:" + info
.search_id
.str() + ")";
476 catch(const boost::regex_error
&)
478 std::cerr
<< "Unable to process regular expression in script line:\n \""
479 << line
<< "\"" << std::endl
;
482 catch(const std::exception
&)
484 std::cerr
<< "Unable to process script line:\n \""
485 << line
<< "\"" << std::endl
;
491 std::cerr
<< "Error: Unable to process line: " << line
<< std::endl
;