]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/tools/auto_index/src/file_scanning.cpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / tools / auto_index / src / file_scanning.cpp
1 // Copyright 2008 John Maddock
2 //
3 // Use, modification and distribution are subject to the
4 // Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt
6 // or copy at http://www.boost.org/LICENSE_1_0.txt)
7
8 #include "auto_index.hpp"
9
10 bool need_defaults = true;
11
12 void install_default_scanners()
13 {
14 need_defaults = false;
15 //
16 // Set the default scanners if they're not defined already:
17 //
18 file_scanner s;
19 s.type = "class_name";
20 if(file_scanner_set.find(s) == file_scanner_set.end())
21 {
22 add_file_scanner(
23 "class_name", // Index type
24 // Header file scanner regex:
25 // possibly leading whitespace:
26 "^[[:space:]]*"
27 // possible template declaration:
28 "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
29 // class or struct:
30 "(class|struct)[[:space:]]*"
31 // leading declspec macros etc:
32 "("
33 "\\<\\w+\\>"
34 "("
35 "[[:blank:]]*\\([^)]*\\)"
36 ")?"
37 "[[:space:]]*"
38 ")*"
39 // the class name
40 "(\\<\\w*\\>)[[:space:]]*"
41 // template specialisation parameters
42 "(<[^;:{]+>)?[[:space:]]*"
43 // terminate in { or :
44 "(\\{|:[^;\\{()]*\\{)",
45
46 "(?:class|struct)[^;{]+\\\\<\\5\\\\>[^;{]+\\\\{", // Format string to create indexing regex.
47 "\\5", // Format string to create index term.
48 "", // Filter regex for section id's.
49 "" // Filter regex for filenames.
50 );
51 }
52
53 s.type = "typedef_name";
54 if(file_scanner_set.find(s) == file_scanner_set.end())
55 {
56 add_file_scanner(
57 "typedef_name", // Index type
58 "typedef[^;{}#]+?(\\w+)\\s*;", // scanner regex
59 "typedef[^;]+\\\\<\\1\\\\>\\\\s*;", // Format string to create indexing regex.
60 "\\1", // Format string to create index term.
61 "", // Filter regex for section id's.
62 "" // Filter regex for filenames.
63 );
64 }
65
66 s.type = "macro_name";
67 if(file_scanner_set.find(s) == file_scanner_set.end())
68 {
69 add_file_scanner(
70 "macro_name", // Index type
71 "^\\s*#\\s*define\\s+(\\w+)", // scanner regex
72 "\\\\<\\1\\\\>", // Format string to create indexing regex.
73 "\\1", // Format string to create index term.
74 "", // Filter regex for section id's.
75 "" // Filter regex for filenames.
76 );
77 }
78
79 s.type = "function_name";
80 if(file_scanner_set.find(s) == file_scanner_set.end())
81 {
82 add_file_scanner(
83 "function_name", // Index type
84 "\\w++(?:\\s*+<[^>]++>)?[\\s&*]+?(\\w+)\\s*(?:BOOST_[[:upper:]_]+\\s*)?\\([^;{}]*\\)\\s*[;{]", // scanner regex
85 "\\\\<\\\\w+\\\\>(?:\\\\s+<[^>]*>)*[\\\\s&*]+\\\\<\\1\\\\>\\\\s*\\\\([^;{]*\\\\)", // Format string to create indexing regex.
86 "\\1", // Format string to create index term.
87 "", // Filter regex for section id's.
88 "" // Filter regex for filenames.
89 );
90 }
91 }
92
93 //
94 // Helper to dump file contents into a std::string:
95 //
96 void load_file(std::string& s, std::istream& is)
97 {
98 s.erase();
99 if(is.bad()) return;
100 s.reserve(is.rdbuf()->in_avail());
101 char c;
102 while(is.get(c))
103 {
104 if(s.capacity() == s.size())
105 s.reserve(s.capacity() * 3);
106 s.append(1, c);
107 }
108 }
109 //
110 // Helper to convert string from external source into valid XML:
111 //
112 std::string escape_to_xml(const std::string& in)
113 {
114 std::string result;
115 for(std::string::size_type i = 0; i < in.size(); ++i)
116 {
117 switch(in[i])
118 {
119 case '&':
120 result.append("&amp;");
121 break;
122 case '<':
123 result.append("&lt;");
124 break;
125 case '>':
126 result.append("&gt;");
127 break;
128 case '"':
129 result.append("&quot;");
130 break;
131 default:
132 result.append(1, in[i]);
133 }
134 }
135 return result;
136 }
137 //
138 // Scan a source file for things to index:
139 //
140 void scan_file(const std::string& file)
141 {
142 if(need_defaults)
143 install_default_scanners();
144 if(verbose)
145 std::cout << "Scanning file... " << file << std::endl;
146 std::string text;
147 std::ifstream is(file.c_str());
148 if(!is.peek() || !is.good())
149 throw std::runtime_error(std::string("Unable to read from file: ") + file);
150 load_file(text, is);
151
152 for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan)
153 {
154 bool need_debug = false;
155 if(!debug.empty() && regex_match(pscan->type, ::debug))
156 {
157 need_debug = true;
158 std::cout << "Processing scanner " << pscan->type << " on file " << file << std::endl;
159 std::cout << "Scanner regex:" << pscan->scanner << std::endl;
160 std::cout << "Scanner formatter (search regex):" << pscan->format_string << std::endl;
161 std::cout << "Scanner formatter (index term):" << pscan->term_formatter << std::endl;
162 std::cout << "Scanner file name filter:" << pscan->file_name_filter << std::endl;
163 std::cout << "Scanner section id filter:" << pscan->section_filter << std::endl;
164 }
165 if(!pscan->file_name_filter.empty())
166 {
167 if(!regex_match(file, pscan->file_name_filter))
168 {
169 if(need_debug)
170 {
171 std::cout << "File failed to match file name filter, this file will be skipped..." << std::endl;
172 }
173 continue; // skip this file
174 }
175 }
176 if(verbose && !need_debug)
177 std::cout << "Scanning for type \"" << (*pscan).type << "\" ... " << std::endl;
178 boost::sregex_iterator i(text.begin(), text.end(), (*pscan).scanner), j;
179 while(i != j)
180 {
181 try
182 {
183 index_info info;
184 info.term = escape_to_xml(i->format(pscan->term_formatter));
185 info.search_text = i->format(pscan->format_string);
186 info.category = pscan->type;
187 if(!pscan->section_filter.empty())
188 info.search_id = pscan->section_filter;
189 std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info);
190 if(pos.second)
191 {
192 if(verbose || need_debug)
193 std::cout << "Indexing " << info.term << " as type " << info.category << std::endl;
194 if(need_debug)
195 std::cout << "Search regex will be: \"" << info.search_text << "\"" <<
196 " ID constraint is: \"" << info.search_id << "\""
197 << "Found text was: " << i->str() << std::endl;
198 if(pos.first->search_text != info.search_text)
199 {
200 //
201 // Merge the search terms:
202 //
203 const_cast<boost::regex&>(pos.first->search_text) =
204 "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")";
205 }
206 if(pos.first->search_id != info.search_id)
207 {
208 //
209 // Merge the ID constraints:
210 //
211 const_cast<boost::regex&>(pos.first->search_id) =
212 "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")";
213 }
214 }
215 }
216 catch(const boost::regex_error& e)
217 {
218 std::cerr << "Unable to create regular expression from found index term:\""
219 << i->format(pscan->term_formatter) << "\" In file " << file << std::endl;
220 std::cerr << e.what() << std::endl;
221 }
222 catch(const std::exception& e)
223 {
224 std::cerr << "Unable to create index term:\""
225 << i->format(pscan->term_formatter) << "\" In file " << file << std::endl;
226 std::cerr << e.what() << std::endl;
227 throw;
228 }
229 ++i;
230 }
231 }
232 }
233 //
234 // Scan a whole directory for files to search:
235 //
236 void scan_dir(const std::string& dir, const std::string& mask, bool recurse)
237 {
238 using namespace boost::filesystem;
239 boost::regex e(mask);
240 directory_iterator i(dir), j;
241
242 while(i != j)
243 {
244 if(regex_match(i->path().filename().string(), e))
245 {
246 scan_file(i->path().string());
247 }
248 else if(recurse && is_directory(i->status()))
249 {
250 scan_dir(i->path().string(), mask, recurse);
251 }
252 ++i;
253 }
254 }
255 //
256 // Remove quotes from a string:
257 //
258 std::string unquote(const std::string& s)
259 {
260 std::string result(s);
261 if((s.size() >= 2) && (*s.begin() == '\"') && (*s.rbegin() == '\"'))
262 {
263 result.erase(result.begin());
264 result.erase(result.end() - 1);
265 }
266 return result;
267 }
268 //
269 // Load and process a script file:
270 //
271 void process_script(const std::string& script)
272 {
273 static const boost::regex comment_parser(
274 "\\s*(?:#.*)?$"
275 );
276 static const boost::regex scan_parser(
277 "!scan[[:space:]]+"
278 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
279 );
280 static const boost::regex scan_dir_parser(
281 "!scan-path[[:space:]]+"
282 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
283 "[[:space:]]+"
284 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
285 "(?:"
286 "[[:space:]]+"
287 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
288 ")?\\s*"
289 );
290 static const boost::regex entry_parser(
291 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
292 "(?:"
293 "[[:space:]]+"
294 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
295 "(?:"
296 "[[:space:]]+"
297 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
298 "(?:"
299 "[[:space:]]+"
300 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
301 ")?"
302 ")?"
303 ")?"
304 "[[:space:]]*");
305 static const boost::regex rewrite_parser(
306 "!(rewrite-name|rewrite-id)\\s+"
307 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"
308 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
309 );
310 static const boost::regex debug_parser(
311 "!debug\\s+"
312 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
313 );
314 static const boost::regex define_scanner_parser(
315 "!define-scanner\\s+"
316 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // type, index 1
317 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // scanner regex, index 2
318 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // format string, index 3
319 "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // format string for name, index 4
320 "(?:"
321 "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // id-filter, index 5
322 "(?:"
323 "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // filename-filter, index 6
324 ")?"
325 ")?"
326 "\\s*"
327 );
328 static const boost::regex error_parser("!.*");
329
330 if(verbose)
331 std::cout << "Processing script " << script << std::endl;
332 boost::smatch what;
333 std::string line;
334 std::ifstream is(script.c_str());
335 if(is.bad() || !exists(boost::filesystem::path(script)))
336 {
337 throw std::runtime_error(std::string("Could not open script file: ") + script);
338 }
339 while(std::getline(is, line).good())
340 {
341 if(regex_match(line, what, comment_parser))
342 {
343 // Nothing to do here...
344 }
345 else if(regex_match(line, what, scan_parser))
346 {
347 std::string f = unquote(what[1].str());
348 if(!boost::filesystem::path(f).is_complete())
349 {
350 if(prefix.size())
351 {
352 boost::filesystem::path base(prefix);
353 base /= f;
354 f = base.string();
355 }
356 else
357 {
358 boost::filesystem::path base(script);
359 base.remove_filename();
360 base /= f;
361 f = base.string();
362 }
363 }
364 if(!exists(boost::filesystem::path(f)))
365 throw std::runtime_error("Error the file requested for scanning does not exist: " + f);
366 scan_file(f);
367 }
368 else if(regex_match(line, what, debug_parser))
369 {
370 debug = unquote(what[1].str());
371 }
372 else if(regex_match(line, what, define_scanner_parser))
373 {
374 add_file_scanner(unquote(what.str(1)), unquote(what.str(2)), unquote(what.str(3)),
375 unquote(what.str(4)), unquote(what.str(5)), unquote(what.str(6)));
376 }
377 else if(regex_match(line, what, scan_dir_parser))
378 {
379 std::string d = unquote(what[1].str());
380 std::string m = unquote(what[2].str());
381 bool r = unquote(what[3].str()) == "true";
382 if(!boost::filesystem::path(d).is_complete())
383 {
384 if(prefix.size())
385 {
386 boost::filesystem::path base(prefix);
387 base /= d;
388 d = base.string();
389 }
390 else
391 {
392 boost::filesystem::path base(script);
393 base.remove_filename();
394 base /= d;
395 d = base.string();
396 }
397 }
398 if(verbose)
399 std::cout << "Scanning directory " << d << std::endl;
400 if(!exists(boost::filesystem::path(d)))
401 throw std::runtime_error("Error the path requested for scanning does not exist: " + d);
402 scan_dir(d, m, r);
403 }
404 else if(regex_match(line, what, rewrite_parser))
405 {
406 bool id = what[1] == "rewrite-id";
407 std::string a = unquote(what[2].str());
408 std::string b = unquote(what[3].str());
409 id_rewrite_list.push_back(id_rewrite_rule(a, b, id));
410 }
411 else if(line.compare(0, 9, "!exclude ") == 0)
412 {
413 static const boost::regex delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")");
414 boost::sregex_token_iterator i(line.begin() + 9, line.end(), delim, 0), j;
415 while(i != j)
416 {
417 index_info info;
418 info.term = escape_to_xml(unquote(*i));
419 // Erase all entries that have a category in our scanner set,
420 // plus any entry with no category at all:
421 index_terms.erase(info);
422 for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan)
423 {
424 info.category = (*pscan).type;
425 index_terms.erase(info);
426 }
427 ++i;
428 }
429 }
430 else if(regex_match(line, error_parser))
431 {
432 std::cerr << "Error: Unable to process line: " << line << std::endl;
433 }
434 else if(regex_match(line, what, entry_parser))
435 {
436 try{
437 // what[1] is the Index entry
438 // what[2] is the regex to search for (optional)
439 // what[3] is a section id that must be matched
440 // in order for the term to be indexed (optional)
441 // what[4] is the index category to place the term in (optional).
442 index_info info;
443 info.term = escape_to_xml(unquote(what.str(1)));
444 std::string s = unquote(what.str(2));
445 if(s.size())
446 info.search_text = boost::regex(s, boost::regex::icase|boost::regex::perl);
447 else
448 info.search_text = boost::regex("\\<" + what.str(1) + "\\>", boost::regex::icase|boost::regex::perl);
449
450 s = unquote(what.str(3));
451 if(s.size())
452 info.search_id = s;
453 if(what[4].matched)
454 info.category = unquote(what.str(4));
455 std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info);
456 if(pos.second)
457 {
458 if(pos.first->search_text != info.search_text)
459 {
460 //
461 // Merge the search terms:
462 //
463 const_cast<boost::regex&>(pos.first->search_text) =
464 "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")";
465 }
466 if(pos.first->search_id != info.search_id)
467 {
468 //
469 // Merge the ID constraints:
470 //
471 const_cast<boost::regex&>(pos.first->search_id) =
472 "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")";
473 }
474 }
475 }
476 catch(const boost::regex_error&)
477 {
478 std::cerr << "Unable to process regular expression in script line:\n \""
479 << line << "\"" << std::endl;
480 throw;
481 }
482 catch(const std::exception&)
483 {
484 std::cerr << "Unable to process script line:\n \""
485 << line << "\"" << std::endl;
486 throw;
487 }
488 }
489 else
490 {
491 std::cerr << "Error: Unable to process line: " << line << std::endl;
492 }
493 }
494 }
495