]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2013 Daniel James | |
3 | ||
4 | Use, modification and distribution is subject to the Boost Software | |
5 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
6 | http://www.boost.org/LICENSE_1_0.txt) | |
7 | =============================================================================*/ | |
8 | ||
9 | #include "glob.hpp" | |
10 | #include <cassert> | |
11 | ||
12 | namespace quickbook | |
13 | { | |
b32b8144 | 14 | typedef string_iterator glob_iterator; |
7c673cae FG |
15 | |
16 | void check_glob_range(glob_iterator&, glob_iterator); | |
17 | void check_glob_escape(glob_iterator&, glob_iterator); | |
18 | ||
19 | bool match_section(glob_iterator& pattern_begin, glob_iterator pattern_end, | |
20 | glob_iterator& filename_begin, glob_iterator& filename_end); | |
21 | bool match_range(glob_iterator& pattern_begin, glob_iterator pattern_end, | |
b32b8144 | 22 | char x); |
7c673cae | 23 | |
b32b8144 FG |
24 | // Is pattern a glob or a plain file name? |
25 | // Throws glob_error if pattern is an invalid glob. | |
26 | bool check_glob(quickbook::string_view pattern) | |
7c673cae FG |
27 | { |
28 | bool is_glob = false; | |
29 | bool is_ascii = true; | |
30 | ||
31 | glob_iterator begin = pattern.begin(); | |
32 | glob_iterator end = pattern.end(); | |
33 | ||
34 | while (begin != end) { | |
b32b8144 | 35 | if (*begin < 32 || (*begin & 0x80)) |
7c673cae FG |
36 | is_ascii = false; |
37 | ||
38 | switch(*begin) { | |
39 | case '\\': | |
40 | check_glob_escape(begin, end); | |
41 | break; | |
42 | ||
43 | case '[': | |
44 | check_glob_range(begin, end); | |
45 | is_glob = true; | |
46 | break; | |
47 | ||
48 | case ']': | |
49 | throw glob_error("uneven square brackets"); | |
50 | ||
51 | case '?': | |
52 | is_glob = true; | |
53 | ++begin; | |
54 | break; | |
55 | ||
56 | case '*': | |
57 | is_glob = true; | |
58 | ++begin; | |
59 | ||
60 | if (begin != end && *begin == '*') { | |
61 | throw glob_error("'**' not supported"); | |
62 | } | |
63 | break; | |
64 | ||
65 | default: | |
66 | ++begin; | |
67 | } | |
68 | } | |
69 | ||
70 | if (is_glob && !is_ascii) | |
71 | throw glob_error("invalid character, globs are ascii only"); | |
72 | ||
73 | return is_glob; | |
74 | } | |
75 | ||
76 | void check_glob_range(glob_iterator& begin, glob_iterator end) | |
77 | { | |
78 | assert(begin != end && *begin == '['); | |
79 | ++begin; | |
80 | ||
81 | if (*begin == ']') | |
82 | throw glob_error("empty range"); | |
83 | ||
84 | while (begin != end) { | |
85 | switch (*begin) { | |
86 | case '\\': | |
87 | ++begin; | |
88 | ||
89 | if (begin == end) { | |
90 | throw glob_error("trailing escape"); | |
91 | } | |
92 | else if (*begin == '\\' || *begin == '/') { | |
93 | throw glob_error("contains escaped slash"); | |
94 | } | |
95 | ||
96 | ++begin; | |
97 | break; | |
98 | case '[': | |
7c673cae FG |
99 | throw glob_error("nested square brackets"); |
100 | case ']': | |
101 | ++begin; | |
102 | return; | |
103 | case '/': | |
104 | throw glob_error("slash in square brackets"); | |
105 | default: | |
106 | ++begin; | |
107 | } | |
108 | } | |
109 | ||
110 | throw glob_error("uneven square brackets"); | |
111 | } | |
112 | ||
113 | void check_glob_escape(glob_iterator& begin, glob_iterator end) | |
114 | { | |
115 | assert(begin != end && *begin == '\\'); | |
116 | ||
117 | ++begin; | |
118 | ||
119 | if (begin == end) { | |
120 | throw glob_error("trailing escape"); | |
121 | } | |
122 | else if (*begin == '\\' || *begin == '/') { | |
123 | throw glob_error("contains escaped slash"); | |
124 | } | |
125 | ||
126 | ++begin; | |
127 | } | |
128 | ||
b32b8144 FG |
129 | // Does filename match pattern? |
130 | // Might throw glob_error if pattern is an invalid glob, | |
131 | // but should call check_glob first to validate the glob. | |
132 | bool glob(quickbook::string_view const& pattern, | |
133 | quickbook::string_view const& filename) | |
7c673cae FG |
134 | { |
135 | // If there wasn't this special case then '*' would match an | |
136 | // empty string. | |
137 | if (filename.empty()) return pattern.empty(); | |
138 | ||
139 | glob_iterator pattern_it = pattern.begin(); | |
140 | glob_iterator pattern_end = pattern.end(); | |
141 | ||
142 | glob_iterator filename_it = filename.begin(); | |
143 | glob_iterator filename_end = filename.end(); | |
144 | ||
145 | if (!match_section(pattern_it, pattern_end, filename_it, filename_end)) | |
146 | return false; | |
147 | ||
148 | while (pattern_it != pattern_end) { | |
149 | assert(*pattern_it == '*'); | |
150 | ++pattern_it; | |
151 | ||
152 | if (pattern_it == pattern_end) return true; | |
153 | ||
b32b8144 | 154 | if (*pattern_it == '*') { throw glob_error("'**' not supported"); } |
7c673cae | 155 | |
b32b8144 | 156 | for (;;) { |
7c673cae FG |
157 | if (filename_it == filename_end) return false; |
158 | if (match_section(pattern_it, pattern_end, filename_it, filename_end)) | |
159 | break; | |
160 | ++filename_it; | |
161 | } | |
162 | } | |
163 | ||
164 | return filename_it == filename_end; | |
165 | } | |
166 | ||
167 | bool match_section(glob_iterator& pattern_begin, glob_iterator pattern_end, | |
168 | glob_iterator& filename_begin, glob_iterator& filename_end) | |
169 | { | |
170 | glob_iterator pattern_it = pattern_begin; | |
171 | glob_iterator filename_it = filename_begin; | |
172 | ||
173 | while (pattern_it != pattern_end && *pattern_it != '*') { | |
174 | if (filename_it == filename_end) return false; | |
175 | ||
176 | switch(*pattern_it) { | |
177 | case '*': | |
178 | assert(false); | |
b32b8144 | 179 | throw new glob_error("Internal error"); |
7c673cae FG |
180 | case '[': |
181 | if (!match_range(pattern_it, pattern_end, *filename_it)) | |
182 | return false; | |
183 | ++filename_it; | |
184 | break; | |
b32b8144 FG |
185 | case ']': |
186 | throw glob_error("uneven square brackets"); | |
7c673cae FG |
187 | case '?': |
188 | ++pattern_it; | |
189 | ++filename_it; | |
190 | break; | |
191 | case '\\': | |
192 | ++pattern_it; | |
b32b8144 FG |
193 | if (pattern_it == pattern_end) { |
194 | throw glob_error("trailing escape"); | |
195 | } else if (*pattern_it == '\\' || *pattern_it == '/') { | |
196 | throw glob_error("contains escaped slash"); | |
197 | } | |
7c673cae FG |
198 | BOOST_FALLTHROUGH; |
199 | default: | |
200 | if (*pattern_it != *filename_it) return false; | |
201 | ++pattern_it; | |
202 | ++filename_it; | |
203 | } | |
204 | } | |
205 | ||
206 | if (pattern_it == pattern_end && filename_it != filename_end) | |
207 | return false; | |
208 | ||
209 | pattern_begin = pattern_it; | |
210 | filename_begin = filename_it; | |
211 | return true; | |
212 | } | |
213 | ||
214 | bool match_range(glob_iterator& pattern_begin, glob_iterator pattern_end, | |
b32b8144 | 215 | char x) |
7c673cae FG |
216 | { |
217 | assert(pattern_begin != pattern_end && *pattern_begin == '['); | |
218 | ++pattern_begin; | |
b32b8144 FG |
219 | if (pattern_begin == pattern_end) { |
220 | throw glob_error("uneven square brackets"); | |
221 | } | |
7c673cae FG |
222 | |
223 | bool invert_match = false; | |
224 | bool matched = false; | |
225 | ||
226 | if (*pattern_begin == '^') { | |
227 | invert_match = true; | |
228 | ++pattern_begin; | |
b32b8144 FG |
229 | if (pattern_begin == pattern_end) { |
230 | throw glob_error("uneven square brackets"); | |
231 | } | |
232 | } else if (*pattern_begin == ']') { | |
233 | throw glob_error("empty range"); | |
7c673cae FG |
234 | } |
235 | ||
236 | // Search for a match | |
b32b8144 | 237 | for (;;) { |
7c673cae FG |
238 | unsigned char first = *pattern_begin; |
239 | ++pattern_begin; | |
240 | if (first == ']') break; | |
b32b8144 FG |
241 | if (first == '[') { |
242 | throw glob_error("nested square brackets"); | |
243 | } | |
244 | if (pattern_begin == pattern_end) { | |
245 | throw glob_error("uneven square brackets"); | |
246 | } | |
7c673cae FG |
247 | |
248 | if (first == '\\') { | |
249 | first = *pattern_begin; | |
b32b8144 FG |
250 | if (first == '\\' || first == '/') { |
251 | throw glob_error("contains escaped slash"); | |
252 | } | |
7c673cae | 253 | ++pattern_begin; |
b32b8144 FG |
254 | if (pattern_begin == pattern_end) { |
255 | throw glob_error("uneven square brackets"); | |
256 | } | |
257 | } else if (first == '/') { | |
258 | throw glob_error("slash in square brackets"); | |
7c673cae FG |
259 | } |
260 | ||
261 | if (*pattern_begin != '-') { | |
262 | matched = matched || (first == x); | |
263 | } | |
264 | else { | |
265 | ++pattern_begin; | |
b32b8144 FG |
266 | if (pattern_begin == pattern_end) { |
267 | throw glob_error("uneven square brackets"); | |
268 | } | |
7c673cae FG |
269 | |
270 | unsigned char second = *pattern_begin; | |
271 | ++pattern_begin; | |
272 | if (second == ']') { | |
273 | matched = matched || (first == x) || (x == '-'); | |
274 | break; | |
275 | } | |
b32b8144 FG |
276 | if (pattern_begin == pattern_end) { |
277 | throw glob_error("uneven square brackets"); | |
278 | } | |
7c673cae FG |
279 | |
280 | if (second == '\\') { | |
281 | second = *pattern_begin; | |
b32b8144 FG |
282 | if (second == '\\' || second == '/') { |
283 | throw glob_error("contains escaped slash"); | |
284 | } | |
7c673cae | 285 | ++pattern_begin; |
b32b8144 FG |
286 | if (pattern_begin == pattern_end) { |
287 | throw glob_error("uneven square brackets"); | |
288 | } | |
289 | } else if (second == '/') { | |
290 | throw glob_error("slash in square brackets"); | |
7c673cae FG |
291 | } |
292 | ||
7c673cae FG |
293 | matched = matched || (first <= x && x <= second); |
294 | } | |
295 | } | |
296 | ||
297 | return invert_match != matched; | |
298 | } | |
299 | ||
b32b8144 | 300 | std::size_t find_glob_char(quickbook::string_view pattern, |
7c673cae FG |
301 | std::size_t pos) |
302 | { | |
b32b8144 | 303 | // Weird style is because quickbook::string_view's find_first_of |
7c673cae FG |
304 | // doesn't take a position argument. |
305 | std::size_t removed = 0; | |
306 | ||
b32b8144 | 307 | for (;;) { |
7c673cae | 308 | pos = pattern.find_first_of("[]?*\\"); |
b32b8144 | 309 | if (pos == quickbook::string_view::npos) return pos; |
7c673cae FG |
310 | if (pattern[pos] != '\\') return pos + removed; |
311 | pattern.remove_prefix(pos + 2); | |
312 | removed += pos + 2; | |
313 | } | |
314 | } | |
315 | ||
b32b8144 | 316 | std::string glob_unescape(quickbook::string_view pattern) |
7c673cae FG |
317 | { |
318 | std::string result; | |
319 | ||
b32b8144 | 320 | for (;;) { |
7c673cae | 321 | std::size_t pos = pattern.find("\\"); |
b32b8144 | 322 | if (pos == quickbook::string_view::npos) { |
7c673cae FG |
323 | result.append(pattern.data(), pattern.size()); |
324 | break; | |
325 | } | |
326 | ||
327 | result.append(pattern.data(), pos); | |
328 | ++pos; | |
329 | if (pos < pattern.size()) { | |
330 | result += pattern[pos]; | |
331 | ++pos; | |
332 | } | |
333 | pattern.remove_prefix(pos); | |
334 | } | |
335 | ||
336 | return result; | |
337 | } | |
338 | } |