]> git.proxmox.com Git - libgit2.git/blob - src/wildmatch.c
New upstream version 1.4.3+dfsg.1
[libgit2.git] / src / wildmatch.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 *
7 * Do shell-style pattern matching for ?, \, [], and * characters.
8 * It is 8bit clean.
9 *
10 * Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
11 * Rich $alz is now <rsalz@bbn.com>.
12 *
13 * Modified by Wayne Davison to special-case '/' matching, to make '**'
14 * work differently than '*', and to fix the character-class code.
15 *
16 * Imported from git.git.
17 */
18
19 #include "wildmatch.h"
20
21 #define GIT_SPACE 0x01
22 #define GIT_DIGIT 0x02
23 #define GIT_ALPHA 0x04
24 #define GIT_GLOB_SPECIAL 0x08
25 #define GIT_REGEX_SPECIAL 0x10
26 #define GIT_PATHSPEC_MAGIC 0x20
27 #define GIT_CNTRL 0x40
28 #define GIT_PUNCT 0x80
29
30 enum {
31 S = GIT_SPACE,
32 A = GIT_ALPHA,
33 D = GIT_DIGIT,
34 G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
35 R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
36 P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
37 X = GIT_CNTRL,
38 U = GIT_PUNCT,
39 Z = GIT_CNTRL | GIT_SPACE
40 };
41
42 static const unsigned char sane_ctype[256] = {
43 X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
44 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
45 S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
46 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
47 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
48 A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
49 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
50 A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
51 /* Nothing in the 128.. range */
52 };
53
54 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
55 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
56
57 typedef unsigned char uchar;
58
59 /* What character marks an inverted character class? */
60 #define NEGATE_CLASS '!'
61 #define NEGATE_CLASS2 '^'
62
63 #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
64 && *(class) == *(litmatch) \
65 && strncmp((char*)class, litmatch, len) == 0)
66
67 #if defined STDC_HEADERS || !defined isascii
68 # define ISASCII(c) 1
69 #else
70 # define ISASCII(c) isascii(c)
71 #endif
72
73 #ifdef isblank
74 # define ISBLANK(c) (ISASCII(c) && isblank(c))
75 #else
76 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
77 #endif
78
79 #ifdef isgraph
80 # define ISGRAPH(c) (ISASCII(c) && isgraph(c))
81 #else
82 # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
83 #endif
84
85 #define ISPRINT(c) (ISASCII(c) && isprint(c))
86 #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
87 #define ISALNUM(c) (ISASCII(c) && isalnum(c))
88 #define ISALPHA(c) (ISASCII(c) && isalpha(c))
89 #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
90 #define ISLOWER(c) (ISASCII(c) && islower(c))
91 #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
92 #define ISSPACE(c) (ISASCII(c) && isspace(c))
93 #define ISUPPER(c) (ISASCII(c) && isupper(c))
94 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
95
96 /* Match pattern "p" against "text" */
97 static int dowild(const uchar *p, const uchar *text, unsigned int flags)
98 {
99 uchar p_ch;
100 const uchar *pattern = p;
101
102 for ( ; (p_ch = *p) != '\0'; text++, p++) {
103 int matched, match_slash, negated;
104 uchar t_ch, prev_ch;
105 if ((t_ch = *text) == '\0' && p_ch != '*')
106 return WM_ABORT_ALL;
107 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
108 t_ch = tolower(t_ch);
109 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
110 p_ch = tolower(p_ch);
111 switch (p_ch) {
112 case '\\':
113 /* Literal match with following character. Note that the test
114 * in "default" handles the p[1] == '\0' failure case. */
115 p_ch = *++p;
116 /* FALLTHROUGH */
117 default:
118 if (t_ch != p_ch)
119 return WM_NOMATCH;
120 continue;
121 case '?':
122 /* Match anything but '/'. */
123 if ((flags & WM_PATHNAME) && t_ch == '/')
124 return WM_NOMATCH;
125 continue;
126 case '*':
127 if (*++p == '*') {
128 const uchar *prev_p = p - 2;
129 while (*++p == '*') {}
130 if (!(flags & WM_PATHNAME))
131 /* without WM_PATHNAME, '*' == '**' */
132 match_slash = 1;
133 else if ((prev_p < pattern || *prev_p == '/') &&
134 (*p == '\0' || *p == '/' ||
135 (p[0] == '\\' && p[1] == '/'))) {
136 /*
137 * Assuming we already match 'foo/' and are at
138 * <star star slash>, just assume it matches
139 * nothing and go ahead match the rest of the
140 * pattern with the remaining string. This
141 * helps make foo/<*><*>/bar (<> because
142 * otherwise it breaks C comment syntax) match
143 * both foo/bar and foo/a/bar.
144 */
145 if (p[0] == '/' &&
146 dowild(p + 1, text, flags) == WM_MATCH)
147 return WM_MATCH;
148 match_slash = 1;
149 } else /* WM_PATHNAME is set */
150 match_slash = 0;
151 } else
152 /* without WM_PATHNAME, '*' == '**' */
153 match_slash = flags & WM_PATHNAME ? 0 : 1;
154 if (*p == '\0') {
155 /* Trailing "**" matches everything. Trailing "*" matches
156 * only if there are no more slash characters. */
157 if (!match_slash) {
158 if (strchr((char*)text, '/') != NULL)
159 return WM_NOMATCH;
160 }
161 return WM_MATCH;
162 } else if (!match_slash && *p == '/') {
163 /*
164 * _one_ asterisk followed by a slash
165 * with WM_PATHNAME matches the next
166 * directory
167 */
168 const char *slash = strchr((char*)text, '/');
169 if (!slash)
170 return WM_NOMATCH;
171 text = (const uchar*)slash;
172 /* the slash is consumed by the top-level for loop */
173 break;
174 }
175 while (1) {
176 if (t_ch == '\0')
177 break;
178 /*
179 * Try to advance faster when an asterisk is
180 * followed by a literal. We know in this case
181 * that the string before the literal
182 * must belong to "*".
183 * If match_slash is false, do not look past
184 * the first slash as it cannot belong to '*'.
185 */
186 if (!is_glob_special(*p)) {
187 p_ch = *p;
188 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
189 p_ch = tolower(p_ch);
190 while ((t_ch = *text) != '\0' &&
191 (match_slash || t_ch != '/')) {
192 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
193 t_ch = tolower(t_ch);
194 if (t_ch == p_ch)
195 break;
196 text++;
197 }
198 if (t_ch != p_ch)
199 return WM_NOMATCH;
200 }
201 if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
202 if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
203 return matched;
204 } else if (!match_slash && t_ch == '/')
205 return WM_ABORT_TO_STARSTAR;
206 t_ch = *++text;
207 }
208 return WM_ABORT_ALL;
209 case '[':
210 p_ch = *++p;
211 #ifdef NEGATE_CLASS2
212 if (p_ch == NEGATE_CLASS2)
213 p_ch = NEGATE_CLASS;
214 #endif
215 /* Assign literal 1/0 because of "matched" comparison. */
216 negated = p_ch == NEGATE_CLASS ? 1 : 0;
217 if (negated) {
218 /* Inverted character class. */
219 p_ch = *++p;
220 }
221 prev_ch = 0;
222 matched = 0;
223 do {
224 if (!p_ch)
225 return WM_ABORT_ALL;
226 if (p_ch == '\\') {
227 p_ch = *++p;
228 if (!p_ch)
229 return WM_ABORT_ALL;
230 if (t_ch == p_ch)
231 matched = 1;
232 } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
233 p_ch = *++p;
234 if (p_ch == '\\') {
235 p_ch = *++p;
236 if (!p_ch)
237 return WM_ABORT_ALL;
238 }
239 if (t_ch <= p_ch && t_ch >= prev_ch)
240 matched = 1;
241 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
242 uchar t_ch_upper = toupper(t_ch);
243 if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
244 matched = 1;
245 }
246 p_ch = 0; /* This makes "prev_ch" get set to 0. */
247 } else if (p_ch == '[' && p[1] == ':') {
248 const uchar *s;
249 int i;
250 for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
251 if (!p_ch)
252 return WM_ABORT_ALL;
253 i = (int)(p - s - 1);
254 if (i < 0 || p[-1] != ':') {
255 /* Didn't find ":]", so treat like a normal set. */
256 p = s - 2;
257 p_ch = '[';
258 if (t_ch == p_ch)
259 matched = 1;
260 continue;
261 }
262 if (CC_EQ(s,i, "alnum")) {
263 if (ISALNUM(t_ch))
264 matched = 1;
265 } else if (CC_EQ(s,i, "alpha")) {
266 if (ISALPHA(t_ch))
267 matched = 1;
268 } else if (CC_EQ(s,i, "blank")) {
269 if (ISBLANK(t_ch))
270 matched = 1;
271 } else if (CC_EQ(s,i, "cntrl")) {
272 if (ISCNTRL(t_ch))
273 matched = 1;
274 } else if (CC_EQ(s,i, "digit")) {
275 if (ISDIGIT(t_ch))
276 matched = 1;
277 } else if (CC_EQ(s,i, "graph")) {
278 if (ISGRAPH(t_ch))
279 matched = 1;
280 } else if (CC_EQ(s,i, "lower")) {
281 if (ISLOWER(t_ch))
282 matched = 1;
283 } else if (CC_EQ(s,i, "print")) {
284 if (ISPRINT(t_ch))
285 matched = 1;
286 } else if (CC_EQ(s,i, "punct")) {
287 if (ISPUNCT(t_ch))
288 matched = 1;
289 } else if (CC_EQ(s,i, "space")) {
290 if (ISSPACE(t_ch))
291 matched = 1;
292 } else if (CC_EQ(s,i, "upper")) {
293 if (ISUPPER(t_ch))
294 matched = 1;
295 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
296 matched = 1;
297 } else if (CC_EQ(s,i, "xdigit")) {
298 if (ISXDIGIT(t_ch))
299 matched = 1;
300 } else /* malformed [:class:] string */
301 return WM_ABORT_ALL;
302 p_ch = 0; /* This makes "prev_ch" get set to 0. */
303 } else if (t_ch == p_ch)
304 matched = 1;
305 } while (prev_ch = p_ch, (p_ch = *++p) != ']');
306 if (matched == negated ||
307 ((flags & WM_PATHNAME) && t_ch == '/'))
308 return WM_NOMATCH;
309 continue;
310 }
311 }
312
313 return *text ? WM_NOMATCH : WM_MATCH;
314 }
315
316 /* Match the "pattern" against the "text" string. */
317 int wildmatch(const char *pattern, const char *text, unsigned int flags)
318 {
319 return dowild((const uchar*)pattern, (const uchar*)text, flags);
320 }