]> git.proxmox.com Git - libgit2.git/blame - src/util/wildmatch.c
Merge https://salsa.debian.org/debian/libgit2 into proxmox/bullseye
[libgit2.git] / src / util / wildmatch.c
CommitLineData
22a2d3d5
UG
1/*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 *
7 * Do shell-style pattern matching for ?, \, [], and * characters.
8 * It is 8bit clean.
9 *
10 * Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
11 * Rich $alz is now <rsalz@bbn.com>.
12 *
13 * Modified by Wayne Davison to special-case '/' matching, to make '**'
14 * work differently than '*', and to fix the character-class code.
15 *
16 * Imported from git.git.
17 */
18
19#include "wildmatch.h"
20
21#define GIT_SPACE 0x01
22#define GIT_DIGIT 0x02
23#define GIT_ALPHA 0x04
24#define GIT_GLOB_SPECIAL 0x08
25#define GIT_REGEX_SPECIAL 0x10
26#define GIT_PATHSPEC_MAGIC 0x20
27#define GIT_CNTRL 0x40
28#define GIT_PUNCT 0x80
29
30enum {
31 S = GIT_SPACE,
32 A = GIT_ALPHA,
33 D = GIT_DIGIT,
34 G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
35 R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
36 P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
37 X = GIT_CNTRL,
38 U = GIT_PUNCT,
39 Z = GIT_CNTRL | GIT_SPACE
40};
41
42static const unsigned char sane_ctype[256] = {
43 X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
44 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
45 S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
46 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
47 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
48 A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
49 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
50 A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
51 /* Nothing in the 128.. range */
52};
53
54#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
55#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
56
57typedef unsigned char uchar;
58
59/* What character marks an inverted character class? */
60#define NEGATE_CLASS '!'
61#define NEGATE_CLASS2 '^'
62
63#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
64 && *(class) == *(litmatch) \
65 && strncmp((char*)class, litmatch, len) == 0)
66
67#if defined STDC_HEADERS || !defined isascii
68# define ISASCII(c) 1
69#else
70# define ISASCII(c) isascii(c)
71#endif
72
73#ifdef isblank
74# define ISBLANK(c) (ISASCII(c) && isblank(c))
75#else
76# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
77#endif
78
79#ifdef isgraph
80# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
81#else
82# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
83#endif
84
85#define ISPRINT(c) (ISASCII(c) && isprint(c))
86#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
87#define ISALNUM(c) (ISASCII(c) && isalnum(c))
88#define ISALPHA(c) (ISASCII(c) && isalpha(c))
89#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
90#define ISLOWER(c) (ISASCII(c) && islower(c))
91#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
92#define ISSPACE(c) (ISASCII(c) && isspace(c))
93#define ISUPPER(c) (ISASCII(c) && isupper(c))
94#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
95
96/* Match pattern "p" against "text" */
97static int dowild(const uchar *p, const uchar *text, unsigned int flags)
98{
99 uchar p_ch;
100 const uchar *pattern = p;
101
102 for ( ; (p_ch = *p) != '\0'; text++, p++) {
103 int matched, match_slash, negated;
104 uchar t_ch, prev_ch;
105 if ((t_ch = *text) == '\0' && p_ch != '*')
106 return WM_ABORT_ALL;
107 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
108 t_ch = tolower(t_ch);
109 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
110 p_ch = tolower(p_ch);
111 switch (p_ch) {
112 case '\\':
113 /* Literal match with following character. Note that the test
114 * in "default" handles the p[1] == '\0' failure case. */
115 p_ch = *++p;
116 /* FALLTHROUGH */
117 default:
118 if (t_ch != p_ch)
119 return WM_NOMATCH;
120 continue;
121 case '?':
122 /* Match anything but '/'. */
123 if ((flags & WM_PATHNAME) && t_ch == '/')
124 return WM_NOMATCH;
125 continue;
126 case '*':
127 if (*++p == '*') {
128 const uchar *prev_p = p - 2;
129 while (*++p == '*') {}
130 if (!(flags & WM_PATHNAME))
131 /* without WM_PATHNAME, '*' == '**' */
132 match_slash = 1;
133 else if ((prev_p < pattern || *prev_p == '/') &&
134 (*p == '\0' || *p == '/' ||
135 (p[0] == '\\' && p[1] == '/'))) {
136 /*
137 * Assuming we already match 'foo/' and are at
138 * <star star slash>, just assume it matches
139 * nothing and go ahead match the rest of the
140 * pattern with the remaining string. This
141 * helps make foo/<*><*>/bar (<> because
142 * otherwise it breaks C comment syntax) match
143 * both foo/bar and foo/a/bar.
144 */
145 if (p[0] == '/' &&
146 dowild(p + 1, text, flags) == WM_MATCH)
147 return WM_MATCH;
148 match_slash = 1;
149 } else /* WM_PATHNAME is set */
150 match_slash = 0;
151 } else
152 /* without WM_PATHNAME, '*' == '**' */
153 match_slash = flags & WM_PATHNAME ? 0 : 1;
154 if (*p == '\0') {
155 /* Trailing "**" matches everything. Trailing "*" matches
156 * only if there are no more slash characters. */
157 if (!match_slash) {
158 if (strchr((char*)text, '/') != NULL)
159 return WM_NOMATCH;
160 }
161 return WM_MATCH;
162 } else if (!match_slash && *p == '/') {
163 /*
164 * _one_ asterisk followed by a slash
165 * with WM_PATHNAME matches the next
166 * directory
167 */
168 const char *slash = strchr((char*)text, '/');
169 if (!slash)
170 return WM_NOMATCH;
171 text = (const uchar*)slash;
172 /* the slash is consumed by the top-level for loop */
173 break;
174 }
175 while (1) {
176 if (t_ch == '\0')
177 break;
178 /*
179 * Try to advance faster when an asterisk is
180 * followed by a literal. We know in this case
181 * that the string before the literal
182 * must belong to "*".
183 * If match_slash is false, do not look past
184 * the first slash as it cannot belong to '*'.
185 */
186 if (!is_glob_special(*p)) {
187 p_ch = *p;
188 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
189 p_ch = tolower(p_ch);
190 while ((t_ch = *text) != '\0' &&
191 (match_slash || t_ch != '/')) {
192 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
193 t_ch = tolower(t_ch);
194 if (t_ch == p_ch)
195 break;
196 text++;
197 }
198 if (t_ch != p_ch)
199 return WM_NOMATCH;
200 }
201 if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
202 if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
203 return matched;
204 } else if (!match_slash && t_ch == '/')
205 return WM_ABORT_TO_STARSTAR;
206 t_ch = *++text;
207 }
208 return WM_ABORT_ALL;
209 case '[':
210 p_ch = *++p;
211#ifdef NEGATE_CLASS2
212 if (p_ch == NEGATE_CLASS2)
213 p_ch = NEGATE_CLASS;
214#endif
215 /* Assign literal 1/0 because of "matched" comparison. */
216 negated = p_ch == NEGATE_CLASS ? 1 : 0;
217 if (negated) {
218 /* Inverted character class. */
219 p_ch = *++p;
220 }
221 prev_ch = 0;
222 matched = 0;
223 do {
224 if (!p_ch)
225 return WM_ABORT_ALL;
226 if (p_ch == '\\') {
227 p_ch = *++p;
228 if (!p_ch)
229 return WM_ABORT_ALL;
230 if (t_ch == p_ch)
231 matched = 1;
232 } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
233 p_ch = *++p;
234 if (p_ch == '\\') {
235 p_ch = *++p;
236 if (!p_ch)
237 return WM_ABORT_ALL;
238 }
239 if (t_ch <= p_ch && t_ch >= prev_ch)
240 matched = 1;
241 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
242 uchar t_ch_upper = toupper(t_ch);
243 if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
244 matched = 1;
245 }
246 p_ch = 0; /* This makes "prev_ch" get set to 0. */
247 } else if (p_ch == '[' && p[1] == ':') {
248 const uchar *s;
249 int i;
250 for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
251 if (!p_ch)
252 return WM_ABORT_ALL;
253 i = (int)(p - s - 1);
254 if (i < 0 || p[-1] != ':') {
255 /* Didn't find ":]", so treat like a normal set. */
256 p = s - 2;
257 p_ch = '[';
258 if (t_ch == p_ch)
259 matched = 1;
260 continue;
261 }
262 if (CC_EQ(s,i, "alnum")) {
263 if (ISALNUM(t_ch))
264 matched = 1;
265 } else if (CC_EQ(s,i, "alpha")) {
266 if (ISALPHA(t_ch))
267 matched = 1;
268 } else if (CC_EQ(s,i, "blank")) {
269 if (ISBLANK(t_ch))
270 matched = 1;
271 } else if (CC_EQ(s,i, "cntrl")) {
272 if (ISCNTRL(t_ch))
273 matched = 1;
274 } else if (CC_EQ(s,i, "digit")) {
275 if (ISDIGIT(t_ch))
276 matched = 1;
277 } else if (CC_EQ(s,i, "graph")) {
278 if (ISGRAPH(t_ch))
279 matched = 1;
280 } else if (CC_EQ(s,i, "lower")) {
281 if (ISLOWER(t_ch))
282 matched = 1;
283 } else if (CC_EQ(s,i, "print")) {
284 if (ISPRINT(t_ch))
285 matched = 1;
286 } else if (CC_EQ(s,i, "punct")) {
287 if (ISPUNCT(t_ch))
288 matched = 1;
289 } else if (CC_EQ(s,i, "space")) {
290 if (ISSPACE(t_ch))
291 matched = 1;
292 } else if (CC_EQ(s,i, "upper")) {
293 if (ISUPPER(t_ch))
294 matched = 1;
295 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
296 matched = 1;
297 } else if (CC_EQ(s,i, "xdigit")) {
298 if (ISXDIGIT(t_ch))
299 matched = 1;
300 } else /* malformed [:class:] string */
301 return WM_ABORT_ALL;
302 p_ch = 0; /* This makes "prev_ch" get set to 0. */
303 } else if (t_ch == p_ch)
304 matched = 1;
305 } while (prev_ch = p_ch, (p_ch = *++p) != ']');
306 if (matched == negated ||
307 ((flags & WM_PATHNAME) && t_ch == '/'))
308 return WM_NOMATCH;
309 continue;
310 }
311 }
312
313 return *text ? WM_NOMATCH : WM_MATCH;
314}
315
316/* Match the "pattern" against the "text" string. */
317int wildmatch(const char *pattern, const char *text, unsigned int flags)
318{
319 return dowild((const uchar*)pattern, (const uchar*)text, flags);
320}