]> git.proxmox.com Git - libgit2.git/blob - src/attr_file.c
Merge pull request #1770 from ethomson/index_fuzz
[libgit2.git] / src / attr_file.c
1 #include "common.h"
2 #include "repository.h"
3 #include "filebuf.h"
4 #include "attr.h"
5 #include "git2/blob.h"
6 #include "git2/tree.h"
7 #include <ctype.h>
8
9 static int sort_by_hash_and_name(const void *a_raw, const void *b_raw);
10 static void git_attr_rule__clear(git_attr_rule *rule);
11 static bool parse_optimized_patterns(
12 git_attr_fnmatch *spec,
13 git_pool *pool,
14 const char *pattern);
15
16 int git_attr_file__new(
17 git_attr_file **attrs_ptr,
18 git_attr_file_source from,
19 const char *path,
20 git_pool *pool)
21 {
22 git_attr_file *attrs = NULL;
23
24 attrs = git__calloc(1, sizeof(git_attr_file));
25 GITERR_CHECK_ALLOC(attrs);
26
27 if (pool)
28 attrs->pool = pool;
29 else {
30 attrs->pool = git__calloc(1, sizeof(git_pool));
31 if (!attrs->pool || git_pool_init(attrs->pool, 1, 0) < 0)
32 goto fail;
33 attrs->pool_is_allocated = true;
34 }
35
36 if (path) {
37 size_t len = strlen(path);
38
39 attrs->key = git_pool_malloc(attrs->pool, (uint32_t)len + 3);
40 GITERR_CHECK_ALLOC(attrs->key);
41
42 attrs->key[0] = '0' + from;
43 attrs->key[1] = '#';
44 memcpy(&attrs->key[2], path, len);
45 attrs->key[len + 2] = '\0';
46 }
47
48 if (git_vector_init(&attrs->rules, 4, NULL) < 0)
49 goto fail;
50
51 *attrs_ptr = attrs;
52 return 0;
53
54 fail:
55 git_attr_file__free(attrs);
56 attrs_ptr = NULL;
57 return -1;
58 }
59
60 int git_attr_file__parse_buffer(
61 git_repository *repo, void *parsedata, const char *buffer, git_attr_file *attrs)
62 {
63 int error = 0;
64 const char *scan = NULL;
65 char *context = NULL;
66 git_attr_rule *rule = NULL;
67
68 GIT_UNUSED(parsedata);
69
70 assert(buffer && attrs);
71
72 scan = buffer;
73
74 /* if subdir file path, convert context for file paths */
75 if (attrs->key && git__suffixcmp(attrs->key, "/" GIT_ATTR_FILE) == 0) {
76 context = attrs->key + 2;
77 context[strlen(context) - strlen(GIT_ATTR_FILE)] = '\0';
78 }
79
80 while (!error && *scan) {
81 /* allocate rule if needed */
82 if (!rule) {
83 if (!(rule = git__calloc(1, sizeof(git_attr_rule)))) {
84 error = -1;
85 break;
86 }
87 rule->match.flags = GIT_ATTR_FNMATCH_ALLOWNEG |
88 GIT_ATTR_FNMATCH_ALLOWMACRO;
89 }
90
91 /* parse the next "pattern attr attr attr" line */
92 if (!(error = git_attr_fnmatch__parse(
93 &rule->match, attrs->pool, context, &scan)) &&
94 !(error = git_attr_assignment__parse(
95 repo, attrs->pool, &rule->assigns, &scan)))
96 {
97 if (rule->match.flags & GIT_ATTR_FNMATCH_MACRO)
98 /* should generate error/warning if this is coming from any
99 * file other than .gitattributes at repo root.
100 */
101 error = git_attr_cache__insert_macro(repo, rule);
102 else
103 error = git_vector_insert(&attrs->rules, rule);
104 }
105
106 /* if the rule wasn't a pattern, on to the next */
107 if (error < 0) {
108 git_attr_rule__clear(rule); /* reset rule contents */
109 if (error == GIT_ENOTFOUND)
110 error = 0;
111 } else {
112 rule = NULL; /* vector now "owns" the rule */
113 }
114 }
115
116 git_attr_rule__free(rule);
117
118 /* restore file path used for context */
119 if (context)
120 context[strlen(context)] = '.'; /* first char of GIT_ATTR_FILE */
121
122 return error;
123 }
124
125 int git_attr_file__new_and_load(
126 git_attr_file **attrs_ptr,
127 const char *path)
128 {
129 int error;
130 git_buf content = GIT_BUF_INIT;
131
132 if ((error = git_attr_file__new(attrs_ptr, 0, path, NULL)) < 0)
133 return error;
134
135 if (!(error = git_futils_readbuffer(&content, path)))
136 error = git_attr_file__parse_buffer(
137 NULL, NULL, git_buf_cstr(&content), *attrs_ptr);
138
139 git_buf_free(&content);
140
141 if (error) {
142 git_attr_file__free(*attrs_ptr);
143 *attrs_ptr = NULL;
144 }
145
146 return error;
147 }
148
149 void git_attr_file__clear_rules(git_attr_file *file)
150 {
151 unsigned int i;
152 git_attr_rule *rule;
153
154 git_vector_foreach(&file->rules, i, rule)
155 git_attr_rule__free(rule);
156
157 git_vector_free(&file->rules);
158 }
159
160 void git_attr_file__free(git_attr_file *file)
161 {
162 if (!file)
163 return;
164
165 git_attr_file__clear_rules(file);
166
167 if (file->pool_is_allocated) {
168 git_pool_clear(file->pool);
169 git__free(file->pool);
170 }
171 file->pool = NULL;
172
173 git__free(file);
174 }
175
176 uint32_t git_attr_file__name_hash(const char *name)
177 {
178 uint32_t h = 5381;
179 int c;
180 assert(name);
181 while ((c = (int)*name++) != 0)
182 h = ((h << 5) + h) + c;
183 return h;
184 }
185
186
187 int git_attr_file__lookup_one(
188 git_attr_file *file,
189 const git_attr_path *path,
190 const char *attr,
191 const char **value)
192 {
193 size_t i;
194 git_attr_name name;
195 git_attr_rule *rule;
196
197 *value = NULL;
198
199 name.name = attr;
200 name.name_hash = git_attr_file__name_hash(attr);
201
202 git_attr_file__foreach_matching_rule(file, path, i, rule) {
203 size_t pos;
204
205 if (!git_vector_bsearch(&pos, &rule->assigns, &name)) {
206 *value = ((git_attr_assignment *)
207 git_vector_get(&rule->assigns, pos))->value;
208 break;
209 }
210 }
211
212 return 0;
213 }
214
215
216 bool git_attr_fnmatch__match(
217 git_attr_fnmatch *match,
218 const git_attr_path *path)
219 {
220 int fnm;
221 int icase_flags = (match->flags & GIT_ATTR_FNMATCH_ICASE) ? FNM_CASEFOLD : 0;
222
223 if (match->flags & GIT_ATTR_FNMATCH_DIRECTORY && !path->is_dir)
224 return false;
225
226 if (match->flags & GIT_ATTR_FNMATCH_FULLPATH)
227 fnm = p_fnmatch(match->pattern, path->path, FNM_PATHNAME | icase_flags);
228 else if (path->is_dir)
229 fnm = p_fnmatch(match->pattern, path->basename, FNM_LEADING_DIR | icase_flags);
230 else
231 fnm = p_fnmatch(match->pattern, path->basename, icase_flags);
232
233 return (fnm == FNM_NOMATCH) ? false : true;
234 }
235
236 bool git_attr_rule__match(
237 git_attr_rule *rule,
238 const git_attr_path *path)
239 {
240 bool matched = git_attr_fnmatch__match(&rule->match, path);
241
242 if (rule->match.flags & GIT_ATTR_FNMATCH_NEGATIVE)
243 matched = !matched;
244
245 return matched;
246 }
247
248
249 git_attr_assignment *git_attr_rule__lookup_assignment(
250 git_attr_rule *rule, const char *name)
251 {
252 size_t pos;
253 git_attr_name key;
254 key.name = name;
255 key.name_hash = git_attr_file__name_hash(name);
256
257 if (git_vector_bsearch(&pos, &rule->assigns, &key))
258 return NULL;
259
260 return git_vector_get(&rule->assigns, pos);
261 }
262
263 int git_attr_path__init(
264 git_attr_path *info, const char *path, const char *base)
265 {
266 ssize_t root;
267
268 /* build full path as best we can */
269 git_buf_init(&info->full, 0);
270
271 if (git_path_join_unrooted(&info->full, path, base, &root) < 0)
272 return -1;
273
274 info->path = info->full.ptr + root;
275
276 /* remove trailing slashes */
277 while (info->full.size > 0) {
278 if (info->full.ptr[info->full.size - 1] != '/')
279 break;
280 info->full.size--;
281 }
282 info->full.ptr[info->full.size] = '\0';
283
284 /* skip leading slashes in path */
285 while (*info->path == '/')
286 info->path++;
287
288 /* find trailing basename component */
289 info->basename = strrchr(info->path, '/');
290 if (info->basename)
291 info->basename++;
292 if (!info->basename || !*info->basename)
293 info->basename = info->path;
294
295 info->is_dir = (int)git_path_isdir(info->full.ptr);
296
297 return 0;
298 }
299
300 void git_attr_path__free(git_attr_path *info)
301 {
302 git_buf_free(&info->full);
303 info->path = NULL;
304 info->basename = NULL;
305 }
306
307 /*
308 * From gitattributes(5):
309 *
310 * Patterns have the following format:
311 *
312 * - A blank line matches no files, so it can serve as a separator for
313 * readability.
314 *
315 * - A line starting with # serves as a comment.
316 *
317 * - An optional prefix ! which negates the pattern; any matching file
318 * excluded by a previous pattern will become included again. If a negated
319 * pattern matches, this will override lower precedence patterns sources.
320 *
321 * - If the pattern ends with a slash, it is removed for the purpose of the
322 * following description, but it would only find a match with a directory. In
323 * other words, foo/ will match a directory foo and paths underneath it, but
324 * will not match a regular file or a symbolic link foo (this is consistent
325 * with the way how pathspec works in general in git).
326 *
327 * - If the pattern does not contain a slash /, git treats it as a shell glob
328 * pattern and checks for a match against the pathname without leading
329 * directories.
330 *
331 * - Otherwise, git treats the pattern as a shell glob suitable for consumption
332 * by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will
333 * not match a / in the pathname. For example, "Documentation/\*.html" matches
334 * "Documentation/git.html" but not "Documentation/ppc/ppc.html". A leading
335 * slash matches the beginning of the pathname; for example, "/\*.c" matches
336 * "cat-file.c" but not "mozilla-sha1/sha1.c".
337 */
338
339 /*
340 * This will return 0 if the spec was filled out,
341 * GIT_ENOTFOUND if the fnmatch does not require matching, or
342 * another error code there was an actual problem.
343 */
344 int git_attr_fnmatch__parse(
345 git_attr_fnmatch *spec,
346 git_pool *pool,
347 const char *source,
348 const char **base)
349 {
350 const char *pattern, *scan;
351 int slash_count, allow_space;
352
353 assert(spec && base && *base);
354
355 if (parse_optimized_patterns(spec, pool, *base))
356 return 0;
357
358 spec->flags = (spec->flags & GIT_ATTR_FNMATCH__INCOMING);
359 allow_space = ((spec->flags & GIT_ATTR_FNMATCH_ALLOWSPACE) != 0);
360
361 pattern = *base;
362
363 while (git__isspace(*pattern)) pattern++;
364 if (!*pattern || *pattern == '#') {
365 *base = git__next_line(pattern);
366 return GIT_ENOTFOUND;
367 }
368
369 if (*pattern == '[' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWMACRO) != 0) {
370 if (strncmp(pattern, "[attr]", 6) == 0) {
371 spec->flags = spec->flags | GIT_ATTR_FNMATCH_MACRO;
372 pattern += 6;
373 }
374 /* else a character range like [a-e]* which is accepted */
375 }
376
377 if (*pattern == '!' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWNEG) != 0) {
378 spec->flags = spec->flags | GIT_ATTR_FNMATCH_NEGATIVE;
379 pattern++;
380 }
381
382 slash_count = 0;
383 for (scan = pattern; *scan != '\0'; ++scan) {
384 /* scan until (non-escaped) white space */
385 if (git__isspace(*scan) && *(scan - 1) != '\\') {
386 if (!allow_space || (*scan != ' ' && *scan != '\t'))
387 break;
388 }
389
390 if (*scan == '/') {
391 spec->flags = spec->flags | GIT_ATTR_FNMATCH_FULLPATH;
392 slash_count++;
393 if (pattern == scan)
394 pattern++;
395 }
396 /* remember if we see an unescaped wildcard in pattern */
397 else if (git__iswildcard(*scan) &&
398 (scan == pattern || (*(scan - 1) != '\\')))
399 spec->flags = spec->flags | GIT_ATTR_FNMATCH_HASWILD;
400 }
401
402 *base = scan;
403
404 if ((spec->length = scan - pattern) == 0)
405 return GIT_ENOTFOUND;
406
407 if (pattern[spec->length - 1] == '/') {
408 spec->length--;
409 spec->flags = spec->flags | GIT_ATTR_FNMATCH_DIRECTORY;
410 if (--slash_count <= 0)
411 spec->flags = spec->flags & ~GIT_ATTR_FNMATCH_FULLPATH;
412 }
413
414 if ((spec->flags & GIT_ATTR_FNMATCH_FULLPATH) != 0 &&
415 source != NULL && git_path_root(pattern) < 0)
416 {
417 size_t sourcelen = strlen(source);
418 /* given an unrooted fullpath match from a file inside a repo,
419 * prefix the pattern with the relative directory of the source file
420 */
421 spec->pattern = git_pool_malloc(
422 pool, (uint32_t)(sourcelen + spec->length + 1));
423 if (spec->pattern) {
424 memcpy(spec->pattern, source, sourcelen);
425 memcpy(spec->pattern + sourcelen, pattern, spec->length);
426 spec->length += sourcelen;
427 spec->pattern[spec->length] = '\0';
428 }
429 } else {
430 spec->pattern = git_pool_strndup(pool, pattern, spec->length);
431 }
432
433 if (!spec->pattern) {
434 *base = git__next_line(pattern);
435 return -1;
436 } else {
437 /* strip '\' that might have be used for internal whitespace */
438 spec->length = git__unescape(spec->pattern);
439 }
440
441 return 0;
442 }
443
444 static bool parse_optimized_patterns(
445 git_attr_fnmatch *spec,
446 git_pool *pool,
447 const char *pattern)
448 {
449 if (!pattern[1] && (pattern[0] == '*' || pattern[0] == '.')) {
450 spec->flags = GIT_ATTR_FNMATCH_MATCH_ALL;
451 spec->pattern = git_pool_strndup(pool, pattern, 1);
452 spec->length = 1;
453
454 return true;
455 }
456
457 return false;
458 }
459
460 static int sort_by_hash_and_name(const void *a_raw, const void *b_raw)
461 {
462 const git_attr_name *a = a_raw;
463 const git_attr_name *b = b_raw;
464
465 if (b->name_hash < a->name_hash)
466 return 1;
467 else if (b->name_hash > a->name_hash)
468 return -1;
469 else
470 return strcmp(b->name, a->name);
471 }
472
473 static void git_attr_assignment__free(git_attr_assignment *assign)
474 {
475 /* name and value are stored in a git_pool associated with the
476 * git_attr_file, so they do not need to be freed here
477 */
478 assign->name = NULL;
479 assign->value = NULL;
480 git__free(assign);
481 }
482
483 static int merge_assignments(void **old_raw, void *new_raw)
484 {
485 git_attr_assignment **old = (git_attr_assignment **)old_raw;
486 git_attr_assignment *new = (git_attr_assignment *)new_raw;
487
488 GIT_REFCOUNT_DEC(*old, git_attr_assignment__free);
489 *old = new;
490 return GIT_EEXISTS;
491 }
492
493 int git_attr_assignment__parse(
494 git_repository *repo,
495 git_pool *pool,
496 git_vector *assigns,
497 const char **base)
498 {
499 int error;
500 const char *scan = *base;
501 git_attr_assignment *assign = NULL;
502
503 assert(assigns && !assigns->length);
504
505 git_vector_set_cmp(assigns, sort_by_hash_and_name);
506
507 while (*scan && *scan != '\n') {
508 const char *name_start, *value_start;
509
510 /* skip leading blanks */
511 while (git__isspace(*scan) && *scan != '\n') scan++;
512
513 /* allocate assign if needed */
514 if (!assign) {
515 assign = git__calloc(1, sizeof(git_attr_assignment));
516 GITERR_CHECK_ALLOC(assign);
517 GIT_REFCOUNT_INC(assign);
518 }
519
520 assign->name_hash = 5381;
521 assign->value = git_attr__true;
522
523 /* look for magic name prefixes */
524 if (*scan == '-') {
525 assign->value = git_attr__false;
526 scan++;
527 } else if (*scan == '!') {
528 assign->value = git_attr__unset; /* explicit unspecified state */
529 scan++;
530 } else if (*scan == '#') /* comment rest of line */
531 break;
532
533 /* find the name */
534 name_start = scan;
535 while (*scan && !git__isspace(*scan) && *scan != '=') {
536 assign->name_hash =
537 ((assign->name_hash << 5) + assign->name_hash) + *scan;
538 scan++;
539 }
540 if (scan == name_start) {
541 /* must have found lone prefix (" - ") or leading = ("=foo")
542 * or end of buffer -- advance until whitespace and continue
543 */
544 while (*scan && !git__isspace(*scan)) scan++;
545 continue;
546 }
547
548 /* allocate permanent storage for name */
549 assign->name = git_pool_strndup(pool, name_start, scan - name_start);
550 GITERR_CHECK_ALLOC(assign->name);
551
552 /* if there is an equals sign, find the value */
553 if (*scan == '=') {
554 for (value_start = ++scan; *scan && !git__isspace(*scan); ++scan);
555
556 /* if we found a value, allocate permanent storage for it */
557 if (scan > value_start) {
558 assign->value = git_pool_strndup(pool, value_start, scan - value_start);
559 GITERR_CHECK_ALLOC(assign->value);
560 }
561 }
562
563 /* expand macros (if given a repo with a macro cache) */
564 if (repo != NULL && assign->value == git_attr__true) {
565 git_attr_rule *macro =
566 git_attr_cache__lookup_macro(repo, assign->name);
567
568 if (macro != NULL) {
569 unsigned int i;
570 git_attr_assignment *massign;
571
572 git_vector_foreach(&macro->assigns, i, massign) {
573 GIT_REFCOUNT_INC(massign);
574
575 error = git_vector_insert_sorted(
576 assigns, massign, &merge_assignments);
577 if (error < 0 && error != GIT_EEXISTS)
578 return error;
579 }
580 }
581 }
582
583 /* insert allocated assign into vector */
584 error = git_vector_insert_sorted(assigns, assign, &merge_assignments);
585 if (error < 0 && error != GIT_EEXISTS)
586 return error;
587
588 /* clear assign since it is now "owned" by the vector */
589 assign = NULL;
590 }
591
592 if (assign != NULL)
593 git_attr_assignment__free(assign);
594
595 *base = git__next_line(scan);
596
597 return (assigns->length == 0) ? GIT_ENOTFOUND : 0;
598 }
599
600 static void git_attr_rule__clear(git_attr_rule *rule)
601 {
602 unsigned int i;
603 git_attr_assignment *assign;
604
605 if (!rule)
606 return;
607
608 if (!(rule->match.flags & GIT_ATTR_FNMATCH_IGNORE)) {
609 git_vector_foreach(&rule->assigns, i, assign)
610 GIT_REFCOUNT_DEC(assign, git_attr_assignment__free);
611 git_vector_free(&rule->assigns);
612 }
613
614 /* match.pattern is stored in a git_pool, so no need to free */
615 rule->match.pattern = NULL;
616 rule->match.length = 0;
617 }
618
619 void git_attr_rule__free(git_attr_rule *rule)
620 {
621 git_attr_rule__clear(rule);
622 git__free(rule);
623 }
624