2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "repository.h"
12 #include "attrcache.h"
13 #include "git2/blob.h"
14 #include "git2/tree.h"
18 static void attr_file_free(git_attr_file
*file
)
20 bool unlock
= !git_mutex_lock(&file
->lock
);
21 git_attr_file__clear_rules(file
, false);
22 git_pool_clear(&file
->pool
);
24 git_mutex_unlock(&file
->lock
);
25 git_mutex_free(&file
->lock
);
27 git__memzero(file
, sizeof(*file
));
31 int git_attr_file__new(
33 git_attr_file_entry
*entry
,
34 git_attr_file_source source
)
36 git_attr_file
*attrs
= git__calloc(1, sizeof(git_attr_file
));
37 GITERR_CHECK_ALLOC(attrs
);
39 if (git_mutex_init(&attrs
->lock
) < 0) {
40 giterr_set(GITERR_OS
, "failed to initialize lock");
45 git_pool_init(&attrs
->pool
, 1);
46 GIT_REFCOUNT_INC(attrs
);
48 attrs
->source
= source
;
53 int git_attr_file__clear_rules(git_attr_file
*file
, bool need_lock
)
58 if (need_lock
&& git_mutex_lock(&file
->lock
) < 0) {
59 giterr_set(GITERR_OS
, "failed to lock attribute file");
63 git_vector_foreach(&file
->rules
, i
, rule
)
64 git_attr_rule__free(rule
);
65 git_vector_free(&file
->rules
);
68 git_mutex_unlock(&file
->lock
);
73 void git_attr_file__free(git_attr_file
*file
)
77 GIT_REFCOUNT_DEC(file
, attr_file_free
);
80 static int attr_file_oid_from_index(
81 git_oid
*oid
, git_repository
*repo
, const char *path
)
86 const git_index_entry
*entry
;
88 if ((error
= git_repository_index__weakptr(&idx
, repo
)) < 0 ||
89 (error
= git_index__find_pos(&pos
, idx
, path
, 0, 0)) < 0)
92 if (!(entry
= git_index_get_byindex(idx
, pos
)))
99 int git_attr_file__load(
101 git_repository
*repo
,
102 git_attr_session
*attr_session
,
103 git_attr_file_entry
*entry
,
104 git_attr_file_source source
,
105 git_attr_file_parser parser
)
108 git_blob
*blob
= NULL
;
109 git_buf content
= GIT_BUF_INIT
;
112 bool nonexistent
= false;
117 case GIT_ATTR_FILE__IN_MEMORY
:
118 /* in-memory attribute file doesn't need data */
120 case GIT_ATTR_FILE__FROM_INDEX
: {
123 if ((error
= attr_file_oid_from_index(&id
, repo
, entry
->path
)) < 0 ||
124 (error
= git_blob_lookup(&blob
, repo
, &id
)) < 0)
127 /* Do not assume that data straight from the ODB is NULL-terminated;
128 * copy the contents of a file to a buffer to work on */
129 git_buf_put(&content
, git_blob_rawcontent(blob
), git_blob_rawsize(blob
));
132 case GIT_ATTR_FILE__FROM_FILE
: {
135 /* For open or read errors, pretend that we got ENOTFOUND. */
136 /* TODO: issue warning when warning API is available */
138 if (p_stat(entry
->fullpath
, &st
) < 0 ||
139 S_ISDIR(st
.st_mode
) ||
140 (fd
= git_futils_open_ro(entry
->fullpath
)) < 0 ||
141 (error
= git_futils_readbuffer_fd(&content
, fd
, (size_t)st
.st_size
)) < 0)
150 giterr_set(GITERR_INVALID
, "unknown file source %d", source
);
154 if ((error
= git_attr_file__new(&file
, entry
, source
)) < 0)
157 /* store the key of the attr_reader; don't bother with cache
158 * invalidation during the same attr reader session.
161 file
->session_key
= attr_session
->key
;
163 if (parser
&& (error
= parser(repo
, file
, git_buf_cstr(&content
))) < 0) {
164 git_attr_file__free(file
);
168 /* write cache breakers */
170 file
->nonexistent
= 1;
171 else if (source
== GIT_ATTR_FILE__FROM_INDEX
)
172 git_oid_cpy(&file
->cache_data
.oid
, git_blob_id(blob
));
173 else if (source
== GIT_ATTR_FILE__FROM_FILE
)
174 git_futils_filestamp_set_from_stat(&file
->cache_data
.stamp
, &st
);
175 /* else always cacheable */
181 git_buf_free(&content
);
186 int git_attr_file__out_of_date(
187 git_repository
*repo
,
188 git_attr_session
*attr_session
,
194 /* we are never out of date if we just created this data in the same
195 * attr_session; otherwise, nonexistent files must be invalidated
197 if (attr_session
&& attr_session
->key
== file
->session_key
)
199 else if (file
->nonexistent
)
202 switch (file
->source
) {
203 case GIT_ATTR_FILE__IN_MEMORY
:
206 case GIT_ATTR_FILE__FROM_FILE
:
207 return git_futils_filestamp_check(
208 &file
->cache_data
.stamp
, file
->entry
->fullpath
);
210 case GIT_ATTR_FILE__FROM_INDEX
: {
214 if ((error
= attr_file_oid_from_index(
215 &id
, repo
, file
->entry
->path
)) < 0)
218 return (git_oid__cmp(&file
->cache_data
.oid
, &id
) != 0);
222 giterr_set(GITERR_INVALID
, "invalid file type %d", file
->source
);
227 static int sort_by_hash_and_name(const void *a_raw
, const void *b_raw
);
228 static void git_attr_rule__clear(git_attr_rule
*rule
);
229 static bool parse_optimized_patterns(
230 git_attr_fnmatch
*spec
,
232 const char *pattern
);
234 int git_attr_file__parse_buffer(
235 git_repository
*repo
, git_attr_file
*attrs
, const char *data
)
238 const char *scan
= data
, *context
= NULL
;
239 git_attr_rule
*rule
= NULL
;
241 /* if subdir file path, convert context for file paths */
243 git_path_root(attrs
->entry
->path
) < 0 &&
244 !git__suffixcmp(attrs
->entry
->path
, "/" GIT_ATTR_FILE
))
245 context
= attrs
->entry
->path
;
247 if (git_mutex_lock(&attrs
->lock
) < 0) {
248 giterr_set(GITERR_OS
, "failed to lock attribute file");
252 while (!error
&& *scan
) {
253 /* allocate rule if needed */
254 if (!rule
&& !(rule
= git__calloc(1, sizeof(*rule
)))) {
260 GIT_ATTR_FNMATCH_ALLOWNEG
| GIT_ATTR_FNMATCH_ALLOWMACRO
;
262 /* parse the next "pattern attr attr attr" line */
263 if (!(error
= git_attr_fnmatch__parse(
264 &rule
->match
, &attrs
->pool
, context
, &scan
)) &&
265 !(error
= git_attr_assignment__parse(
266 repo
, &attrs
->pool
, &rule
->assigns
, &scan
)))
268 if (rule
->match
.flags
& GIT_ATTR_FNMATCH_MACRO
)
269 /* TODO: warning if macro found in file below repo root */
270 error
= git_attr_cache__insert_macro(repo
, rule
);
272 error
= git_vector_insert(&attrs
->rules
, rule
);
275 /* if the rule wasn't a pattern, on to the next */
277 git_attr_rule__clear(rule
); /* reset rule contents */
278 if (error
== GIT_ENOTFOUND
)
281 rule
= NULL
; /* vector now "owns" the rule */
285 git_mutex_unlock(&attrs
->lock
);
286 git_attr_rule__free(rule
);
291 uint32_t git_attr_file__name_hash(const char *name
)
296 while ((c
= (int)*name
++) != 0)
297 h
= ((h
<< 5) + h
) + c
;
301 int git_attr_file__lookup_one(
314 name
.name_hash
= git_attr_file__name_hash(attr
);
316 git_attr_file__foreach_matching_rule(file
, path
, i
, rule
) {
319 if (!git_vector_bsearch(&pos
, &rule
->assigns
, &name
)) {
320 *value
= ((git_attr_assignment
*)
321 git_vector_get(&rule
->assigns
, pos
))->value
;
329 int git_attr_file__load_standalone(git_attr_file
**out
, const char *path
)
333 git_buf content
= GIT_BUF_INIT
;
335 error
= git_attr_file__new(&file
, NULL
, GIT_ATTR_FILE__FROM_FILE
);
339 error
= git_attr_cache__alloc_file_entry(
340 &file
->entry
, NULL
, path
, &file
->pool
);
342 git_attr_file__free(file
);
345 /* because the cache entry is allocated from the file's own pool, we
346 * don't have to free it - freeing file+pool will free cache entry, too.
349 if (!(error
= git_futils_readbuffer(&content
, path
))) {
350 error
= git_attr_file__parse_buffer(NULL
, file
, content
.ptr
);
351 git_buf_free(&content
);
355 git_attr_file__free(file
);
362 bool git_attr_fnmatch__match(
363 git_attr_fnmatch
*match
,
366 const char *relpath
= path
->path
;
367 const char *filename
;
371 * If the rule was generated in a subdirectory, we must only
372 * use it for paths inside that directory. We can thus return
373 * a non-match if the prefixes don't match.
375 if (match
->containing_dir
) {
376 if (match
->flags
& GIT_ATTR_FNMATCH_ICASE
) {
377 if (git__strncasecmp(path
->path
, match
->containing_dir
, match
->containing_dir_length
))
380 if (git__prefixcmp(path
->path
, match
->containing_dir
))
384 relpath
+= match
->containing_dir_length
;
387 if (match
->flags
& GIT_ATTR_FNMATCH_ICASE
)
388 flags
|= FNM_CASEFOLD
;
389 if (match
->flags
& GIT_ATTR_FNMATCH_LEADINGDIR
)
390 flags
|= FNM_LEADING_DIR
;
392 if (match
->flags
& GIT_ATTR_FNMATCH_FULLPATH
) {
394 flags
|= FNM_PATHNAME
;
396 filename
= path
->basename
;
399 flags
|= FNM_LEADING_DIR
;
402 if ((match
->flags
& GIT_ATTR_FNMATCH_DIRECTORY
) && !path
->is_dir
) {
406 * for attribute checks or checks at the root of this match's
407 * containing_dir (or root of the repository if no containing_dir),
410 if (!(match
->flags
& GIT_ATTR_FNMATCH_IGNORE
) ||
411 path
->basename
== relpath
)
414 flags
|= FNM_LEADING_DIR
;
416 /* fail match if this is a file with same name as ignored folder */
417 samename
= (match
->flags
& GIT_ATTR_FNMATCH_ICASE
) ?
418 !strcasecmp(match
->pattern
, relpath
) :
419 !strcmp(match
->pattern
, relpath
);
424 return (p_fnmatch(match
->pattern
, relpath
, flags
) != FNM_NOMATCH
);
427 /* if path is a directory prefix of a negated pattern, then match */
428 if ((match
->flags
& GIT_ATTR_FNMATCH_NEGATIVE
) && path
->is_dir
) {
429 size_t pathlen
= strlen(relpath
);
430 bool prefixed
= (pathlen
<= match
->length
) &&
431 ((match
->flags
& GIT_ATTR_FNMATCH_ICASE
) ?
432 !strncasecmp(match
->pattern
, relpath
, pathlen
) :
433 !strncmp(match
->pattern
, relpath
, pathlen
));
435 if (prefixed
&& git_path_at_end_of_segment(&match
->pattern
[pathlen
]))
439 return (p_fnmatch(match
->pattern
, filename
, flags
) != FNM_NOMATCH
);
442 bool git_attr_rule__match(
446 bool matched
= git_attr_fnmatch__match(&rule
->match
, path
);
448 if (rule
->match
.flags
& GIT_ATTR_FNMATCH_NEGATIVE
)
454 git_attr_assignment
*git_attr_rule__lookup_assignment(
455 git_attr_rule
*rule
, const char *name
)
460 key
.name_hash
= git_attr_file__name_hash(name
);
462 if (git_vector_bsearch(&pos
, &rule
->assigns
, &key
))
465 return git_vector_get(&rule
->assigns
, pos
);
468 int git_attr_path__init(
469 git_attr_path
*info
, const char *path
, const char *base
, git_dir_flag dir_flag
)
473 /* build full path as best we can */
474 git_buf_init(&info
->full
, 0);
476 if (git_path_join_unrooted(&info
->full
, path
, base
, &root
) < 0)
479 info
->path
= info
->full
.ptr
+ root
;
481 /* remove trailing slashes */
482 while (info
->full
.size
> 0) {
483 if (info
->full
.ptr
[info
->full
.size
- 1] != '/')
487 info
->full
.ptr
[info
->full
.size
] = '\0';
489 /* skip leading slashes in path */
490 while (*info
->path
== '/')
493 /* find trailing basename component */
494 info
->basename
= strrchr(info
->path
, '/');
497 if (!info
->basename
|| !*info
->basename
)
498 info
->basename
= info
->path
;
502 case GIT_DIR_FLAG_FALSE
:
506 case GIT_DIR_FLAG_TRUE
:
510 case GIT_DIR_FLAG_UNKNOWN
:
512 info
->is_dir
= (int)git_path_isdir(info
->full
.ptr
);
519 void git_attr_path__free(git_attr_path
*info
)
521 git_buf_free(&info
->full
);
523 info
->basename
= NULL
;
527 * From gitattributes(5):
529 * Patterns have the following format:
531 * - A blank line matches no files, so it can serve as a separator for
534 * - A line starting with # serves as a comment.
536 * - An optional prefix ! which negates the pattern; any matching file
537 * excluded by a previous pattern will become included again. If a negated
538 * pattern matches, this will override lower precedence patterns sources.
540 * - If the pattern ends with a slash, it is removed for the purpose of the
541 * following description, but it would only find a match with a directory. In
542 * other words, foo/ will match a directory foo and paths underneath it, but
543 * will not match a regular file or a symbolic link foo (this is consistent
544 * with the way how pathspec works in general in git).
546 * - If the pattern does not contain a slash /, git treats it as a shell glob
547 * pattern and checks for a match against the pathname without leading
550 * - Otherwise, git treats the pattern as a shell glob suitable for consumption
551 * by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will
552 * not match a / in the pathname. For example, "Documentation/\*.html" matches
553 * "Documentation/git.html" but not "Documentation/ppc/ppc.html". A leading
554 * slash matches the beginning of the pathname; for example, "/\*.c" matches
555 * "cat-file.c" but not "mozilla-sha1/sha1.c".
559 * This will return 0 if the spec was filled out,
560 * GIT_ENOTFOUND if the fnmatch does not require matching, or
561 * another error code there was an actual problem.
563 int git_attr_fnmatch__parse(
564 git_attr_fnmatch
*spec
,
569 const char *pattern
, *scan
;
570 int slash_count
, allow_space
;
572 assert(spec
&& base
&& *base
);
574 if (parse_optimized_patterns(spec
, pool
, *base
))
577 spec
->flags
= (spec
->flags
& GIT_ATTR_FNMATCH__INCOMING
);
578 allow_space
= ((spec
->flags
& GIT_ATTR_FNMATCH_ALLOWSPACE
) != 0);
582 while (git__isspace(*pattern
)) pattern
++;
583 if (!*pattern
|| *pattern
== '#') {
584 *base
= git__next_line(pattern
);
585 return GIT_ENOTFOUND
;
588 if (*pattern
== '[' && (spec
->flags
& GIT_ATTR_FNMATCH_ALLOWMACRO
) != 0) {
589 if (strncmp(pattern
, "[attr]", 6) == 0) {
590 spec
->flags
= spec
->flags
| GIT_ATTR_FNMATCH_MACRO
;
593 /* else a character range like [a-e]* which is accepted */
596 if (*pattern
== '!' && (spec
->flags
& GIT_ATTR_FNMATCH_ALLOWNEG
) != 0) {
597 spec
->flags
= spec
->flags
|
598 GIT_ATTR_FNMATCH_NEGATIVE
| GIT_ATTR_FNMATCH_LEADINGDIR
;
603 for (scan
= pattern
; *scan
!= '\0'; ++scan
) {
604 /* scan until (non-escaped) white space */
605 if (git__isspace(*scan
) && *(scan
- 1) != '\\') {
606 if (!allow_space
|| (*scan
!= ' ' && *scan
!= '\t' && *scan
!= '\r'))
611 spec
->flags
= spec
->flags
| GIT_ATTR_FNMATCH_FULLPATH
;
616 /* remember if we see an unescaped wildcard in pattern */
617 else if (git__iswildcard(*scan
) &&
618 (scan
== pattern
|| (*(scan
- 1) != '\\')))
619 spec
->flags
= spec
->flags
| GIT_ATTR_FNMATCH_HASWILD
;
624 if ((spec
->length
= scan
- pattern
) == 0)
625 return GIT_ENOTFOUND
;
628 * Remove one trailing \r in case this is a CRLF delimited
629 * file, in the case of Icon\r\r\n, we still leave the first
630 * \r there to match against.
632 if (pattern
[spec
->length
- 1] == '\r')
633 if (--spec
->length
== 0)
634 return GIT_ENOTFOUND
;
636 if (pattern
[spec
->length
- 1] == '/') {
638 spec
->flags
= spec
->flags
| GIT_ATTR_FNMATCH_DIRECTORY
;
639 if (--slash_count
<= 0)
640 spec
->flags
= spec
->flags
& ~GIT_ATTR_FNMATCH_FULLPATH
;
642 if ((spec
->flags
& GIT_ATTR_FNMATCH_NOLEADINGDIR
) == 0 &&
644 pattern
[spec
->length
- 1] == '*' &&
645 pattern
[spec
->length
- 2] == '/') {
647 spec
->flags
= spec
->flags
| GIT_ATTR_FNMATCH_LEADINGDIR
;
648 /* leave FULLPATH match on, however */
652 char *slash
= strrchr(context
, '/');
655 /* include the slash for easier matching */
656 len
= slash
- context
+ 1;
657 spec
->containing_dir
= git_pool_strndup(pool
, context
, len
);
658 spec
->containing_dir_length
= len
;
662 spec
->pattern
= git_pool_strndup(pool
, pattern
, spec
->length
);
664 if (!spec
->pattern
) {
665 *base
= git__next_line(pattern
);
668 /* strip '\' that might have be used for internal whitespace */
669 spec
->length
= git__unescape(spec
->pattern
);
670 /* TODO: convert remaining '\' into '/' for POSIX ??? */
676 static bool parse_optimized_patterns(
677 git_attr_fnmatch
*spec
,
681 if (!pattern
[1] && (pattern
[0] == '*' || pattern
[0] == '.')) {
682 spec
->flags
= GIT_ATTR_FNMATCH_MATCH_ALL
;
683 spec
->pattern
= git_pool_strndup(pool
, pattern
, 1);
692 static int sort_by_hash_and_name(const void *a_raw
, const void *b_raw
)
694 const git_attr_name
*a
= a_raw
;
695 const git_attr_name
*b
= b_raw
;
697 if (b
->name_hash
< a
->name_hash
)
699 else if (b
->name_hash
> a
->name_hash
)
702 return strcmp(b
->name
, a
->name
);
705 static void git_attr_assignment__free(git_attr_assignment
*assign
)
707 /* name and value are stored in a git_pool associated with the
708 * git_attr_file, so they do not need to be freed here
711 assign
->value
= NULL
;
715 static int merge_assignments(void **old_raw
, void *new_raw
)
717 git_attr_assignment
**old
= (git_attr_assignment
**)old_raw
;
718 git_attr_assignment
*new = (git_attr_assignment
*)new_raw
;
720 GIT_REFCOUNT_DEC(*old
, git_attr_assignment__free
);
725 int git_attr_assignment__parse(
726 git_repository
*repo
,
732 const char *scan
= *base
;
733 git_attr_assignment
*assign
= NULL
;
735 assert(assigns
&& !assigns
->length
);
737 git_vector_set_cmp(assigns
, sort_by_hash_and_name
);
739 while (*scan
&& *scan
!= '\n') {
740 const char *name_start
, *value_start
;
742 /* skip leading blanks */
743 while (git__isspace(*scan
) && *scan
!= '\n') scan
++;
745 /* allocate assign if needed */
747 assign
= git__calloc(1, sizeof(git_attr_assignment
));
748 GITERR_CHECK_ALLOC(assign
);
749 GIT_REFCOUNT_INC(assign
);
752 assign
->name_hash
= 5381;
753 assign
->value
= git_attr__true
;
755 /* look for magic name prefixes */
757 assign
->value
= git_attr__false
;
759 } else if (*scan
== '!') {
760 assign
->value
= git_attr__unset
; /* explicit unspecified state */
762 } else if (*scan
== '#') /* comment rest of line */
767 while (*scan
&& !git__isspace(*scan
) && *scan
!= '=') {
769 ((assign
->name_hash
<< 5) + assign
->name_hash
) + *scan
;
772 if (scan
== name_start
) {
773 /* must have found lone prefix (" - ") or leading = ("=foo")
774 * or end of buffer -- advance until whitespace and continue
776 while (*scan
&& !git__isspace(*scan
)) scan
++;
780 /* allocate permanent storage for name */
781 assign
->name
= git_pool_strndup(pool
, name_start
, scan
- name_start
);
782 GITERR_CHECK_ALLOC(assign
->name
);
784 /* if there is an equals sign, find the value */
786 for (value_start
= ++scan
; *scan
&& !git__isspace(*scan
); ++scan
);
788 /* if we found a value, allocate permanent storage for it */
789 if (scan
> value_start
) {
790 assign
->value
= git_pool_strndup(pool
, value_start
, scan
- value_start
);
791 GITERR_CHECK_ALLOC(assign
->value
);
795 /* expand macros (if given a repo with a macro cache) */
796 if (repo
!= NULL
&& assign
->value
== git_attr__true
) {
797 git_attr_rule
*macro
=
798 git_attr_cache__lookup_macro(repo
, assign
->name
);
802 git_attr_assignment
*massign
;
804 git_vector_foreach(¯o
->assigns
, i
, massign
) {
805 GIT_REFCOUNT_INC(massign
);
807 error
= git_vector_insert_sorted(
808 assigns
, massign
, &merge_assignments
);
809 if (error
< 0 && error
!= GIT_EEXISTS
) {
810 git_attr_assignment__free(assign
);
817 /* insert allocated assign into vector */
818 error
= git_vector_insert_sorted(assigns
, assign
, &merge_assignments
);
819 if (error
< 0 && error
!= GIT_EEXISTS
)
822 /* clear assign since it is now "owned" by the vector */
827 git_attr_assignment__free(assign
);
829 *base
= git__next_line(scan
);
831 return (assigns
->length
== 0) ? GIT_ENOTFOUND
: 0;
834 static void git_attr_rule__clear(git_attr_rule
*rule
)
837 git_attr_assignment
*assign
;
842 if (!(rule
->match
.flags
& GIT_ATTR_FNMATCH_IGNORE
)) {
843 git_vector_foreach(&rule
->assigns
, i
, assign
)
844 GIT_REFCOUNT_DEC(assign
, git_attr_assignment__free
);
845 git_vector_free(&rule
->assigns
);
848 /* match.pattern is stored in a git_pool, so no need to free */
849 rule
->match
.pattern
= NULL
;
850 rule
->match
.length
= 0;
853 void git_attr_rule__free(git_attr_rule
*rule
)
855 git_attr_rule__clear(rule
);
859 int git_attr_session__init(git_attr_session
*session
, git_repository
*repo
)
863 session
->key
= git_atomic_inc(&repo
->attr_session_key
);
868 void git_attr_session__free(git_attr_session
*session
)
873 git_buf_free(&session
->sysdir
);
874 git_buf_free(&session
->tmp
);
876 memset(session
, 0, sizeof(git_attr_session
));