]> git.proxmox.com Git - libgit2.git/blobdiff - src/crlf.c
Merge pull request #3303 from libgit2/cmn/index-add-submodule
[libgit2.git] / src / crlf.c
index 65039f9ccf765052d133d8ca952199e8831de5d2..f391137c1b4e7770fb06ef1f01e53f1ec82a27e3 100644 (file)
@@ -8,6 +8,7 @@
 #include "git2/attr.h"
 #include "git2/blob.h"
 #include "git2/index.h"
+#include "git2/sys/filter.h"
 
 #include "common.h"
 #include "fileops.h"
 struct crlf_attrs {
        int crlf_action;
        int eol;
+       int auto_crlf;
+       int safe_crlf;
 };
 
 struct crlf_filter {
        git_filter f;
-       struct crlf_attrs attrs;
-       git_repository *repo;
-       char path[GIT_FLEX_ARRAY];
 };
 
 static int check_crlf(const char *value)
@@ -76,41 +76,10 @@ static int crlf_input_action(struct crlf_attrs *ca)
        return ca->crlf_action;
 }
 
-static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path)
+static int has_cr_in_index(const git_filter_source *src)
 {
-#define NUM_CONV_ATTRS 3
-
-       static const char *attr_names[NUM_CONV_ATTRS] = {
-               "crlf", "eol", "text",
-       };
-
-       const char *attr_vals[NUM_CONV_ATTRS];
-       int error;
-
-       error = git_attr_get_many(attr_vals,
-               repo, 0, path, NUM_CONV_ATTRS, attr_names);
-
-       if (error == GIT_ENOTFOUND) {
-               ca->crlf_action = GIT_CRLF_GUESS;
-               ca->eol = GIT_EOL_UNSET;
-               return 0;
-       }
-
-       if (error == 0) {
-               ca->crlf_action = check_crlf(attr_vals[2]); /* text */
-               if (ca->crlf_action == GIT_CRLF_GUESS)
-                       ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */
-
-               ca->eol = check_eol(attr_vals[1]); /* eol */
-               return 0;
-       }
-
-       return -1;
-}
-
-static int has_cr_in_index(git_filter *self)
-{
-       struct crlf_filter *filter = (struct crlf_filter *)self;
+       git_repository *repo = git_filter_source_repo(src);
+       const char *path = git_filter_source_path(src);
        git_index *index;
        const git_index_entry *entry;
        git_blob *blob;
@@ -118,19 +87,22 @@ static int has_cr_in_index(git_filter *self)
        git_off_t blobsize;
        bool found_cr;
 
-       if (git_repository_index__weakptr(&index, filter->repo) < 0) {
+       if (!path)
+               return false;
+
+       if (git_repository_index__weakptr(&index, repo) < 0) {
                giterr_clear();
                return false;
        }
 
-       if (!(entry = git_index_get_bypath(index, filter->path, 0)) &&
-               !(entry = git_index_get_bypath(index, filter->path, 1)))
+       if (!(entry = git_index_get_bypath(index, path, 0)) &&
+               !(entry = git_index_get_bypath(index, path, 1)))
                return false;
 
        if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
                return true;
 
-       if (git_blob_lookup(&blob, filter->repo, &entry->oid) < 0)
+       if (git_blob_lookup(&blob, repo, &entry->id) < 0)
                return false;
 
        blobcontent = git_blob_rawcontent(blob);
@@ -147,27 +119,44 @@ static int has_cr_in_index(git_filter *self)
 }
 
 static int crlf_apply_to_odb(
-       git_filter *self, git_buf *dest, const git_buf *source)
+       struct crlf_attrs *ca,
+       git_buf *to,
+       const git_buf *from,
+       const git_filter_source *src)
 {
-       struct crlf_filter *filter = (struct crlf_filter *)self;
-
-       assert(self && dest && source);
-
        /* Empty file? Nothing to do */
-       if (git_buf_len(source) == 0)
+       if (!git_buf_len(from))
                return 0;
 
        /* Heuristics to see if we can skip the conversion.
         * Straight from Core Git.
         */
-       if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
-               filter->attrs.crlf_action == GIT_CRLF_GUESS) {
-
+       if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) {
                git_buf_text_stats stats;
 
-               /* Check heuristics for binary vs text... */
-               if (git_buf_text_gather_stats(&stats, source, false))
-                       return -1;
+               /* Check heuristics for binary vs text - returns true if binary */
+               if (git_buf_text_gather_stats(&stats, from, false))
+                       return GIT_PASSTHROUGH;
+
+               /* If there are no CR characters to filter out, then just pass */
+               if (!stats.cr)
+                       return GIT_PASSTHROUGH;
+
+               /* If safecrlf is enabled, sanity-check the result. */
+               if (stats.cr != stats.crlf || stats.lf != stats.crlf) {
+                       switch (ca->safe_crlf) {
+                       case GIT_SAFE_CRLF_FAIL:
+                               giterr_set(
+                                       GITERR_FILTER, "LF would be replaced by CRLF in '%s'",
+                                       git_filter_source_path(src));
+                               return -1;
+                       case GIT_SAFE_CRLF_WARN:
+                               /* TODO: issue warning when warning API is available */;
+                               break;
+                       default:
+                               break;
+                       }
+               }
 
                /*
                 * We're currently not going to even try to convert stuff
@@ -175,28 +164,28 @@ static int crlf_apply_to_odb(
                 * stuff?
                 */
                if (stats.cr != stats.crlf)
-                       return -1;
+                       return GIT_PASSTHROUGH;
 
-               if (filter->attrs.crlf_action == GIT_CRLF_GUESS) {
+               if (ca->crlf_action == GIT_CRLF_GUESS) {
                        /*
                         * If the file in the index has any CR in it, do not convert.
                         * This is the new safer autocrlf handling.
                         */
-                       if (has_cr_in_index(self))
-                               return -1;
+                       if (has_cr_in_index(src))
+                               return GIT_PASSTHROUGH;
                }
 
                if (!stats.cr)
-                       return -1;
+                       return GIT_PASSTHROUGH;
        }
 
        /* Actually drop the carriage returns */
-       return git_buf_text_crlf_to_lf(dest, source);
+       return git_buf_text_crlf_to_lf(to, from);
 }
 
-static const char *line_ending(struct crlf_filter *filter)
+static const char *line_ending(struct crlf_attrs *ca)
 {
-       switch (filter->attrs.crlf_action) {
+       switch (ca->crlf_action) {
        case GIT_CRLF_BINARY:
        case GIT_CRLF_INPUT:
                return "\n";
@@ -204,30 +193,29 @@ static const char *line_ending(struct crlf_filter *filter)
        case GIT_CRLF_CRLF:
                return "\r\n";
 
+       case GIT_CRLF_GUESS:
+               if (ca->auto_crlf == GIT_AUTO_CRLF_FALSE)
+                       return "\n";
+               break;
+
        case GIT_CRLF_AUTO:
        case GIT_CRLF_TEXT:
-       case GIT_CRLF_GUESS:
                break;
 
        default:
                goto line_ending_error;
        }
 
-       switch (filter->attrs.eol) {
-       case GIT_EOL_UNSET:
-               return GIT_EOL_NATIVE == GIT_EOL_CRLF
-                       ? "\r\n"
-                       : "\n";
-
-       case GIT_EOL_CRLF:
+       if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE)
                return "\r\n";
-
-       case GIT_EOL_LF:
+       else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT)
                return "\n";
-
-       default:
-               goto line_ending_error;
-       }
+       else if (ca->eol == GIT_EOL_UNSET)
+               return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" : "\n";
+       else if (ca->eol == GIT_EOL_LF)
+               return "\n";
+       else if (ca->eol == GIT_EOL_CRLF)
+               return "\r\n";
 
 line_ending_error:
        giterr_set(GITERR_INVALID, "Invalid input to line ending filter");
@@ -235,41 +223,72 @@ line_ending_error:
 }
 
 static int crlf_apply_to_workdir(
-       git_filter *self, git_buf *dest, const git_buf *source)
+       struct crlf_attrs *ca, git_buf *to, const git_buf *from)
 {
-       struct crlf_filter *filter = (struct crlf_filter *)self;
+       git_buf_text_stats stats;
        const char *workdir_ending = NULL;
-
-       assert(self && dest && source);
+       bool is_binary;
 
        /* Empty file? Nothing to do. */
-       if (git_buf_len(source) == 0)
-               return -1;
+       if (git_buf_len(from) == 0)
+               return 0;
 
        /* Determine proper line ending */
-       workdir_ending = line_ending(filter);
+       workdir_ending = line_ending(ca);
        if (!workdir_ending)
                return -1;
-       if (!strcmp("\n", workdir_ending)) /* do nothing for \n ending */
-               return -1;
 
-       /* for now, only lf->crlf conversion is supported here */
-       assert(!strcmp("\r\n", workdir_ending));
-       return git_buf_text_lf_to_crlf(dest, source);
+       /* only LF->CRLF conversion is supported, do nothing on LF platforms */
+       if (strcmp(workdir_ending, "\r\n") != 0)
+               return GIT_PASSTHROUGH;
+
+       /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */
+       is_binary = git_buf_text_gather_stats(&stats, from, false);
+
+       if (stats.lf == 0 || stats.lf == stats.crlf)
+               return GIT_PASSTHROUGH;
+
+       if (ca->crlf_action == GIT_CRLF_AUTO ||
+               ca->crlf_action == GIT_CRLF_GUESS) {
+
+               /* If we have any existing CR or CRLF line endings, do nothing */
+               if (ca->crlf_action == GIT_CRLF_GUESS &&
+                       stats.cr > 0 && stats.crlf > 0)
+                       return GIT_PASSTHROUGH;
+
+               /* If we have bare CR characters, do nothing */
+               if (stats.cr != stats.crlf)
+                       return GIT_PASSTHROUGH;
+
+               /* Don't filter binary files */
+               if (is_binary)
+                       return GIT_PASSTHROUGH;
+       }
+
+       return git_buf_text_lf_to_crlf(to, from);
 }
 
-static int find_and_add_filter(
-       git_vector *filters, git_repository *repo, const char *path,
-       int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source))
+static int crlf_check(
+       git_filter        *self,
+       void              **payload, /* points to NULL ptr on entry, may be set */
+       const git_filter_source *src,
+       const char **attr_values)
 {
-       struct crlf_attrs ca;
-       struct crlf_filter *filter;
-       size_t pathlen;
        int error;
+       struct crlf_attrs ca;
+
+       GIT_UNUSED(self);
 
-       /* Load gitattributes for the path */
-       if ((error = crlf_load_attributes(&ca, repo, path)) < 0)
-               return error;
+       if (!attr_values) {
+               ca.crlf_action = GIT_CRLF_GUESS;
+               ca.eol = GIT_EOL_UNSET;
+       } else {
+               ca.crlf_action = check_crlf(attr_values[2]); /* text */
+               if (ca.crlf_action == GIT_CRLF_GUESS)
+                       ca.crlf_action = check_crlf(attr_values[0]); /* clrf */
+               ca.eol = check_eol(attr_values[1]); /* eol */
+       }
+       ca.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
 
        /*
         * Use the core Git logic to see if we should perform CRLF for this file
@@ -278,41 +297,86 @@ static int find_and_add_filter(
        ca.crlf_action = crlf_input_action(&ca);
 
        if (ca.crlf_action == GIT_CRLF_BINARY)
-               return 0;
+               return GIT_PASSTHROUGH;
+
+       if (ca.crlf_action == GIT_CRLF_GUESS ||
+               ((ca.crlf_action == GIT_CRLF_AUTO || ca.crlf_action == GIT_CRLF_TEXT) &&
+               git_filter_source_mode(src) == GIT_FILTER_SMUDGE)) {
+
+               error = git_repository__cvar(
+                       &ca.auto_crlf, git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF);
+               if (error < 0)
+                       return error;
+
+               if (ca.crlf_action == GIT_CRLF_GUESS &&
+                       ca.auto_crlf == GIT_AUTO_CRLF_FALSE)
+                       return GIT_PASSTHROUGH;
 
-       if (ca.crlf_action == GIT_CRLF_GUESS) {
-               int auto_crlf;
+               if (ca.auto_crlf == GIT_AUTO_CRLF_INPUT &&
+                       git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
+                       return GIT_PASSTHROUGH;
+       }
 
-               if ((error = git_repository__cvar(&auto_crlf, repo, GIT_CVAR_AUTO_CRLF)) < 0)
+       if (git_filter_source_mode(src) == GIT_FILTER_CLEAN) {
+               error = git_repository__cvar(
+                       &ca.safe_crlf, git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF);
+               if (error < 0)
                        return error;
 
-               if (auto_crlf == GIT_AUTO_CRLF_FALSE)
-                       return 0;
+               /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
+               if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) &&
+                       ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
+                       ca.safe_crlf = GIT_SAFE_CRLF_WARN;
        }
 
-       /* If we're good, we create a new filter object and push it
-        * into the filters array */
-       pathlen = strlen(path);
-       filter = git__malloc(sizeof(struct crlf_filter) + pathlen + 1);
-       GITERR_CHECK_ALLOC(filter);
+       *payload = git__malloc(sizeof(ca));
+       GITERR_CHECK_ALLOC(*payload);
+       memcpy(*payload, &ca, sizeof(ca));
+
+       return 0;
+}
 
-       filter->f.apply = apply;
-       filter->f.do_free = NULL;
-       memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs));
-       filter->repo = repo;
-       memcpy(filter->path, path, pathlen + 1);
+static int crlf_apply(
+       git_filter    *self,
+       void         **payload, /* may be read and/or set */
+       git_buf       *to,
+       const git_buf *from,
+       const git_filter_source *src)
+{
+       /* initialize payload in case `check` was bypassed */
+       if (!*payload) {
+               int error = crlf_check(self, payload, src, NULL);
+               if (error < 0 && error != GIT_PASSTHROUGH)
+                       return error;
+       }
 
-       return git_vector_insert(filters, filter);
+       if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
+               return crlf_apply_to_workdir(*payload, to, from);
+       else
+               return crlf_apply_to_odb(*payload, to, from, src);
 }
 
-int git_filter_add__crlf_to_odb(
-       git_vector *filters, git_repository *repo, const char *path)
+static void crlf_cleanup(
+       git_filter *self,
+       void       *payload)
 {
-       return find_and_add_filter(filters, repo, path, &crlf_apply_to_odb);
+       GIT_UNUSED(self);
+       git__free(payload);
 }
 
-int git_filter_add__crlf_to_workdir(
-       git_vector *filters, git_repository *repo, const char *path)
+git_filter *git_crlf_filter_new(void)
 {
-       return find_and_add_filter(filters, repo, path, &crlf_apply_to_workdir);
+       struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
+       if (f == NULL)
+               return NULL;
+
+       f->f.version = GIT_FILTER_VERSION;
+       f->f.attributes = "crlf eol text";
+       f->f.initialize = NULL;
+       f->f.shutdown = git_filter_free;
+       f->f.check    = crlf_check;
+       f->f.apply    = crlf_apply;
+       f->f.cleanup  = crlf_cleanup;
+
+       return (git_filter *)f;
 }