]> git.proxmox.com Git - libgit2.git/blobdiff - src/crlf.c
Merge remote-tracking branch 'upstream/master' into cmn/describe
[libgit2.git] / src / crlf.c
index 060d39d37cb3c3dd7791f08797fab88088550e36..93448760da998d848e0959a9ddddc8476356e352 100644 (file)
@@ -5,22 +5,27 @@
  * a Linking Exception. For full terms see the included COPYING file.
  */
 
+#include "git2/attr.h"
+#include "git2/blob.h"
+#include "git2/index.h"
+#include "git2/sys/filter.h"
+
 #include "common.h"
 #include "fileops.h"
 #include "hash.h"
 #include "filter.h"
+#include "buf_text.h"
 #include "repository.h"
 
-#include "git2/attr.h"
-
 struct crlf_attrs {
        int crlf_action;
        int eol;
+       int auto_crlf;
+       int safe_crlf;
 };
 
 struct crlf_filter {
        git_filter f;
-       struct crlf_attrs attrs;
 };
 
 static int check_crlf(const char *value)
@@ -71,88 +76,87 @@ static int crlf_input_action(struct crlf_attrs *ca)
        return ca->crlf_action;
 }
 
-static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path)
+static int has_cr_in_index(const git_filter_source *src)
 {
-#define NUM_CONV_ATTRS 3
-
-       static const char *attr_names[NUM_CONV_ATTRS] = {
-               "crlf", "eol", "text",
-       };
-
-       const char *attr_vals[NUM_CONV_ATTRS];
-       int error;
-
-       error = git_attr_get_many(attr_vals,
-               repo, 0, path, NUM_CONV_ATTRS, attr_names);
-
-       if (error == GIT_ENOTFOUND) {
-               ca->crlf_action = GIT_CRLF_GUESS;
-               ca->eol = GIT_EOL_UNSET;
-               return 0;
+       git_repository *repo = git_filter_source_repo(src);
+       const char *path = git_filter_source_path(src);
+       git_index *index;
+       const git_index_entry *entry;
+       git_blob *blob;
+       const void *blobcontent;
+       git_off_t blobsize;
+       bool found_cr;
+
+       if (!path)
+               return false;
+
+       if (git_repository_index__weakptr(&index, repo) < 0) {
+               giterr_clear();
+               return false;
        }
 
-       if (error == 0) {
-               ca->crlf_action = check_crlf(attr_vals[2]); /* text */
-               if (ca->crlf_action == GIT_CRLF_GUESS)
-                       ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */
+       if (!(entry = git_index_get_bypath(index, path, 0)) &&
+               !(entry = git_index_get_bypath(index, path, 1)))
+               return false;
 
-               ca->eol = check_eol(attr_vals[1]); /* eol */
-               return 0;
-       }
+       if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */
+               return true;
 
-       return -1;
-}
+       if (git_blob_lookup(&blob, repo, &entry->id) < 0)
+               return false;
 
-static int drop_crlf(git_buf *dest, const git_buf *source)
-{
-       const char *scan = source->ptr, *next;
-       const char *scan_end = git_buf_cstr(source) + git_buf_len(source);
+       blobcontent = git_blob_rawcontent(blob);
+       blobsize    = git_blob_rawsize(blob);
+       if (!git__is_sizet(blobsize))
+               blobsize = (size_t)-1;
 
-       /* Main scan loop.  Find the next carriage return and copy the
-        * whole chunk up to that point to the destination buffer.
-        */
-       while ((next = memchr(scan, '\r', scan_end - scan)) != NULL) {
-               /* copy input up to \r */
-               if (next > scan)
-                       git_buf_put(dest, scan, next - scan);
-
-               /* Do not drop \r unless it is followed by \n */
-               if (*(next + 1) != '\n')
-                       git_buf_putc(dest, '\r');
-
-               scan = next + 1;
-       }
+       found_cr = (blobcontent != NULL &&
+               blobsize > 0 &&
+               memchr(blobcontent, '\r', (size_t)blobsize) != NULL);
 
-       /* If there was no \r, then tell the library to skip this filter */
-       if (scan == source->ptr)
-               return -1;
-
-       /* Copy remaining input into dest */
-       git_buf_put(dest, scan, scan_end - scan);
-       return 0;
+       git_blob_free(blob);
+       return found_cr;
 }
 
-static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source)
+static int crlf_apply_to_odb(
+       struct crlf_attrs *ca,
+       git_buf *to,
+       const git_buf *from,
+       const git_filter_source *src)
 {
-       struct crlf_filter *filter = (struct crlf_filter *)self;
-
-       assert(self && dest && source);
-
        /* Empty file? Nothing to do */
-       if (git_buf_len(source) == 0)
+       if (!git_buf_len(from))
                return 0;
 
        /* Heuristics to see if we can skip the conversion.
         * Straight from Core Git.
         */
-       if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
-               filter->attrs.crlf_action == GIT_CRLF_GUESS) {
-
+       if (ca->crlf_action == GIT_CRLF_AUTO || ca->crlf_action == GIT_CRLF_GUESS) {
                git_buf_text_stats stats;
 
-               /* Check heuristics for binary vs text... */
-               if (git_buf_text_gather_stats(&stats, source, false))
-                       return -1;
+               /* Check heuristics for binary vs text - returns true if binary */
+               if (git_buf_text_gather_stats(&stats, from, false))
+                       return GIT_PASSTHROUGH;
+
+               /* If there are no CR characters to filter out, then just pass */
+               if (!stats.cr)
+                       return GIT_PASSTHROUGH;
+
+               /* If safecrlf is enabled, sanity-check the result. */
+               if (stats.cr != stats.crlf || stats.lf != stats.crlf) {
+                       switch (ca->safe_crlf) {
+                       case GIT_SAFE_CRLF_FAIL:
+                               giterr_set(
+                                       GITERR_FILTER, "LF would be replaced by CRLF in '%s'",
+                                       git_filter_source_path(src));
+                               return -1;
+                       case GIT_SAFE_CRLF_WARN:
+                               /* TODO: issue warning when warning API is available */;
+                               break;
+                       default:
+                               break;
+                       }
+               }
 
                /*
                 * We're currently not going to even try to convert stuff
@@ -160,47 +164,28 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
                 * stuff?
                 */
                if (stats.cr != stats.crlf)
-                       return -1;
+                       return GIT_PASSTHROUGH;
 
-#if 0
-               if (crlf_action == CRLF_GUESS) {
+               if (ca->crlf_action == GIT_CRLF_GUESS) {
                        /*
                         * If the file in the index has any CR in it, do not convert.
                         * This is the new safer autocrlf handling.
                         */
-                       if (has_cr_in_index(path))
-                               return 0;
+                       if (has_cr_in_index(src))
+                               return GIT_PASSTHROUGH;
                }
-#endif
 
                if (!stats.cr)
-                       return -1;
+                       return GIT_PASSTHROUGH;
        }
 
        /* Actually drop the carriage returns */
-       return drop_crlf(dest, source);
+       return git_buf_text_crlf_to_lf(to, from);
 }
 
-static int convert_line_endings(git_buf *dest, const git_buf *source, const char *ending)
+static const char *line_ending(struct crlf_attrs *ca)
 {
-       const char *scan = git_buf_cstr(source),
-                                 *next,
-                                 *scan_end = git_buf_cstr(source) + git_buf_len(source);
-
-       while ((next = memchr(scan, '\n', scan_end - scan)) != NULL) {
-               if (next > scan)
-                       git_buf_put(dest, scan, next-scan);
-               git_buf_puts(dest, ending);
-               scan = next + 1;
-       }
-
-       git_buf_put(dest, scan, scan_end - scan);
-       return 0;
-}
-
-static const char *line_ending(struct crlf_filter *filter)
-{
-       switch (filter->attrs.crlf_action) {
+       switch (ca->crlf_action) {
        case GIT_CRLF_BINARY:
        case GIT_CRLF_INPUT:
                return "\n";
@@ -217,11 +202,9 @@ static const char *line_ending(struct crlf_filter *filter)
                goto line_ending_error;
        }
 
-       switch (filter->attrs.eol) {
+       switch (ca->eol) {
        case GIT_EOL_UNSET:
-               return GIT_EOL_NATIVE == GIT_EOL_CRLF
-                       ? "\r\n"
-                       : "\n";
+               return GIT_EOL_NATIVE == GIT_EOL_CRLF ? "\r\n" : "\n";
 
        case GIT_EOL_CRLF:
                return "\r\n";
@@ -238,39 +221,52 @@ line_ending_error:
        return NULL;
 }
 
-static int crlf_apply_to_workdir(git_filter *self, git_buf *dest, const git_buf *source)
+static int crlf_apply_to_workdir(
+       struct crlf_attrs *ca, git_buf *to, const git_buf *from)
 {
-       struct crlf_filter *filter = (struct crlf_filter *)self;
        const char *workdir_ending = NULL;
 
-       assert (self && dest && source);
-
        /* Empty file? Nothing to do. */
-       if (git_buf_len(source) == 0)
+       if (git_buf_len(from) == 0)
                return 0;
 
+       /* Don't filter binary files */
+       if (git_buf_text_is_binary(from))
+               return GIT_PASSTHROUGH;
+
        /* Determine proper line ending */
-       workdir_ending = line_ending(filter);
-       if (!workdir_ending) return -1;
+       workdir_ending = line_ending(ca);
+       if (!workdir_ending)
+               return -1;
 
-       /* If the line ending is '\n', just copy the input */
-       if (!strcmp(workdir_ending, "\n"))
-               return git_buf_puts(dest, git_buf_cstr(source));
+       /* only LF->CRLF conversion is supported, do nothing on LF platforms */
+       if (strcmp(workdir_ending, "\r\n") != 0)
+               return GIT_PASSTHROUGH;
 
-       return convert_line_endings(dest, source, workdir_ending);
+       return git_buf_text_lf_to_crlf(to, from);
 }
 
-static int find_and_add_filter(
-       git_vector *filters, git_repository *repo, const char *path,
-       int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source))
+static int crlf_check(
+       git_filter        *self,
+       void              **payload, /* points to NULL ptr on entry, may be set */
+       const git_filter_source *src,
+       const char **attr_values)
 {
-       struct crlf_attrs ca;
-       struct crlf_filter *filter;
        int error;
+       struct crlf_attrs ca;
 
-       /* Load gitattributes for the path */
-       if ((error = crlf_load_attributes(&ca, repo, path)) < 0)
-               return error;
+       GIT_UNUSED(self);
+
+       if (!attr_values) {
+               ca.crlf_action = GIT_CRLF_GUESS;
+               ca.eol = GIT_EOL_UNSET;
+       } else {
+               ca.crlf_action = check_crlf(attr_values[2]); /* text */
+               if (ca.crlf_action == GIT_CRLF_GUESS)
+                       ca.crlf_action = check_crlf(attr_values[0]); /* clrf */
+               ca.eol = check_eol(attr_values[1]); /* eol */
+       }
+       ca.auto_crlf = GIT_AUTO_CRLF_DEFAULT;
 
        /*
         * Use the core Git logic to see if we should perform CRLF for this file
@@ -279,36 +275,84 @@ static int find_and_add_filter(
        ca.crlf_action = crlf_input_action(&ca);
 
        if (ca.crlf_action == GIT_CRLF_BINARY)
-               return 0;
+               return GIT_PASSTHROUGH;
 
-       if (ca.crlf_action == GIT_CRLF_GUESS) {
-               int auto_crlf;
+       if (ca.crlf_action == GIT_CRLF_GUESS ||
+               (ca.crlf_action == GIT_CRLF_AUTO &&
+               git_filter_source_mode(src) == GIT_FILTER_SMUDGE)) {
 
-               if ((error = git_repository__cvar(&auto_crlf, repo, GIT_CVAR_AUTO_CRLF)) < 0)
+               error = git_repository__cvar(
+                       &ca.auto_crlf, git_filter_source_repo(src), GIT_CVAR_AUTO_CRLF);
+               if (error < 0)
                        return error;
 
-               if (auto_crlf == GIT_AUTO_CRLF_FALSE)
-                       return 0;
+               if (ca.crlf_action == GIT_CRLF_GUESS &&
+                       ca.auto_crlf == GIT_AUTO_CRLF_FALSE)
+                       return GIT_PASSTHROUGH;
+
+               if (ca.auto_crlf == GIT_AUTO_CRLF_INPUT &&
+                       git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
+                       return GIT_PASSTHROUGH;
        }
 
-       /* If we're good, we create a new filter object and push it
-        * into the filters array */
-       filter = git__malloc(sizeof(struct crlf_filter));
-       GITERR_CHECK_ALLOC(filter);
+       if (git_filter_source_mode(src) == GIT_FILTER_CLEAN) {
+               error = git_repository__cvar(
+                       &ca.safe_crlf, git_filter_source_repo(src), GIT_CVAR_SAFE_CRLF);
+               if (error < 0)
+                       return error;
+
+               /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */
+               if ((git_filter_source_options(src) & GIT_FILTER_OPT_ALLOW_UNSAFE) &&
+                       ca.safe_crlf == GIT_SAFE_CRLF_FAIL)
+                       ca.safe_crlf = GIT_SAFE_CRLF_WARN;
+       }
 
-       filter->f.apply = apply;
-       filter->f.do_free = NULL;
-       memcpy(&filter->attrs, &ca, sizeof(struct crlf_attrs));
+       *payload = git__malloc(sizeof(ca));
+       GITERR_CHECK_ALLOC(*payload);
+       memcpy(*payload, &ca, sizeof(ca));
 
-       return git_vector_insert(filters, filter);
+       return 0;
 }
 
-int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo, const char *path)
+static int crlf_apply(
+       git_filter    *self,
+       void         **payload, /* may be read and/or set */
+       git_buf       *to,
+       const git_buf *from,
+       const git_filter_source *src)
 {
-       return find_and_add_filter(filters, repo, path, &crlf_apply_to_odb);
+       /* initialize payload in case `check` was bypassed */
+       if (!*payload) {
+               int error = crlf_check(self, payload, src, NULL);
+               if (error < 0 && error != GIT_PASSTHROUGH)
+                       return error;
+       }
+
+       if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE)
+               return crlf_apply_to_workdir(*payload, to, from);
+       else
+               return crlf_apply_to_odb(*payload, to, from, src);
+}
+
+static void crlf_cleanup(
+       git_filter *self,
+       void       *payload)
+{
+       GIT_UNUSED(self);
+       git__free(payload);
 }
 
-int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path)
+git_filter *git_crlf_filter_new(void)
 {
-       return find_and_add_filter(filters, repo, path, &crlf_apply_to_workdir);
+       struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter));
+
+       f->f.version = GIT_FILTER_VERSION;
+       f->f.attributes = "crlf eol text";
+       f->f.initialize = NULL;
+       f->f.shutdown = git_filter_free;
+       f->f.check    = crlf_check;
+       f->f.apply    = crlf_apply;
+       f->f.cleanup  = crlf_cleanup;
+
+       return (git_filter *)f;
 }