]> git.proxmox.com Git - libgit2.git/commitdiff
introduce `git_diff_from_buffer` to parse diffs
authorEdward Thomson <ethomson@github.com>
Mon, 25 Apr 2016 04:35:48 +0000 (00:35 -0400)
committerEdward Thomson <ethomson@github.com>
Thu, 26 May 2016 18:01:09 +0000 (13:01 -0500)
Parse diff files into a `git_diff` structure.

include/git2/diff.h
src/diff.c
src/diff.h
src/diff_generate.c
src/diff_parse.c [new file with mode: 0644]
src/patch_parse.c
src/patch_parse.h
tests/diff/parse.c [new file with mode: 0644]

index 065a786e920f7690b7cc9bd8d71fc6fcc505c43f..880292a1fc7888e99a3e0141e313acc4155d9423 100644 (file)
@@ -1174,6 +1174,11 @@ GIT_EXTERN(int) git_diff_buffers(
        git_diff_line_cb line_cb,
        void *payload);
 
+GIT_EXTERN(int) git_diff_from_buffer(
+       git_diff **out,
+       const char *content,
+       size_t content_len);
+
 /**
  * This is an opaque structure which is allocated by `git_diff_get_stats`.
  * You are responsible for releasing the object memory when done, using the
index c54d3574bec458676e1f9ba9b2f41726d4939736..317d495972d46f05e021949e9791a64988142d03 100644 (file)
@@ -51,7 +51,7 @@ int git_diff_delta__casecmp(const void *a, const void *b)
        return val ? val : ((int)da->status - (int)db->status);
 }
 
-static int diff_entry_cmp(const void *a, const void *b)
+int git_diff__entry_cmp(const void *a, const void *b)
 {
        const git_index_entry *entry_a = a;
        const git_index_entry *entry_b = b;
@@ -59,7 +59,7 @@ static int diff_entry_cmp(const void *a, const void *b)
        return strcmp(entry_a->path, entry_b->path);
 }
 
-static int diff_entry_icmp(const void *a, const void *b)
+int git_diff__entry_icmp(const void *a, const void *b)
 {
        const git_index_entry *entry_a = a;
        const git_index_entry *entry_b = b;
index 153cd350ab68e7f7a258a76632ed2b3fc31a5bc0..2c0e52ca21d49cf1737d8e38958e498360922a26 100644 (file)
@@ -25,6 +25,7 @@
 typedef enum {
        GIT_DIFF_TYPE_UNKNOWN = 0,
        GIT_DIFF_TYPE_GENERATED = 1,
+       GIT_DIFF_TYPE_PARSED = 2,
 } git_diff_origin_t;
 
 struct git_diff {
@@ -56,5 +57,8 @@ extern int git_diff_delta__format_file_header(
 extern int git_diff_delta__cmp(const void *a, const void *b);
 extern int git_diff_delta__casecmp(const void *a, const void *b);
 
+extern int git_diff__entry_cmp(const void *a, const void *b);
+extern int git_diff__entry_icmp(const void *a, const void *b);
+
 #endif
 
index 10bc154863923376cf015e3bcb062cc62051268e..a996bf156a2ec55254afdb70f773e1a53fcf8885 100644 (file)
@@ -358,22 +358,6 @@ static const char *diff_mnemonic_prefix(
        return pfx;
 }
 
-static int diff_entry_cmp(const void *a, const void *b)
-{
-       const git_index_entry *entry_a = a;
-       const git_index_entry *entry_b = b;
-
-       return strcmp(entry_a->path, entry_b->path);
-}
-
-static int diff_entry_icmp(const void *a, const void *b)
-{
-       const git_index_entry *entry_a = a;
-       const git_index_entry *entry_b = b;
-
-       return strcasecmp(entry_a->path, entry_b->path);
-}
-
 void git_diff__set_ignore_case(git_diff *diff, bool ignore_case)
 {
        if (!ignore_case) {
@@ -382,7 +366,7 @@ void git_diff__set_ignore_case(git_diff *diff, bool ignore_case)
                diff->strcomp    = git__strcmp;
                diff->strncomp   = git__strncmp;
                diff->pfxcomp    = git__prefixcmp;
-               diff->entrycomp  = diff_entry_cmp;
+               diff->entrycomp  = git_diff__entry_cmp;
 
                git_vector_set_cmp(&diff->deltas, git_diff_delta__cmp);
        } else {
@@ -391,7 +375,7 @@ void git_diff__set_ignore_case(git_diff *diff, bool ignore_case)
                diff->strcomp    = git__strcasecmp;
                diff->strncomp   = git__strncasecmp;
                diff->pfxcomp    = git__prefixcmp_icase;
-               diff->entrycomp  = diff_entry_icmp;
+               diff->entrycomp  = git_diff__entry_icmp;
 
                git_vector_set_cmp(&diff->deltas, git_diff_delta__casecmp);
        }
diff --git a/src/diff_parse.c b/src/diff_parse.c
new file mode 100644 (file)
index 0000000..ffdc8df
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#include "common.h"
+#include "diff.h"
+#include "patch.h"
+#include "patch_parse.h"
+
+typedef struct {
+       struct git_diff base;
+
+       git_vector patches;
+} git_diff_parsed;
+
+static void diff_parsed_free(git_diff *d)
+{
+       git_diff_parsed *diff = (git_diff_parsed *)d;
+       git_patch *patch;
+       size_t i;
+
+       git_vector_foreach(&diff->patches, i, patch)
+               git_patch_free(patch);
+
+       git_vector_free(&diff->patches);
+
+       git_vector_free(&diff->base.deltas);
+       git_pool_clear(&diff->base.pool);
+
+       git__memzero(diff, sizeof(*diff));
+       git__free(diff);
+}
+
+static git_diff_parsed *diff_parsed_alloc(void)
+{
+       git_diff_parsed *diff;
+
+       if ((diff = git__calloc(1, sizeof(git_diff_parsed))) == NULL)
+               return NULL;
+
+       GIT_REFCOUNT_INC(diff);
+       diff->base.type = GIT_DIFF_TYPE_PARSED;
+       diff->base.opts.flags &= ~GIT_DIFF_IGNORE_CASE;
+       diff->base.strcomp = git__strcmp;
+       diff->base.strncomp = git__strncmp;
+       diff->base.pfxcomp = git__prefixcmp;
+       diff->base.entrycomp = git_diff__entry_cmp;
+       diff->base.free_fn = diff_parsed_free;
+
+       git_pool_init(&diff->base.pool, 1);
+
+       if (git_vector_init(&diff->patches, 0, NULL) < 0 ||
+               git_vector_init(&diff->base.deltas, 0, git_diff_delta__cmp) < 0) {
+               git_diff_free(&diff->base);
+               return NULL;
+       }
+
+       git_vector_set_cmp(&diff->base.deltas, git_diff_delta__cmp);
+
+       return diff;
+}
+
+int git_diff_from_buffer(
+       git_diff **out,
+       const char *content,
+       size_t content_len)
+{
+       git_diff_parsed *diff;
+       git_patch *patch;
+       git_patch_parse_ctx *ctx = NULL;
+       int error = 0;
+
+       *out = NULL;
+
+       diff = diff_parsed_alloc();
+       GITERR_CHECK_ALLOC(diff);
+
+       ctx = git_patch_parse_ctx_init(content, content_len, NULL);
+       GITERR_CHECK_ALLOC(ctx);
+
+       while (ctx->remain_len) {
+               if ((error = git_patch_parse(&patch, ctx)) < 0)
+                       break;
+
+               git_vector_insert(&diff->patches, patch);
+               git_vector_insert(&diff->base.deltas, patch->delta);
+       }
+
+       if (error == GIT_ENOTFOUND && git_vector_length(&diff->patches) > 0) {
+               giterr_clear();
+               error = 0;
+       }
+
+       git_patch_parse_ctx_free(ctx);
+
+       if (error < 0)
+               git_diff_free(&diff->base);
+       else
+               *out = &diff->base;
+
+       return error;
+}
+
index 991802cb42c17bebce8f9ae7924ad70adaa38b67..ee75663e67304dd84ddc4e1e47242974864278ab 100644 (file)
@@ -6,26 +6,12 @@
  */
 #include "git2/patch.h"
 #include "patch.h"
+#include "patch_parse.h"
 #include "path.h"
 
 #define parse_err(...) \
        ( giterr_set(GITERR_PATCH, __VA_ARGS__), -1 )
 
-typedef struct {
-       git_refcount rc;
-
-       const char *content;
-       size_t content_len;
-
-       git_patch_options opts;
-
-       const char *line;
-       size_t line_len;
-       size_t line_num;
-
-       size_t remain;
-} git_patch_parse_ctx;
-
 typedef struct {
        git_patch base;
 
@@ -60,15 +46,15 @@ GIT_INLINE(bool) parse_ctx_contains(
 static void parse_advance_line(git_patch_parse_ctx *ctx)
 {
        ctx->line += ctx->line_len;
-       ctx->remain -= ctx->line_len;
-       ctx->line_len = git__linenlen(ctx->line, ctx->remain);
+       ctx->remain_len -= ctx->line_len;
+       ctx->line_len = git__linenlen(ctx->line, ctx->remain_len);
        ctx->line_num++;
 }
 
 static void parse_advance_chars(git_patch_parse_ctx *ctx, size_t char_cnt)
 {
        ctx->line += char_cnt;
-       ctx->remain -= char_cnt;
+       ctx->remain_len -= char_cnt;
        ctx->line_len -= char_cnt;
 }
 
@@ -99,7 +85,7 @@ static int parse_advance_ws(git_patch_parse_ctx *ctx)
                git__isspace(ctx->line[0])) {
                ctx->line++;
                ctx->line_len--;
-               ctx->remain--;
+               ctx->remain_len--;
                ret = 0;
        }
 
@@ -413,7 +399,12 @@ static int parse_header_git(
                        ctx->line_num);
 
        /* Parse remaining header lines */
-       for (parse_advance_line(ctx); ctx->remain > 0; parse_advance_line(ctx)) {
+       for (parse_advance_line(ctx);
+               ctx->remain_len > 0;
+               parse_advance_line(ctx)) {
+
+               bool found = false;
+
                if (ctx->line_len == 0 || ctx->line[ctx->line_len - 1] != '\n')
                        break;
 
@@ -441,8 +432,14 @@ static int parse_header_git(
                                goto done;
                        }
 
+                       found = true;
                        break;
                }
+               
+               if (!found) {
+                       error = parse_err("invalid patch header at line %d", ctx->line_num);
+                       goto done;
+               }
        }
 
 done:
@@ -545,7 +542,7 @@ static int parse_hunk_body(
        int newlines = hunk->hunk.new_lines;
 
        for (;
-               ctx->remain > 4 && (oldlines || newlines) &&
+               ctx->remain_len > 4 && (oldlines || newlines) &&
                memcmp(ctx->line, "@@ -", 4) != 0;
                parse_advance_line(ctx)) {
 
@@ -590,7 +587,7 @@ static int parse_hunk_body(
 
                line->content = ctx->line + prefix;
                line->content_len = ctx->line_len - prefix;
-               line->content_offset = ctx->content_len - ctx->remain;
+               line->content_offset = ctx->content_len - ctx->remain_len;
                line->origin = origin;
 
                hunk->line_count++;
@@ -633,7 +630,10 @@ static int parse_patch_header(
 {
        int error = 0;
 
-       for (ctx->line = ctx->content; ctx->remain > 0; parse_advance_line(ctx)) {
+       for (ctx->line = ctx->remain;
+               ctx->remain_len > 0;
+               parse_advance_line(ctx)) {
+
                /* This line is too short to be a patch header. */
                if (ctx->line_len < 6)
                        continue;
@@ -658,7 +658,7 @@ static int parse_patch_header(
                }
 
                /* This buffer is too short to contain a patch. */
-               if (ctx->remain < ctx->line_len + 6)
+               if (ctx->remain_len < ctx->line_len + 6)
                        break;
 
                /* A proper git patch */
@@ -781,6 +781,10 @@ static int parse_patch_binary(
                        &patch->base.binary.old_file, ctx)) < 0)
                return error;
 
+       if (parse_advance_nl(ctx) < 0)
+               return parse_err("corrupt git binary patch separator at line %d",
+                       ctx->line_num);
+
        patch->base.delta->flags |= GIT_DIFF_FLAG_BINARY;
        return 0;
 }
@@ -848,7 +852,7 @@ static int check_prefix(
 {
        const char *path = path_start;
        size_t prefix_len = patch->ctx->opts.prefix_len;
-       size_t remain = prefix_len;
+       size_t remain_len = prefix_len;
 
        *out = NULL;
        *out_len = 0;
@@ -860,14 +864,14 @@ static int check_prefix(
        while (*path == '/')
                path++;
 
-       while (*path && remain) {
+       while (*path && remain_len) {
                if (*path == '/')
-                       remain--;
+                       remain_len--;
 
                path++;
        }
 
-       if (remain || !*path)
+       if (remain_len || !*path)
                return parse_err(
                        "header filename does not contain %d path components",
                        prefix_len);
@@ -947,7 +951,7 @@ static int check_patch(git_patch_parsed *patch)
        return 0;
 }
 
-static git_patch_parse_ctx *git_patch_parse_ctx_init(
+git_patch_parse_ctx *git_patch_parse_ctx_init(
        const char *content,
        size_t content_len,
        const git_patch_options *opts)
@@ -966,7 +970,8 @@ static git_patch_parse_ctx *git_patch_parse_ctx_init(
        }
 
        ctx->content_len = content_len;
-       ctx->remain = content_len;
+       ctx->remain = ctx->content;
+       ctx->remain_len = ctx->content_len;
 
        if (opts)
                memcpy(&ctx->opts, opts, sizeof(git_patch_options));
@@ -986,7 +991,7 @@ static void patch_parse_ctx_free(git_patch_parse_ctx *ctx)
        git__free(ctx);
 }
 
-static void git_patch_parse_ctx_free(git_patch_parse_ctx *ctx)
+void git_patch_parse_ctx_free(git_patch_parse_ctx *ctx)
 {
        GIT_REFCOUNT_DEC(ctx, patch_parse_ctx_free);
 }
@@ -1017,11 +1022,12 @@ static void patch_parsed__free(git_patch *p)
        git__free(patch);
 }
 
-static int git_patch_parse(
+int git_patch_parse(
        git_patch **out,
        git_patch_parse_ctx *ctx)
 {
        git_patch_parsed *patch;
+       size_t start, used;
        int error = 0;
 
        assert(out && ctx);
@@ -1042,11 +1048,16 @@ static int git_patch_parse(
        patch->base.delta->status = GIT_DELTA_MODIFIED;
        patch->base.delta->nfiles = 2;
 
+       start = ctx->remain_len;
+
        if ((error = parse_patch_header(patch, ctx)) < 0 ||
                (error = parse_patch_body(patch, ctx)) < 0 ||
                (error = check_patch(patch)) < 0)
                goto done;
 
+       used = start - ctx->remain_len;
+       ctx->remain += used;
+
        patch->base.diff_opts.old_prefix = patch->old_prefix;
        patch->base.diff_opts.new_prefix = patch->new_prefix;
        patch->base.diff_opts.flags |= GIT_DIFF_SHOW_BINARY;
index d5e86073f64e8e1401e710e17646752b77e8ca3c..da56dad7c70b3c40f9a7a4439a1b4b46ce630b9c 100644 (file)
@@ -7,6 +7,31 @@
 #ifndef INCLUDE_patch_parse_h__
 #define INCLUDE_patch_parse_h__
 
+typedef struct {
+       git_refcount rc;
+
+       /* Original content buffer */
+       const char *content;
+       size_t content_len;
+
+       git_patch_options opts;
+
+       /* The remaining (unparsed) buffer */
+       const char *remain;
+       size_t remain_len;
+
+       const char *line;
+       size_t line_len;
+       size_t line_num;
+} git_patch_parse_ctx;
+
+extern git_patch_parse_ctx *git_patch_parse_ctx_init(
+       const char *content,
+       size_t content_len,
+       const git_patch_options *opts);
+
+extern void git_patch_parse_ctx_free(git_patch_parse_ctx *ctx);
+
 /**
  * Create a patch for a single file from the contents of a patch buffer.
  *
@@ -22,4 +47,8 @@ extern int git_patch_from_buffer(
        size_t contents_len,
        const git_patch_options *opts);
 
+extern int git_patch_parse(
+       git_patch **out,
+       git_patch_parse_ctx *ctx);
+
 #endif
diff --git a/tests/diff/parse.c b/tests/diff/parse.c
new file mode 100644 (file)
index 0000000..8eb9842
--- /dev/null
@@ -0,0 +1,60 @@
+#include "clar_libgit2.h"
+#include "patch.h"
+#include "patch_parse.h"
+
+#include "../patch/patch_common.h"
+
+void test_diff_parse__nonpatches_fail_with_notfound(void)
+{
+       git_diff *diff;
+       const char *not = PATCH_NOT_A_PATCH;
+       const char *not_with_leading = "Leading text.\n" PATCH_NOT_A_PATCH;
+       const char *not_with_trailing = PATCH_NOT_A_PATCH "Trailing text.\n";
+       const char *not_with_both = "Lead.\n" PATCH_NOT_A_PATCH "Trail.\n";
+
+       cl_git_fail_with(GIT_ENOTFOUND,
+               git_diff_from_buffer(&diff,
+               not,
+               strlen(not)));
+       cl_git_fail_with(GIT_ENOTFOUND,
+               git_diff_from_buffer(&diff,
+               not_with_leading,
+               strlen(not_with_leading)));
+       cl_git_fail_with(GIT_ENOTFOUND,
+               git_diff_from_buffer(&diff,
+               not_with_trailing,
+               strlen(not_with_trailing)));
+       cl_git_fail_with(GIT_ENOTFOUND,
+               git_diff_from_buffer(&diff,
+               not_with_both,
+               strlen(not_with_both)));
+}
+
+static void test_parse_invalid_diff(const char *invalid_diff)
+{
+       git_diff *diff;
+       git_buf buf = GIT_BUF_INIT;
+
+       /* throw some random (legitimate) diffs in with the given invalid
+        * one.
+        */
+       git_buf_puts(&buf, PATCH_ORIGINAL_TO_CHANGE_FIRSTLINE);
+       git_buf_puts(&buf, PATCH_BINARY_DELTA);
+       git_buf_puts(&buf, invalid_diff);
+       git_buf_puts(&buf, PATCH_ORIGINAL_TO_CHANGE_MIDDLE);
+       git_buf_puts(&buf, PATCH_BINARY_LITERAL);
+
+       cl_git_fail_with(GIT_ERROR,
+               git_diff_from_buffer(&diff, buf.ptr, buf.size));
+
+       git_buf_free(&buf);
+}
+
+void test_diff_parse__invalid_patches_fails(void)
+{
+       test_parse_invalid_diff(PATCH_CORRUPT_MISSING_NEW_FILE);
+       test_parse_invalid_diff(PATCH_CORRUPT_MISSING_OLD_FILE);
+       test_parse_invalid_diff(PATCH_CORRUPT_NO_CHANGES);
+       test_parse_invalid_diff(PATCH_CORRUPT_MISSING_HUNK_HEADER);
+}
+