]> git.proxmox.com Git - libgit2.git/commitdiff
Fix rename looped reference issues
authorRussell Belfer <rb@github.com>
Tue, 18 Jun 2013 23:14:35 +0000 (16:14 -0700)
committerRussell Belfer <rb@github.com>
Tue, 18 Jun 2013 23:14:35 +0000 (16:14 -0700)
This makes the diff rename tracking code more careful about the
order in which it processes renames and more thorough in updating
the mapping of correct renames when an earlier rename update
alters the index of a later matched pair.

src/diff_print.c
src/diff_tform.c
tests-clar/diff/rename.c

index 30f221a62e5733a6d8c14c3af31454d41265c06c..0de548813ec726dad35b252715fe0d766cfa14b0 100644 (file)
@@ -46,7 +46,7 @@ static char diff_pick_suffix(int mode)
 {
        if (S_ISDIR(mode))
                return '/';
-       else if (mode & 0100) //-V536
+       else if (mode & 0100) /* -V536 */
                /* in git, modes are very regular, so we must have 0100755 mode */
                return '*';
        else
@@ -162,7 +162,7 @@ static int diff_print_one_raw(
        if (delta->similarity > 0)
                git_buf_printf(out, "%03u", delta->similarity);
 
-       if (delta->status == GIT_DELTA_RENAMED || delta->status == GIT_DELTA_COPIED)
+       if (delta->old_file.path != delta->new_file.path)
                git_buf_printf(
                        out, "\t%s %s\n", delta->old_file.path, delta->new_file.path);
        else
index 64746e7ddcee9f7f246adb0f28f7d908ee537d84..8c4e96ecf3222aa26a0d35d9bcce30695dca5bc1 100644 (file)
@@ -673,6 +673,15 @@ GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
                        delta->status == GIT_DELTA_IGNORED);
 }
 
+GIT_INLINE(void) delta_make_rename(
+       git_diff_delta *to, const git_diff_delta *from, uint32_t similarity)
+{
+       to->status     = GIT_DELTA_RENAMED;
+       to->similarity = similarity;
+       memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
+       to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+}
+
 typedef struct {
        uint32_t idx;
        uint32_t similarity;
@@ -682,13 +691,15 @@ int git_diff_find_similar(
        git_diff_list *diff,
        git_diff_find_options *given_opts)
 {
-       size_t i, j, cache_size;
+       size_t i, j, sigcache_size;
        int error = 0, similarity;
        git_diff_delta *from, *to;
        git_diff_find_options opts;
-       size_t num_rewrites = 0, num_updates = 0;
-       void **cache; /* cache of similarity metric file signatures */
-       diff_find_match *match_sources, *match_targets; /* cache of best matches */
+       size_t num_srcs = 0, num_tgts = 0, tried_srcs = 0, tried_tgts = 0;
+       size_t num_rewrites = 0, num_updates = 0, num_bumped = 0;
+       void **sigcache; /* cache of similarity metric file signatures */
+       diff_find_match *match_srcs = NULL, *match_tgts = NULL, *best_match;
+       git_diff_file swap;
 
        if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
                return error;
@@ -697,70 +708,109 @@ int git_diff_find_similar(
        if (!git__is_uint32(diff->deltas.length))
                return 0;
 
-       cache_size = diff->deltas.length * 2; /* must store b/c length may change */
-       cache = git__calloc(cache_size, sizeof(void *));
-       GITERR_CHECK_ALLOC(cache);
+       sigcache_size = diff->deltas.length * 2; /* keep size b/c diff may change */
+       sigcache = git__calloc(sigcache_size, sizeof(void *));
+       GITERR_CHECK_ALLOC(sigcache);
+
+       /* Label rename sources and targets
+        *
+        * This will also set self-similarity scores for MODIFIED files and
+        * mark them for splitting if break-rewrites is enabled
+        */
+       git_vector_foreach(&diff->deltas, i, to) {
+               if (is_rename_source(diff, &opts, i, sigcache))
+                       ++num_srcs;
+
+               if (is_rename_target(diff, &opts, i, sigcache))
+                       ++num_tgts;
+       }
 
-       match_sources = git__calloc(diff->deltas.length, sizeof(diff_find_match));
-       match_targets = git__calloc(diff->deltas.length, sizeof(diff_find_match));
-       GITERR_CHECK_ALLOC(match_sources);
-       GITERR_CHECK_ALLOC(match_targets);
+       /* if there are no candidate srcs or tgts, we're done */
+       if (!num_srcs || !num_tgts)
+               goto cleanup;
 
-       /* next find the most similar delta for each rename / copy candidate */
+       match_tgts = git__calloc(diff->deltas.length, sizeof(diff_find_match));
+       GITERR_CHECK_ALLOC(match_tgts);
+       match_srcs = git__calloc(diff->deltas.length, sizeof(diff_find_match));
+       GITERR_CHECK_ALLOC(match_srcs);
 
-       git_vector_foreach(&diff->deltas, i, to) {
-               size_t tried_sources = 0;
+       /*
+        * Find best-fit matches for rename / copy candidates
+        */
 
-               match_targets[i].idx = (uint32_t)i;
-               match_targets[i].similarity = 0;
+find_best_matches:
+       tried_tgts = num_bumped = 0;
 
+       git_vector_foreach(&diff->deltas, i, to) {
                /* skip things that are not rename targets */
-               if (!is_rename_target(diff, &opts, i, cache))
+               if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
                        continue;
 
-               git_vector_foreach(&diff->deltas, j, from) {
-                       if (i == j)
-                               continue;
+               tried_srcs = 0;
 
+               git_vector_foreach(&diff->deltas, j, from) {
                        /* skip things that are not rename sources */
-                       if (!is_rename_source(diff, &opts, j, cache))
+                       if ((from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
                                continue;
 
-                       /* cap on maximum targets we'll examine (per "to" file) */
-                       if (++tried_sources > opts.rename_limit)
-                               break;
-
                        /* calculate similarity for this pair and find best match */
-                       if ((error = similarity_measure(
-                                       &similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0)
+                       if (i == j)
+                               similarity = -1; /* don't measure self-similarity here */
+                       else if ((error = similarity_measure(
+                               &similarity, diff, &opts, sigcache, 2 * j, 2 * i + 1)) < 0)
                                goto cleanup;
 
-                       if (similarity < 0) { /* not actually comparable */
-                               --tried_sources;
-                               continue;
-                       }
+                       /* if this pairing is better for the src and the tgt, keep it */
+                       if (similarity > 0 &&
+                               match_tgts[i].similarity < (uint32_t)similarity &&
+                               match_srcs[j].similarity < (uint32_t)similarity)
+                       {
+                               if (match_tgts[i].similarity > 0) {
+                                       match_tgts[match_srcs[j].idx].similarity = 0;
+                                       match_srcs[match_tgts[i].idx].similarity = 0;
+                                       ++num_bumped;
+                               }
+
+                               match_tgts[i].similarity = (uint32_t)similarity;
+                               match_tgts[i].idx = (uint32_t)j;
 
-                       if (match_targets[i].similarity < (uint32_t)similarity &&
-                               match_sources[j].similarity < (uint32_t)similarity) {
-                               match_targets[i].similarity = (uint32_t)similarity;
-                               match_sources[j].similarity = (uint32_t)similarity;
-                               match_targets[i].idx = (uint32_t)j;
-                               match_sources[j].idx = (uint32_t)i;
+                               match_srcs[j].similarity = (uint32_t)similarity;
+                               match_srcs[j].idx = (uint32_t)i;
                        }
+
+                       if (++tried_srcs >= num_srcs)
+                               break;
+
+                       /* cap on maximum targets we'll examine (per "to" file) */
+                       if (tried_srcs > opts.rename_limit)
+                               break;
                }
+
+               if (++tried_tgts >= num_tgts)
+                       break;
        }
 
-       /* next rewrite the diffs with renames / copies */
+       if (num_bumped > 0) /* try again if we bumped some items */
+               goto find_best_matches;
+
+       /*
+        * Rewrite the diffs with renames / copies
+        */
+
+       tried_tgts = 0;
 
        git_vector_foreach(&diff->deltas, i, to) {
-               /* check if this delta was the target of a similarity */
-               if ((similarity = (int)match_targets[i].similarity) <= 0)
+               /* skip things that are not rename targets */
+               if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
                        continue;
 
-               assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0);
+               /* check if this delta was the target of a similarity */
+               best_match = &match_tgts[i];
+               if (!best_match->similarity)
+                       continue;
 
-               from = GIT_VECTOR_GET(&diff->deltas, match_targets[i].idx);
-               assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0);
+               j = best_match->idx;
+               from = GIT_VECTOR_GET(&diff->deltas, j);
 
                /* possible scenarios:
                 * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
@@ -774,99 +824,84 @@ int git_diff_find_similar(
 
                        if (delta_is_new_only(to)) {
 
-                               if (similarity < (int)opts.rename_threshold)
+                               if (best_match->similarity < opts.rename_threshold)
                                        continue;
 
-                               from->status = GIT_DELTA_RENAMED;
-                               from->similarity = (uint32_t)similarity;
-                               memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
-
-                               to->flags |= GIT_DIFF_FLAG__TO_DELETE;
+                               delta_make_rename(to, from, best_match->similarity);
 
+                               from->flags |= GIT_DIFF_FLAG__TO_DELETE;
                                num_rewrites++;
                        } else {
                                assert(delta_is_split(to));
 
-                               if (similarity < (int)opts.rename_from_rewrite_threshold)
+                               if (best_match->similarity < opts.rename_from_rewrite_threshold)
                                        continue;
 
-                               from->status = GIT_DELTA_RENAMED;
-                               from->similarity = (uint32_t)similarity;
-                               memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
+                               memcpy(&swap, &to->old_file, sizeof(swap));
 
-                               to->status = GIT_DELTA_DELETED;
-                               memset(&to->new_file, 0, sizeof(to->new_file));
-                               to->new_file.path = to->old_file.path;
-                               to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
-                               if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
-                                       to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
-                                       num_rewrites--;
-                               }
+                               delta_make_rename(to, from, best_match->similarity);
+                               num_rewrites--;
+
+                               from->status = GIT_DELTA_DELETED;
+                               memcpy(&from->old_file, &swap, sizeof(from->old_file));
+                               memset(&from->new_file, 0, sizeof(from->new_file));
+                               from->new_file.path = from->old_file.path;
+                               from->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
 
                                num_updates++;
                        }
                }
 
                else if (delta_is_split(from)) {
-                       git_diff_file swap;
 
                        if (delta_is_new_only(to)) {
 
-                               if (similarity < (int)opts.rename_threshold)
+                               if (best_match->similarity < opts.rename_threshold)
                                        continue;
 
-                               memcpy(&swap, &from->new_file, sizeof(swap));
+                               delta_make_rename(to, from, best_match->similarity);
 
-                               from->status = GIT_DELTA_RENAMED;
-                               from->similarity = (uint32_t)similarity;
-                               memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
-                               if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
-                                       from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
-                                       num_rewrites--;
-                               }
-
-                               to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
+                               from->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
                                        GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
-                               memcpy(&to->new_file, &swap, sizeof(to->new_file));
-                               to->old_file.path = to->new_file.path;
+                               memset(&from->old_file, 0, sizeof(from->old_file));
+                               from->old_file.path = from->new_file.path;
+                               from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
+
+                               from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+                               num_rewrites--;
 
                                num_updates++;
                        } else {
                                assert(delta_is_split(from));
 
-                               if (similarity < (int)opts.rename_from_rewrite_threshold)
+                               if (best_match->similarity < opts.rename_from_rewrite_threshold)
                                        continue;
 
-                               memcpy(&swap, &to->new_file, sizeof(swap));
+                               memcpy(&swap, &to->old_file, sizeof(swap));
 
-                               to->status = GIT_DELTA_RENAMED;
-                               to->similarity = (uint32_t)similarity;
-                               memcpy(&to->new_file, &from->new_file, sizeof(to->new_file));
-                               if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
-                                       to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
-                                       num_rewrites--;
-                               }
+                               delta_make_rename(to, from, best_match->similarity);
+                               num_rewrites--;
+                               num_updates++;
 
-                               memcpy(&from->new_file, &swap, sizeof(from->new_file));
-                               if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
-                                       from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
-                                       num_rewrites++;
-                               }
+                               memcpy(&from->old_file, &swap, sizeof(from->old_file));
 
-                               /* in the off chance that we've just swapped the new
-                                * element into the correct place, clear the SPLIT flag
+                               /* if we've just swapped the new element into the correct
+                                * place, clear the SPLIT flag
                                 */
-                               if (match_targets[match_targets[i].idx].idx == i &&
-                                       match_targets[match_targets[i].idx].similarity >
+                               if (match_tgts[j].idx == i &&
+                                       match_tgts[j].similarity >
                                        opts.rename_from_rewrite_threshold) {
 
-                                       from->status = GIT_DELTA_RENAMED;
-                                       from->similarity =
-                                               (uint32_t)match_targets[match_targets[i].idx].similarity;
-                                       match_targets[match_targets[i].idx].similarity = 0;
+                                       from->status     = GIT_DELTA_RENAMED;
+                                       from->similarity = match_tgts[j].similarity;
+                                       match_tgts[j].similarity = 0;
                                        from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
                                        num_rewrites--;
                                }
+                               /* otherwise, if we just overwrote a source, update mapping */
+                               else if (j > i && match_srcs[i].similarity > 0) {
+                                       match_tgts[match_srcs[i].idx].idx = j;
+                               }
 
                                num_updates++;
                        }
@@ -874,31 +909,35 @@ int git_diff_find_similar(
 
                else if (delta_is_new_only(to)) {
                        if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
-                               similarity < (int)opts.copy_threshold)
+                               best_match->similarity < opts.copy_threshold)
                                continue;
 
-                       to->status = GIT_DELTA_COPIED;
-                       to->similarity = (uint32_t)similarity;
+                       to->status     = GIT_DELTA_COPIED;
+                       to->similarity = best_match->similarity;
                        memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
 
                        num_updates++;
                }
        }
 
+       /*
+        * Actually split and delete entries as needed
+        */
+
        if (num_rewrites > 0 || num_updates > 0)
                error = apply_splits_and_deletes(
                        diff, diff->deltas.length - num_rewrites,
                        FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
 
 cleanup:
-       git__free(match_sources);
-       git__free(match_targets);
+       git__free(match_srcs);
+       git__free(match_tgts);
 
-       for (i = 0; i < cache_size; ++i) {
-               if (cache[i] != NULL)
-                       opts.metric->free_signature(cache[i], opts.metric->payload);
+       for (i = 0; i < sigcache_size; ++i) {
+               if (sigcache[i] != NULL)
+                       opts.metric->free_signature(sigcache[i], opts.metric->payload);
        }
-       git__free(cache);
+       git__free(sigcache);
 
        if (!given_opts || !given_opts->metric)
                git__free(opts.metric);
index e6bf72c17e23083b5ae20e618752bf8aa076f9dc..79f4070573ff8cc32310a50f94e90f21a7d745cd 100644 (file)
@@ -755,7 +755,7 @@ void test_diff_rename__file_partial_exchange(void)
        git_buf_free(&c2);
 }
 
-void test_diff_rename__file_split(void)
+void test_diff_rename__rename_and_copy_from_same_source(void)
 {
        git_buf c1 = GIT_BUF_INIT, c2 = GIT_BUF_INIT;
        git_index *index;
@@ -947,6 +947,7 @@ void test_diff_rename__rejected_match_can_match_others(void)
 
        cl_git_pass(
                git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
+
        cl_git_pass(git_diff_find_similar(diff, &findopts));
 
        cl_git_pass(
@@ -967,10 +968,10 @@ static void write_similarity_file_two(const char *filename, size_t b_lines)
        size_t i;
 
        for (i = 0; i < b_lines; i++)
-               git_buf_printf(&contents, "%0.2d - bbbbb\r\n", (i+1));
+               git_buf_printf(&contents, "%0.2d - bbbbb\r\n", (int)(i+1));
 
        for (i = b_lines; i < 50; i++)
-               git_buf_printf(&contents, "%0.2d - aaaaa%s", (i+1), (i == 49 ? "" : "\r\n"));
+               git_buf_printf(&contents, "%0.2d - aaaaa%s", (int)(i+1), (i == 49 ? "" : "\r\n"));
 
        cl_git_pass(
                git_futils_writebuffer(&contents, filename, O_RDWR|O_CREAT, 0777));
@@ -1018,6 +1019,7 @@ void test_diff_rename__rejected_match_can_match_others_two(void)
 
        cl_git_pass(
                git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
+
        cl_git_pass(git_diff_find_similar(diff, &findopts));
 
        cl_git_pass(