]>
Commit | Line | Data |
---|---|---|
db106d01 | 1 | /* |
359fc2d2 | 2 | * Copyright (C) the libgit2 contributors. All rights reserved. |
db106d01 RB |
3 | * |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
eae0bfdc PP |
7 | |
8 | #include "diff_tform.h" | |
114f5a6c | 9 | |
db106d01 | 10 | #include "git2/config.h" |
960a04dd | 11 | #include "git2/blob.h" |
737b5051 | 12 | #include "git2/sys/hashsig.h" |
114f5a6c RB |
13 | |
14 | #include "diff.h" | |
9be638ec | 15 | #include "diff_generate.h" |
e579e0f7 | 16 | #include "fs_path.h" |
22a2d3d5 | 17 | #include "futils.h" |
9f77b3f6 | 18 | #include "config.h" |
db106d01 | 19 | |
90177111 | 20 | git_diff_delta *git_diff__delta_dup( |
db106d01 RB |
21 | const git_diff_delta *d, git_pool *pool) |
22 | { | |
23 | git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); | |
24 | if (!delta) | |
25 | return NULL; | |
26 | ||
27 | memcpy(delta, d, sizeof(git_diff_delta)); | |
c68b09dc | 28 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
db106d01 | 29 | |
d958e37a RB |
30 | if (d->old_file.path != NULL) { |
31 | delta->old_file.path = git_pool_strdup(pool, d->old_file.path); | |
32 | if (delta->old_file.path == NULL) | |
33 | goto fail; | |
34 | } | |
db106d01 | 35 | |
d958e37a | 36 | if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) { |
db106d01 RB |
37 | delta->new_file.path = git_pool_strdup(pool, d->new_file.path); |
38 | if (delta->new_file.path == NULL) | |
39 | goto fail; | |
40 | } else { | |
41 | delta->new_file.path = delta->old_file.path; | |
42 | } | |
43 | ||
44 | return delta; | |
45 | ||
46 | fail: | |
47 | git__free(delta); | |
48 | return NULL; | |
49 | } | |
50 | ||
90177111 | 51 | git_diff_delta *git_diff__merge_like_cgit( |
3940310e RB |
52 | const git_diff_delta *a, |
53 | const git_diff_delta *b, | |
54 | git_pool *pool) | |
db106d01 RB |
55 | { |
56 | git_diff_delta *dup; | |
57 | ||
58 | /* Emulate C git for merging two diffs (a la 'git diff <sha>'). | |
59 | * | |
60 | * When C git does a diff between the work dir and a tree, it actually | |
61 | * diffs with the index but uses the workdir contents. This emulates | |
62 | * those choices so we can emulate the type of diff. | |
63 | * | |
64 | * We have three file descriptions here, let's call them: | |
65 | * f1 = a->old_file | |
66 | * f2 = a->new_file AND b->old_file | |
67 | * f3 = b->new_file | |
68 | */ | |
69 | ||
cb63e7e8 POL |
70 | /* If one of the diffs is a conflict, just dup it */ |
71 | if (b->status == GIT_DELTA_CONFLICTED) | |
90177111 | 72 | return git_diff__delta_dup(b, pool); |
cb63e7e8 | 73 | if (a->status == GIT_DELTA_CONFLICTED) |
90177111 | 74 | return git_diff__delta_dup(a, pool); |
cb63e7e8 | 75 | |
db106d01 RB |
76 | /* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ |
77 | if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) | |
90177111 | 78 | return git_diff__delta_dup(a, pool); |
db106d01 RB |
79 | |
80 | /* otherwise, base this diff on the 'b' diff */ | |
90177111 | 81 | if ((dup = git_diff__delta_dup(b, pool)) == NULL) |
db106d01 RB |
82 | return NULL; |
83 | ||
84 | /* If 'a' status is uninteresting, then we're done */ | |
83ba5e36 ET |
85 | if (a->status == GIT_DELTA_UNMODIFIED || |
86 | a->status == GIT_DELTA_UNTRACKED || | |
87 | a->status == GIT_DELTA_UNREADABLE) | |
db106d01 RB |
88 | return dup; |
89 | ||
c25aa7cd | 90 | GIT_ASSERT_WITH_RETVAL(b->status != GIT_DELTA_UNMODIFIED, NULL); |
db106d01 RB |
91 | |
92 | /* A cgit exception is that the diff of a file that is only in the | |
93 | * index (i.e. not in HEAD nor workdir) is given as empty. | |
94 | */ | |
95 | if (dup->status == GIT_DELTA_DELETED) { | |
b9780823 | 96 | if (a->status == GIT_DELTA_ADDED) { |
db106d01 | 97 | dup->status = GIT_DELTA_UNMODIFIED; |
b9780823 POL |
98 | dup->nfiles = 2; |
99 | } | |
db106d01 RB |
100 | /* else don't overwrite DELETE status */ |
101 | } else { | |
102 | dup->status = a->status; | |
b9780823 | 103 | dup->nfiles = a->nfiles; |
db106d01 RB |
104 | } |
105 | ||
9950bb4e | 106 | git_oid_cpy(&dup->old_file.id, &a->old_file.id); |
db106d01 RB |
107 | dup->old_file.mode = a->old_file.mode; |
108 | dup->old_file.size = a->old_file.size; | |
109 | dup->old_file.flags = a->old_file.flags; | |
110 | ||
111 | return dup; | |
112 | } | |
113 | ||
90177111 | 114 | int git_diff__merge( |
5ef43d41 | 115 | git_diff *onto, const git_diff *from, git_diff__merge_cb cb) |
db106d01 RB |
116 | { |
117 | int error = 0; | |
118 | git_pool onto_pool; | |
119 | git_vector onto_new; | |
120 | git_diff_delta *delta; | |
e7c85120 | 121 | bool ignore_case, reversed; |
db106d01 RB |
122 | unsigned int i, j; |
123 | ||
c25aa7cd PP |
124 | GIT_ASSERT_ARG(onto); |
125 | GIT_ASSERT_ARG(from); | |
db106d01 RB |
126 | |
127 | if (!from->deltas.length) | |
128 | return 0; | |
129 | ||
e7c85120 RB |
130 | ignore_case = ((onto->opts.flags & GIT_DIFF_IGNORE_CASE) != 0); |
131 | reversed = ((onto->opts.flags & GIT_DIFF_REVERSE) != 0); | |
132 | ||
133 | if (ignore_case != ((from->opts.flags & GIT_DIFF_IGNORE_CASE) != 0) || | |
134 | reversed != ((from->opts.flags & GIT_DIFF_REVERSE) != 0)) { | |
ac3d33df | 135 | git_error_set(GIT_ERROR_INVALID, |
909d5494 | 136 | "attempt to merge diffs created with conflicting options"); |
3940310e RB |
137 | return -1; |
138 | } | |
139 | ||
22a2d3d5 UG |
140 | if (git_vector_init(&onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 || |
141 | git_pool_init(&onto_pool, 1) < 0) | |
db106d01 RB |
142 | return -1; |
143 | ||
db106d01 RB |
144 | for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { |
145 | git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); | |
146 | const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); | |
3940310e RB |
147 | int cmp = !f ? -1 : !o ? 1 : |
148 | STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); | |
db106d01 RB |
149 | |
150 | if (cmp < 0) { | |
90177111 | 151 | delta = git_diff__delta_dup(o, &onto_pool); |
db106d01 RB |
152 | i++; |
153 | } else if (cmp > 0) { | |
90177111 | 154 | delta = git_diff__delta_dup(f, &onto_pool); |
db106d01 RB |
155 | j++; |
156 | } else { | |
83ba5e36 ET |
157 | const git_diff_delta *left = reversed ? f : o; |
158 | const git_diff_delta *right = reversed ? o : f; | |
159 | ||
5ef43d41 | 160 | delta = cb(left, right, &onto_pool); |
db106d01 RB |
161 | i++; |
162 | j++; | |
163 | } | |
164 | ||
165 | /* the ignore rules for the target may not match the source | |
166 | * or the result of a merged delta could be skippable... | |
167 | */ | |
5ef43d41 | 168 | if (delta && git_diff_delta__should_skip(&onto->opts, delta)) { |
db106d01 RB |
169 | git__free(delta); |
170 | continue; | |
171 | } | |
172 | ||
173 | if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) | |
174 | break; | |
175 | } | |
176 | ||
177 | if (!error) { | |
178 | git_vector_swap(&onto->deltas, &onto_new); | |
179 | git_pool_swap(&onto->pool, &onto_pool); | |
3940310e RB |
180 | |
181 | if ((onto->opts.flags & GIT_DIFF_REVERSE) != 0) | |
182 | onto->old_src = from->old_src; | |
183 | else | |
184 | onto->new_src = from->new_src; | |
db106d01 RB |
185 | |
186 | /* prefix strings also come from old pool, so recreate those.*/ | |
187 | onto->opts.old_prefix = | |
188 | git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); | |
189 | onto->opts.new_prefix = | |
190 | git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); | |
191 | } | |
192 | ||
9cfce273 | 193 | git_vector_free_deep(&onto_new); |
db106d01 RB |
194 | git_pool_clear(&onto_pool); |
195 | ||
196 | return error; | |
197 | } | |
198 | ||
5ef43d41 ET |
199 | int git_diff_merge(git_diff *onto, const git_diff *from) |
200 | { | |
90177111 | 201 | return git_diff__merge(onto, from, git_diff__merge_like_cgit); |
5ef43d41 ET |
202 | } |
203 | ||
0462fba5 | 204 | int git_diff_find_similar__hashsig_for_file( |
f8275890 RB |
205 | void **out, const git_diff_file *f, const char *path, void *p) |
206 | { | |
c8893d1f | 207 | git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
aa408cbf | 208 | |
f8275890 | 209 | GIT_UNUSED(f); |
36fc5497 | 210 | return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); |
f8275890 | 211 | } |
9bc8be3d | 212 | |
0462fba5 | 213 | int git_diff_find_similar__hashsig_for_buf( |
f8275890 RB |
214 | void **out, const git_diff_file *f, const char *buf, size_t len, void *p) |
215 | { | |
c8893d1f | 216 | git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; |
0462fba5 | 217 | |
f8275890 | 218 | GIT_UNUSED(f); |
36fc5497 | 219 | return git_hashsig_create((git_hashsig **)out, buf, len, opt); |
f8275890 | 220 | } |
9bc8be3d | 221 | |
0462fba5 | 222 | void git_diff_find_similar__hashsig_free(void *sig, void *payload) |
9bc8be3d RB |
223 | { |
224 | GIT_UNUSED(payload); | |
225 | git_hashsig_free(sig); | |
226 | } | |
227 | ||
0462fba5 | 228 | int git_diff_find_similar__calc_similarity( |
9bc8be3d RB |
229 | int *score, void *siga, void *sigb, void *payload) |
230 | { | |
36fc5497 POL |
231 | int error; |
232 | ||
9bc8be3d | 233 | GIT_UNUSED(payload); |
36fc5497 POL |
234 | error = git_hashsig_compare(siga, sigb); |
235 | if (error < 0) | |
236 | return error; | |
237 | ||
238 | *score = error; | |
9bc8be3d RB |
239 | return 0; |
240 | } | |
241 | ||
db106d01 RB |
242 | #define DEFAULT_THRESHOLD 50 |
243 | #define DEFAULT_BREAK_REWRITE_THRESHOLD 60 | |
e579e0f7 | 244 | #define DEFAULT_RENAME_LIMIT 1000 |
db106d01 RB |
245 | |
246 | static int normalize_find_opts( | |
3ff1d123 | 247 | git_diff *diff, |
db106d01 | 248 | git_diff_find_options *opts, |
10672e3e | 249 | const git_diff_find_options *given) |
db106d01 RB |
250 | { |
251 | git_config *cfg = NULL; | |
36fc5497 | 252 | git_hashsig_option_t hashsig_opts; |
db106d01 | 253 | |
ac3d33df | 254 | GIT_ERROR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); |
5a52d6be | 255 | |
db106d01 RB |
256 | if (diff->repo != NULL && |
257 | git_repository_config__weakptr(&cfg, diff->repo) < 0) | |
258 | return -1; | |
259 | ||
7e3ed419 | 260 | if (given) |
db106d01 | 261 | memcpy(opts, given, sizeof(*opts)); |
db106d01 | 262 | |
c56c6d69 BS |
263 | if (!given || |
264 | (given->flags & GIT_DIFF_FIND_ALL) == GIT_DIFF_FIND_BY_CONFIG) | |
265 | { | |
1a8c11f4 | 266 | if (cfg) { |
32f07984 PS |
267 | char *rule = |
268 | git_config__get_string_force(cfg, "diff.renames", "true"); | |
269 | int boolval; | |
270 | ||
271 | if (!git__parse_bool(&boolval, rule) && !boolval) | |
272 | /* don't set FIND_RENAMES if bool value is false */; | |
273 | else if (!strcasecmp(rule, "copies") || !strcasecmp(rule, "copy")) | |
274 | opts->flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; | |
275 | else | |
276 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
277 | ||
278 | git__free(rule); | |
279 | } else { | |
280 | /* set default flag */ | |
9f77b3f6 | 281 | opts->flags |= GIT_DIFF_FIND_RENAMES; |
32f07984 | 282 | } |
db106d01 | 283 | } |
ca901e7b | 284 | |
db106d01 RB |
285 | /* some flags imply others */ |
286 | ||
9be5be47 RB |
287 | if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) { |
288 | /* if we are only looking for exact matches, then don't turn | |
289 | * MODIFIED items into ADD/DELETE pairs because it's too picky | |
290 | */ | |
291 | opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES); | |
292 | ||
293 | /* similarly, don't look for self-rewrites to split */ | |
294 | opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES; | |
295 | } | |
296 | ||
db106d01 RB |
297 | if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) |
298 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
299 | ||
300 | if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) | |
301 | opts->flags |= GIT_DIFF_FIND_COPIES; | |
302 | ||
d958e37a RB |
303 | if (opts->flags & GIT_DIFF_BREAK_REWRITES) |
304 | opts->flags |= GIT_DIFF_FIND_REWRITES; | |
305 | ||
db106d01 RB |
306 | #define USE_DEFAULT(X) ((X) == 0 || (X) > 100) |
307 | ||
308 | if (USE_DEFAULT(opts->rename_threshold)) | |
309 | opts->rename_threshold = DEFAULT_THRESHOLD; | |
310 | ||
311 | if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) | |
312 | opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; | |
313 | ||
314 | if (USE_DEFAULT(opts->copy_threshold)) | |
315 | opts->copy_threshold = DEFAULT_THRESHOLD; | |
316 | ||
317 | if (USE_DEFAULT(opts->break_rewrite_threshold)) | |
318 | opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; | |
319 | ||
320 | #undef USE_DEFAULT | |
321 | ||
a21cbb12 | 322 | if (!opts->rename_limit) { |
1a8c11f4 PS |
323 | if (cfg) { |
324 | opts->rename_limit = git_config__get_int_force( | |
325 | cfg, "diff.renamelimit", DEFAULT_RENAME_LIMIT); | |
326 | } | |
db106d01 | 327 | |
9f77b3f6 RB |
328 | if (opts->rename_limit <= 0) |
329 | opts->rename_limit = DEFAULT_RENAME_LIMIT; | |
db106d01 RB |
330 | } |
331 | ||
f8275890 | 332 | /* assign the internal metric with whitespace flag as payload */ |
9bc8be3d | 333 | if (!opts->metric) { |
f8275890 | 334 | opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); |
ac3d33df | 335 | GIT_ERROR_CHECK_ALLOC(opts->metric); |
f8275890 | 336 | |
0462fba5 ET |
337 | opts->metric->file_signature = git_diff_find_similar__hashsig_for_file; |
338 | opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; | |
339 | opts->metric->free_signature = git_diff_find_similar__hashsig_free; | |
340 | opts->metric->similarity = git_diff_find_similar__calc_similarity; | |
f8275890 | 341 | |
9bc8be3d | 342 | if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) |
36fc5497 | 343 | hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE; |
9bc8be3d | 344 | else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) |
36fc5497 | 345 | hashsig_opts = GIT_HASHSIG_NORMAL; |
9bc8be3d | 346 | else |
36fc5497 POL |
347 | hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE; |
348 | hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES; | |
349 | opts->metric->payload = (void *)hashsig_opts; | |
9bc8be3d RB |
350 | } |
351 | ||
db106d01 RB |
352 | return 0; |
353 | } | |
354 | ||
2123a17f RB |
355 | static int insert_delete_side_of_split( |
356 | git_diff *diff, git_vector *onto, const git_diff_delta *delta) | |
357 | { | |
358 | /* make new record for DELETED side of split */ | |
90177111 | 359 | git_diff_delta *deleted = git_diff__delta_dup(delta, &diff->pool); |
ac3d33df | 360 | GIT_ERROR_CHECK_ALLOC(deleted); |
2123a17f RB |
361 | |
362 | deleted->status = GIT_DELTA_DELETED; | |
363 | deleted->nfiles = 1; | |
364 | memset(&deleted->new_file, 0, sizeof(deleted->new_file)); | |
365 | deleted->new_file.path = deleted->old_file.path; | |
9950bb4e | 366 | deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
2123a17f RB |
367 | |
368 | return git_vector_insert(onto, deleted); | |
369 | } | |
370 | ||
d958e37a | 371 | static int apply_splits_and_deletes( |
3ff1d123 | 372 | git_diff *diff, size_t expected_size, bool actually_split) |
db106d01 RB |
373 | { |
374 | git_vector onto = GIT_VECTOR_INIT; | |
375 | size_t i; | |
2123a17f | 376 | git_diff_delta *delta; |
db106d01 | 377 | |
e579e0f7 | 378 | if (git_vector_init(&onto, expected_size, diff->deltas._cmp) < 0) |
db106d01 RB |
379 | return -1; |
380 | ||
381 | /* build new delta list without TO_DELETE and splitting TO_SPLIT */ | |
382 | git_vector_foreach(&diff->deltas, i, delta) { | |
71a3d27e | 383 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
db106d01 | 384 | continue; |
db106d01 | 385 | |
a21cbb12 | 386 | if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { |
d958e37a RB |
387 | delta->similarity = 0; |
388 | ||
2123a17f | 389 | if (insert_delete_side_of_split(diff, &onto, delta) < 0) |
11d9f6b3 | 390 | goto on_error; |
db106d01 | 391 | |
22a2d3d5 | 392 | if (diff->new_src == GIT_ITERATOR_WORKDIR) |
9be5be47 RB |
393 | delta->status = GIT_DELTA_UNTRACKED; |
394 | else | |
395 | delta->status = GIT_DELTA_ADDED; | |
74a627f0 | 396 | delta->nfiles = 1; |
db106d01 RB |
397 | memset(&delta->old_file, 0, sizeof(delta->old_file)); |
398 | delta->old_file.path = delta->new_file.path; | |
9950bb4e | 399 | delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
db106d01 RB |
400 | } |
401 | ||
c68b09dc RB |
402 | /* clean up delta before inserting into new list */ |
403 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); | |
404 | ||
405 | if (delta->status != GIT_DELTA_COPIED && | |
406 | delta->status != GIT_DELTA_RENAMED && | |
407 | (delta->status != GIT_DELTA_MODIFIED || actually_split)) | |
408 | delta->similarity = 0; | |
409 | ||
410 | /* insert into new list */ | |
11d9f6b3 PK |
411 | if (git_vector_insert(&onto, delta) < 0) |
412 | goto on_error; | |
db106d01 RB |
413 | } |
414 | ||
11d9f6b3 | 415 | /* cannot return an error past this point */ |
c68b09dc RB |
416 | |
417 | /* free deltas from old list that didn't make it to the new one */ | |
a21cbb12 | 418 | git_vector_foreach(&diff->deltas, i, delta) { |
71a3d27e | 419 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
11d9f6b3 | 420 | git__free(delta); |
a21cbb12 RB |
421 | } |
422 | ||
db106d01 | 423 | /* swap new delta list into place */ |
db106d01 RB |
424 | git_vector_swap(&diff->deltas, &onto); |
425 | git_vector_free(&onto); | |
a21cbb12 | 426 | git_vector_sort(&diff->deltas); |
db106d01 RB |
427 | |
428 | return 0; | |
11d9f6b3 PK |
429 | |
430 | on_error: | |
9cfce273 | 431 | git_vector_free_deep(&onto); |
11d9f6b3 PK |
432 | |
433 | return -1; | |
db106d01 RB |
434 | } |
435 | ||
3ff1d123 | 436 | GIT_INLINE(git_diff_file *) similarity_get_file(git_diff *diff, size_t idx) |
960a04dd RB |
437 | { |
438 | git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); | |
439 | return (idx & 1) ? &delta->new_file : &delta->old_file; | |
440 | } | |
99ba8f23 | 441 | |
a5140f4d RB |
442 | typedef struct { |
443 | size_t idx; | |
22a2d3d5 | 444 | git_iterator_t src; |
a5140f4d RB |
445 | git_repository *repo; |
446 | git_diff_file *file; | |
e579e0f7 | 447 | git_str data; |
effdbeb3 | 448 | git_odb_object *odb_obj; |
a5140f4d | 449 | git_blob *blob; |
a5140f4d RB |
450 | } similarity_info; |
451 | ||
effdbeb3 | 452 | static int similarity_init( |
3ff1d123 | 453 | similarity_info *info, git_diff *diff, size_t file_idx) |
a5140f4d RB |
454 | { |
455 | info->idx = file_idx; | |
456 | info->src = (file_idx & 1) ? diff->new_src : diff->old_src; | |
457 | info->repo = diff->repo; | |
458 | info->file = similarity_get_file(diff, file_idx); | |
effdbeb3 | 459 | info->odb_obj = NULL; |
a5140f4d | 460 | info->blob = NULL; |
e579e0f7 | 461 | git_str_init(&info->data, 0); |
09fae31d | 462 | |
e579e0f7 MB |
463 | if ((info->file->flags & GIT_DIFF_FLAG_VALID_SIZE) || |
464 | info->src == GIT_ITERATOR_WORKDIR) | |
effdbeb3 | 465 | return 0; |
5e5848eb | 466 | |
effdbeb3 RB |
467 | return git_diff_file__resolve_zero_size( |
468 | info->file, &info->odb_obj, info->repo); | |
a5140f4d | 469 | } |
960a04dd | 470 | |
d730d3f4 | 471 | static int similarity_sig( |
a5140f4d RB |
472 | similarity_info *info, |
473 | const git_diff_find_options *opts, | |
474 | void **cache) | |
475 | { | |
476 | int error = 0; | |
effdbeb3 | 477 | git_diff_file *file = info->file; |
8cfd54f0 | 478 | |
22a2d3d5 | 479 | if (info->src == GIT_ITERATOR_WORKDIR) { |
c25aa7cd PP |
480 | if ((error = git_repository_workdir_path( |
481 | &info->data, info->repo, file->path)) < 0) | |
effdbeb3 | 482 | return error; |
960a04dd | 483 | |
effdbeb3 | 484 | /* if path is not a regular file, just skip this item */ |
e579e0f7 | 485 | if (!git_fs_path_isfile(info->data.ptr)) |
effdbeb3 | 486 | return 0; |
a5140f4d | 487 | |
a5140f4d RB |
488 | /* TODO: apply wd-to-odb filters to file data if necessary */ |
489 | ||
490 | error = opts->metric->file_signature( | |
491 | &cache[info->idx], info->file, | |
492 | info->data.ptr, opts->metric->payload); | |
493 | } else { | |
effdbeb3 RB |
494 | /* if we didn't initially know the size, we might have an odb_obj |
495 | * around from earlier, so convert that, otherwise load the blob now | |
496 | */ | |
497 | if (info->odb_obj != NULL) | |
498 | error = git_object__from_odb_object( | |
499 | (git_object **)&info->blob, info->repo, | |
ac3d33df | 500 | info->odb_obj, GIT_OBJECT_BLOB); |
effdbeb3 | 501 | else |
9950bb4e | 502 | error = git_blob_lookup(&info->blob, info->repo, &file->id); |
effdbeb3 RB |
503 | |
504 | if (error < 0) { | |
505 | /* if lookup fails, just skip this item in similarity calc */ | |
ac3d33df | 506 | git_error_clear(); |
effdbeb3 | 507 | } else { |
a16e4172 RB |
508 | size_t sz; |
509 | ||
510 | /* index size may not be actual blob size if filtered */ | |
511 | if (file->size != git_blob_rawsize(info->blob)) | |
512 | file->size = git_blob_rawsize(info->blob); | |
513 | ||
22a2d3d5 | 514 | sz = git__is_sizet(file->size) ? (size_t)file->size : (size_t)-1; |
effdbeb3 RB |
515 | |
516 | error = opts->metric->buffer_signature( | |
517 | &cache[info->idx], info->file, | |
518 | git_blob_rawcontent(info->blob), sz, opts->metric->payload); | |
519 | } | |
960a04dd RB |
520 | } |
521 | ||
522 | return error; | |
523 | } | |
524 | ||
effdbeb3 RB |
525 | static void similarity_unload(similarity_info *info) |
526 | { | |
527 | if (info->odb_obj) | |
528 | git_odb_object_free(info->odb_obj); | |
529 | ||
530 | if (info->blob) | |
531 | git_blob_free(info->blob); | |
532 | else | |
e579e0f7 | 533 | git_str_dispose(&info->data); |
effdbeb3 RB |
534 | } |
535 | ||
a21cbb12 | 536 | #define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0) |
9be5be47 RB |
537 | |
538 | /* - score < 0 means files cannot be compared | |
539 | * - score >= 100 means files are exact match | |
540 | * - score == 0 means files are completely different | |
541 | */ | |
960a04dd | 542 | static int similarity_measure( |
9be5be47 | 543 | int *score, |
3ff1d123 | 544 | git_diff *diff, |
a21cbb12 | 545 | const git_diff_find_options *opts, |
960a04dd RB |
546 | void **cache, |
547 | size_t a_idx, | |
548 | size_t b_idx) | |
549 | { | |
960a04dd RB |
550 | git_diff_file *a_file = similarity_get_file(diff, a_idx); |
551 | git_diff_file *b_file = similarity_get_file(diff, b_idx); | |
a21cbb12 | 552 | bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY); |
a5140f4d RB |
553 | int error = 0; |
554 | similarity_info a_info, b_info; | |
9be5be47 RB |
555 | |
556 | *score = -1; | |
960a04dd | 557 | |
191474a1 ET |
558 | /* don't try to compare things that aren't files */ |
559 | if (!GIT_MODE_ISBLOB(a_file->mode) || !GIT_MODE_ISBLOB(b_file->mode)) | |
960a04dd RB |
560 | return 0; |
561 | ||
a1683f28 | 562 | /* if exact match is requested, force calculation of missing OIDs now */ |
9be5be47 | 563 | if (exact_match) { |
22a2d3d5 UG |
564 | if (git_oid_is_zero(&a_file->id) && |
565 | diff->old_src == GIT_ITERATOR_WORKDIR && | |
240f4af3 RB |
566 | !git_diff__oid_for_file(&a_file->id, |
567 | diff, a_file->path, a_file->mode, a_file->size)) | |
9950bb4e | 568 | a_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
9be5be47 | 569 | |
22a2d3d5 UG |
570 | if (git_oid_is_zero(&b_file->id) && |
571 | diff->new_src == GIT_ITERATOR_WORKDIR && | |
240f4af3 RB |
572 | !git_diff__oid_for_file(&b_file->id, |
573 | diff, b_file->path, b_file->mode, b_file->size)) | |
9950bb4e | 574 | b_file->flags |= GIT_DIFF_FLAG_VALID_ID; |
9be5be47 RB |
575 | } |
576 | ||
577 | /* check OID match as a quick test */ | |
9950bb4e | 578 | if (git_oid__cmp(&a_file->id, &b_file->id) == 0) { |
9be5be47 RB |
579 | *score = 100; |
580 | return 0; | |
581 | } | |
582 | ||
583 | /* don't calculate signatures if we are doing exact match */ | |
584 | if (exact_match) { | |
585 | *score = 0; | |
586 | return 0; | |
587 | } | |
db106d01 | 588 | |
effdbeb3 RB |
589 | memset(&a_info, 0, sizeof(a_info)); |
590 | memset(&b_info, 0, sizeof(b_info)); | |
a5140f4d | 591 | |
effdbeb3 RB |
592 | /* set up similarity data (will try to update missing file sizes) */ |
593 | if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0) | |
594 | return error; | |
595 | if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0) | |
596 | goto cleanup; | |
a5140f4d | 597 | |
f5c4d022 | 598 | /* check if file sizes are nowhere near each other */ |
18e9efc4 RB |
599 | if (a_file->size > 127 && |
600 | b_file->size > 127 && | |
d730d3f4 RB |
601 | (a_file->size > (b_file->size << 3) || |
602 | b_file->size > (a_file->size << 3))) | |
effdbeb3 | 603 | goto cleanup; |
18e9efc4 | 604 | |
960a04dd | 605 | /* update signature cache if needed */ |
d730d3f4 RB |
606 | if (!cache[a_idx]) { |
607 | if ((error = similarity_sig(&a_info, opts, cache)) < 0) | |
608 | goto cleanup; | |
609 | } | |
610 | if (!cache[b_idx]) { | |
611 | if ((error = similarity_sig(&b_info, opts, cache)) < 0) | |
612 | goto cleanup; | |
613 | } | |
1fed6b07 | 614 | |
a5140f4d RB |
615 | /* calculate similarity provided that the metric choose to process |
616 | * both the a and b files (some may not if file is too big, etc). | |
617 | */ | |
618 | if (cache[a_idx] && cache[b_idx]) | |
619 | error = opts->metric->similarity( | |
620 | score, cache[a_idx], cache[b_idx], opts->metric->payload); | |
db106d01 | 621 | |
effdbeb3 | 622 | cleanup: |
a5140f4d RB |
623 | similarity_unload(&a_info); |
624 | similarity_unload(&b_info); | |
625 | ||
626 | return error; | |
db106d01 RB |
627 | } |
628 | ||
a21cbb12 | 629 | static int calc_self_similarity( |
3ff1d123 | 630 | git_diff *diff, |
a21cbb12 RB |
631 | const git_diff_find_options *opts, |
632 | size_t delta_idx, | |
633 | void **cache) | |
9be5be47 | 634 | { |
a21cbb12 RB |
635 | int error, similarity = -1; |
636 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
637 | ||
638 | if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0) | |
639 | return 0; | |
640 | ||
641 | error = similarity_measure( | |
642 | &similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1); | |
643 | if (error < 0) | |
644 | return error; | |
645 | ||
646 | if (similarity >= 0) { | |
74a627f0 | 647 | delta->similarity = (uint16_t)similarity; |
a21cbb12 RB |
648 | delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY; |
649 | } | |
650 | ||
651 | return 0; | |
652 | } | |
653 | ||
654 | static bool is_rename_target( | |
3ff1d123 | 655 | git_diff *diff, |
a21cbb12 RB |
656 | const git_diff_find_options *opts, |
657 | size_t delta_idx, | |
658 | void **cache) | |
659 | { | |
660 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
661 | ||
662 | /* skip things that aren't plain blobs */ | |
663 | if (!GIT_MODE_ISBLOB(delta->new_file.mode)) | |
664 | return false; | |
665 | ||
666 | /* only consider ADDED, RENAMED, COPIED, and split MODIFIED as | |
50456801 | 667 | * targets; maybe include UNTRACKED if requested. |
a21cbb12 RB |
668 | */ |
669 | switch (delta->status) { | |
670 | case GIT_DELTA_UNMODIFIED: | |
671 | case GIT_DELTA_DELETED: | |
50456801 POL |
672 | case GIT_DELTA_IGNORED: |
673 | case GIT_DELTA_CONFLICTED: | |
a21cbb12 RB |
674 | return false; |
675 | ||
676 | case GIT_DELTA_MODIFIED: | |
677 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
678 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
679 | return false; | |
680 | ||
681 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
682 | return false; | |
683 | ||
684 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
685 | delta->similarity < opts->break_rewrite_threshold) { | |
686 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
687 | break; | |
688 | } | |
689 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
eae0bfdc PP |
690 | delta->similarity < opts->rename_from_rewrite_threshold) { |
691 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
a21cbb12 | 692 | break; |
eae0bfdc | 693 | } |
a21cbb12 RB |
694 | |
695 | return false; | |
696 | ||
697 | case GIT_DELTA_UNTRACKED: | |
a21cbb12 RB |
698 | if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED)) |
699 | return false; | |
700 | break; | |
701 | ||
702 | default: /* all other status values should be checked */ | |
703 | break; | |
704 | } | |
705 | ||
706 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET; | |
707 | return true; | |
708 | } | |
709 | ||
710 | static bool is_rename_source( | |
3ff1d123 | 711 | git_diff *diff, |
a21cbb12 RB |
712 | const git_diff_find_options *opts, |
713 | size_t delta_idx, | |
714 | void **cache) | |
715 | { | |
716 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
717 | ||
718 | /* skip things that aren't blobs */ | |
719 | if (!GIT_MODE_ISBLOB(delta->old_file.mode)) | |
720 | return false; | |
721 | ||
722 | switch (delta->status) { | |
723 | case GIT_DELTA_ADDED: | |
724 | case GIT_DELTA_UNTRACKED: | |
61bef72d | 725 | case GIT_DELTA_UNREADABLE: |
a21cbb12 | 726 | case GIT_DELTA_IGNORED: |
50456801 | 727 | case GIT_DELTA_CONFLICTED: |
a21cbb12 RB |
728 | return false; |
729 | ||
730 | case GIT_DELTA_DELETED: | |
731 | case GIT_DELTA_TYPECHANGE: | |
732 | break; | |
733 | ||
734 | case GIT_DELTA_UNMODIFIED: | |
735 | if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) | |
736 | return false; | |
f62c174d | 737 | if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED)) |
97ad85b8 | 738 | delta->flags |= GIT_DIFF_FLAG__TO_DELETE; |
a21cbb12 RB |
739 | break; |
740 | ||
741 | default: /* MODIFIED, RENAMED, COPIED */ | |
742 | /* if we're finding copies, this could be a source */ | |
743 | if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) | |
744 | break; | |
745 | ||
746 | /* otherwise, this is only a source if we can split it */ | |
747 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
748 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
749 | return false; | |
750 | ||
751 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
752 | return false; | |
753 | ||
754 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
755 | delta->similarity < opts->break_rewrite_threshold) { | |
756 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
757 | break; | |
758 | } | |
759 | ||
760 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
761 | delta->similarity < opts->rename_from_rewrite_threshold) | |
762 | break; | |
763 | ||
764 | return false; | |
765 | } | |
766 | ||
767 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE; | |
768 | return true; | |
769 | } | |
770 | ||
771 | GIT_INLINE(bool) delta_is_split(git_diff_delta *delta) | |
772 | { | |
773 | return (delta->status == GIT_DELTA_TYPECHANGE || | |
774 | (delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0); | |
775 | } | |
776 | ||
777 | GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) | |
778 | { | |
779 | return (delta->status == GIT_DELTA_ADDED || | |
780 | delta->status == GIT_DELTA_UNTRACKED || | |
61bef72d | 781 | delta->status == GIT_DELTA_UNREADABLE || |
a21cbb12 | 782 | delta->status == GIT_DELTA_IGNORED); |
9be5be47 | 783 | } |
db106d01 | 784 | |
e4acc3ba | 785 | GIT_INLINE(void) delta_make_rename( |
74a627f0 | 786 | git_diff_delta *to, const git_diff_delta *from, uint16_t similarity) |
e4acc3ba RB |
787 | { |
788 | to->status = GIT_DELTA_RENAMED; | |
789 | to->similarity = similarity; | |
74a627f0 | 790 | to->nfiles = 2; |
e4acc3ba RB |
791 | memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); |
792 | to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
793 | } | |
794 | ||
d958e37a | 795 | typedef struct { |
74a627f0 RB |
796 | size_t idx; |
797 | uint16_t similarity; | |
d958e37a RB |
798 | } diff_find_match; |
799 | ||
db106d01 | 800 | int git_diff_find_similar( |
3ff1d123 | 801 | git_diff *diff, |
10672e3e | 802 | const git_diff_find_options *given_opts) |
db106d01 | 803 | { |
d730d3f4 | 804 | size_t s, t; |
74a627f0 RB |
805 | int error = 0, result; |
806 | uint16_t similarity; | |
d730d3f4 | 807 | git_diff_delta *src, *tgt; |
7e3ed419 | 808 | git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; |
d730d3f4 RB |
809 | size_t num_deltas, num_srcs = 0, num_tgts = 0; |
810 | size_t tried_srcs = 0, tried_tgts = 0; | |
e4acc3ba | 811 | size_t num_rewrites = 0, num_updates = 0, num_bumped = 0; |
f1453c59 | 812 | size_t sigcache_size; |
7e3ed419 | 813 | void **sigcache = NULL; /* cache of similarity metric file signatures */ |
d730d3f4 RB |
814 | diff_find_match *tgt2src = NULL; |
815 | diff_find_match *src2tgt = NULL; | |
816 | diff_find_match *tgt2src_copy = NULL; | |
817 | diff_find_match *best_match; | |
e4acc3ba | 818 | git_diff_file swap; |
db106d01 | 819 | |
c25aa7cd | 820 | GIT_ASSERT_ARG(diff); |
ac3d33df | 821 | |
960a04dd | 822 | if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) |
628e92cd BS |
823 | return error; |
824 | ||
d730d3f4 RB |
825 | num_deltas = diff->deltas.length; |
826 | ||
a21cbb12 | 827 | /* TODO: maybe abort if deltas.length > rename_limit ??? */ |
6c7cee42 | 828 | if (!num_deltas || !git__is_uint32(num_deltas)) |
7e3ed419 RB |
829 | goto cleanup; |
830 | ||
831 | /* No flags set; nothing to do */ | |
832 | if ((opts.flags & GIT_DIFF_FIND_ALL) == 0) | |
833 | goto cleanup; | |
960a04dd | 834 | |
ac3d33df | 835 | GIT_ERROR_CHECK_ALLOC_MULTIPLY(&sigcache_size, num_deltas, 2); |
f1453c59 | 836 | sigcache = git__calloc(sigcache_size, sizeof(void *)); |
ac3d33df | 837 | GIT_ERROR_CHECK_ALLOC(sigcache); |
e4acc3ba RB |
838 | |
839 | /* Label rename sources and targets | |
840 | * | |
841 | * This will also set self-similarity scores for MODIFIED files and | |
842 | * mark them for splitting if break-rewrites is enabled | |
843 | */ | |
d730d3f4 RB |
844 | git_vector_foreach(&diff->deltas, t, tgt) { |
845 | if (is_rename_source(diff, &opts, t, sigcache)) | |
e4acc3ba RB |
846 | ++num_srcs; |
847 | ||
d730d3f4 | 848 | if (is_rename_target(diff, &opts, t, sigcache)) |
e4acc3ba | 849 | ++num_tgts; |
17c7fbf6 ET |
850 | |
851 | if ((tgt->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) | |
852 | num_rewrites++; | |
e4acc3ba | 853 | } |
960a04dd | 854 | |
e4acc3ba RB |
855 | /* if there are no candidate srcs or tgts, we're done */ |
856 | if (!num_srcs || !num_tgts) | |
857 | goto cleanup; | |
960a04dd | 858 | |
d730d3f4 | 859 | src2tgt = git__calloc(num_deltas, sizeof(diff_find_match)); |
ac3d33df | 860 | GIT_ERROR_CHECK_ALLOC(src2tgt); |
d730d3f4 | 861 | tgt2src = git__calloc(num_deltas, sizeof(diff_find_match)); |
ac3d33df | 862 | GIT_ERROR_CHECK_ALLOC(tgt2src); |
d730d3f4 RB |
863 | |
864 | if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { | |
865 | tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match)); | |
ac3d33df | 866 | GIT_ERROR_CHECK_ALLOC(tgt2src_copy); |
d730d3f4 | 867 | } |
db106d01 | 868 | |
e4acc3ba RB |
869 | /* |
870 | * Find best-fit matches for rename / copy candidates | |
871 | */ | |
d958e37a | 872 | |
e4acc3ba RB |
873 | find_best_matches: |
874 | tried_tgts = num_bumped = 0; | |
db106d01 | 875 | |
d730d3f4 | 876 | git_vector_foreach(&diff->deltas, t, tgt) { |
a21cbb12 | 877 | /* skip things that are not rename targets */ |
d730d3f4 | 878 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
d958e37a RB |
879 | continue; |
880 | ||
e4acc3ba | 881 | tried_srcs = 0; |
db106d01 | 882 | |
d730d3f4 | 883 | git_vector_foreach(&diff->deltas, s, src) { |
a21cbb12 | 884 | /* skip things that are not rename sources */ |
d730d3f4 | 885 | if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0) |
960a04dd RB |
886 | continue; |
887 | ||
d958e37a | 888 | /* calculate similarity for this pair and find best match */ |
d730d3f4 | 889 | if (s == t) |
74a627f0 | 890 | result = -1; /* don't measure self-similarity here */ |
e4acc3ba | 891 | else if ((error = similarity_measure( |
74a627f0 | 892 | &result, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0) |
960a04dd | 893 | goto cleanup; |
a21cbb12 | 894 | |
74a627f0 | 895 | if (result < 0) |
d730d3f4 | 896 | continue; |
74a627f0 | 897 | similarity = (uint16_t)result; |
d730d3f4 RB |
898 | |
899 | /* is this a better rename? */ | |
74a627f0 RB |
900 | if (tgt2src[t].similarity < similarity && |
901 | src2tgt[s].similarity < similarity) | |
e4acc3ba | 902 | { |
d730d3f4 RB |
903 | /* eject old mapping */ |
904 | if (src2tgt[s].similarity > 0) { | |
905 | tgt2src[src2tgt[s].idx].similarity = 0; | |
906 | num_bumped++; | |
907 | } | |
908 | if (tgt2src[t].similarity > 0) { | |
909 | src2tgt[tgt2src[t].idx].similarity = 0; | |
910 | num_bumped++; | |
e4acc3ba RB |
911 | } |
912 | ||
d730d3f4 | 913 | /* write new mapping */ |
74a627f0 RB |
914 | tgt2src[t].idx = s; |
915 | tgt2src[t].similarity = similarity; | |
916 | src2tgt[s].idx = t; | |
917 | src2tgt[s].similarity = similarity; | |
d730d3f4 | 918 | } |
a21cbb12 | 919 | |
d730d3f4 RB |
920 | /* keep best absolute match for copies */ |
921 | if (tgt2src_copy != NULL && | |
74a627f0 | 922 | tgt2src_copy[t].similarity < similarity) |
d730d3f4 | 923 | { |
74a627f0 RB |
924 | tgt2src_copy[t].idx = s; |
925 | tgt2src_copy[t].similarity = similarity; | |
db106d01 | 926 | } |
e4acc3ba RB |
927 | |
928 | if (++tried_srcs >= num_srcs) | |
929 | break; | |
930 | ||
d730d3f4 | 931 | /* cap on maximum targets we'll examine (per "tgt" file) */ |
e4acc3ba RB |
932 | if (tried_srcs > opts.rename_limit) |
933 | break; | |
db106d01 | 934 | } |
e4acc3ba RB |
935 | |
936 | if (++tried_tgts >= num_tgts) | |
937 | break; | |
db106d01 RB |
938 | } |
939 | ||
e4acc3ba RB |
940 | if (num_bumped > 0) /* try again if we bumped some items */ |
941 | goto find_best_matches; | |
942 | ||
943 | /* | |
944 | * Rewrite the diffs with renames / copies | |
945 | */ | |
946 | ||
d730d3f4 | 947 | git_vector_foreach(&diff->deltas, t, tgt) { |
e4acc3ba | 948 | /* skip things that are not rename targets */ |
d730d3f4 | 949 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
a21cbb12 | 950 | continue; |
690bf41c | 951 | |
e4acc3ba | 952 | /* check if this delta was the target of a similarity */ |
d730d3f4 RB |
953 | if (tgt2src[t].similarity) |
954 | best_match = &tgt2src[t]; | |
955 | else if (tgt2src_copy && tgt2src_copy[t].similarity) | |
956 | best_match = &tgt2src_copy[t]; | |
957 | else | |
e4acc3ba | 958 | continue; |
d958e37a | 959 | |
d730d3f4 RB |
960 | s = best_match->idx; |
961 | src = GIT_VECTOR_GET(&diff->deltas, s); | |
d958e37a | 962 | |
a21cbb12 RB |
963 | /* possible scenarios: |
964 | * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME | |
965 | * 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE | |
966 | * 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME | |
967 | * 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT | |
968 | * 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY | |
db106d01 RB |
969 | */ |
970 | ||
d730d3f4 | 971 | if (src->status == GIT_DELTA_DELETED) { |
db106d01 | 972 | |
d730d3f4 | 973 | if (delta_is_new_only(tgt)) { |
db106d01 | 974 | |
e4acc3ba | 975 | if (best_match->similarity < opts.rename_threshold) |
a21cbb12 | 976 | continue; |
960a04dd | 977 | |
d730d3f4 | 978 | delta_make_rename(tgt, src, best_match->similarity); |
d958e37a | 979 | |
d730d3f4 | 980 | src->flags |= GIT_DIFF_FLAG__TO_DELETE; |
a21cbb12 RB |
981 | num_rewrites++; |
982 | } else { | |
c25aa7cd | 983 | GIT_ASSERT(delta_is_split(tgt)); |
960a04dd | 984 | |
e4acc3ba | 985 | if (best_match->similarity < opts.rename_from_rewrite_threshold) |
a21cbb12 | 986 | continue; |
db106d01 | 987 | |
d730d3f4 | 988 | memcpy(&swap, &tgt->old_file, sizeof(swap)); |
db106d01 | 989 | |
d730d3f4 | 990 | delta_make_rename(tgt, src, best_match->similarity); |
e4acc3ba RB |
991 | num_rewrites--; |
992 | ||
c25aa7cd | 993 | GIT_ASSERT(src->status == GIT_DELTA_DELETED); |
d730d3f4 RB |
994 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
995 | memset(&src->new_file, 0, sizeof(src->new_file)); | |
996 | src->new_file.path = src->old_file.path; | |
9950bb4e | 997 | src->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
db106d01 | 998 | |
d958e37a | 999 | num_updates++; |
7edb74d3 RB |
1000 | |
1001 | if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { | |
1002 | /* what used to be at src t is now at src s */ | |
74a627f0 | 1003 | tgt2src[src2tgt[t].idx].idx = s; |
7edb74d3 | 1004 | } |
db106d01 RB |
1005 | } |
1006 | } | |
1007 | ||
d730d3f4 | 1008 | else if (delta_is_split(src)) { |
a21cbb12 | 1009 | |
d730d3f4 | 1010 | if (delta_is_new_only(tgt)) { |
db106d01 | 1011 | |
e4acc3ba | 1012 | if (best_match->similarity < opts.rename_threshold) |
a21cbb12 | 1013 | continue; |
d958e37a | 1014 | |
d730d3f4 | 1015 | delta_make_rename(tgt, src, best_match->similarity); |
a21cbb12 | 1016 | |
22a2d3d5 | 1017 | src->status = (diff->new_src == GIT_ITERATOR_WORKDIR) ? |
a21cbb12 | 1018 | GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; |
74a627f0 | 1019 | src->nfiles = 1; |
d730d3f4 RB |
1020 | memset(&src->old_file, 0, sizeof(src->old_file)); |
1021 | src->old_file.path = src->new_file.path; | |
9950bb4e | 1022 | src->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; |
e4acc3ba | 1023 | |
d730d3f4 | 1024 | src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
e4acc3ba | 1025 | num_rewrites--; |
a21cbb12 RB |
1026 | |
1027 | num_updates++; | |
1028 | } else { | |
c25aa7cd | 1029 | GIT_ASSERT(delta_is_split(src)); |
a21cbb12 | 1030 | |
e4acc3ba | 1031 | if (best_match->similarity < opts.rename_from_rewrite_threshold) |
a21cbb12 RB |
1032 | continue; |
1033 | ||
d730d3f4 | 1034 | memcpy(&swap, &tgt->old_file, sizeof(swap)); |
a21cbb12 | 1035 | |
d730d3f4 | 1036 | delta_make_rename(tgt, src, best_match->similarity); |
e4acc3ba RB |
1037 | num_rewrites--; |
1038 | num_updates++; | |
a21cbb12 | 1039 | |
d730d3f4 | 1040 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
a21cbb12 | 1041 | |
e4acc3ba | 1042 | /* if we've just swapped the new element into the correct |
c25aa7cd | 1043 | * place, clear the SPLIT and RENAME_TARGET flags |
67db583d | 1044 | */ |
d730d3f4 RB |
1045 | if (tgt2src[s].idx == t && |
1046 | tgt2src[s].similarity > | |
67db583d | 1047 | opts.rename_from_rewrite_threshold) { |
d730d3f4 RB |
1048 | src->status = GIT_DELTA_RENAMED; |
1049 | src->similarity = tgt2src[s].similarity; | |
1050 | tgt2src[s].similarity = 0; | |
c25aa7cd | 1051 | src->flags &= ~(GIT_DIFF_FLAG__TO_SPLIT | GIT_DIFF_FLAG__IS_RENAME_TARGET); |
67db583d RB |
1052 | num_rewrites--; |
1053 | } | |
e4acc3ba | 1054 | /* otherwise, if we just overwrote a source, update mapping */ |
7edb74d3 | 1055 | else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
d730d3f4 | 1056 | /* what used to be at src t is now at src s */ |
74a627f0 | 1057 | tgt2src[src2tgt[t].idx].idx = s; |
e4acc3ba | 1058 | } |
67db583d | 1059 | |
a21cbb12 RB |
1060 | num_updates++; |
1061 | } | |
1062 | } | |
1063 | ||
2123a17f | 1064 | else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
d730d3f4 | 1065 | if (tgt2src_copy[t].similarity < opts.copy_threshold) |
a21cbb12 RB |
1066 | continue; |
1067 | ||
d730d3f4 RB |
1068 | /* always use best possible source for copy */ |
1069 | best_match = &tgt2src_copy[t]; | |
1070 | src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); | |
1071 | ||
2123a17f RB |
1072 | if (delta_is_split(tgt)) { |
1073 | error = insert_delete_side_of_split(diff, &diff->deltas, tgt); | |
1074 | if (error < 0) | |
1075 | goto cleanup; | |
1076 | num_rewrites--; | |
1077 | } | |
1078 | ||
1079 | if (!delta_is_split(tgt) && !delta_is_new_only(tgt)) | |
1080 | continue; | |
1081 | ||
d730d3f4 RB |
1082 | tgt->status = GIT_DELTA_COPIED; |
1083 | tgt->similarity = best_match->similarity; | |
74a627f0 | 1084 | tgt->nfiles = 2; |
d730d3f4 | 1085 | memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); |
2123a17f | 1086 | tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
a21cbb12 RB |
1087 | |
1088 | num_updates++; | |
1089 | } | |
db106d01 RB |
1090 | } |
1091 | ||
e4acc3ba RB |
1092 | /* |
1093 | * Actually split and delete entries as needed | |
1094 | */ | |
1095 | ||
a21cbb12 | 1096 | if (num_rewrites > 0 || num_updates > 0) |
960a04dd | 1097 | error = apply_splits_and_deletes( |
d958e37a | 1098 | diff, diff->deltas.length - num_rewrites, |
17c7fbf6 ET |
1099 | FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES) && |
1100 | !FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY)); | |
d958e37a | 1101 | |
960a04dd | 1102 | cleanup: |
d730d3f4 RB |
1103 | git__free(tgt2src); |
1104 | git__free(src2tgt); | |
1105 | git__free(tgt2src_copy); | |
db106d01 | 1106 | |
7e3ed419 RB |
1107 | if (sigcache) { |
1108 | for (t = 0; t < num_deltas * 2; ++t) { | |
1109 | if (sigcache[t] != NULL) | |
1110 | opts.metric->free_signature(sigcache[t], opts.metric->payload); | |
1111 | } | |
1112 | git__free(sigcache); | |
db106d01 RB |
1113 | } |
1114 | ||
f8275890 RB |
1115 | if (!given_opts || !given_opts->metric) |
1116 | git__free(opts.metric); | |
1117 | ||
960a04dd | 1118 | return error; |
db106d01 RB |
1119 | } |
1120 | ||
1121 | #undef FLAG_SET |