]>
Commit | Line | Data |
---|---|---|
db106d01 | 1 | /* |
359fc2d2 | 2 | * Copyright (C) the libgit2 contributors. All rights reserved. |
db106d01 RB |
3 | * |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
7 | #include "common.h" | |
114f5a6c | 8 | |
db106d01 | 9 | #include "git2/config.h" |
960a04dd | 10 | #include "git2/blob.h" |
114f5a6c RB |
11 | |
12 | #include "diff.h" | |
5e5848eb | 13 | #include "hashsig.h" |
114f5a6c RB |
14 | #include "path.h" |
15 | #include "fileops.h" | |
9f77b3f6 | 16 | #include "config.h" |
db106d01 RB |
17 | |
18 | static git_diff_delta *diff_delta__dup( | |
19 | const git_diff_delta *d, git_pool *pool) | |
20 | { | |
21 | git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); | |
22 | if (!delta) | |
23 | return NULL; | |
24 | ||
25 | memcpy(delta, d, sizeof(git_diff_delta)); | |
c68b09dc | 26 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); |
db106d01 | 27 | |
d958e37a RB |
28 | if (d->old_file.path != NULL) { |
29 | delta->old_file.path = git_pool_strdup(pool, d->old_file.path); | |
30 | if (delta->old_file.path == NULL) | |
31 | goto fail; | |
32 | } | |
db106d01 | 33 | |
d958e37a | 34 | if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) { |
db106d01 RB |
35 | delta->new_file.path = git_pool_strdup(pool, d->new_file.path); |
36 | if (delta->new_file.path == NULL) | |
37 | goto fail; | |
38 | } else { | |
39 | delta->new_file.path = delta->old_file.path; | |
40 | } | |
41 | ||
42 | return delta; | |
43 | ||
44 | fail: | |
45 | git__free(delta); | |
46 | return NULL; | |
47 | } | |
48 | ||
49 | static git_diff_delta *diff_delta__merge_like_cgit( | |
3940310e RB |
50 | const git_diff_delta *a, |
51 | const git_diff_delta *b, | |
52 | git_pool *pool) | |
db106d01 RB |
53 | { |
54 | git_diff_delta *dup; | |
55 | ||
56 | /* Emulate C git for merging two diffs (a la 'git diff <sha>'). | |
57 | * | |
58 | * When C git does a diff between the work dir and a tree, it actually | |
59 | * diffs with the index but uses the workdir contents. This emulates | |
60 | * those choices so we can emulate the type of diff. | |
61 | * | |
62 | * We have three file descriptions here, let's call them: | |
63 | * f1 = a->old_file | |
64 | * f2 = a->new_file AND b->old_file | |
65 | * f3 = b->new_file | |
66 | */ | |
67 | ||
68 | /* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ | |
69 | if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) | |
70 | return diff_delta__dup(a, pool); | |
71 | ||
72 | /* otherwise, base this diff on the 'b' diff */ | |
73 | if ((dup = diff_delta__dup(b, pool)) == NULL) | |
74 | return NULL; | |
75 | ||
76 | /* If 'a' status is uninteresting, then we're done */ | |
77 | if (a->status == GIT_DELTA_UNMODIFIED) | |
78 | return dup; | |
79 | ||
80 | assert(a->status != GIT_DELTA_UNMODIFIED); | |
81 | assert(b->status != GIT_DELTA_UNMODIFIED); | |
82 | ||
83 | /* A cgit exception is that the diff of a file that is only in the | |
84 | * index (i.e. not in HEAD nor workdir) is given as empty. | |
85 | */ | |
86 | if (dup->status == GIT_DELTA_DELETED) { | |
87 | if (a->status == GIT_DELTA_ADDED) | |
88 | dup->status = GIT_DELTA_UNMODIFIED; | |
89 | /* else don't overwrite DELETE status */ | |
90 | } else { | |
91 | dup->status = a->status; | |
92 | } | |
93 | ||
94 | git_oid_cpy(&dup->old_file.oid, &a->old_file.oid); | |
95 | dup->old_file.mode = a->old_file.mode; | |
96 | dup->old_file.size = a->old_file.size; | |
97 | dup->old_file.flags = a->old_file.flags; | |
98 | ||
99 | return dup; | |
100 | } | |
101 | ||
e7c85120 RB |
102 | static git_diff_delta *diff_delta__merge_like_cgit_reversed( |
103 | const git_diff_delta *a, | |
104 | const git_diff_delta *b, | |
105 | git_pool *pool) | |
106 | { | |
107 | git_diff_delta *dup; | |
108 | ||
109 | /* reversed version of above logic */ | |
110 | ||
111 | if (a->status == GIT_DELTA_UNMODIFIED) | |
112 | return diff_delta__dup(b, pool); | |
113 | ||
114 | if ((dup = diff_delta__dup(a, pool)) == NULL) | |
115 | return NULL; | |
116 | ||
117 | if (b->status == GIT_DELTA_UNMODIFIED || b->status == GIT_DELTA_UNTRACKED) | |
118 | return dup; | |
119 | ||
120 | if (dup->status == GIT_DELTA_DELETED) { | |
121 | if (b->status == GIT_DELTA_ADDED) | |
122 | dup->status = GIT_DELTA_UNMODIFIED; | |
123 | } else { | |
124 | dup->status = b->status; | |
125 | } | |
126 | ||
127 | git_oid_cpy(&dup->old_file.oid, &b->old_file.oid); | |
128 | dup->old_file.mode = b->old_file.mode; | |
129 | dup->old_file.size = b->old_file.size; | |
130 | dup->old_file.flags = b->old_file.flags; | |
131 | ||
132 | return dup; | |
133 | } | |
134 | ||
135 | int git_diff_merge(git_diff *onto, const git_diff *from) | |
db106d01 RB |
136 | { |
137 | int error = 0; | |
138 | git_pool onto_pool; | |
139 | git_vector onto_new; | |
140 | git_diff_delta *delta; | |
e7c85120 | 141 | bool ignore_case, reversed; |
db106d01 RB |
142 | unsigned int i, j; |
143 | ||
144 | assert(onto && from); | |
145 | ||
146 | if (!from->deltas.length) | |
147 | return 0; | |
148 | ||
e7c85120 RB |
149 | ignore_case = ((onto->opts.flags & GIT_DIFF_IGNORE_CASE) != 0); |
150 | reversed = ((onto->opts.flags & GIT_DIFF_REVERSE) != 0); | |
151 | ||
152 | if (ignore_case != ((from->opts.flags & GIT_DIFF_IGNORE_CASE) != 0) || | |
153 | reversed != ((from->opts.flags & GIT_DIFF_REVERSE) != 0)) { | |
3940310e RB |
154 | giterr_set(GITERR_INVALID, |
155 | "Attempt to merge diffs created with conflicting options"); | |
156 | return -1; | |
157 | } | |
158 | ||
db106d01 RB |
159 | if (git_vector_init( |
160 | &onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 || | |
161 | git_pool_init(&onto_pool, 1, 0) < 0) | |
162 | return -1; | |
163 | ||
db106d01 RB |
164 | for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { |
165 | git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); | |
166 | const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); | |
3940310e RB |
167 | int cmp = !f ? -1 : !o ? 1 : |
168 | STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); | |
db106d01 RB |
169 | |
170 | if (cmp < 0) { | |
171 | delta = diff_delta__dup(o, &onto_pool); | |
172 | i++; | |
173 | } else if (cmp > 0) { | |
174 | delta = diff_delta__dup(f, &onto_pool); | |
175 | j++; | |
176 | } else { | |
e7c85120 RB |
177 | delta = reversed ? |
178 | diff_delta__merge_like_cgit_reversed(o, f, &onto_pool) : | |
179 | diff_delta__merge_like_cgit(o, f, &onto_pool); | |
db106d01 RB |
180 | i++; |
181 | j++; | |
182 | } | |
183 | ||
184 | /* the ignore rules for the target may not match the source | |
185 | * or the result of a merged delta could be skippable... | |
186 | */ | |
187 | if (git_diff_delta__should_skip(&onto->opts, delta)) { | |
188 | git__free(delta); | |
189 | continue; | |
190 | } | |
191 | ||
192 | if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) | |
193 | break; | |
194 | } | |
195 | ||
196 | if (!error) { | |
197 | git_vector_swap(&onto->deltas, &onto_new); | |
198 | git_pool_swap(&onto->pool, &onto_pool); | |
3940310e RB |
199 | |
200 | if ((onto->opts.flags & GIT_DIFF_REVERSE) != 0) | |
201 | onto->old_src = from->old_src; | |
202 | else | |
203 | onto->new_src = from->new_src; | |
db106d01 RB |
204 | |
205 | /* prefix strings also come from old pool, so recreate those.*/ | |
206 | onto->opts.old_prefix = | |
207 | git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); | |
208 | onto->opts.new_prefix = | |
209 | git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); | |
210 | } | |
211 | ||
9cfce273 | 212 | git_vector_free_deep(&onto_new); |
db106d01 RB |
213 | git_pool_clear(&onto_pool); |
214 | ||
215 | return error; | |
216 | } | |
217 | ||
0462fba5 | 218 | int git_diff_find_similar__hashsig_for_file( |
f8275890 RB |
219 | void **out, const git_diff_file *f, const char *path, void *p) |
220 | { | |
221 | git_hashsig_option_t opt = (git_hashsig_option_t)p; | |
aa408cbf ET |
222 | int error = 0; |
223 | ||
f8275890 | 224 | GIT_UNUSED(f); |
aa408cbf | 225 | error = git_hashsig_create_fromfile((git_hashsig **)out, path, opt); |
1fed6b07 | 226 | |
aa408cbf ET |
227 | if (error == GIT_EBUFS) { |
228 | error = 0; | |
229 | giterr_clear(); | |
230 | } | |
231 | ||
232 | return error; | |
f8275890 | 233 | } |
9bc8be3d | 234 | |
0462fba5 | 235 | int git_diff_find_similar__hashsig_for_buf( |
f8275890 RB |
236 | void **out, const git_diff_file *f, const char *buf, size_t len, void *p) |
237 | { | |
238 | git_hashsig_option_t opt = (git_hashsig_option_t)p; | |
aa408cbf | 239 | int error = 0; |
0462fba5 | 240 | |
f8275890 | 241 | GIT_UNUSED(f); |
aa408cbf | 242 | error = git_hashsig_create((git_hashsig **)out, buf, len, opt); |
1fed6b07 | 243 | |
aa408cbf ET |
244 | if (error == GIT_EBUFS) { |
245 | error = 0; | |
246 | giterr_clear(); | |
247 | } | |
248 | ||
249 | return error; | |
f8275890 | 250 | } |
9bc8be3d | 251 | |
0462fba5 | 252 | void git_diff_find_similar__hashsig_free(void *sig, void *payload) |
9bc8be3d RB |
253 | { |
254 | GIT_UNUSED(payload); | |
255 | git_hashsig_free(sig); | |
256 | } | |
257 | ||
0462fba5 | 258 | int git_diff_find_similar__calc_similarity( |
9bc8be3d RB |
259 | int *score, void *siga, void *sigb, void *payload) |
260 | { | |
261 | GIT_UNUSED(payload); | |
262 | *score = git_hashsig_compare(siga, sigb); | |
263 | return 0; | |
264 | } | |
265 | ||
db106d01 RB |
266 | #define DEFAULT_THRESHOLD 50 |
267 | #define DEFAULT_BREAK_REWRITE_THRESHOLD 60 | |
a21cbb12 | 268 | #define DEFAULT_RENAME_LIMIT 200 |
db106d01 RB |
269 | |
270 | static int normalize_find_opts( | |
3ff1d123 | 271 | git_diff *diff, |
db106d01 | 272 | git_diff_find_options *opts, |
10672e3e | 273 | const git_diff_find_options *given) |
db106d01 RB |
274 | { |
275 | git_config *cfg = NULL; | |
db106d01 | 276 | |
5a52d6be BS |
277 | GITERR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); |
278 | ||
db106d01 RB |
279 | if (diff->repo != NULL && |
280 | git_repository_config__weakptr(&cfg, diff->repo) < 0) | |
281 | return -1; | |
282 | ||
7e3ed419 | 283 | if (given) |
db106d01 | 284 | memcpy(opts, given, sizeof(*opts)); |
db106d01 | 285 | |
c56c6d69 BS |
286 | if (!given || |
287 | (given->flags & GIT_DIFF_FIND_ALL) == GIT_DIFF_FIND_BY_CONFIG) | |
288 | { | |
9f77b3f6 RB |
289 | const char *rule = |
290 | git_config__get_string_force(cfg, "diff.renames", "true"); | |
291 | int boolval; | |
292 | ||
293 | if (!git__parse_bool(&boolval, rule) && !boolval) | |
294 | /* don't set FIND_RENAMES if bool value is false */; | |
295 | else if (!strcasecmp(rule, "copies") || !strcasecmp(rule, "copy")) | |
296 | opts->flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; | |
297 | else | |
298 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
db106d01 | 299 | } |
ca901e7b | 300 | |
db106d01 RB |
301 | /* some flags imply others */ |
302 | ||
9be5be47 RB |
303 | if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) { |
304 | /* if we are only looking for exact matches, then don't turn | |
305 | * MODIFIED items into ADD/DELETE pairs because it's too picky | |
306 | */ | |
307 | opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES); | |
308 | ||
309 | /* similarly, don't look for self-rewrites to split */ | |
310 | opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES; | |
311 | } | |
312 | ||
db106d01 RB |
313 | if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) |
314 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
315 | ||
316 | if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) | |
317 | opts->flags |= GIT_DIFF_FIND_COPIES; | |
318 | ||
d958e37a RB |
319 | if (opts->flags & GIT_DIFF_BREAK_REWRITES) |
320 | opts->flags |= GIT_DIFF_FIND_REWRITES; | |
321 | ||
db106d01 RB |
322 | #define USE_DEFAULT(X) ((X) == 0 || (X) > 100) |
323 | ||
324 | if (USE_DEFAULT(opts->rename_threshold)) | |
325 | opts->rename_threshold = DEFAULT_THRESHOLD; | |
326 | ||
327 | if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) | |
328 | opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; | |
329 | ||
330 | if (USE_DEFAULT(opts->copy_threshold)) | |
331 | opts->copy_threshold = DEFAULT_THRESHOLD; | |
332 | ||
333 | if (USE_DEFAULT(opts->break_rewrite_threshold)) | |
334 | opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; | |
335 | ||
336 | #undef USE_DEFAULT | |
337 | ||
a21cbb12 | 338 | if (!opts->rename_limit) { |
9f77b3f6 RB |
339 | opts->rename_limit = git_config__get_int_force( |
340 | cfg, "diff.renamelimit", DEFAULT_RENAME_LIMIT); | |
db106d01 | 341 | |
9f77b3f6 RB |
342 | if (opts->rename_limit <= 0) |
343 | opts->rename_limit = DEFAULT_RENAME_LIMIT; | |
db106d01 RB |
344 | } |
345 | ||
f8275890 | 346 | /* assign the internal metric with whitespace flag as payload */ |
9bc8be3d | 347 | if (!opts->metric) { |
f8275890 RB |
348 | opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); |
349 | GITERR_CHECK_ALLOC(opts->metric); | |
350 | ||
0462fba5 ET |
351 | opts->metric->file_signature = git_diff_find_similar__hashsig_for_file; |
352 | opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; | |
353 | opts->metric->free_signature = git_diff_find_similar__hashsig_free; | |
354 | opts->metric->similarity = git_diff_find_similar__calc_similarity; | |
f8275890 | 355 | |
9bc8be3d | 356 | if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) |
f8275890 | 357 | opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE; |
9bc8be3d | 358 | else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) |
f8275890 | 359 | opts->metric->payload = (void *)GIT_HASHSIG_NORMAL; |
9bc8be3d | 360 | else |
f8275890 | 361 | opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE; |
9bc8be3d RB |
362 | } |
363 | ||
db106d01 RB |
364 | return 0; |
365 | } | |
366 | ||
2123a17f RB |
367 | static int insert_delete_side_of_split( |
368 | git_diff *diff, git_vector *onto, const git_diff_delta *delta) | |
369 | { | |
370 | /* make new record for DELETED side of split */ | |
371 | git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool); | |
372 | GITERR_CHECK_ALLOC(deleted); | |
373 | ||
374 | deleted->status = GIT_DELTA_DELETED; | |
375 | deleted->nfiles = 1; | |
376 | memset(&deleted->new_file, 0, sizeof(deleted->new_file)); | |
377 | deleted->new_file.path = deleted->old_file.path; | |
378 | deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID; | |
379 | ||
380 | return git_vector_insert(onto, deleted); | |
381 | } | |
382 | ||
d958e37a | 383 | static int apply_splits_and_deletes( |
3ff1d123 | 384 | git_diff *diff, size_t expected_size, bool actually_split) |
db106d01 RB |
385 | { |
386 | git_vector onto = GIT_VECTOR_INIT; | |
387 | size_t i; | |
2123a17f | 388 | git_diff_delta *delta; |
db106d01 RB |
389 | |
390 | if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) | |
391 | return -1; | |
392 | ||
393 | /* build new delta list without TO_DELETE and splitting TO_SPLIT */ | |
394 | git_vector_foreach(&diff->deltas, i, delta) { | |
71a3d27e | 395 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
db106d01 | 396 | continue; |
db106d01 | 397 | |
a21cbb12 | 398 | if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { |
d958e37a RB |
399 | delta->similarity = 0; |
400 | ||
2123a17f | 401 | if (insert_delete_side_of_split(diff, &onto, delta) < 0) |
11d9f6b3 | 402 | goto on_error; |
db106d01 | 403 | |
9be5be47 RB |
404 | if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) |
405 | delta->status = GIT_DELTA_UNTRACKED; | |
406 | else | |
407 | delta->status = GIT_DELTA_ADDED; | |
74a627f0 | 408 | delta->nfiles = 1; |
db106d01 RB |
409 | memset(&delta->old_file, 0, sizeof(delta->old_file)); |
410 | delta->old_file.path = delta->new_file.path; | |
71a3d27e | 411 | delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; |
db106d01 RB |
412 | } |
413 | ||
c68b09dc RB |
414 | /* clean up delta before inserting into new list */ |
415 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); | |
416 | ||
417 | if (delta->status != GIT_DELTA_COPIED && | |
418 | delta->status != GIT_DELTA_RENAMED && | |
419 | (delta->status != GIT_DELTA_MODIFIED || actually_split)) | |
420 | delta->similarity = 0; | |
421 | ||
422 | /* insert into new list */ | |
11d9f6b3 PK |
423 | if (git_vector_insert(&onto, delta) < 0) |
424 | goto on_error; | |
db106d01 RB |
425 | } |
426 | ||
11d9f6b3 | 427 | /* cannot return an error past this point */ |
c68b09dc RB |
428 | |
429 | /* free deltas from old list that didn't make it to the new one */ | |
a21cbb12 | 430 | git_vector_foreach(&diff->deltas, i, delta) { |
71a3d27e | 431 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) |
11d9f6b3 | 432 | git__free(delta); |
a21cbb12 RB |
433 | } |
434 | ||
db106d01 | 435 | /* swap new delta list into place */ |
db106d01 RB |
436 | git_vector_swap(&diff->deltas, &onto); |
437 | git_vector_free(&onto); | |
a21cbb12 | 438 | git_vector_sort(&diff->deltas); |
db106d01 RB |
439 | |
440 | return 0; | |
11d9f6b3 PK |
441 | |
442 | on_error: | |
9cfce273 | 443 | git_vector_free_deep(&onto); |
11d9f6b3 PK |
444 | |
445 | return -1; | |
db106d01 RB |
446 | } |
447 | ||
3ff1d123 | 448 | GIT_INLINE(git_diff_file *) similarity_get_file(git_diff *diff, size_t idx) |
960a04dd RB |
449 | { |
450 | git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); | |
451 | return (idx & 1) ? &delta->new_file : &delta->old_file; | |
452 | } | |
99ba8f23 | 453 | |
a5140f4d RB |
454 | typedef struct { |
455 | size_t idx; | |
456 | git_iterator_type_t src; | |
457 | git_repository *repo; | |
458 | git_diff_file *file; | |
459 | git_buf data; | |
effdbeb3 | 460 | git_odb_object *odb_obj; |
a5140f4d | 461 | git_blob *blob; |
a5140f4d RB |
462 | } similarity_info; |
463 | ||
effdbeb3 | 464 | static int similarity_init( |
3ff1d123 | 465 | similarity_info *info, git_diff *diff, size_t file_idx) |
a5140f4d RB |
466 | { |
467 | info->idx = file_idx; | |
468 | info->src = (file_idx & 1) ? diff->new_src : diff->old_src; | |
469 | info->repo = diff->repo; | |
470 | info->file = similarity_get_file(diff, file_idx); | |
effdbeb3 | 471 | info->odb_obj = NULL; |
a5140f4d | 472 | info->blob = NULL; |
a5140f4d | 473 | git_buf_init(&info->data, 0); |
09fae31d | 474 | |
410a8e6f | 475 | if (info->file->size > 0 || info->src == GIT_ITERATOR_TYPE_WORKDIR) |
effdbeb3 | 476 | return 0; |
5e5848eb | 477 | |
effdbeb3 RB |
478 | return git_diff_file__resolve_zero_size( |
479 | info->file, &info->odb_obj, info->repo); | |
a5140f4d | 480 | } |
960a04dd | 481 | |
d730d3f4 | 482 | static int similarity_sig( |
a5140f4d RB |
483 | similarity_info *info, |
484 | const git_diff_find_options *opts, | |
485 | void **cache) | |
486 | { | |
487 | int error = 0; | |
effdbeb3 | 488 | git_diff_file *file = info->file; |
8cfd54f0 | 489 | |
effdbeb3 RB |
490 | if (info->src == GIT_ITERATOR_TYPE_WORKDIR) { |
491 | if ((error = git_buf_joinpath( | |
492 | &info->data, git_repository_workdir(info->repo), file->path)) < 0) | |
493 | return error; | |
960a04dd | 494 | |
effdbeb3 RB |
495 | /* if path is not a regular file, just skip this item */ |
496 | if (!git_path_isfile(info->data.ptr)) | |
497 | return 0; | |
a5140f4d | 498 | |
a5140f4d RB |
499 | /* TODO: apply wd-to-odb filters to file data if necessary */ |
500 | ||
501 | error = opts->metric->file_signature( | |
502 | &cache[info->idx], info->file, | |
503 | info->data.ptr, opts->metric->payload); | |
504 | } else { | |
effdbeb3 RB |
505 | /* if we didn't initially know the size, we might have an odb_obj |
506 | * around from earlier, so convert that, otherwise load the blob now | |
507 | */ | |
508 | if (info->odb_obj != NULL) | |
509 | error = git_object__from_odb_object( | |
510 | (git_object **)&info->blob, info->repo, | |
511 | info->odb_obj, GIT_OBJ_BLOB); | |
512 | else | |
513 | error = git_blob_lookup(&info->blob, info->repo, &file->oid); | |
514 | ||
515 | if (error < 0) { | |
516 | /* if lookup fails, just skip this item in similarity calc */ | |
517 | giterr_clear(); | |
518 | } else { | |
a16e4172 RB |
519 | size_t sz; |
520 | ||
521 | /* index size may not be actual blob size if filtered */ | |
522 | if (file->size != git_blob_rawsize(info->blob)) | |
523 | file->size = git_blob_rawsize(info->blob); | |
524 | ||
525 | sz = (size_t)(git__is_sizet(file->size) ? file->size : -1); | |
effdbeb3 RB |
526 | |
527 | error = opts->metric->buffer_signature( | |
528 | &cache[info->idx], info->file, | |
529 | git_blob_rawcontent(info->blob), sz, opts->metric->payload); | |
530 | } | |
960a04dd RB |
531 | } |
532 | ||
533 | return error; | |
534 | } | |
535 | ||
effdbeb3 RB |
536 | static void similarity_unload(similarity_info *info) |
537 | { | |
538 | if (info->odb_obj) | |
539 | git_odb_object_free(info->odb_obj); | |
540 | ||
541 | if (info->blob) | |
542 | git_blob_free(info->blob); | |
543 | else | |
544 | git_buf_free(&info->data); | |
545 | } | |
546 | ||
a21cbb12 | 547 | #define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0) |
9be5be47 RB |
548 | |
549 | /* - score < 0 means files cannot be compared | |
550 | * - score >= 100 means files are exact match | |
551 | * - score == 0 means files are completely different | |
552 | */ | |
960a04dd | 553 | static int similarity_measure( |
9be5be47 | 554 | int *score, |
3ff1d123 | 555 | git_diff *diff, |
a21cbb12 | 556 | const git_diff_find_options *opts, |
960a04dd RB |
557 | void **cache, |
558 | size_t a_idx, | |
559 | size_t b_idx) | |
560 | { | |
960a04dd RB |
561 | git_diff_file *a_file = similarity_get_file(diff, a_idx); |
562 | git_diff_file *b_file = similarity_get_file(diff, b_idx); | |
a21cbb12 | 563 | bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY); |
a5140f4d RB |
564 | int error = 0; |
565 | similarity_info a_info, b_info; | |
9be5be47 RB |
566 | |
567 | *score = -1; | |
960a04dd | 568 | |
9be5be47 | 569 | /* don't try to compare files of different types */ |
960a04dd RB |
570 | if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode)) |
571 | return 0; | |
572 | ||
a1683f28 | 573 | /* if exact match is requested, force calculation of missing OIDs now */ |
9be5be47 RB |
574 | if (exact_match) { |
575 | if (git_oid_iszero(&a_file->oid) && | |
576 | diff->old_src == GIT_ITERATOR_TYPE_WORKDIR && | |
577 | !git_diff__oid_for_file(diff->repo, a_file->path, | |
578 | a_file->mode, a_file->size, &a_file->oid)) | |
579 | a_file->flags |= GIT_DIFF_FLAG_VALID_OID; | |
580 | ||
581 | if (git_oid_iszero(&b_file->oid) && | |
582 | diff->new_src == GIT_ITERATOR_TYPE_WORKDIR && | |
583 | !git_diff__oid_for_file(diff->repo, b_file->path, | |
584 | b_file->mode, b_file->size, &b_file->oid)) | |
585 | b_file->flags |= GIT_DIFF_FLAG_VALID_OID; | |
586 | } | |
587 | ||
588 | /* check OID match as a quick test */ | |
589 | if (git_oid__cmp(&a_file->oid, &b_file->oid) == 0) { | |
590 | *score = 100; | |
591 | return 0; | |
592 | } | |
593 | ||
594 | /* don't calculate signatures if we are doing exact match */ | |
595 | if (exact_match) { | |
596 | *score = 0; | |
597 | return 0; | |
598 | } | |
db106d01 | 599 | |
effdbeb3 RB |
600 | memset(&a_info, 0, sizeof(a_info)); |
601 | memset(&b_info, 0, sizeof(b_info)); | |
a5140f4d | 602 | |
effdbeb3 RB |
603 | /* set up similarity data (will try to update missing file sizes) */ |
604 | if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0) | |
605 | return error; | |
606 | if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0) | |
607 | goto cleanup; | |
a5140f4d | 608 | |
f5c4d022 | 609 | /* check if file sizes are nowhere near each other */ |
18e9efc4 RB |
610 | if (a_file->size > 127 && |
611 | b_file->size > 127 && | |
d730d3f4 RB |
612 | (a_file->size > (b_file->size << 3) || |
613 | b_file->size > (a_file->size << 3))) | |
effdbeb3 | 614 | goto cleanup; |
18e9efc4 | 615 | |
960a04dd | 616 | /* update signature cache if needed */ |
d730d3f4 RB |
617 | if (!cache[a_idx]) { |
618 | if ((error = similarity_sig(&a_info, opts, cache)) < 0) | |
619 | goto cleanup; | |
620 | } | |
621 | if (!cache[b_idx]) { | |
622 | if ((error = similarity_sig(&b_info, opts, cache)) < 0) | |
623 | goto cleanup; | |
624 | } | |
1fed6b07 | 625 | |
a5140f4d RB |
626 | /* calculate similarity provided that the metric choose to process |
627 | * both the a and b files (some may not if file is too big, etc). | |
628 | */ | |
629 | if (cache[a_idx] && cache[b_idx]) | |
630 | error = opts->metric->similarity( | |
631 | score, cache[a_idx], cache[b_idx], opts->metric->payload); | |
db106d01 | 632 | |
effdbeb3 | 633 | cleanup: |
a5140f4d RB |
634 | similarity_unload(&a_info); |
635 | similarity_unload(&b_info); | |
636 | ||
637 | return error; | |
db106d01 RB |
638 | } |
639 | ||
a21cbb12 | 640 | static int calc_self_similarity( |
3ff1d123 | 641 | git_diff *diff, |
a21cbb12 RB |
642 | const git_diff_find_options *opts, |
643 | size_t delta_idx, | |
644 | void **cache) | |
9be5be47 | 645 | { |
a21cbb12 RB |
646 | int error, similarity = -1; |
647 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
648 | ||
649 | if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0) | |
650 | return 0; | |
651 | ||
652 | error = similarity_measure( | |
653 | &similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1); | |
654 | if (error < 0) | |
655 | return error; | |
656 | ||
657 | if (similarity >= 0) { | |
74a627f0 | 658 | delta->similarity = (uint16_t)similarity; |
a21cbb12 RB |
659 | delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY; |
660 | } | |
661 | ||
662 | return 0; | |
663 | } | |
664 | ||
665 | static bool is_rename_target( | |
3ff1d123 | 666 | git_diff *diff, |
a21cbb12 RB |
667 | const git_diff_find_options *opts, |
668 | size_t delta_idx, | |
669 | void **cache) | |
670 | { | |
671 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
672 | ||
673 | /* skip things that aren't plain blobs */ | |
674 | if (!GIT_MODE_ISBLOB(delta->new_file.mode)) | |
675 | return false; | |
676 | ||
677 | /* only consider ADDED, RENAMED, COPIED, and split MODIFIED as | |
678 | * targets; maybe include UNTRACKED and IGNORED if requested. | |
679 | */ | |
680 | switch (delta->status) { | |
681 | case GIT_DELTA_UNMODIFIED: | |
682 | case GIT_DELTA_DELETED: | |
683 | return false; | |
684 | ||
685 | case GIT_DELTA_MODIFIED: | |
686 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
687 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
688 | return false; | |
689 | ||
690 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
691 | return false; | |
692 | ||
693 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
694 | delta->similarity < opts->break_rewrite_threshold) { | |
695 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
696 | break; | |
697 | } | |
698 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
699 | delta->similarity < opts->rename_from_rewrite_threshold) | |
700 | break; | |
701 | ||
702 | return false; | |
703 | ||
704 | case GIT_DELTA_UNTRACKED: | |
a21cbb12 RB |
705 | if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED)) |
706 | return false; | |
707 | break; | |
708 | ||
d55bed1a ET |
709 | case GIT_DELTA_IGNORED: |
710 | return false; | |
711 | ||
a21cbb12 RB |
712 | default: /* all other status values should be checked */ |
713 | break; | |
714 | } | |
715 | ||
716 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET; | |
717 | return true; | |
718 | } | |
719 | ||
720 | static bool is_rename_source( | |
3ff1d123 | 721 | git_diff *diff, |
a21cbb12 RB |
722 | const git_diff_find_options *opts, |
723 | size_t delta_idx, | |
724 | void **cache) | |
725 | { | |
726 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
727 | ||
728 | /* skip things that aren't blobs */ | |
729 | if (!GIT_MODE_ISBLOB(delta->old_file.mode)) | |
730 | return false; | |
731 | ||
732 | switch (delta->status) { | |
733 | case GIT_DELTA_ADDED: | |
734 | case GIT_DELTA_UNTRACKED: | |
735 | case GIT_DELTA_IGNORED: | |
736 | return false; | |
737 | ||
738 | case GIT_DELTA_DELETED: | |
739 | case GIT_DELTA_TYPECHANGE: | |
740 | break; | |
741 | ||
742 | case GIT_DELTA_UNMODIFIED: | |
743 | if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) | |
744 | return false; | |
f62c174d | 745 | if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED)) |
97ad85b8 | 746 | delta->flags |= GIT_DIFF_FLAG__TO_DELETE; |
a21cbb12 RB |
747 | break; |
748 | ||
749 | default: /* MODIFIED, RENAMED, COPIED */ | |
750 | /* if we're finding copies, this could be a source */ | |
751 | if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) | |
752 | break; | |
753 | ||
754 | /* otherwise, this is only a source if we can split it */ | |
755 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
756 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
757 | return false; | |
758 | ||
759 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
760 | return false; | |
761 | ||
762 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
763 | delta->similarity < opts->break_rewrite_threshold) { | |
764 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
765 | break; | |
766 | } | |
767 | ||
768 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
769 | delta->similarity < opts->rename_from_rewrite_threshold) | |
770 | break; | |
771 | ||
772 | return false; | |
773 | } | |
774 | ||
775 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE; | |
776 | return true; | |
777 | } | |
778 | ||
779 | GIT_INLINE(bool) delta_is_split(git_diff_delta *delta) | |
780 | { | |
781 | return (delta->status == GIT_DELTA_TYPECHANGE || | |
782 | (delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0); | |
783 | } | |
784 | ||
785 | GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) | |
786 | { | |
787 | return (delta->status == GIT_DELTA_ADDED || | |
788 | delta->status == GIT_DELTA_UNTRACKED || | |
789 | delta->status == GIT_DELTA_IGNORED); | |
9be5be47 | 790 | } |
db106d01 | 791 | |
e4acc3ba | 792 | GIT_INLINE(void) delta_make_rename( |
74a627f0 | 793 | git_diff_delta *to, const git_diff_delta *from, uint16_t similarity) |
e4acc3ba RB |
794 | { |
795 | to->status = GIT_DELTA_RENAMED; | |
796 | to->similarity = similarity; | |
74a627f0 | 797 | to->nfiles = 2; |
e4acc3ba RB |
798 | memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); |
799 | to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
800 | } | |
801 | ||
d958e37a | 802 | typedef struct { |
74a627f0 RB |
803 | size_t idx; |
804 | uint16_t similarity; | |
d958e37a RB |
805 | } diff_find_match; |
806 | ||
db106d01 | 807 | int git_diff_find_similar( |
3ff1d123 | 808 | git_diff *diff, |
10672e3e | 809 | const git_diff_find_options *given_opts) |
db106d01 | 810 | { |
d730d3f4 | 811 | size_t s, t; |
74a627f0 RB |
812 | int error = 0, result; |
813 | uint16_t similarity; | |
d730d3f4 | 814 | git_diff_delta *src, *tgt; |
7e3ed419 | 815 | git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; |
d730d3f4 RB |
816 | size_t num_deltas, num_srcs = 0, num_tgts = 0; |
817 | size_t tried_srcs = 0, tried_tgts = 0; | |
e4acc3ba | 818 | size_t num_rewrites = 0, num_updates = 0, num_bumped = 0; |
7e3ed419 | 819 | void **sigcache = NULL; /* cache of similarity metric file signatures */ |
d730d3f4 RB |
820 | diff_find_match *tgt2src = NULL; |
821 | diff_find_match *src2tgt = NULL; | |
822 | diff_find_match *tgt2src_copy = NULL; | |
823 | diff_find_match *best_match; | |
e4acc3ba | 824 | git_diff_file swap; |
db106d01 | 825 | |
960a04dd | 826 | if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) |
628e92cd BS |
827 | return error; |
828 | ||
d730d3f4 RB |
829 | num_deltas = diff->deltas.length; |
830 | ||
a21cbb12 | 831 | /* TODO: maybe abort if deltas.length > rename_limit ??? */ |
d730d3f4 | 832 | if (!git__is_uint32(num_deltas)) |
7e3ed419 RB |
833 | goto cleanup; |
834 | ||
835 | /* No flags set; nothing to do */ | |
836 | if ((opts.flags & GIT_DIFF_FIND_ALL) == 0) | |
837 | goto cleanup; | |
960a04dd | 838 | |
d730d3f4 | 839 | sigcache = git__calloc(num_deltas * 2, sizeof(void *)); |
e4acc3ba RB |
840 | GITERR_CHECK_ALLOC(sigcache); |
841 | ||
842 | /* Label rename sources and targets | |
843 | * | |
844 | * This will also set self-similarity scores for MODIFIED files and | |
845 | * mark them for splitting if break-rewrites is enabled | |
846 | */ | |
d730d3f4 RB |
847 | git_vector_foreach(&diff->deltas, t, tgt) { |
848 | if (is_rename_source(diff, &opts, t, sigcache)) | |
e4acc3ba RB |
849 | ++num_srcs; |
850 | ||
d730d3f4 | 851 | if (is_rename_target(diff, &opts, t, sigcache)) |
e4acc3ba | 852 | ++num_tgts; |
17c7fbf6 ET |
853 | |
854 | if ((tgt->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) | |
855 | num_rewrites++; | |
e4acc3ba | 856 | } |
960a04dd | 857 | |
e4acc3ba RB |
858 | /* if there are no candidate srcs or tgts, we're done */ |
859 | if (!num_srcs || !num_tgts) | |
860 | goto cleanup; | |
960a04dd | 861 | |
d730d3f4 RB |
862 | src2tgt = git__calloc(num_deltas, sizeof(diff_find_match)); |
863 | GITERR_CHECK_ALLOC(src2tgt); | |
864 | tgt2src = git__calloc(num_deltas, sizeof(diff_find_match)); | |
865 | GITERR_CHECK_ALLOC(tgt2src); | |
866 | ||
867 | if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { | |
868 | tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match)); | |
869 | GITERR_CHECK_ALLOC(tgt2src_copy); | |
870 | } | |
db106d01 | 871 | |
e4acc3ba RB |
872 | /* |
873 | * Find best-fit matches for rename / copy candidates | |
874 | */ | |
d958e37a | 875 | |
e4acc3ba RB |
876 | find_best_matches: |
877 | tried_tgts = num_bumped = 0; | |
db106d01 | 878 | |
d730d3f4 | 879 | git_vector_foreach(&diff->deltas, t, tgt) { |
a21cbb12 | 880 | /* skip things that are not rename targets */ |
d730d3f4 | 881 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
d958e37a RB |
882 | continue; |
883 | ||
e4acc3ba | 884 | tried_srcs = 0; |
db106d01 | 885 | |
d730d3f4 | 886 | git_vector_foreach(&diff->deltas, s, src) { |
a21cbb12 | 887 | /* skip things that are not rename sources */ |
d730d3f4 | 888 | if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0) |
960a04dd RB |
889 | continue; |
890 | ||
d958e37a | 891 | /* calculate similarity for this pair and find best match */ |
d730d3f4 | 892 | if (s == t) |
74a627f0 | 893 | result = -1; /* don't measure self-similarity here */ |
e4acc3ba | 894 | else if ((error = similarity_measure( |
74a627f0 | 895 | &result, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0) |
960a04dd | 896 | goto cleanup; |
a21cbb12 | 897 | |
74a627f0 | 898 | if (result < 0) |
d730d3f4 | 899 | continue; |
74a627f0 | 900 | similarity = (uint16_t)result; |
d730d3f4 RB |
901 | |
902 | /* is this a better rename? */ | |
74a627f0 RB |
903 | if (tgt2src[t].similarity < similarity && |
904 | src2tgt[s].similarity < similarity) | |
e4acc3ba | 905 | { |
d730d3f4 RB |
906 | /* eject old mapping */ |
907 | if (src2tgt[s].similarity > 0) { | |
908 | tgt2src[src2tgt[s].idx].similarity = 0; | |
909 | num_bumped++; | |
910 | } | |
911 | if (tgt2src[t].similarity > 0) { | |
912 | src2tgt[tgt2src[t].idx].similarity = 0; | |
913 | num_bumped++; | |
e4acc3ba RB |
914 | } |
915 | ||
d730d3f4 | 916 | /* write new mapping */ |
74a627f0 RB |
917 | tgt2src[t].idx = s; |
918 | tgt2src[t].similarity = similarity; | |
919 | src2tgt[s].idx = t; | |
920 | src2tgt[s].similarity = similarity; | |
d730d3f4 | 921 | } |
a21cbb12 | 922 | |
d730d3f4 RB |
923 | /* keep best absolute match for copies */ |
924 | if (tgt2src_copy != NULL && | |
74a627f0 | 925 | tgt2src_copy[t].similarity < similarity) |
d730d3f4 | 926 | { |
74a627f0 RB |
927 | tgt2src_copy[t].idx = s; |
928 | tgt2src_copy[t].similarity = similarity; | |
db106d01 | 929 | } |
e4acc3ba RB |
930 | |
931 | if (++tried_srcs >= num_srcs) | |
932 | break; | |
933 | ||
d730d3f4 | 934 | /* cap on maximum targets we'll examine (per "tgt" file) */ |
e4acc3ba RB |
935 | if (tried_srcs > opts.rename_limit) |
936 | break; | |
db106d01 | 937 | } |
e4acc3ba RB |
938 | |
939 | if (++tried_tgts >= num_tgts) | |
940 | break; | |
db106d01 RB |
941 | } |
942 | ||
e4acc3ba RB |
943 | if (num_bumped > 0) /* try again if we bumped some items */ |
944 | goto find_best_matches; | |
945 | ||
946 | /* | |
947 | * Rewrite the diffs with renames / copies | |
948 | */ | |
949 | ||
950 | tried_tgts = 0; | |
db106d01 | 951 | |
d730d3f4 | 952 | git_vector_foreach(&diff->deltas, t, tgt) { |
e4acc3ba | 953 | /* skip things that are not rename targets */ |
d730d3f4 | 954 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) |
a21cbb12 | 955 | continue; |
690bf41c | 956 | |
e4acc3ba | 957 | /* check if this delta was the target of a similarity */ |
d730d3f4 RB |
958 | if (tgt2src[t].similarity) |
959 | best_match = &tgt2src[t]; | |
960 | else if (tgt2src_copy && tgt2src_copy[t].similarity) | |
961 | best_match = &tgt2src_copy[t]; | |
962 | else | |
e4acc3ba | 963 | continue; |
d958e37a | 964 | |
d730d3f4 RB |
965 | s = best_match->idx; |
966 | src = GIT_VECTOR_GET(&diff->deltas, s); | |
d958e37a | 967 | |
a21cbb12 RB |
968 | /* possible scenarios: |
969 | * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME | |
970 | * 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE | |
971 | * 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME | |
972 | * 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT | |
973 | * 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY | |
db106d01 RB |
974 | */ |
975 | ||
d730d3f4 | 976 | if (src->status == GIT_DELTA_DELETED) { |
db106d01 | 977 | |
d730d3f4 | 978 | if (delta_is_new_only(tgt)) { |
db106d01 | 979 | |
e4acc3ba | 980 | if (best_match->similarity < opts.rename_threshold) |
a21cbb12 | 981 | continue; |
960a04dd | 982 | |
d730d3f4 | 983 | delta_make_rename(tgt, src, best_match->similarity); |
d958e37a | 984 | |
d730d3f4 | 985 | src->flags |= GIT_DIFF_FLAG__TO_DELETE; |
a21cbb12 RB |
986 | num_rewrites++; |
987 | } else { | |
d730d3f4 | 988 | assert(delta_is_split(tgt)); |
960a04dd | 989 | |
e4acc3ba | 990 | if (best_match->similarity < opts.rename_from_rewrite_threshold) |
a21cbb12 | 991 | continue; |
db106d01 | 992 | |
d730d3f4 | 993 | memcpy(&swap, &tgt->old_file, sizeof(swap)); |
db106d01 | 994 | |
d730d3f4 | 995 | delta_make_rename(tgt, src, best_match->similarity); |
e4acc3ba RB |
996 | num_rewrites--; |
997 | ||
74a627f0 | 998 | assert(src->status == GIT_DELTA_DELETED); |
d730d3f4 RB |
999 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
1000 | memset(&src->new_file, 0, sizeof(src->new_file)); | |
1001 | src->new_file.path = src->old_file.path; | |
1002 | src->new_file.flags |= GIT_DIFF_FLAG_VALID_OID; | |
db106d01 | 1003 | |
d958e37a | 1004 | num_updates++; |
7edb74d3 RB |
1005 | |
1006 | if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { | |
1007 | /* what used to be at src t is now at src s */ | |
74a627f0 | 1008 | tgt2src[src2tgt[t].idx].idx = s; |
7edb74d3 | 1009 | } |
db106d01 RB |
1010 | } |
1011 | } | |
1012 | ||
d730d3f4 | 1013 | else if (delta_is_split(src)) { |
a21cbb12 | 1014 | |
d730d3f4 | 1015 | if (delta_is_new_only(tgt)) { |
db106d01 | 1016 | |
e4acc3ba | 1017 | if (best_match->similarity < opts.rename_threshold) |
a21cbb12 | 1018 | continue; |
d958e37a | 1019 | |
d730d3f4 | 1020 | delta_make_rename(tgt, src, best_match->similarity); |
a21cbb12 | 1021 | |
d730d3f4 | 1022 | src->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ? |
a21cbb12 | 1023 | GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; |
74a627f0 | 1024 | src->nfiles = 1; |
d730d3f4 RB |
1025 | memset(&src->old_file, 0, sizeof(src->old_file)); |
1026 | src->old_file.path = src->new_file.path; | |
1027 | src->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; | |
e4acc3ba | 1028 | |
d730d3f4 | 1029 | src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
e4acc3ba | 1030 | num_rewrites--; |
a21cbb12 RB |
1031 | |
1032 | num_updates++; | |
1033 | } else { | |
d730d3f4 | 1034 | assert(delta_is_split(src)); |
a21cbb12 | 1035 | |
e4acc3ba | 1036 | if (best_match->similarity < opts.rename_from_rewrite_threshold) |
a21cbb12 RB |
1037 | continue; |
1038 | ||
d730d3f4 | 1039 | memcpy(&swap, &tgt->old_file, sizeof(swap)); |
a21cbb12 | 1040 | |
d730d3f4 | 1041 | delta_make_rename(tgt, src, best_match->similarity); |
e4acc3ba RB |
1042 | num_rewrites--; |
1043 | num_updates++; | |
a21cbb12 | 1044 | |
d730d3f4 | 1045 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); |
a21cbb12 | 1046 | |
e4acc3ba RB |
1047 | /* if we've just swapped the new element into the correct |
1048 | * place, clear the SPLIT flag | |
67db583d | 1049 | */ |
d730d3f4 RB |
1050 | if (tgt2src[s].idx == t && |
1051 | tgt2src[s].similarity > | |
67db583d | 1052 | opts.rename_from_rewrite_threshold) { |
d730d3f4 RB |
1053 | src->status = GIT_DELTA_RENAMED; |
1054 | src->similarity = tgt2src[s].similarity; | |
1055 | tgt2src[s].similarity = 0; | |
1056 | src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
67db583d RB |
1057 | num_rewrites--; |
1058 | } | |
e4acc3ba | 1059 | /* otherwise, if we just overwrote a source, update mapping */ |
7edb74d3 | 1060 | else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { |
d730d3f4 | 1061 | /* what used to be at src t is now at src s */ |
74a627f0 | 1062 | tgt2src[src2tgt[t].idx].idx = s; |
e4acc3ba | 1063 | } |
67db583d | 1064 | |
a21cbb12 RB |
1065 | num_updates++; |
1066 | } | |
1067 | } | |
1068 | ||
2123a17f | 1069 | else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { |
d730d3f4 | 1070 | if (tgt2src_copy[t].similarity < opts.copy_threshold) |
a21cbb12 RB |
1071 | continue; |
1072 | ||
d730d3f4 RB |
1073 | /* always use best possible source for copy */ |
1074 | best_match = &tgt2src_copy[t]; | |
1075 | src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); | |
1076 | ||
2123a17f RB |
1077 | if (delta_is_split(tgt)) { |
1078 | error = insert_delete_side_of_split(diff, &diff->deltas, tgt); | |
1079 | if (error < 0) | |
1080 | goto cleanup; | |
1081 | num_rewrites--; | |
1082 | } | |
1083 | ||
1084 | if (!delta_is_split(tgt) && !delta_is_new_only(tgt)) | |
1085 | continue; | |
1086 | ||
d730d3f4 RB |
1087 | tgt->status = GIT_DELTA_COPIED; |
1088 | tgt->similarity = best_match->similarity; | |
74a627f0 | 1089 | tgt->nfiles = 2; |
d730d3f4 | 1090 | memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); |
2123a17f | 1091 | tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; |
a21cbb12 RB |
1092 | |
1093 | num_updates++; | |
1094 | } | |
db106d01 RB |
1095 | } |
1096 | ||
e4acc3ba RB |
1097 | /* |
1098 | * Actually split and delete entries as needed | |
1099 | */ | |
1100 | ||
a21cbb12 | 1101 | if (num_rewrites > 0 || num_updates > 0) |
960a04dd | 1102 | error = apply_splits_and_deletes( |
d958e37a | 1103 | diff, diff->deltas.length - num_rewrites, |
17c7fbf6 ET |
1104 | FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES) && |
1105 | !FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY)); | |
d958e37a | 1106 | |
960a04dd | 1107 | cleanup: |
d730d3f4 RB |
1108 | git__free(tgt2src); |
1109 | git__free(src2tgt); | |
1110 | git__free(tgt2src_copy); | |
db106d01 | 1111 | |
7e3ed419 RB |
1112 | if (sigcache) { |
1113 | for (t = 0; t < num_deltas * 2; ++t) { | |
1114 | if (sigcache[t] != NULL) | |
1115 | opts.metric->free_signature(sigcache[t], opts.metric->payload); | |
1116 | } | |
1117 | git__free(sigcache); | |
db106d01 RB |
1118 | } |
1119 | ||
f8275890 RB |
1120 | if (!given_opts || !given_opts->metric) |
1121 | git__free(opts.metric); | |
1122 | ||
960a04dd | 1123 | return error; |
db106d01 RB |
1124 | } |
1125 | ||
1126 | #undef FLAG_SET |