]> git.proxmox.com Git - libgit2.git/blame - src/diff_tform.c
Merge pull request #1208 from ethomson/ppc_sha1_asm_deadness
[libgit2.git] / src / diff_tform.c
CommitLineData
db106d01
RB
1/*
2 * Copyright (C) 2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7#include "common.h"
8#include "diff.h"
9#include "git2/config.h"
10
11static git_diff_delta *diff_delta__dup(
12 const git_diff_delta *d, git_pool *pool)
13{
14 git_diff_delta *delta = git__malloc(sizeof(git_diff_delta));
15 if (!delta)
16 return NULL;
17
18 memcpy(delta, d, sizeof(git_diff_delta));
19
20 delta->old_file.path = git_pool_strdup(pool, d->old_file.path);
21 if (delta->old_file.path == NULL)
22 goto fail;
23
24 if (d->new_file.path != d->old_file.path) {
25 delta->new_file.path = git_pool_strdup(pool, d->new_file.path);
26 if (delta->new_file.path == NULL)
27 goto fail;
28 } else {
29 delta->new_file.path = delta->old_file.path;
30 }
31
32 return delta;
33
34fail:
35 git__free(delta);
36 return NULL;
37}
38
39static git_diff_delta *diff_delta__merge_like_cgit(
40 const git_diff_delta *a, const git_diff_delta *b, git_pool *pool)
41{
42 git_diff_delta *dup;
43
44 /* Emulate C git for merging two diffs (a la 'git diff <sha>').
45 *
46 * When C git does a diff between the work dir and a tree, it actually
47 * diffs with the index but uses the workdir contents. This emulates
48 * those choices so we can emulate the type of diff.
49 *
50 * We have three file descriptions here, let's call them:
51 * f1 = a->old_file
52 * f2 = a->new_file AND b->old_file
53 * f3 = b->new_file
54 */
55
56 /* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */
57 if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED)
58 return diff_delta__dup(a, pool);
59
60 /* otherwise, base this diff on the 'b' diff */
61 if ((dup = diff_delta__dup(b, pool)) == NULL)
62 return NULL;
63
64 /* If 'a' status is uninteresting, then we're done */
65 if (a->status == GIT_DELTA_UNMODIFIED)
66 return dup;
67
68 assert(a->status != GIT_DELTA_UNMODIFIED);
69 assert(b->status != GIT_DELTA_UNMODIFIED);
70
71 /* A cgit exception is that the diff of a file that is only in the
72 * index (i.e. not in HEAD nor workdir) is given as empty.
73 */
74 if (dup->status == GIT_DELTA_DELETED) {
75 if (a->status == GIT_DELTA_ADDED)
76 dup->status = GIT_DELTA_UNMODIFIED;
77 /* else don't overwrite DELETE status */
78 } else {
79 dup->status = a->status;
80 }
81
82 git_oid_cpy(&dup->old_file.oid, &a->old_file.oid);
83 dup->old_file.mode = a->old_file.mode;
84 dup->old_file.size = a->old_file.size;
85 dup->old_file.flags = a->old_file.flags;
86
87 return dup;
88}
89
90int git_diff_merge(
91 git_diff_list *onto,
92 const git_diff_list *from)
93{
94 int error = 0;
95 git_pool onto_pool;
96 git_vector onto_new;
97 git_diff_delta *delta;
98 bool ignore_case = false;
99 unsigned int i, j;
100
101 assert(onto && from);
102
103 if (!from->deltas.length)
104 return 0;
105
106 if (git_vector_init(
107 &onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 ||
108 git_pool_init(&onto_pool, 1, 0) < 0)
109 return -1;
110
111 if ((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 ||
112 (from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0)
113 {
114 ignore_case = true;
115
116 /* This function currently only supports merging diff lists that
117 * are sorted identically. */
118 assert((onto->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0 &&
119 (from->opts.flags & GIT_DIFF_DELTAS_ARE_ICASE) != 0);
120 }
121
122 for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) {
123 git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i);
124 const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j);
125 int cmp = !f ? -1 : !o ? 1 : STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path);
126
127 if (cmp < 0) {
128 delta = diff_delta__dup(o, &onto_pool);
129 i++;
130 } else if (cmp > 0) {
131 delta = diff_delta__dup(f, &onto_pool);
132 j++;
133 } else {
134 delta = diff_delta__merge_like_cgit(o, f, &onto_pool);
135 i++;
136 j++;
137 }
138
139 /* the ignore rules for the target may not match the source
140 * or the result of a merged delta could be skippable...
141 */
142 if (git_diff_delta__should_skip(&onto->opts, delta)) {
143 git__free(delta);
144 continue;
145 }
146
147 if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0)
148 break;
149 }
150
151 if (!error) {
152 git_vector_swap(&onto->deltas, &onto_new);
153 git_pool_swap(&onto->pool, &onto_pool);
154 onto->new_src = from->new_src;
155
156 /* prefix strings also come from old pool, so recreate those.*/
157 onto->opts.old_prefix =
158 git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix);
159 onto->opts.new_prefix =
160 git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix);
161 }
162
163 git_vector_foreach(&onto_new, i, delta)
164 git__free(delta);
165 git_vector_free(&onto_new);
166 git_pool_clear(&onto_pool);
167
168 return error;
169}
170
171#define DEFAULT_THRESHOLD 50
172#define DEFAULT_BREAK_REWRITE_THRESHOLD 60
173#define DEFAULT_TARGET_LIMIT 200
174
175static int normalize_find_opts(
176 git_diff_list *diff,
177 git_diff_find_options *opts,
178 git_diff_find_options *given)
179{
180 git_config *cfg = NULL;
181 const char *val;
182
183 if (diff->repo != NULL &&
184 git_repository_config__weakptr(&cfg, diff->repo) < 0)
185 return -1;
186
187 if (given != NULL)
188 memcpy(opts, given, sizeof(*opts));
189 else {
ca901e7b
BS
190 git_diff_find_options init = GIT_DIFF_FIND_OPTIONS_INIT;
191 memmove(opts, &init, sizeof(init));
db106d01
RB
192
193 opts->flags = GIT_DIFF_FIND_RENAMES;
194
195 if (git_config_get_string(&val, cfg, "diff.renames") < 0)
196 giterr_clear();
197 else if (val &&
198 (!strcasecmp(val, "copies") || !strcasecmp(val, "copy")))
199 opts->flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES;
200 }
201
c7231c45 202 GITERR_CHECK_VERSION(opts, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options");
ca901e7b 203
db106d01
RB
204 /* some flags imply others */
205
206 if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES)
207 opts->flags |= GIT_DIFF_FIND_RENAMES;
208
209 if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)
210 opts->flags |= GIT_DIFF_FIND_COPIES;
211
212#define USE_DEFAULT(X) ((X) == 0 || (X) > 100)
213
214 if (USE_DEFAULT(opts->rename_threshold))
215 opts->rename_threshold = DEFAULT_THRESHOLD;
216
217 if (USE_DEFAULT(opts->rename_from_rewrite_threshold))
218 opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD;
219
220 if (USE_DEFAULT(opts->copy_threshold))
221 opts->copy_threshold = DEFAULT_THRESHOLD;
222
223 if (USE_DEFAULT(opts->break_rewrite_threshold))
224 opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD;
225
226#undef USE_DEFAULT
227
228 if (!opts->target_limit) {
229 int32_t limit = 0;
230
231 opts->target_limit = DEFAULT_TARGET_LIMIT;
232
233 if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0)
234 giterr_clear();
235 else if (limit > 0)
236 opts->target_limit = limit;
237 }
238
239 return 0;
240}
241
242static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
243{
244 git_vector onto = GIT_VECTOR_INIT;
245 size_t i;
246 git_diff_delta *delta;
247
248 if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0)
249 return -1;
250
251 /* build new delta list without TO_DELETE and splitting TO_SPLIT */
252 git_vector_foreach(&diff->deltas, i, delta) {
253 if (delta->status == GIT_DELTA__TO_DELETE) {
254 git__free(delta);
255 continue;
256 }
257
258 if (delta->status == GIT_DELTA__TO_SPLIT) {
259 git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
260 if (!deleted)
261 return -1;
262
263 deleted->status = GIT_DELTA_DELETED;
264 memset(&deleted->new_file, 0, sizeof(deleted->new_file));
265 deleted->new_file.path = deleted->old_file.path;
266 deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
267
268 git_vector_insert(&onto, deleted);
269
270 delta->status = GIT_DELTA_ADDED;
271 memset(&delta->old_file, 0, sizeof(delta->old_file));
272 delta->old_file.path = delta->new_file.path;
273 delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
274 }
275
276 git_vector_insert(&onto, delta);
277 }
278
279 /* swap new delta list into place */
280 git_vector_sort(&onto);
281 git_vector_swap(&diff->deltas, &onto);
282 git_vector_free(&onto);
283
284 return 0;
285}
286
287static unsigned int calc_similarity(
288 void *cache, git_diff_file *old_file, git_diff_file *new_file)
289{
290 GIT_UNUSED(cache);
291
292 if (git_oid_cmp(&old_file->oid, &new_file->oid) == 0)
293 return 100;
294
295 /* TODO: insert actual similarity algo here */
296
297 return 0;
298}
299
300#define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0)
301
302int git_diff_find_similar(
303 git_diff_list *diff,
304 git_diff_find_options *given_opts)
305{
306 unsigned int i, j, similarity;
307 git_diff_delta *from, *to;
308 git_diff_find_options opts;
309 unsigned int tried_targets, num_changes = 0;
310 git_vector matches = GIT_VECTOR_INIT;
311
312 if (normalize_find_opts(diff, &opts, given_opts) < 0)
313 return -1;
314
315 /* first do splits if requested */
316
317 if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) {
318 git_vector_foreach(&diff->deltas, i, from) {
319 if (from->status != GIT_DELTA_MODIFIED)
320 continue;
321
322 /* Right now, this doesn't work right because the similarity
323 * algorithm isn't actually implemented...
324 */
325 similarity = 100;
326 /* calc_similarity(NULL, &from->old_file, from->new_file); */
327
328 if (similarity < opts.break_rewrite_threshold) {
329 from->status = GIT_DELTA__TO_SPLIT;
330 num_changes++;
331 }
332 }
333
334 /* apply splits as needed */
335 if (num_changes > 0 &&
336 apply_splits_and_deletes(
337 diff, diff->deltas.length + num_changes) < 0)
338 return -1;
339 }
340
341 /* next find the most similar delta for each rename / copy candidate */
342
343 if (git_vector_init(&matches, diff->deltas.length, git_diff_delta__cmp) < 0)
344 return -1;
345
346 git_vector_foreach(&diff->deltas, i, from) {
347 tried_targets = 0;
348
349 git_vector_foreach(&diff->deltas, j, to) {
350 if (i == j)
351 continue;
352
353 switch (to->status) {
354 case GIT_DELTA_ADDED:
355 case GIT_DELTA_UNTRACKED:
356 case GIT_DELTA_RENAMED:
357 case GIT_DELTA_COPIED:
358 break;
359 default:
360 /* only the above status values should be checked */
361 continue;
362 }
363
364 /* skip all but DELETED files unless copy detection is on */
365 if (from->status != GIT_DELTA_DELETED &&
366 !FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
367 continue;
368
369 /* don't check UNMODIFIED files as source unless given option */
370 if (from->status == GIT_DELTA_UNMODIFIED &&
371 !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
372 continue;
373
374 /* cap on maximum files we'll examine */
375 if (++tried_targets > opts.target_limit)
376 break;
377
378 /* calculate similarity and see if this pair beats the
379 * similarity score of the current best pair.
380 */
381 similarity = calc_similarity(NULL, &from->old_file, &to->new_file);
382
383 if (to->similarity < similarity) {
384 to->similarity = similarity;
385 if (git_vector_set(NULL, &matches, j, from) < 0)
386 return -1;
387 }
388 }
389 }
390
391 /* next rewrite the diffs with renames / copies */
392
393 num_changes = 0;
394
395 git_vector_foreach(&diff->deltas, j, to) {
396 from = GIT_VECTOR_GET(&matches, j);
397 if (!from) {
398 assert(to->similarity == 0);
399 continue;
400 }
401
402 /* three possible outcomes here:
403 * 1. old DELETED and if over rename threshold,
404 * new becomes RENAMED and old goes away
405 * 2. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
406 * old is more similar to new than it is to itself, in which
407 * case, new becomes RENAMED and old becomed ADDED
408 * 3. otherwise if over copy threshold, new becomes COPIED
409 */
410
411 if (from->status == GIT_DELTA_DELETED) {
412 if (to->similarity < opts.rename_threshold) {
413 to->similarity = 0;
414 continue;
415 }
416
417 to->status = GIT_DELTA_RENAMED;
418 memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
419
420 from->status = GIT_DELTA__TO_DELETE;
421 num_changes++;
422
423 continue;
424 }
425
426 if (from->status == GIT_DELTA_MODIFIED &&
427 FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
428 to->similarity > opts.rename_threshold)
429 {
430 similarity = 100;
431 /* calc_similarity(NULL, &from->old_file, from->new_file); */
432
433 if (similarity < opts.rename_from_rewrite_threshold) {
434 to->status = GIT_DELTA_RENAMED;
435 memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
436
437 from->status = GIT_DELTA_ADDED;
438 memset(&from->old_file, 0, sizeof(from->old_file));
439 from->old_file.path = to->old_file.path;
440 from->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
441
442 continue;
443 }
444 }
445
446 if (to->similarity < opts.copy_threshold) {
447 to->similarity = 0;
448 continue;
449 }
450
451 /* convert "to" to a COPIED record */
452 to->status = GIT_DELTA_COPIED;
453 memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
454 }
455
456 git_vector_free(&matches);
457
458 if (num_changes > 0) {
459 assert(num_changes < diff->deltas.length);
460
461 if (apply_splits_and_deletes(
462 diff, diff->deltas.length - num_changes) < 0)
463 return -1;
464 }
465
466 return 0;
467}
468
469#undef FLAG_SET