]> git.proxmox.com Git - libgit2.git/blob - src/blame_git.c
New upstream version 1.3.0+dfsg.1
[libgit2.git] / src / blame_git.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame_git.h"
9
10 #include "commit.h"
11 #include "blob.h"
12 #include "xdiff/xinclude.h"
13 #include "diff_xdiff.h"
14
15 /*
16 * Origin is refcounted and usually we keep the blob contents to be
17 * reused.
18 */
19 static git_blame__origin *origin_incref(git_blame__origin *o)
20 {
21 if (o)
22 o->refcnt++;
23 return o;
24 }
25
26 static void origin_decref(git_blame__origin *o)
27 {
28 if (o && --o->refcnt <= 0) {
29 if (o->previous)
30 origin_decref(o->previous);
31 git_blob_free(o->blob);
32 git_commit_free(o->commit);
33 git__free(o);
34 }
35 }
36
37 /* Given a commit and a path in it, create a new origin structure. */
38 static int make_origin(git_blame__origin **out, git_commit *commit, const char *path)
39 {
40 git_blame__origin *o;
41 git_object *blob;
42 size_t path_len = strlen(path), alloc_len;
43 int error = 0;
44
45 if ((error = git_object_lookup_bypath(&blob, (git_object*)commit,
46 path, GIT_OBJECT_BLOB)) < 0)
47 return error;
48
49 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*o), path_len);
50 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1);
51 o = git__calloc(1, alloc_len);
52 GIT_ERROR_CHECK_ALLOC(o);
53
54 o->commit = commit;
55 o->blob = (git_blob *) blob;
56 o->refcnt = 1;
57 strcpy(o->path, path);
58
59 *out = o;
60
61 return 0;
62 }
63
64 /* Locate an existing origin or create a new one. */
65 int git_blame__get_origin(
66 git_blame__origin **out,
67 git_blame *blame,
68 git_commit *commit,
69 const char *path)
70 {
71 git_blame__entry *e;
72
73 for (e = blame->ent; e; e = e->next) {
74 if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) {
75 *out = origin_incref(e->suspect);
76 }
77 }
78 return make_origin(out, commit, path);
79 }
80
81 typedef struct blame_chunk_cb_data {
82 git_blame *blame;
83 git_blame__origin *target;
84 git_blame__origin *parent;
85 long tlno;
86 long plno;
87 }blame_chunk_cb_data;
88
89 static bool same_suspect(git_blame__origin *a, git_blame__origin *b)
90 {
91 if (a == b)
92 return true;
93 if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit)))
94 return false;
95 return 0 == strcmp(a->path, b->path);
96 }
97
98 /* find the line number of the last line the target is suspected for */
99 static bool find_last_in_target(size_t *out, git_blame *blame, git_blame__origin *target)
100 {
101 git_blame__entry *e;
102 size_t last_in_target = 0;
103 bool found = false;
104
105 *out = 0;
106
107 for (e=blame->ent; e; e=e->next) {
108 if (e->guilty || !same_suspect(e->suspect, target))
109 continue;
110 if (last_in_target < e->s_lno + e->num_lines) {
111 found = true;
112 last_in_target = e->s_lno + e->num_lines;
113 }
114 }
115
116 *out = last_in_target;
117 return found;
118 }
119
120 /*
121 * It is known that lines between tlno to same came from parent, and e
122 * has an overlap with that range. it also is known that parent's
123 * line plno corresponds to e's line tlno.
124 *
125 * <---- e ----->
126 * <------> (entirely within)
127 * <------------> (extends past)
128 * <------------> (starts before)
129 * <------------------> (entirely encloses)
130 *
131 * Split e into potentially three parts; before this chunk, the chunk
132 * to be blamed for the parent, and after that portion.
133 */
134 static void split_overlap(git_blame__entry *split, git_blame__entry *e,
135 size_t tlno, size_t plno, size_t same, git_blame__origin *parent)
136 {
137 size_t chunk_end_lno;
138
139 if (e->s_lno < tlno) {
140 /* there is a pre-chunk part not blamed on the parent */
141 split[0].suspect = origin_incref(e->suspect);
142 split[0].lno = e->lno;
143 split[0].s_lno = e->s_lno;
144 split[0].num_lines = tlno - e->s_lno;
145 split[1].lno = e->lno + tlno - e->s_lno;
146 split[1].s_lno = plno;
147 } else {
148 split[1].lno = e->lno;
149 split[1].s_lno = plno + (e->s_lno - tlno);
150 }
151
152 if (same < e->s_lno + e->num_lines) {
153 /* there is a post-chunk part not blamed on parent */
154 split[2].suspect = origin_incref(e->suspect);
155 split[2].lno = e->lno + (same - e->s_lno);
156 split[2].s_lno = e->s_lno + (same - e->s_lno);
157 split[2].num_lines = e->s_lno + e->num_lines - same;
158 chunk_end_lno = split[2].lno;
159 } else {
160 chunk_end_lno = e->lno + e->num_lines;
161 }
162 split[1].num_lines = chunk_end_lno - split[1].lno;
163
164 /*
165 * if it turns out there is nothing to blame the parent for, forget about
166 * the splitting. !split[1].suspect signals this.
167 */
168 if (split[1].num_lines < 1)
169 return;
170 split[1].suspect = origin_incref(parent);
171 }
172
173 /*
174 * Link in a new blame entry to the scoreboard. Entries that cover the same
175 * line range have been removed from the scoreboard previously.
176 */
177 static void add_blame_entry(git_blame *blame, git_blame__entry *e)
178 {
179 git_blame__entry *ent, *prev = NULL;
180
181 origin_incref(e->suspect);
182
183 for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next)
184 prev = ent;
185
186 /* prev, if not NULL, is the last one that is below e */
187 e->prev = prev;
188 if (prev) {
189 e->next = prev->next;
190 prev->next = e;
191 } else {
192 e->next = blame->ent;
193 blame->ent = e;
194 }
195 if (e->next)
196 e->next->prev = e;
197 }
198
199 /*
200 * src typically is on-stack; we want to copy the information in it to
201 * a malloced blame_entry that is already on the linked list of the scoreboard.
202 * The origin of dst loses a refcnt while the origin of src gains one.
203 */
204 static void dup_entry(git_blame__entry *dst, git_blame__entry *src)
205 {
206 git_blame__entry *p, *n;
207
208 p = dst->prev;
209 n = dst->next;
210 origin_incref(src->suspect);
211 origin_decref(dst->suspect);
212 memcpy(dst, src, sizeof(*src));
213 dst->prev = p;
214 dst->next = n;
215 dst->score = 0;
216 }
217
218 /*
219 * split_overlap() divided an existing blame e into up to three parts in split.
220 * Adjust the linked list of blames in the scoreboard to reflect the split.
221 */
222 static int split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e)
223 {
224 git_blame__entry *new_entry;
225
226 if (split[0].suspect && split[2].suspect) {
227 /* The first part (reuse storage for the existing entry e */
228 dup_entry(e, &split[0]);
229
230 /* The last part -- me */
231 new_entry = git__malloc(sizeof(*new_entry));
232 GIT_ERROR_CHECK_ALLOC(new_entry);
233 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
234 add_blame_entry(blame, new_entry);
235
236 /* ... and the middle part -- parent */
237 new_entry = git__malloc(sizeof(*new_entry));
238 GIT_ERROR_CHECK_ALLOC(new_entry);
239 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
240 add_blame_entry(blame, new_entry);
241 } else if (!split[0].suspect && !split[2].suspect) {
242 /*
243 * The parent covers the entire area; reuse storage for e and replace it
244 * with the parent
245 */
246 dup_entry(e, &split[1]);
247 } else if (split[0].suspect) {
248 /* me and then parent */
249 dup_entry(e, &split[0]);
250 new_entry = git__malloc(sizeof(*new_entry));
251 GIT_ERROR_CHECK_ALLOC(new_entry);
252 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
253 add_blame_entry(blame, new_entry);
254 } else {
255 /* parent and then me */
256 dup_entry(e, &split[1]);
257 new_entry = git__malloc(sizeof(*new_entry));
258 GIT_ERROR_CHECK_ALLOC(new_entry);
259 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
260 add_blame_entry(blame, new_entry);
261 }
262
263 return 0;
264 }
265
266 /*
267 * After splitting the blame, the origins used by the on-stack blame_entry
268 * should lose one refcnt each.
269 */
270 static void decref_split(git_blame__entry *split)
271 {
272 int i;
273 for (i=0; i<3; i++)
274 origin_decref(split[i].suspect);
275 }
276
277 /*
278 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
279 * hunk; split it and pass blame to the parent.
280 */
281 static int blame_overlap(
282 git_blame *blame,
283 git_blame__entry *e,
284 size_t tlno,
285 size_t plno,
286 size_t same,
287 git_blame__origin *parent)
288 {
289 git_blame__entry split[3] = {{0}};
290
291 split_overlap(split, e, tlno, plno, same, parent);
292 if (split[1].suspect)
293 if (split_blame(blame, split, e) < 0)
294 return -1;
295 decref_split(split);
296
297 return 0;
298 }
299
300 /*
301 * Process one hunk from the patch between the current suspect for blame_entry
302 * e and its parent. Find and split the overlap, and pass blame to the
303 * overlapping part to the parent.
304 */
305 static int blame_chunk(
306 git_blame *blame,
307 size_t tlno,
308 size_t plno,
309 size_t same,
310 git_blame__origin *target,
311 git_blame__origin *parent)
312 {
313 git_blame__entry *e;
314
315 for (e = blame->ent; e; e = e->next) {
316 if (e->guilty || !same_suspect(e->suspect, target))
317 continue;
318 if (same <= e->s_lno)
319 continue;
320 if (tlno < e->s_lno + e->num_lines) {
321 if (blame_overlap(blame, e, tlno, plno, same, parent) < 0)
322 return -1;
323 }
324 }
325
326 return 0;
327 }
328
329 static int my_emit(
330 long start_a, long count_a,
331 long start_b, long count_b,
332 void *cb_data)
333 {
334 blame_chunk_cb_data *d = (blame_chunk_cb_data *)cb_data;
335
336 if (blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent) < 0)
337 return -1;
338 d->plno = start_a + count_a;
339 d->tlno = start_b + count_b;
340
341 return 0;
342 }
343
344 static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx)
345 {
346 const int blk = 1024;
347 long trimmed = 0, recovered = 0;
348 char *ap = a->ptr + a->size;
349 char *bp = b->ptr + b->size;
350 long smaller = (long)((a->size < b->size) ? a->size : b->size);
351
352 if (ctx)
353 return;
354
355 while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) {
356 trimmed += blk;
357 ap -= blk;
358 bp -= blk;
359 }
360
361 while (recovered < trimmed)
362 if (ap[recovered++] == '\n')
363 break;
364 a->size -= trimmed - recovered;
365 b->size -= trimmed - recovered;
366 }
367
368 static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data, git_blame_options *options)
369 {
370 xdemitconf_t xecfg = {0};
371 xdemitcb_t ecb = {0};
372 xpparam_t xpp = {0};
373
374 if (options->flags & GIT_BLAME_IGNORE_WHITESPACE)
375 xpp.flags |= XDF_IGNORE_WHITESPACE;
376
377 xecfg.hunk_func = my_emit;
378 ecb.priv = cb_data;
379
380 trim_common_tail(&file_a, &file_b, 0);
381
382 if (file_a.size > GIT_XDIFF_MAX_SIZE ||
383 file_b.size > GIT_XDIFF_MAX_SIZE) {
384 git_error_set(GIT_ERROR_INVALID, "file too large to blame");
385 return -1;
386 }
387
388 return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb);
389 }
390
391 static void fill_origin_blob(git_blame__origin *o, mmfile_t *file)
392 {
393 memset(file, 0, sizeof(*file));
394 if (o->blob) {
395 file->ptr = (char*)git_blob_rawcontent(o->blob);
396 file->size = (size_t)git_blob_rawsize(o->blob);
397 }
398 }
399
400 static int pass_blame_to_parent(
401 git_blame *blame,
402 git_blame__origin *target,
403 git_blame__origin *parent)
404 {
405 size_t last_in_target;
406 mmfile_t file_p, file_o;
407 blame_chunk_cb_data d = { blame, target, parent, 0, 0 };
408
409 if (!find_last_in_target(&last_in_target, blame, target))
410 return 1; /* nothing remains for this target */
411
412 fill_origin_blob(parent, &file_p);
413 fill_origin_blob(target, &file_o);
414
415 if (diff_hunks(file_p, file_o, &d, &blame->options) < 0)
416 return -1;
417
418 /* The reset (i.e. anything after tlno) are the same as the parent */
419 if (blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent) < 0)
420 return -1;
421
422 return 0;
423 }
424
425 static int paths_on_dup(void **old, void *new)
426 {
427 GIT_UNUSED(old);
428 git__free(new);
429 return -1;
430 }
431
432 static git_blame__origin *find_origin(
433 git_blame *blame,
434 git_commit *parent,
435 git_blame__origin *origin)
436 {
437 git_blame__origin *porigin = NULL;
438 git_diff *difflist = NULL;
439 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
440 git_tree *otree=NULL, *ptree=NULL;
441
442 /* Get the trees from this commit and its parent */
443 if (0 != git_commit_tree(&otree, origin->commit) ||
444 0 != git_commit_tree(&ptree, parent))
445 goto cleanup;
446
447 /* Configure the diff */
448 diffopts.context_lines = 0;
449 diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK;
450
451 /* Check to see if files we're interested have changed */
452 diffopts.pathspec.count = blame->paths.length;
453 diffopts.pathspec.strings = (char**)blame->paths.contents;
454 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
455 goto cleanup;
456
457 if (!git_diff_num_deltas(difflist)) {
458 /* No changes; copy data */
459 git_blame__get_origin(&porigin, blame, parent, origin->path);
460 } else {
461 git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
462 int i;
463
464 /* Generate a full diff between the two trees */
465 git_diff_free(difflist);
466 diffopts.pathspec.count = 0;
467 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
468 goto cleanup;
469
470 /* Let diff find renames */
471 findopts.flags = GIT_DIFF_FIND_RENAMES;
472 if (0 != git_diff_find_similar(difflist, &findopts))
473 goto cleanup;
474
475 /* Find one that matches */
476 for (i=0; i<(int)git_diff_num_deltas(difflist); i++) {
477 const git_diff_delta *delta = git_diff_get_delta(difflist, i);
478
479 if (!git_vector_bsearch(NULL, &blame->paths, delta->new_file.path))
480 {
481 git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path),
482 paths_on_dup);
483 make_origin(&porigin, parent, delta->old_file.path);
484 }
485 }
486 }
487
488 cleanup:
489 git_diff_free(difflist);
490 git_tree_free(otree);
491 git_tree_free(ptree);
492 return porigin;
493 }
494
495 /*
496 * The blobs of origin and porigin exactly match, so everything origin is
497 * suspected for can be blamed on the parent.
498 */
499 static int pass_whole_blame(git_blame *blame,
500 git_blame__origin *origin, git_blame__origin *porigin)
501 {
502 git_blame__entry *e;
503
504 if (!porigin->blob &&
505 git_object_lookup((git_object**)&porigin->blob, blame->repository,
506 git_blob_id(origin->blob), GIT_OBJECT_BLOB) < 0)
507 return -1;
508 for (e=blame->ent; e; e=e->next) {
509 if (!same_suspect(e->suspect, origin))
510 continue;
511 origin_incref(porigin);
512 origin_decref(e->suspect);
513 e->suspect = porigin;
514 }
515
516 return 0;
517 }
518
519 static int pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt)
520 {
521 git_commit *commit = origin->commit;
522 int i, num_parents;
523 git_blame__origin *sg_buf[16];
524 git_blame__origin *porigin, **sg_origin = sg_buf;
525 int ret, error = 0;
526
527 num_parents = git_commit_parentcount(commit);
528 if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit))
529 /* Stop at oldest specified commit */
530 num_parents = 0;
531 else if (opt & GIT_BLAME_FIRST_PARENT && num_parents > 1)
532 /* Limit search to the first parent */
533 num_parents = 1;
534
535 if (!num_parents) {
536 git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit));
537 goto finish;
538 } else if (num_parents < (int)ARRAY_SIZE(sg_buf))
539 memset(sg_buf, 0, sizeof(sg_buf));
540 else {
541 sg_origin = git__calloc(num_parents, sizeof(*sg_origin));
542 GIT_ERROR_CHECK_ALLOC(sg_origin);
543 }
544
545 for (i=0; i<num_parents; i++) {
546 git_commit *p;
547 int j, same;
548
549 if (sg_origin[i])
550 continue;
551
552 if ((error = git_commit_parent(&p, origin->commit, i)) < 0)
553 goto finish;
554 porigin = find_origin(blame, p, origin);
555
556 if (!porigin) {
557 /*
558 * We only have to decrement the parent's
559 * reference count when no porigin has
560 * been created, as otherwise the commit
561 * is assigned to the created object.
562 */
563 git_commit_free(p);
564 continue;
565 }
566 if (porigin->blob && origin->blob &&
567 !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) {
568 error = pass_whole_blame(blame, origin, porigin);
569 origin_decref(porigin);
570 goto finish;
571 }
572 for (j = same = 0; j<i; j++)
573 if (sg_origin[j] &&
574 !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) {
575 same = 1;
576 break;
577 }
578 if (!same)
579 sg_origin[i] = porigin;
580 else
581 origin_decref(porigin);
582 }
583
584 /* Standard blame */
585 for (i=0; i<num_parents; i++) {
586 git_blame__origin *porigin = sg_origin[i];
587 if (!porigin)
588 continue;
589 if (!origin->previous) {
590 origin_incref(porigin);
591 origin->previous = porigin;
592 }
593
594 if ((ret = pass_blame_to_parent(blame, origin, porigin)) != 0) {
595 if (ret < 0)
596 error = -1;
597
598 goto finish;
599 }
600 }
601
602 /* TODO: optionally find moves in parents' files */
603
604 /* TODO: optionally find copies in parents' files */
605
606 finish:
607 for (i=0; i<num_parents; i++)
608 if (sg_origin[i])
609 origin_decref(sg_origin[i]);
610 if (sg_origin != sg_buf)
611 git__free(sg_origin);
612 return error;
613 }
614
615 /*
616 * If two blame entries that are next to each other came from
617 * contiguous lines in the same origin (i.e. <commit, path> pair),
618 * merge them together.
619 */
620 static void coalesce(git_blame *blame)
621 {
622 git_blame__entry *ent, *next;
623
624 for (ent=blame->ent; ent && (next = ent->next); ent = next) {
625 if (same_suspect(ent->suspect, next->suspect) &&
626 ent->guilty == next->guilty &&
627 ent->s_lno + ent->num_lines == next->s_lno)
628 {
629 ent->num_lines += next->num_lines;
630 ent->next = next->next;
631 if (ent->next)
632 ent->next->prev = ent;
633 origin_decref(next->suspect);
634 git__free(next);
635 ent->score = 0;
636 next = ent; /* again */
637 }
638 }
639 }
640
641 int git_blame__like_git(git_blame *blame, uint32_t opt)
642 {
643 int error = 0;
644
645 while (true) {
646 git_blame__entry *ent;
647 git_blame__origin *suspect = NULL;
648
649 /* Find a suspect to break down */
650 for (ent = blame->ent; !suspect && ent; ent = ent->next)
651 if (!ent->guilty)
652 suspect = ent->suspect;
653 if (!suspect)
654 break;
655
656 /* We'll use this suspect later in the loop, so hold on to it for now. */
657 origin_incref(suspect);
658
659 if ((error = pass_blame(blame, suspect, opt)) < 0)
660 break;
661
662 /* Take responsibility for the remaining entries */
663 for (ent = blame->ent; ent; ent = ent->next) {
664 if (same_suspect(ent->suspect, suspect)) {
665 ent->guilty = true;
666 ent->is_boundary = !git_oid_cmp(
667 git_commit_id(suspect->commit),
668 &blame->options.oldest_commit);
669 }
670 }
671 origin_decref(suspect);
672 }
673
674 if (!error)
675 coalesce(blame);
676
677 return error;
678 }
679
680 void git_blame__free_entry(git_blame__entry *ent)
681 {
682 if (!ent) return;
683 origin_decref(ent->suspect);
684 git__free(ent);
685 }