2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
12 #include "xdiff/xinclude.h"
13 #include "diff_xdiff.h"
16 * Origin is refcounted and usually we keep the blob contents to be
19 static git_blame__origin
*origin_incref(git_blame__origin
*o
)
26 static void origin_decref(git_blame__origin
*o
)
28 if (o
&& --o
->refcnt
<= 0) {
30 origin_decref(o
->previous
);
31 git_blob_free(o
->blob
);
32 git_commit_free(o
->commit
);
37 /* Given a commit and a path in it, create a new origin structure. */
38 static int make_origin(git_blame__origin
**out
, git_commit
*commit
, const char *path
)
42 size_t path_len
= strlen(path
), alloc_len
;
45 if ((error
= git_object_lookup_bypath(&blob
, (git_object
*)commit
,
46 path
, GIT_OBJECT_BLOB
)) < 0)
49 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, sizeof(*o
), path_len
);
50 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, alloc_len
, 1);
51 o
= git__calloc(1, alloc_len
);
52 GIT_ERROR_CHECK_ALLOC(o
);
55 o
->blob
= (git_blob
*) blob
;
57 strcpy(o
->path
, path
);
64 /* Locate an existing origin or create a new one. */
65 int git_blame__get_origin(
66 git_blame__origin
**out
,
73 for (e
= blame
->ent
; e
; e
= e
->next
) {
74 if (e
->suspect
->commit
== commit
&& !strcmp(e
->suspect
->path
, path
)) {
75 *out
= origin_incref(e
->suspect
);
78 return make_origin(out
, commit
, path
);
81 typedef struct blame_chunk_cb_data
{
83 git_blame__origin
*target
;
84 git_blame__origin
*parent
;
89 static bool same_suspect(git_blame__origin
*a
, git_blame__origin
*b
)
93 if (git_oid_cmp(git_commit_id(a
->commit
), git_commit_id(b
->commit
)))
95 return 0 == strcmp(a
->path
, b
->path
);
98 /* find the line number of the last line the target is suspected for */
99 static bool find_last_in_target(size_t *out
, git_blame
*blame
, git_blame__origin
*target
)
102 size_t last_in_target
= 0;
107 for (e
=blame
->ent
; e
; e
=e
->next
) {
108 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
110 if (last_in_target
< e
->s_lno
+ e
->num_lines
) {
112 last_in_target
= e
->s_lno
+ e
->num_lines
;
116 *out
= last_in_target
;
121 * It is known that lines between tlno to same came from parent, and e
122 * has an overlap with that range. it also is known that parent's
123 * line plno corresponds to e's line tlno.
126 * <------> (entirely within)
127 * <------------> (extends past)
128 * <------------> (starts before)
129 * <------------------> (entirely encloses)
131 * Split e into potentially three parts; before this chunk, the chunk
132 * to be blamed for the parent, and after that portion.
134 static void split_overlap(git_blame__entry
*split
, git_blame__entry
*e
,
135 size_t tlno
, size_t plno
, size_t same
, git_blame__origin
*parent
)
137 size_t chunk_end_lno
;
139 if (e
->s_lno
< tlno
) {
140 /* there is a pre-chunk part not blamed on the parent */
141 split
[0].suspect
= origin_incref(e
->suspect
);
142 split
[0].lno
= e
->lno
;
143 split
[0].s_lno
= e
->s_lno
;
144 split
[0].num_lines
= tlno
- e
->s_lno
;
145 split
[1].lno
= e
->lno
+ tlno
- e
->s_lno
;
146 split
[1].s_lno
= plno
;
148 split
[1].lno
= e
->lno
;
149 split
[1].s_lno
= plno
+ (e
->s_lno
- tlno
);
152 if (same
< e
->s_lno
+ e
->num_lines
) {
153 /* there is a post-chunk part not blamed on parent */
154 split
[2].suspect
= origin_incref(e
->suspect
);
155 split
[2].lno
= e
->lno
+ (same
- e
->s_lno
);
156 split
[2].s_lno
= e
->s_lno
+ (same
- e
->s_lno
);
157 split
[2].num_lines
= e
->s_lno
+ e
->num_lines
- same
;
158 chunk_end_lno
= split
[2].lno
;
160 chunk_end_lno
= e
->lno
+ e
->num_lines
;
162 split
[1].num_lines
= chunk_end_lno
- split
[1].lno
;
165 * if it turns out there is nothing to blame the parent for, forget about
166 * the splitting. !split[1].suspect signals this.
168 if (split
[1].num_lines
< 1)
170 split
[1].suspect
= origin_incref(parent
);
174 * Link in a new blame entry to the scoreboard. Entries that cover the same
175 * line range have been removed from the scoreboard previously.
177 static void add_blame_entry(git_blame
*blame
, git_blame__entry
*e
)
179 git_blame__entry
*ent
, *prev
= NULL
;
181 origin_incref(e
->suspect
);
183 for (ent
= blame
->ent
; ent
&& ent
->lno
< e
->lno
; ent
= ent
->next
)
186 /* prev, if not NULL, is the last one that is below e */
189 e
->next
= prev
->next
;
192 e
->next
= blame
->ent
;
200 * src typically is on-stack; we want to copy the information in it to
201 * a malloced blame_entry that is already on the linked list of the scoreboard.
202 * The origin of dst loses a refcnt while the origin of src gains one.
204 static void dup_entry(git_blame__entry
*dst
, git_blame__entry
*src
)
206 git_blame__entry
*p
, *n
;
210 origin_incref(src
->suspect
);
211 origin_decref(dst
->suspect
);
212 memcpy(dst
, src
, sizeof(*src
));
219 * split_overlap() divided an existing blame e into up to three parts in split.
220 * Adjust the linked list of blames in the scoreboard to reflect the split.
222 static int split_blame(git_blame
*blame
, git_blame__entry
*split
, git_blame__entry
*e
)
224 git_blame__entry
*new_entry
;
226 if (split
[0].suspect
&& split
[2].suspect
) {
227 /* The first part (reuse storage for the existing entry e */
228 dup_entry(e
, &split
[0]);
230 /* The last part -- me */
231 new_entry
= git__malloc(sizeof(*new_entry
));
232 GIT_ERROR_CHECK_ALLOC(new_entry
);
233 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
234 add_blame_entry(blame
, new_entry
);
236 /* ... and the middle part -- parent */
237 new_entry
= git__malloc(sizeof(*new_entry
));
238 GIT_ERROR_CHECK_ALLOC(new_entry
);
239 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
240 add_blame_entry(blame
, new_entry
);
241 } else if (!split
[0].suspect
&& !split
[2].suspect
) {
243 * The parent covers the entire area; reuse storage for e and replace it
246 dup_entry(e
, &split
[1]);
247 } else if (split
[0].suspect
) {
248 /* me and then parent */
249 dup_entry(e
, &split
[0]);
250 new_entry
= git__malloc(sizeof(*new_entry
));
251 GIT_ERROR_CHECK_ALLOC(new_entry
);
252 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
253 add_blame_entry(blame
, new_entry
);
255 /* parent and then me */
256 dup_entry(e
, &split
[1]);
257 new_entry
= git__malloc(sizeof(*new_entry
));
258 GIT_ERROR_CHECK_ALLOC(new_entry
);
259 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
260 add_blame_entry(blame
, new_entry
);
267 * After splitting the blame, the origins used by the on-stack blame_entry
268 * should lose one refcnt each.
270 static void decref_split(git_blame__entry
*split
)
274 origin_decref(split
[i
].suspect
);
278 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
279 * hunk; split it and pass blame to the parent.
281 static int blame_overlap(
287 git_blame__origin
*parent
)
289 git_blame__entry split
[3] = {{0}};
291 split_overlap(split
, e
, tlno
, plno
, same
, parent
);
292 if (split
[1].suspect
)
293 if (split_blame(blame
, split
, e
) < 0)
301 * Process one hunk from the patch between the current suspect for blame_entry
302 * e and its parent. Find and split the overlap, and pass blame to the
303 * overlapping part to the parent.
305 static int blame_chunk(
310 git_blame__origin
*target
,
311 git_blame__origin
*parent
)
315 for (e
= blame
->ent
; e
; e
= e
->next
) {
316 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
318 if (same
<= e
->s_lno
)
320 if (tlno
< e
->s_lno
+ e
->num_lines
) {
321 if (blame_overlap(blame
, e
, tlno
, plno
, same
, parent
) < 0)
330 long start_a
, long count_a
,
331 long start_b
, long count_b
,
334 blame_chunk_cb_data
*d
= (blame_chunk_cb_data
*)cb_data
;
336 if (blame_chunk(d
->blame
, d
->tlno
, d
->plno
, start_b
, d
->target
, d
->parent
) < 0)
338 d
->plno
= start_a
+ count_a
;
339 d
->tlno
= start_b
+ count_b
;
344 static void trim_common_tail(mmfile_t
*a
, mmfile_t
*b
, long ctx
)
346 const int blk
= 1024;
347 long trimmed
= 0, recovered
= 0;
348 char *ap
= a
->ptr
+ a
->size
;
349 char *bp
= b
->ptr
+ b
->size
;
350 long smaller
= (long)((a
->size
< b
->size
) ? a
->size
: b
->size
);
355 while (blk
+ trimmed
<= smaller
&& !memcmp(ap
- blk
, bp
- blk
, blk
)) {
361 while (recovered
< trimmed
)
362 if (ap
[recovered
++] == '\n')
364 a
->size
-= trimmed
- recovered
;
365 b
->size
-= trimmed
- recovered
;
368 static int diff_hunks(mmfile_t file_a
, mmfile_t file_b
, void *cb_data
, git_blame_options
*options
)
370 xdemitconf_t xecfg
= {0};
371 xdemitcb_t ecb
= {0};
374 if (options
->flags
& GIT_BLAME_IGNORE_WHITESPACE
)
375 xpp
.flags
|= XDF_IGNORE_WHITESPACE
;
377 xecfg
.hunk_func
= my_emit
;
380 trim_common_tail(&file_a
, &file_b
, 0);
382 if (file_a
.size
> GIT_XDIFF_MAX_SIZE
||
383 file_b
.size
> GIT_XDIFF_MAX_SIZE
) {
384 git_error_set(GIT_ERROR_INVALID
, "file too large to blame");
388 return xdl_diff(&file_a
, &file_b
, &xpp
, &xecfg
, &ecb
);
391 static void fill_origin_blob(git_blame__origin
*o
, mmfile_t
*file
)
393 memset(file
, 0, sizeof(*file
));
395 file
->ptr
= (char*)git_blob_rawcontent(o
->blob
);
396 file
->size
= (size_t)git_blob_rawsize(o
->blob
);
400 static int pass_blame_to_parent(
402 git_blame__origin
*target
,
403 git_blame__origin
*parent
)
405 size_t last_in_target
;
406 mmfile_t file_p
, file_o
;
407 blame_chunk_cb_data d
= { blame
, target
, parent
, 0, 0 };
409 if (!find_last_in_target(&last_in_target
, blame
, target
))
410 return 1; /* nothing remains for this target */
412 fill_origin_blob(parent
, &file_p
);
413 fill_origin_blob(target
, &file_o
);
415 if (diff_hunks(file_p
, file_o
, &d
, &blame
->options
) < 0)
418 /* The reset (i.e. anything after tlno) are the same as the parent */
419 if (blame_chunk(blame
, d
.tlno
, d
.plno
, last_in_target
, target
, parent
) < 0)
425 static int paths_on_dup(void **old
, void *new)
432 static git_blame__origin
*find_origin(
435 git_blame__origin
*origin
)
437 git_blame__origin
*porigin
= NULL
;
438 git_diff
*difflist
= NULL
;
439 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
440 git_tree
*otree
=NULL
, *ptree
=NULL
;
442 /* Get the trees from this commit and its parent */
443 if (0 != git_commit_tree(&otree
, origin
->commit
) ||
444 0 != git_commit_tree(&ptree
, parent
))
447 /* Configure the diff */
448 diffopts
.context_lines
= 0;
449 diffopts
.flags
= GIT_DIFF_SKIP_BINARY_CHECK
;
451 /* Check to see if files we're interested have changed */
452 diffopts
.pathspec
.count
= blame
->paths
.length
;
453 diffopts
.pathspec
.strings
= (char**)blame
->paths
.contents
;
454 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
457 if (!git_diff_num_deltas(difflist
)) {
458 /* No changes; copy data */
459 git_blame__get_origin(&porigin
, blame
, parent
, origin
->path
);
461 git_diff_find_options findopts
= GIT_DIFF_FIND_OPTIONS_INIT
;
464 /* Generate a full diff between the two trees */
465 git_diff_free(difflist
);
466 diffopts
.pathspec
.count
= 0;
467 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
470 /* Let diff find renames */
471 findopts
.flags
= GIT_DIFF_FIND_RENAMES
;
472 if (0 != git_diff_find_similar(difflist
, &findopts
))
475 /* Find one that matches */
476 for (i
=0; i
<(int)git_diff_num_deltas(difflist
); i
++) {
477 const git_diff_delta
*delta
= git_diff_get_delta(difflist
, i
);
479 if (!git_vector_bsearch(NULL
, &blame
->paths
, delta
->new_file
.path
))
481 git_vector_insert_sorted(&blame
->paths
, (void*)git__strdup(delta
->old_file
.path
),
483 make_origin(&porigin
, parent
, delta
->old_file
.path
);
489 git_diff_free(difflist
);
490 git_tree_free(otree
);
491 git_tree_free(ptree
);
496 * The blobs of origin and porigin exactly match, so everything origin is
497 * suspected for can be blamed on the parent.
499 static int pass_whole_blame(git_blame
*blame
,
500 git_blame__origin
*origin
, git_blame__origin
*porigin
)
504 if (!porigin
->blob
&&
505 git_object_lookup((git_object
**)&porigin
->blob
, blame
->repository
,
506 git_blob_id(origin
->blob
), GIT_OBJECT_BLOB
) < 0)
508 for (e
=blame
->ent
; e
; e
=e
->next
) {
509 if (!same_suspect(e
->suspect
, origin
))
511 origin_incref(porigin
);
512 origin_decref(e
->suspect
);
513 e
->suspect
= porigin
;
519 static int pass_blame(git_blame
*blame
, git_blame__origin
*origin
, uint32_t opt
)
521 git_commit
*commit
= origin
->commit
;
523 git_blame__origin
*sg_buf
[16];
524 git_blame__origin
*porigin
, **sg_origin
= sg_buf
;
527 num_parents
= git_commit_parentcount(commit
);
528 if (!git_oid_cmp(git_commit_id(commit
), &blame
->options
.oldest_commit
))
529 /* Stop at oldest specified commit */
531 else if (opt
& GIT_BLAME_FIRST_PARENT
&& num_parents
> 1)
532 /* Limit search to the first parent */
536 git_oid_cpy(&blame
->options
.oldest_commit
, git_commit_id(commit
));
538 } else if (num_parents
< (int)ARRAY_SIZE(sg_buf
))
539 memset(sg_buf
, 0, sizeof(sg_buf
));
541 sg_origin
= git__calloc(num_parents
, sizeof(*sg_origin
));
542 GIT_ERROR_CHECK_ALLOC(sg_origin
);
545 for (i
=0; i
<num_parents
; i
++) {
552 if ((error
= git_commit_parent(&p
, origin
->commit
, i
)) < 0)
554 porigin
= find_origin(blame
, p
, origin
);
558 * We only have to decrement the parent's
559 * reference count when no porigin has
560 * been created, as otherwise the commit
561 * is assigned to the created object.
566 if (porigin
->blob
&& origin
->blob
&&
567 !git_oid_cmp(git_blob_id(porigin
->blob
), git_blob_id(origin
->blob
))) {
568 error
= pass_whole_blame(blame
, origin
, porigin
);
569 origin_decref(porigin
);
572 for (j
= same
= 0; j
<i
; j
++)
574 !git_oid_cmp(git_blob_id(sg_origin
[j
]->blob
), git_blob_id(porigin
->blob
))) {
579 sg_origin
[i
] = porigin
;
581 origin_decref(porigin
);
585 for (i
=0; i
<num_parents
; i
++) {
586 git_blame__origin
*porigin
= sg_origin
[i
];
589 if (!origin
->previous
) {
590 origin_incref(porigin
);
591 origin
->previous
= porigin
;
594 if ((ret
= pass_blame_to_parent(blame
, origin
, porigin
)) != 0) {
602 /* TODO: optionally find moves in parents' files */
604 /* TODO: optionally find copies in parents' files */
607 for (i
=0; i
<num_parents
; i
++)
609 origin_decref(sg_origin
[i
]);
610 if (sg_origin
!= sg_buf
)
611 git__free(sg_origin
);
616 * If two blame entries that are next to each other came from
617 * contiguous lines in the same origin (i.e. <commit, path> pair),
618 * merge them together.
620 static void coalesce(git_blame
*blame
)
622 git_blame__entry
*ent
, *next
;
624 for (ent
=blame
->ent
; ent
&& (next
= ent
->next
); ent
= next
) {
625 if (same_suspect(ent
->suspect
, next
->suspect
) &&
626 ent
->guilty
== next
->guilty
&&
627 ent
->s_lno
+ ent
->num_lines
== next
->s_lno
)
629 ent
->num_lines
+= next
->num_lines
;
630 ent
->next
= next
->next
;
632 ent
->next
->prev
= ent
;
633 origin_decref(next
->suspect
);
636 next
= ent
; /* again */
641 int git_blame__like_git(git_blame
*blame
, uint32_t opt
)
646 git_blame__entry
*ent
;
647 git_blame__origin
*suspect
= NULL
;
649 /* Find a suspect to break down */
650 for (ent
= blame
->ent
; !suspect
&& ent
; ent
= ent
->next
)
652 suspect
= ent
->suspect
;
656 /* We'll use this suspect later in the loop, so hold on to it for now. */
657 origin_incref(suspect
);
659 if ((error
= pass_blame(blame
, suspect
, opt
)) < 0)
662 /* Take responsibility for the remaining entries */
663 for (ent
= blame
->ent
; ent
; ent
= ent
->next
) {
664 if (same_suspect(ent
->suspect
, suspect
)) {
666 ent
->is_boundary
= !git_oid_cmp(
667 git_commit_id(suspect
->commit
),
668 &blame
->options
.oldest_commit
);
671 origin_decref(suspect
);
680 void git_blame__free_entry(git_blame__entry
*ent
)
683 origin_decref(ent
->suspect
);