2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
12 #include "xdiff/xinclude.h"
13 #include "diff_xdiff.h"
16 * Origin is refcounted and usually we keep the blob contents to be
19 static git_blame__origin
*origin_incref(git_blame__origin
*o
)
26 static void origin_decref(git_blame__origin
*o
)
28 if (o
&& --o
->refcnt
<= 0) {
30 origin_decref(o
->previous
);
31 git_blob_free(o
->blob
);
32 git_commit_free(o
->commit
);
37 /* Given a commit and a path in it, create a new origin structure. */
38 static int make_origin(git_blame__origin
**out
, git_commit
*commit
, const char *path
)
42 size_t path_len
= strlen(path
), alloc_len
;
45 if ((error
= git_object_lookup_bypath(&blob
, (git_object
*)commit
,
46 path
, GIT_OBJECT_BLOB
)) < 0)
49 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, sizeof(*o
), path_len
);
50 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, alloc_len
, 1);
51 o
= git__calloc(1, alloc_len
);
52 GIT_ERROR_CHECK_ALLOC(o
);
55 o
->blob
= (git_blob
*) blob
;
57 strcpy(o
->path
, path
);
64 /* Locate an existing origin or create a new one. */
65 int git_blame__get_origin(
66 git_blame__origin
**out
,
73 for (e
= blame
->ent
; e
; e
= e
->next
) {
74 if (e
->suspect
->commit
== commit
&& !strcmp(e
->suspect
->path
, path
)) {
75 *out
= origin_incref(e
->suspect
);
78 return make_origin(out
, commit
, path
);
81 typedef struct blame_chunk_cb_data
{
83 git_blame__origin
*target
;
84 git_blame__origin
*parent
;
89 static bool same_suspect(git_blame__origin
*a
, git_blame__origin
*b
)
93 if (git_oid_cmp(git_commit_id(a
->commit
), git_commit_id(b
->commit
)))
95 return 0 == strcmp(a
->path
, b
->path
);
98 /* find the line number of the last line the target is suspected for */
99 static bool find_last_in_target(size_t *out
, git_blame
*blame
, git_blame__origin
*target
)
102 size_t last_in_target
= 0;
107 for (e
=blame
->ent
; e
; e
=e
->next
) {
108 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
110 if (last_in_target
< e
->s_lno
+ e
->num_lines
) {
112 last_in_target
= e
->s_lno
+ e
->num_lines
;
116 *out
= last_in_target
;
121 * It is known that lines between tlno to same came from parent, and e
122 * has an overlap with that range. it also is known that parent's
123 * line plno corresponds to e's line tlno.
126 * <------> (entirely within)
127 * <------------> (extends past)
128 * <------------> (starts before)
129 * <------------------> (entirely encloses)
131 * Split e into potentially three parts; before this chunk, the chunk
132 * to be blamed for the parent, and after that portion.
134 static void split_overlap(git_blame__entry
*split
, git_blame__entry
*e
,
135 size_t tlno
, size_t plno
, size_t same
, git_blame__origin
*parent
)
137 size_t chunk_end_lno
;
139 if (e
->s_lno
< tlno
) {
140 /* there is a pre-chunk part not blamed on the parent */
141 split
[0].suspect
= origin_incref(e
->suspect
);
142 split
[0].lno
= e
->lno
;
143 split
[0].s_lno
= e
->s_lno
;
144 split
[0].num_lines
= tlno
- e
->s_lno
;
145 split
[1].lno
= e
->lno
+ tlno
- e
->s_lno
;
146 split
[1].s_lno
= plno
;
148 split
[1].lno
= e
->lno
;
149 split
[1].s_lno
= plno
+ (e
->s_lno
- tlno
);
152 if (same
< e
->s_lno
+ e
->num_lines
) {
153 /* there is a post-chunk part not blamed on parent */
154 split
[2].suspect
= origin_incref(e
->suspect
);
155 split
[2].lno
= e
->lno
+ (same
- e
->s_lno
);
156 split
[2].s_lno
= e
->s_lno
+ (same
- e
->s_lno
);
157 split
[2].num_lines
= e
->s_lno
+ e
->num_lines
- same
;
158 chunk_end_lno
= split
[2].lno
;
160 chunk_end_lno
= e
->lno
+ e
->num_lines
;
162 split
[1].num_lines
= chunk_end_lno
- split
[1].lno
;
165 * if it turns out there is nothing to blame the parent for, forget about
166 * the splitting. !split[1].suspect signals this.
168 if (split
[1].num_lines
< 1)
170 split
[1].suspect
= origin_incref(parent
);
174 * Link in a new blame entry to the scoreboard. Entries that cover the same
175 * line range have been removed from the scoreboard previously.
177 static void add_blame_entry(git_blame
*blame
, git_blame__entry
*e
)
179 git_blame__entry
*ent
, *prev
= NULL
;
181 origin_incref(e
->suspect
);
183 for (ent
= blame
->ent
; ent
&& ent
->lno
< e
->lno
; ent
= ent
->next
)
186 /* prev, if not NULL, is the last one that is below e */
189 e
->next
= prev
->next
;
192 e
->next
= blame
->ent
;
200 * src typically is on-stack; we want to copy the information in it to
201 * a malloced blame_entry that is already on the linked list of the scoreboard.
202 * The origin of dst loses a refcnt while the origin of src gains one.
204 static void dup_entry(git_blame__entry
*dst
, git_blame__entry
*src
)
206 git_blame__entry
*p
, *n
;
210 origin_incref(src
->suspect
);
211 origin_decref(dst
->suspect
);
212 memcpy(dst
, src
, sizeof(*src
));
219 * split_overlap() divided an existing blame e into up to three parts in split.
220 * Adjust the linked list of blames in the scoreboard to reflect the split.
222 static void split_blame(git_blame
*blame
, git_blame__entry
*split
, git_blame__entry
*e
)
224 git_blame__entry
*new_entry
;
226 if (split
[0].suspect
&& split
[2].suspect
) {
227 /* The first part (reuse storage for the existing entry e */
228 dup_entry(e
, &split
[0]);
230 /* The last part -- me */
231 new_entry
= git__malloc(sizeof(*new_entry
));
232 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
233 add_blame_entry(blame
, new_entry
);
235 /* ... and the middle part -- parent */
236 new_entry
= git__malloc(sizeof(*new_entry
));
237 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
238 add_blame_entry(blame
, new_entry
);
239 } else if (!split
[0].suspect
&& !split
[2].suspect
) {
241 * The parent covers the entire area; reuse storage for e and replace it
244 dup_entry(e
, &split
[1]);
245 } else if (split
[0].suspect
) {
246 /* me and then parent */
247 dup_entry(e
, &split
[0]);
248 new_entry
= git__malloc(sizeof(*new_entry
));
249 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
250 add_blame_entry(blame
, new_entry
);
252 /* parent and then me */
253 dup_entry(e
, &split
[1]);
254 new_entry
= git__malloc(sizeof(*new_entry
));
255 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
256 add_blame_entry(blame
, new_entry
);
261 * After splitting the blame, the origins used by the on-stack blame_entry
262 * should lose one refcnt each.
264 static void decref_split(git_blame__entry
*split
)
268 origin_decref(split
[i
].suspect
);
272 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
273 * hunk; split it and pass blame to the parent.
275 static void blame_overlap(
281 git_blame__origin
*parent
)
283 git_blame__entry split
[3] = {{0}};
285 split_overlap(split
, e
, tlno
, plno
, same
, parent
);
286 if (split
[1].suspect
)
287 split_blame(blame
, split
, e
);
292 * Process one hunk from the patch between the current suspect for blame_entry
293 * e and its parent. Find and split the overlap, and pass blame to the
294 * overlapping part to the parent.
296 static void blame_chunk(
301 git_blame__origin
*target
,
302 git_blame__origin
*parent
)
306 for (e
= blame
->ent
; e
; e
= e
->next
) {
307 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
309 if (same
<= e
->s_lno
)
311 if (tlno
< e
->s_lno
+ e
->num_lines
) {
312 blame_overlap(blame
, e
, tlno
, plno
, same
, parent
);
318 long start_a
, long count_a
,
319 long start_b
, long count_b
,
322 blame_chunk_cb_data
*d
= (blame_chunk_cb_data
*)cb_data
;
324 blame_chunk(d
->blame
, d
->tlno
, d
->plno
, start_b
, d
->target
, d
->parent
);
325 d
->plno
= start_a
+ count_a
;
326 d
->tlno
= start_b
+ count_b
;
331 static void trim_common_tail(mmfile_t
*a
, mmfile_t
*b
, long ctx
)
333 const int blk
= 1024;
334 long trimmed
= 0, recovered
= 0;
335 char *ap
= a
->ptr
+ a
->size
;
336 char *bp
= b
->ptr
+ b
->size
;
337 long smaller
= (long)((a
->size
< b
->size
) ? a
->size
: b
->size
);
342 while (blk
+ trimmed
<= smaller
&& !memcmp(ap
- blk
, bp
- blk
, blk
)) {
348 while (recovered
< trimmed
)
349 if (ap
[recovered
++] == '\n')
351 a
->size
-= trimmed
- recovered
;
352 b
->size
-= trimmed
- recovered
;
355 static int diff_hunks(mmfile_t file_a
, mmfile_t file_b
, void *cb_data
)
358 xdemitconf_t xecfg
= {0};
359 xdemitcb_t ecb
= {0};
361 xecfg
.hunk_func
= my_emit
;
364 trim_common_tail(&file_a
, &file_b
, 0);
366 if (file_a
.size
> GIT_XDIFF_MAX_SIZE
||
367 file_b
.size
> GIT_XDIFF_MAX_SIZE
) {
368 git_error_set(GIT_ERROR_INVALID
, "file too large to blame");
372 return xdl_diff(&file_a
, &file_b
, &xpp
, &xecfg
, &ecb
);
375 static void fill_origin_blob(git_blame__origin
*o
, mmfile_t
*file
)
377 memset(file
, 0, sizeof(*file
));
379 file
->ptr
= (char*)git_blob_rawcontent(o
->blob
);
380 file
->size
= (size_t)git_blob_rawsize(o
->blob
);
384 static int pass_blame_to_parent(
386 git_blame__origin
*target
,
387 git_blame__origin
*parent
)
389 size_t last_in_target
;
390 mmfile_t file_p
, file_o
;
391 blame_chunk_cb_data d
= { blame
, target
, parent
, 0, 0 };
393 if (!find_last_in_target(&last_in_target
, blame
, target
))
394 return 1; /* nothing remains for this target */
396 fill_origin_blob(parent
, &file_p
);
397 fill_origin_blob(target
, &file_o
);
399 if (diff_hunks(file_p
, file_o
, &d
) < 0)
402 /* The reset (i.e. anything after tlno) are the same as the parent */
403 blame_chunk(blame
, d
.tlno
, d
.plno
, last_in_target
, target
, parent
);
408 static int paths_on_dup(void **old
, void *new)
415 static git_blame__origin
* find_origin(
418 git_blame__origin
*origin
)
420 git_blame__origin
*porigin
= NULL
;
421 git_diff
*difflist
= NULL
;
422 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
423 git_tree
*otree
=NULL
, *ptree
=NULL
;
425 /* Get the trees from this commit and its parent */
426 if (0 != git_commit_tree(&otree
, origin
->commit
) ||
427 0 != git_commit_tree(&ptree
, parent
))
430 /* Configure the diff */
431 diffopts
.context_lines
= 0;
432 diffopts
.flags
= GIT_DIFF_SKIP_BINARY_CHECK
;
434 /* Check to see if files we're interested have changed */
435 diffopts
.pathspec
.count
= blame
->paths
.length
;
436 diffopts
.pathspec
.strings
= (char**)blame
->paths
.contents
;
437 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
440 if (!git_diff_num_deltas(difflist
)) {
441 /* No changes; copy data */
442 git_blame__get_origin(&porigin
, blame
, parent
, origin
->path
);
444 git_diff_find_options findopts
= GIT_DIFF_FIND_OPTIONS_INIT
;
447 /* Generate a full diff between the two trees */
448 git_diff_free(difflist
);
449 diffopts
.pathspec
.count
= 0;
450 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
453 /* Let diff find renames */
454 findopts
.flags
= GIT_DIFF_FIND_RENAMES
;
455 if (0 != git_diff_find_similar(difflist
, &findopts
))
458 /* Find one that matches */
459 for (i
=0; i
<(int)git_diff_num_deltas(difflist
); i
++) {
460 const git_diff_delta
*delta
= git_diff_get_delta(difflist
, i
);
462 if (!git_vector_bsearch(NULL
, &blame
->paths
, delta
->new_file
.path
))
464 git_vector_insert_sorted(&blame
->paths
, (void*)git__strdup(delta
->old_file
.path
),
466 make_origin(&porigin
, parent
, delta
->old_file
.path
);
472 git_diff_free(difflist
);
473 git_tree_free(otree
);
474 git_tree_free(ptree
);
479 * The blobs of origin and porigin exactly match, so everything origin is
480 * suspected for can be blamed on the parent.
482 static int pass_whole_blame(git_blame
*blame
,
483 git_blame__origin
*origin
, git_blame__origin
*porigin
)
487 if (!porigin
->blob
&&
488 git_object_lookup((git_object
**)&porigin
->blob
, blame
->repository
,
489 git_blob_id(origin
->blob
), GIT_OBJECT_BLOB
) < 0)
491 for (e
=blame
->ent
; e
; e
=e
->next
) {
492 if (!same_suspect(e
->suspect
, origin
))
494 origin_incref(porigin
);
495 origin_decref(e
->suspect
);
496 e
->suspect
= porigin
;
502 static int pass_blame(git_blame
*blame
, git_blame__origin
*origin
, uint32_t opt
)
504 git_commit
*commit
= origin
->commit
;
506 git_blame__origin
*sg_buf
[16];
507 git_blame__origin
*porigin
, **sg_origin
= sg_buf
;
510 num_parents
= git_commit_parentcount(commit
);
511 if (!git_oid_cmp(git_commit_id(commit
), &blame
->options
.oldest_commit
))
512 /* Stop at oldest specified commit */
514 else if (opt
& GIT_BLAME_FIRST_PARENT
&& num_parents
> 1)
515 /* Limit search to the first parent */
519 git_oid_cpy(&blame
->options
.oldest_commit
, git_commit_id(commit
));
521 } else if (num_parents
< (int)ARRAY_SIZE(sg_buf
))
522 memset(sg_buf
, 0, sizeof(sg_buf
));
524 sg_origin
= git__calloc(num_parents
, sizeof(*sg_origin
));
525 GIT_ERROR_CHECK_ALLOC(sg_origin
);
528 for (i
=0; i
<num_parents
; i
++) {
535 if ((error
= git_commit_parent(&p
, origin
->commit
, i
)) < 0)
537 porigin
= find_origin(blame
, p
, origin
);
541 * We only have to decrement the parent's
542 * reference count when no porigin has
543 * been created, as otherwise the commit
544 * is assigned to the created object.
549 if (porigin
->blob
&& origin
->blob
&&
550 !git_oid_cmp(git_blob_id(porigin
->blob
), git_blob_id(origin
->blob
))) {
551 error
= pass_whole_blame(blame
, origin
, porigin
);
552 origin_decref(porigin
);
555 for (j
= same
= 0; j
<i
; j
++)
557 !git_oid_cmp(git_blob_id(sg_origin
[j
]->blob
), git_blob_id(porigin
->blob
))) {
562 sg_origin
[i
] = porigin
;
564 origin_decref(porigin
);
568 for (i
=0; i
<num_parents
; i
++) {
569 git_blame__origin
*porigin
= sg_origin
[i
];
572 if (!origin
->previous
) {
573 origin_incref(porigin
);
574 origin
->previous
= porigin
;
577 if ((ret
= pass_blame_to_parent(blame
, origin
, porigin
)) != 0) {
585 /* TODO: optionally find moves in parents' files */
587 /* TODO: optionally find copies in parents' files */
590 for (i
=0; i
<num_parents
; i
++)
592 origin_decref(sg_origin
[i
]);
593 if (sg_origin
!= sg_buf
)
594 git__free(sg_origin
);
599 * If two blame entries that are next to each other came from
600 * contiguous lines in the same origin (i.e. <commit, path> pair),
601 * merge them together.
603 static void coalesce(git_blame
*blame
)
605 git_blame__entry
*ent
, *next
;
607 for (ent
=blame
->ent
; ent
&& (next
= ent
->next
); ent
= next
) {
608 if (same_suspect(ent
->suspect
, next
->suspect
) &&
609 ent
->guilty
== next
->guilty
&&
610 ent
->s_lno
+ ent
->num_lines
== next
->s_lno
)
612 ent
->num_lines
+= next
->num_lines
;
613 ent
->next
= next
->next
;
615 ent
->next
->prev
= ent
;
616 origin_decref(next
->suspect
);
619 next
= ent
; /* again */
624 int git_blame__like_git(git_blame
*blame
, uint32_t opt
)
629 git_blame__entry
*ent
;
630 git_blame__origin
*suspect
= NULL
;
632 /* Find a suspect to break down */
633 for (ent
= blame
->ent
; !suspect
&& ent
; ent
= ent
->next
)
635 suspect
= ent
->suspect
;
639 /* We'll use this suspect later in the loop, so hold on to it for now. */
640 origin_incref(suspect
);
642 if ((error
= pass_blame(blame
, suspect
, opt
)) < 0)
645 /* Take responsibility for the remaining entries */
646 for (ent
= blame
->ent
; ent
; ent
= ent
->next
) {
647 if (same_suspect(ent
->suspect
, suspect
)) {
649 ent
->is_boundary
= !git_oid_cmp(
650 git_commit_id(suspect
->commit
),
651 &blame
->options
.oldest_commit
);
654 origin_decref(suspect
);
663 void git_blame__free_entry(git_blame__entry
*ent
)
666 origin_decref(ent
->suspect
);