2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
11 #include "xdiff/xinclude.h"
12 #include "diff_xdiff.h"
15 * Origin is refcounted and usually we keep the blob contents to be
18 static git_blame__origin
*origin_incref(git_blame__origin
*o
)
25 static void origin_decref(git_blame__origin
*o
)
27 if (o
&& --o
->refcnt
<= 0) {
29 origin_decref(o
->previous
);
30 git_blob_free(o
->blob
);
31 git_commit_free(o
->commit
);
36 /* Given a commit and a path in it, create a new origin structure. */
37 static int make_origin(git_blame__origin
**out
, git_commit
*commit
, const char *path
)
41 size_t path_len
= strlen(path
), alloc_len
;
44 if ((error
= git_object_lookup_bypath(&blob
, (git_object
*)commit
,
45 path
, GIT_OBJ_BLOB
)) < 0)
48 GITERR_CHECK_ALLOC_ADD(&alloc_len
, sizeof(*o
), path_len
);
49 GITERR_CHECK_ALLOC_ADD(&alloc_len
, alloc_len
, 1);
50 o
= git__calloc(1, alloc_len
);
51 GITERR_CHECK_ALLOC(o
);
54 o
->blob
= (git_blob
*) blob
;
56 strcpy(o
->path
, path
);
63 /* Locate an existing origin or create a new one. */
64 int git_blame__get_origin(
65 git_blame__origin
**out
,
72 for (e
= blame
->ent
; e
; e
= e
->next
) {
73 if (e
->suspect
->commit
== commit
&& !strcmp(e
->suspect
->path
, path
)) {
74 *out
= origin_incref(e
->suspect
);
77 return make_origin(out
, commit
, path
);
80 typedef struct blame_chunk_cb_data
{
82 git_blame__origin
*target
;
83 git_blame__origin
*parent
;
88 static bool same_suspect(git_blame__origin
*a
, git_blame__origin
*b
)
92 if (git_oid_cmp(git_commit_id(a
->commit
), git_commit_id(b
->commit
)))
94 return 0 == strcmp(a
->path
, b
->path
);
97 /* find the line number of the last line the target is suspected for */
98 static bool find_last_in_target(size_t *out
, git_blame
*blame
, git_blame__origin
*target
)
101 size_t last_in_target
= 0;
106 for (e
=blame
->ent
; e
; e
=e
->next
) {
107 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
109 if (last_in_target
< e
->s_lno
+ e
->num_lines
) {
111 last_in_target
= e
->s_lno
+ e
->num_lines
;
115 *out
= last_in_target
;
120 * It is known that lines between tlno to same came from parent, and e
121 * has an overlap with that range. it also is known that parent's
122 * line plno corresponds to e's line tlno.
125 * <------> (entirely within)
126 * <------------> (extends past)
127 * <------------> (starts before)
128 * <------------------> (entirely encloses)
130 * Split e into potentially three parts; before this chunk, the chunk
131 * to be blamed for the parent, and after that portion.
133 static void split_overlap(git_blame__entry
*split
, git_blame__entry
*e
,
134 size_t tlno
, size_t plno
, size_t same
, git_blame__origin
*parent
)
136 size_t chunk_end_lno
;
138 if (e
->s_lno
< tlno
) {
139 /* there is a pre-chunk part not blamed on the parent */
140 split
[0].suspect
= origin_incref(e
->suspect
);
141 split
[0].lno
= e
->lno
;
142 split
[0].s_lno
= e
->s_lno
;
143 split
[0].num_lines
= tlno
- e
->s_lno
;
144 split
[1].lno
= e
->lno
+ tlno
- e
->s_lno
;
145 split
[1].s_lno
= plno
;
147 split
[1].lno
= e
->lno
;
148 split
[1].s_lno
= plno
+ (e
->s_lno
- tlno
);
151 if (same
< e
->s_lno
+ e
->num_lines
) {
152 /* there is a post-chunk part not blamed on parent */
153 split
[2].suspect
= origin_incref(e
->suspect
);
154 split
[2].lno
= e
->lno
+ (same
- e
->s_lno
);
155 split
[2].s_lno
= e
->s_lno
+ (same
- e
->s_lno
);
156 split
[2].num_lines
= e
->s_lno
+ e
->num_lines
- same
;
157 chunk_end_lno
= split
[2].lno
;
159 chunk_end_lno
= e
->lno
+ e
->num_lines
;
161 split
[1].num_lines
= chunk_end_lno
- split
[1].lno
;
164 * if it turns out there is nothing to blame the parent for, forget about
165 * the splitting. !split[1].suspect signals this.
167 if (split
[1].num_lines
< 1)
169 split
[1].suspect
= origin_incref(parent
);
173 * Link in a new blame entry to the scoreboard. Entries that cover the same
174 * line range have been removed from the scoreboard previously.
176 static void add_blame_entry(git_blame
*blame
, git_blame__entry
*e
)
178 git_blame__entry
*ent
, *prev
= NULL
;
180 origin_incref(e
->suspect
);
182 for (ent
= blame
->ent
; ent
&& ent
->lno
< e
->lno
; ent
= ent
->next
)
185 /* prev, if not NULL, is the last one that is below e */
188 e
->next
= prev
->next
;
191 e
->next
= blame
->ent
;
199 * src typically is on-stack; we want to copy the information in it to
200 * a malloced blame_entry that is already on the linked list of the scoreboard.
201 * The origin of dst loses a refcnt while the origin of src gains one.
203 static void dup_entry(git_blame__entry
*dst
, git_blame__entry
*src
)
205 git_blame__entry
*p
, *n
;
209 origin_incref(src
->suspect
);
210 origin_decref(dst
->suspect
);
211 memcpy(dst
, src
, sizeof(*src
));
218 * split_overlap() divided an existing blame e into up to three parts in split.
219 * Adjust the linked list of blames in the scoreboard to reflect the split.
221 static void split_blame(git_blame
*blame
, git_blame__entry
*split
, git_blame__entry
*e
)
223 git_blame__entry
*new_entry
;
225 if (split
[0].suspect
&& split
[2].suspect
) {
226 /* The first part (reuse storage for the existing entry e */
227 dup_entry(e
, &split
[0]);
229 /* The last part -- me */
230 new_entry
= git__malloc(sizeof(*new_entry
));
231 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
232 add_blame_entry(blame
, new_entry
);
234 /* ... and the middle part -- parent */
235 new_entry
= git__malloc(sizeof(*new_entry
));
236 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
237 add_blame_entry(blame
, new_entry
);
238 } else if (!split
[0].suspect
&& !split
[2].suspect
) {
240 * The parent covers the entire area; reuse storage for e and replace it
243 dup_entry(e
, &split
[1]);
244 } else if (split
[0].suspect
) {
245 /* me and then parent */
246 dup_entry(e
, &split
[0]);
247 new_entry
= git__malloc(sizeof(*new_entry
));
248 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
249 add_blame_entry(blame
, new_entry
);
251 /* parent and then me */
252 dup_entry(e
, &split
[1]);
253 new_entry
= git__malloc(sizeof(*new_entry
));
254 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
255 add_blame_entry(blame
, new_entry
);
260 * After splitting the blame, the origins used by the on-stack blame_entry
261 * should lose one refcnt each.
263 static void decref_split(git_blame__entry
*split
)
267 origin_decref(split
[i
].suspect
);
271 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
272 * hunk; split it and pass blame to the parent.
274 static void blame_overlap(
280 git_blame__origin
*parent
)
282 git_blame__entry split
[3] = {{0}};
284 split_overlap(split
, e
, tlno
, plno
, same
, parent
);
285 if (split
[1].suspect
)
286 split_blame(blame
, split
, e
);
291 * Process one hunk from the patch between the current suspect for blame_entry
292 * e and its parent. Find and split the overlap, and pass blame to the
293 * overlapping part to the parent.
295 static void blame_chunk(
300 git_blame__origin
*target
,
301 git_blame__origin
*parent
)
305 for (e
= blame
->ent
; e
; e
= e
->next
) {
306 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
308 if (same
<= e
->s_lno
)
310 if (tlno
< e
->s_lno
+ e
->num_lines
) {
311 blame_overlap(blame
, e
, tlno
, plno
, same
, parent
);
317 long start_a
, long count_a
,
318 long start_b
, long count_b
,
321 blame_chunk_cb_data
*d
= (blame_chunk_cb_data
*)cb_data
;
323 blame_chunk(d
->blame
, d
->tlno
, d
->plno
, start_b
, d
->target
, d
->parent
);
324 d
->plno
= start_a
+ count_a
;
325 d
->tlno
= start_b
+ count_b
;
330 static void trim_common_tail(mmfile_t
*a
, mmfile_t
*b
, long ctx
)
332 const int blk
= 1024;
333 long trimmed
= 0, recovered
= 0;
334 char *ap
= a
->ptr
+ a
->size
;
335 char *bp
= b
->ptr
+ b
->size
;
336 long smaller
= (long)((a
->size
< b
->size
) ? a
->size
: b
->size
);
341 while (blk
+ trimmed
<= smaller
&& !memcmp(ap
- blk
, bp
- blk
, blk
)) {
347 while (recovered
< trimmed
)
348 if (ap
[recovered
++] == '\n')
350 a
->size
-= trimmed
- recovered
;
351 b
->size
-= trimmed
- recovered
;
354 static int diff_hunks(mmfile_t file_a
, mmfile_t file_b
, void *cb_data
)
357 xdemitconf_t xecfg
= {0};
358 xdemitcb_t ecb
= {0};
360 xecfg
.hunk_func
= my_emit
;
363 trim_common_tail(&file_a
, &file_b
, 0);
365 if (file_a
.size
> GIT_XDIFF_MAX_SIZE
||
366 file_b
.size
> GIT_XDIFF_MAX_SIZE
) {
367 giterr_set(GITERR_INVALID
, "file too large to blame");
371 return xdl_diff(&file_a
, &file_b
, &xpp
, &xecfg
, &ecb
);
374 static void fill_origin_blob(git_blame__origin
*o
, mmfile_t
*file
)
376 memset(file
, 0, sizeof(*file
));
378 file
->ptr
= (char*)git_blob_rawcontent(o
->blob
);
379 file
->size
= (size_t)git_blob_rawsize(o
->blob
);
383 static int pass_blame_to_parent(
385 git_blame__origin
*target
,
386 git_blame__origin
*parent
)
388 size_t last_in_target
;
389 mmfile_t file_p
, file_o
;
390 blame_chunk_cb_data d
= { blame
, target
, parent
, 0, 0 };
392 if (!find_last_in_target(&last_in_target
, blame
, target
))
393 return 1; /* nothing remains for this target */
395 fill_origin_blob(parent
, &file_p
);
396 fill_origin_blob(target
, &file_o
);
398 if (diff_hunks(file_p
, file_o
, &d
) < 0)
401 /* The reset (i.e. anything after tlno) are the same as the parent */
402 blame_chunk(blame
, d
.tlno
, d
.plno
, last_in_target
, target
, parent
);
407 static int paths_on_dup(void **old
, void *new)
414 static git_blame__origin
* find_origin(
417 git_blame__origin
*origin
)
419 git_blame__origin
*porigin
= NULL
;
420 git_diff
*difflist
= NULL
;
421 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
422 git_tree
*otree
=NULL
, *ptree
=NULL
;
424 /* Get the trees from this commit and its parent */
425 if (0 != git_commit_tree(&otree
, origin
->commit
) ||
426 0 != git_commit_tree(&ptree
, parent
))
429 /* Configure the diff */
430 diffopts
.context_lines
= 0;
431 diffopts
.flags
= GIT_DIFF_SKIP_BINARY_CHECK
;
433 /* Check to see if files we're interested have changed */
434 diffopts
.pathspec
.count
= blame
->paths
.length
;
435 diffopts
.pathspec
.strings
= (char**)blame
->paths
.contents
;
436 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
439 if (!git_diff_num_deltas(difflist
)) {
440 /* No changes; copy data */
441 git_blame__get_origin(&porigin
, blame
, parent
, origin
->path
);
443 git_diff_find_options findopts
= GIT_DIFF_FIND_OPTIONS_INIT
;
446 /* Generate a full diff between the two trees */
447 git_diff_free(difflist
);
448 diffopts
.pathspec
.count
= 0;
449 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
452 /* Let diff find renames */
453 findopts
.flags
= GIT_DIFF_FIND_RENAMES
;
454 if (0 != git_diff_find_similar(difflist
, &findopts
))
457 /* Find one that matches */
458 for (i
=0; i
<(int)git_diff_num_deltas(difflist
); i
++) {
459 const git_diff_delta
*delta
= git_diff_get_delta(difflist
, i
);
461 if (!git_vector_bsearch(NULL
, &blame
->paths
, delta
->new_file
.path
))
463 git_vector_insert_sorted(&blame
->paths
, (void*)git__strdup(delta
->old_file
.path
),
465 make_origin(&porigin
, parent
, delta
->old_file
.path
);
471 git_diff_free(difflist
);
472 git_tree_free(otree
);
473 git_tree_free(ptree
);
478 * The blobs of origin and porigin exactly match, so everything origin is
479 * suspected for can be blamed on the parent.
481 static int pass_whole_blame(git_blame
*blame
,
482 git_blame__origin
*origin
, git_blame__origin
*porigin
)
486 if (!porigin
->blob
&&
487 git_object_lookup((git_object
**)&porigin
->blob
, blame
->repository
,
488 git_blob_id(origin
->blob
), GIT_OBJ_BLOB
) < 0)
490 for (e
=blame
->ent
; e
; e
=e
->next
) {
491 if (!same_suspect(e
->suspect
, origin
))
493 origin_incref(porigin
);
494 origin_decref(e
->suspect
);
495 e
->suspect
= porigin
;
501 static int pass_blame(git_blame
*blame
, git_blame__origin
*origin
, uint32_t opt
)
503 git_commit
*commit
= origin
->commit
;
505 git_blame__origin
*sg_buf
[16];
506 git_blame__origin
*porigin
, **sg_origin
= sg_buf
;
509 num_parents
= git_commit_parentcount(commit
);
510 if (!git_oid_cmp(git_commit_id(commit
), &blame
->options
.oldest_commit
))
511 /* Stop at oldest specified commit */
513 else if (opt
& GIT_BLAME_FIRST_PARENT
&& num_parents
> 1)
514 /* Limit search to the first parent */
518 git_oid_cpy(&blame
->options
.oldest_commit
, git_commit_id(commit
));
520 } else if (num_parents
< (int)ARRAY_SIZE(sg_buf
))
521 memset(sg_buf
, 0, sizeof(sg_buf
));
523 sg_origin
= git__calloc(num_parents
, sizeof(*sg_origin
));
524 GITERR_CHECK_ALLOC(sg_origin
);
527 for (i
=0; i
<num_parents
; i
++) {
534 if ((error
= git_commit_parent(&p
, origin
->commit
, i
)) < 0)
536 porigin
= find_origin(blame
, p
, origin
);
540 * We only have to decrement the parent's
541 * reference count when no porigin has
542 * been created, as otherwise the commit
543 * is assigned to the created object.
548 if (porigin
->blob
&& origin
->blob
&&
549 !git_oid_cmp(git_blob_id(porigin
->blob
), git_blob_id(origin
->blob
))) {
550 error
= pass_whole_blame(blame
, origin
, porigin
);
551 origin_decref(porigin
);
554 for (j
= same
= 0; j
<i
; j
++)
556 !git_oid_cmp(git_blob_id(sg_origin
[j
]->blob
), git_blob_id(porigin
->blob
))) {
561 sg_origin
[i
] = porigin
;
563 origin_decref(porigin
);
567 for (i
=0; i
<num_parents
; i
++) {
568 git_blame__origin
*porigin
= sg_origin
[i
];
571 if (!origin
->previous
) {
572 origin_incref(porigin
);
573 origin
->previous
= porigin
;
576 if ((ret
= pass_blame_to_parent(blame
, origin
, porigin
)) != 0) {
584 /* TODO: optionally find moves in parents' files */
586 /* TODO: optionally find copies in parents' files */
589 for (i
=0; i
<num_parents
; i
++)
591 origin_decref(sg_origin
[i
]);
592 if (sg_origin
!= sg_buf
)
593 git__free(sg_origin
);
598 * If two blame entries that are next to each other came from
599 * contiguous lines in the same origin (i.e. <commit, path> pair),
600 * merge them together.
602 static void coalesce(git_blame
*blame
)
604 git_blame__entry
*ent
, *next
;
606 for (ent
=blame
->ent
; ent
&& (next
= ent
->next
); ent
= next
) {
607 if (same_suspect(ent
->suspect
, next
->suspect
) &&
608 ent
->guilty
== next
->guilty
&&
609 ent
->s_lno
+ ent
->num_lines
== next
->s_lno
)
611 ent
->num_lines
+= next
->num_lines
;
612 ent
->next
= next
->next
;
614 ent
->next
->prev
= ent
;
615 origin_decref(next
->suspect
);
618 next
= ent
; /* again */
623 int git_blame__like_git(git_blame
*blame
, uint32_t opt
)
626 git_blame__entry
*ent
;
627 git_blame__origin
*suspect
= NULL
;
629 /* Find a suspect to break down */
630 for (ent
= blame
->ent
; !suspect
&& ent
; ent
= ent
->next
)
632 suspect
= ent
->suspect
;
634 return 0; /* all done */
636 /* We'll use this suspect later in the loop, so hold on to it for now. */
637 origin_incref(suspect
);
639 if (pass_blame(blame
, suspect
, opt
) < 0)
642 /* Take responsibility for the remaining entries */
643 for (ent
= blame
->ent
; ent
; ent
= ent
->next
) {
644 if (same_suspect(ent
->suspect
, suspect
)) {
646 ent
->is_boundary
= !git_oid_cmp(
647 git_commit_id(suspect
->commit
),
648 &blame
->options
.oldest_commit
);
651 origin_decref(suspect
);
659 void git_blame__free_entry(git_blame__entry
*ent
)
662 origin_decref(ent
->suspect
);