2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
11 #include "xdiff/xinclude.h"
14 * Origin is refcounted and usually we keep the blob contents to be
17 static git_blame__origin
*origin_incref(git_blame__origin
*o
)
24 static void origin_decref(git_blame__origin
*o
)
26 if (o
&& --o
->refcnt
<= 0) {
28 origin_decref(o
->previous
);
29 git_blob_free(o
->blob
);
30 git_commit_free(o
->commit
);
35 /* Given a commit and a path in it, create a new origin structure. */
36 static int make_origin(git_blame__origin
**out
, git_commit
*commit
, const char *path
)
41 o
= git__calloc(1, sizeof(*o
) + strlen(path
) + 1);
42 GITERR_CHECK_ALLOC(o
);
45 strcpy(o
->path
, path
);
47 if (!(error
= git_object_lookup_bypath((git_object
**)&o
->blob
, (git_object
*)commit
,
48 path
, GIT_OBJ_BLOB
))) {
56 /* Locate an existing origin or create a new one. */
57 int git_blame__get_origin(
58 git_blame__origin
**out
,
65 for (e
= blame
->ent
; e
; e
= e
->next
) {
66 if (e
->suspect
->commit
== commit
&& !strcmp(e
->suspect
->path
, path
)) {
67 *out
= origin_incref(e
->suspect
);
70 return make_origin(out
, commit
, path
);
73 typedef struct blame_chunk_cb_data
{
75 git_blame__origin
*target
;
76 git_blame__origin
*parent
;
81 static bool same_suspect(git_blame__origin
*a
, git_blame__origin
*b
)
85 if (git_oid_cmp(git_commit_id(a
->commit
), git_commit_id(b
->commit
)))
87 return 0 == strcmp(a
->path
, b
->path
);
90 /* find the line number of the last line the target is suspected for */
91 static int find_last_in_target(git_blame
*blame
, git_blame__origin
*target
)
94 int last_in_target
= -1;
96 for (e
=blame
->ent
; e
; e
=e
->next
) {
97 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
99 if (last_in_target
< e
->s_lno
+ e
->num_lines
)
100 last_in_target
= e
->s_lno
+ e
->num_lines
;
102 return last_in_target
;
106 * It is known that lines between tlno to same came from parent, and e
107 * has an overlap with that range. it also is known that parent's
108 * line plno corresponds to e's line tlno.
111 * <------> (entirely within)
112 * <------------> (extends past)
113 * <------------> (starts before)
114 * <------------------> (entirely encloses)
116 * Split e into potentially three parts; before this chunk, the chunk
117 * to be blamed for the parent, and after that portion.
119 static void split_overlap(git_blame__entry
*split
, git_blame__entry
*e
,
120 int tlno
, int plno
, int same
, git_blame__origin
*parent
)
124 if (e
->s_lno
< tlno
) {
125 /* there is a pre-chunk part not blamed on the parent */
126 split
[0].suspect
= origin_incref(e
->suspect
);
127 split
[0].lno
= e
->lno
;
128 split
[0].s_lno
= e
->s_lno
;
129 split
[0].num_lines
= tlno
- e
->s_lno
;
130 split
[1].lno
= e
->lno
+ tlno
- e
->s_lno
;
131 split
[1].s_lno
= plno
;
133 split
[1].lno
= e
->lno
;
134 split
[1].s_lno
= plno
+ (e
->s_lno
- tlno
);
137 if (same
< e
->s_lno
+ e
->num_lines
) {
138 /* there is a post-chunk part not blamed on parent */
139 split
[2].suspect
= origin_incref(e
->suspect
);
140 split
[2].lno
= e
->lno
+ (same
- e
->s_lno
);
141 split
[2].s_lno
= e
->s_lno
+ (same
- e
->s_lno
);
142 split
[2].num_lines
= e
->s_lno
+ e
->num_lines
- same
;
143 chunk_end_lno
= split
[2].lno
;
145 chunk_end_lno
= e
->lno
+ e
->num_lines
;
147 split
[1].num_lines
= chunk_end_lno
- split
[1].lno
;
150 * if it turns out there is nothing to blame the parent for, forget about
151 * the splitting. !split[1].suspect signals this.
153 if (split
[1].num_lines
< 1)
155 split
[1].suspect
= origin_incref(parent
);
159 * Link in a new blame entry to the scoreboard. Entries that cover the same
160 * line range have been removed from the scoreboard previously.
162 static void add_blame_entry(git_blame
*blame
, git_blame__entry
*e
)
164 git_blame__entry
*ent
, *prev
= NULL
;
166 origin_incref(e
->suspect
);
168 for (ent
= blame
->ent
; ent
&& ent
->lno
< e
->lno
; ent
= ent
->next
)
171 /* prev, if not NULL, is the last one that is below e */
174 e
->next
= prev
->next
;
177 e
->next
= blame
->ent
;
185 * src typically is on-stack; we want to copy the information in it to
186 * a malloced blame_entry that is already on the linked list of the scoreboard.
187 * The origin of dst loses a refcnt while the origin of src gains one.
189 static void dup_entry(git_blame__entry
*dst
, git_blame__entry
*src
)
191 git_blame__entry
*p
, *n
;
195 origin_incref(src
->suspect
);
196 origin_decref(dst
->suspect
);
197 memcpy(dst
, src
, sizeof(*src
));
204 * split_overlap() divided an existing blame e into up to three parts in split.
205 * Adjust the linked list of blames in the scoreboard to reflect the split.
207 static void split_blame(git_blame
*blame
, git_blame__entry
*split
, git_blame__entry
*e
)
209 git_blame__entry
*new_entry
;
211 if (split
[0].suspect
&& split
[2].suspect
) {
212 /* The first part (reuse storage for the existing entry e */
213 dup_entry(e
, &split
[0]);
215 /* The last part -- me */
216 new_entry
= git__malloc(sizeof(*new_entry
));
217 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
218 add_blame_entry(blame
, new_entry
);
220 /* ... and the middle part -- parent */
221 new_entry
= git__malloc(sizeof(*new_entry
));
222 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
223 add_blame_entry(blame
, new_entry
);
224 } else if (!split
[0].suspect
&& !split
[2].suspect
) {
226 * The parent covers the entire area; reuse storage for e and replace it
229 dup_entry(e
, &split
[1]);
230 } else if (split
[0].suspect
) {
231 /* me and then parent */
232 dup_entry(e
, &split
[0]);
233 new_entry
= git__malloc(sizeof(*new_entry
));
234 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
235 add_blame_entry(blame
, new_entry
);
237 /* parent and then me */
238 dup_entry(e
, &split
[1]);
239 new_entry
= git__malloc(sizeof(*new_entry
));
240 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
241 add_blame_entry(blame
, new_entry
);
246 * After splitting the blame, the origins used by the on-stack blame_entry
247 * should lose one refcnt each.
249 static void decref_split(git_blame__entry
*split
)
253 origin_decref(split
[i
].suspect
);
257 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
258 * hunk; split it and pass blame to the parent.
260 static void blame_overlap(
266 git_blame__origin
*parent
)
268 git_blame__entry split
[3] = {{0}};
270 split_overlap(split
, e
, tlno
, plno
, same
, parent
);
271 if (split
[1].suspect
)
272 split_blame(blame
, split
, e
);
277 * Process one hunk from the patch between the current suspect for blame_entry
278 * e and its parent. Find and split the overlap, and pass blame to the
279 * overlapping part to the parent.
281 static void blame_chunk(
286 git_blame__origin
*target
,
287 git_blame__origin
*parent
)
291 for (e
= blame
->ent
; e
; e
= e
->next
) {
292 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
294 if (same
<= e
->s_lno
)
296 if (tlno
< e
->s_lno
+ e
->num_lines
) {
297 blame_overlap(blame
, e
, tlno
, plno
, same
, parent
);
306 xdemitconf_t
const *xecfg
)
308 xdchange_t
*xch
= xscr
;
312 blame_chunk_cb_data
*d
= ecb
->priv
;
313 blame_chunk(d
->blame
, d
->tlno
, d
->plno
, xch
->i2
, d
->target
, d
->parent
);
314 d
->plno
= xch
->i1
+ xch
->chg1
;
315 d
->tlno
= xch
->i2
+ xch
->chg2
;
321 static void trim_common_tail(mmfile_t
*a
, mmfile_t
*b
, long ctx
)
323 const int blk
= 1024;
324 long trimmed
= 0, recovered
= 0;
325 char *ap
= a
->ptr
+ a
->size
;
326 char *bp
= b
->ptr
+ b
->size
;
327 long smaller
= (long)((a
->size
< b
->size
) ? a
->size
: b
->size
);
332 while (blk
+ trimmed
<= smaller
&& !memcmp(ap
- blk
, bp
- blk
, blk
)) {
338 while (recovered
< trimmed
)
339 if (ap
[recovered
++] == '\n')
341 a
->size
-= trimmed
- recovered
;
342 b
->size
-= trimmed
- recovered
;
345 static int diff_hunks(mmfile_t file_a
, mmfile_t file_b
, void *cb_data
)
348 xdemitconf_t xecfg
= {0};
349 xdemitcb_t ecb
= {0};
351 xecfg
.emit_func
= (void(*)(void))my_emit
;
354 trim_common_tail(&file_a
, &file_b
, 0);
355 return xdl_diff(&file_a
, &file_b
, &xpp
, &xecfg
, &ecb
);
358 static void fill_origin_blob(git_blame__origin
*o
, mmfile_t
*file
)
360 memset(file
, 0, sizeof(*file
));
362 file
->ptr
= (char*)git_blob_rawcontent(o
->blob
);
363 file
->size
= (size_t)git_blob_rawsize(o
->blob
);
367 static int pass_blame_to_parent(
369 git_blame__origin
*target
,
370 git_blame__origin
*parent
)
373 mmfile_t file_p
, file_o
;
374 blame_chunk_cb_data d
= { blame
, target
, parent
, 0, 0 };
376 last_in_target
= find_last_in_target(blame
, target
);
377 if (last_in_target
< 0)
378 return 1; /* nothing remains for this target */
380 fill_origin_blob(parent
, &file_p
);
381 fill_origin_blob(target
, &file_o
);
383 diff_hunks(file_p
, file_o
, &d
);
384 /* The reset (i.e. anything after tlno) are the same as the parent */
385 blame_chunk(blame
, d
.tlno
, d
.plno
, last_in_target
, target
, parent
);
390 static int paths_on_dup(void **old
, void *new)
397 static git_blame__origin
* find_origin(
400 git_blame__origin
*origin
)
402 git_blame__origin
*porigin
= NULL
;
403 git_diff
*difflist
= NULL
;
404 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
405 git_tree
*otree
=NULL
, *ptree
=NULL
;
407 /* Get the trees from this commit and its parent */
408 if (0 != git_commit_tree(&otree
, origin
->commit
) ||
409 0 != git_commit_tree(&ptree
, parent
))
412 /* Configure the diff */
413 diffopts
.context_lines
= 0;
414 diffopts
.flags
= GIT_DIFF_SKIP_BINARY_CHECK
;
416 /* Check to see if files we're interested have changed */
417 diffopts
.pathspec
.count
= blame
->paths
.length
;
418 diffopts
.pathspec
.strings
= (char**)blame
->paths
.contents
;
419 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
422 if (!git_diff_num_deltas(difflist
)) {
423 /* No changes; copy data */
424 git_blame__get_origin(&porigin
, blame
, parent
, origin
->path
);
426 git_diff_find_options findopts
= GIT_DIFF_FIND_OPTIONS_INIT
;
429 /* Generate a full diff between the two trees */
430 git_diff_free(difflist
);
431 diffopts
.pathspec
.count
= 0;
432 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
435 /* Let diff find renames */
436 findopts
.flags
= GIT_DIFF_FIND_RENAMES
;
437 if (0 != git_diff_find_similar(difflist
, &findopts
))
440 /* Find one that matches */
441 for (i
=0; i
<(int)git_diff_num_deltas(difflist
); i
++) {
442 const git_diff_delta
*delta
= git_diff_get_delta(difflist
, i
);
444 if (!git_vector_bsearch(NULL
, &blame
->paths
, delta
->new_file
.path
))
446 git_vector_insert_sorted(&blame
->paths
, (void*)git__strdup(delta
->old_file
.path
),
448 make_origin(&porigin
, parent
, delta
->old_file
.path
);
454 git_diff_free(difflist
);
455 git_tree_free(otree
);
456 git_tree_free(ptree
);
461 * The blobs of origin and porigin exactly match, so everything origin is
462 * suspected for can be blamed on the parent.
464 static void pass_whole_blame(git_blame
*blame
,
465 git_blame__origin
*origin
, git_blame__origin
*porigin
)
470 git_object_lookup((git_object
**)&porigin
->blob
, blame
->repository
,
471 git_blob_id(origin
->blob
), GIT_OBJ_BLOB
);
472 for (e
=blame
->ent
; e
; e
=e
->next
) {
473 if (!same_suspect(e
->suspect
, origin
))
475 origin_incref(porigin
);
476 origin_decref(e
->suspect
);
477 e
->suspect
= porigin
;
481 static void pass_blame(git_blame
*blame
, git_blame__origin
*origin
, uint32_t opt
)
483 git_commit
*commit
= origin
->commit
;
485 git_blame__origin
*sg_buf
[16];
486 git_blame__origin
*porigin
, **sg_origin
= sg_buf
;
488 num_parents
= git_commit_parentcount(commit
);
489 if (!git_oid_cmp(git_commit_id(commit
), &blame
->options
.oldest_commit
))
490 /* Stop at oldest specified commit */
492 else if (opt
& GIT_BLAME_FIRST_PARENT
&& num_parents
> 1)
493 /* Limit search to the first parent */
497 git_oid_cpy(&blame
->options
.oldest_commit
, git_commit_id(commit
));
500 else if (num_parents
< (int)ARRAY_SIZE(sg_buf
))
501 memset(sg_buf
, 0, sizeof(sg_buf
));
503 sg_origin
= git__calloc(num_parents
, sizeof(*sg_origin
));
505 for (i
=0; i
<num_parents
; i
++) {
512 git_commit_parent(&p
, origin
->commit
, i
);
513 porigin
= find_origin(blame
, p
, origin
);
517 if (porigin
->blob
&& origin
->blob
&&
518 !git_oid_cmp(git_blob_id(porigin
->blob
), git_blob_id(origin
->blob
))) {
519 pass_whole_blame(blame
, origin
, porigin
);
520 origin_decref(porigin
);
523 for (j
= same
= 0; j
<i
; j
++)
525 !git_oid_cmp(git_blob_id(sg_origin
[j
]->blob
), git_blob_id(porigin
->blob
))) {
530 sg_origin
[i
] = porigin
;
532 origin_decref(porigin
);
536 for (i
=0; i
<num_parents
; i
++) {
537 git_blame__origin
*porigin
= sg_origin
[i
];
540 if (!origin
->previous
) {
541 origin_incref(porigin
);
542 origin
->previous
= porigin
;
544 if (pass_blame_to_parent(blame
, origin
, porigin
))
548 /* TODO: optionally find moves in parents' files */
550 /* TODO: optionally find copies in parents' files */
553 for (i
=0; i
<num_parents
; i
++)
555 origin_decref(sg_origin
[i
]);
556 if (sg_origin
!= sg_buf
)
557 git__free(sg_origin
);
562 * If two blame entries that are next to each other came from
563 * contiguous lines in the same origin (i.e. <commit, path> pair),
564 * merge them together.
566 static void coalesce(git_blame
*blame
)
568 git_blame__entry
*ent
, *next
;
570 for (ent
=blame
->ent
; ent
&& (next
= ent
->next
); ent
= next
) {
571 if (same_suspect(ent
->suspect
, next
->suspect
) &&
572 ent
->guilty
== next
->guilty
&&
573 ent
->s_lno
+ ent
->num_lines
== next
->s_lno
)
575 ent
->num_lines
+= next
->num_lines
;
576 ent
->next
= next
->next
;
578 ent
->next
->prev
= ent
;
579 origin_decref(next
->suspect
);
582 next
= ent
; /* again */
587 void git_blame__like_git(git_blame
*blame
, uint32_t opt
)
590 git_blame__entry
*ent
;
591 git_blame__origin
*suspect
= NULL
;
593 /* Find a suspect to break down */
594 for (ent
= blame
->ent
; !suspect
&& ent
; ent
= ent
->next
)
596 suspect
= ent
->suspect
;
598 return; /* all done */
600 /* We'll use this suspect later in the loop, so hold on to it for now. */
601 origin_incref(suspect
);
602 pass_blame(blame
, suspect
, opt
);
604 /* Take responsibility for the remaining entries */
605 for (ent
= blame
->ent
; ent
; ent
= ent
->next
) {
606 if (same_suspect(ent
->suspect
, suspect
)) {
608 ent
->is_boundary
= !git_oid_cmp(
609 git_commit_id(suspect
->commit
),
610 &blame
->options
.oldest_commit
);
613 origin_decref(suspect
);
619 void git_blame__free_entry(git_blame__entry
*ent
)
622 origin_decref(ent
->suspect
);