2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
11 #include "xdiff/xinclude.h"
14 * Origin is refcounted and usually we keep the blob contents to be
17 static git_blame__origin
*origin_incref(git_blame__origin
*o
)
24 static void origin_decref(git_blame__origin
*o
)
26 if (o
&& --o
->refcnt
<= 0) {
28 origin_decref(o
->previous
);
29 git_blob_free(o
->blob
);
30 git_commit_free(o
->commit
);
35 /* Given a commit and a path in it, create a new origin structure. */
36 static int make_origin(git_blame__origin
**out
, git_commit
*commit
, const char *path
)
39 size_t path_len
= strlen(path
), alloc_len
;
42 GITERR_CHECK_ALLOC_ADD(&alloc_len
, sizeof(*o
), path_len
);
43 GITERR_CHECK_ALLOC_ADD(&alloc_len
, alloc_len
, 1);
44 o
= git__calloc(1, alloc_len
);
45 GITERR_CHECK_ALLOC(o
);
49 strcpy(o
->path
, path
);
51 if (!(error
= git_object_lookup_bypath((git_object
**)&o
->blob
, (git_object
*)commit
,
52 path
, GIT_OBJ_BLOB
))) {
60 /* Locate an existing origin or create a new one. */
61 int git_blame__get_origin(
62 git_blame__origin
**out
,
69 for (e
= blame
->ent
; e
; e
= e
->next
) {
70 if (e
->suspect
->commit
== commit
&& !strcmp(e
->suspect
->path
, path
)) {
71 *out
= origin_incref(e
->suspect
);
74 return make_origin(out
, commit
, path
);
77 typedef struct blame_chunk_cb_data
{
79 git_blame__origin
*target
;
80 git_blame__origin
*parent
;
85 static bool same_suspect(git_blame__origin
*a
, git_blame__origin
*b
)
89 if (git_oid_cmp(git_commit_id(a
->commit
), git_commit_id(b
->commit
)))
91 return 0 == strcmp(a
->path
, b
->path
);
94 /* find the line number of the last line the target is suspected for */
95 static int find_last_in_target(git_blame
*blame
, git_blame__origin
*target
)
98 int last_in_target
= -1;
100 for (e
=blame
->ent
; e
; e
=e
->next
) {
101 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
103 if (last_in_target
< e
->s_lno
+ e
->num_lines
)
104 last_in_target
= e
->s_lno
+ e
->num_lines
;
106 return last_in_target
;
110 * It is known that lines between tlno to same came from parent, and e
111 * has an overlap with that range. it also is known that parent's
112 * line plno corresponds to e's line tlno.
115 * <------> (entirely within)
116 * <------------> (extends past)
117 * <------------> (starts before)
118 * <------------------> (entirely encloses)
120 * Split e into potentially three parts; before this chunk, the chunk
121 * to be blamed for the parent, and after that portion.
123 static void split_overlap(git_blame__entry
*split
, git_blame__entry
*e
,
124 int tlno
, int plno
, int same
, git_blame__origin
*parent
)
128 if (e
->s_lno
< tlno
) {
129 /* there is a pre-chunk part not blamed on the parent */
130 split
[0].suspect
= origin_incref(e
->suspect
);
131 split
[0].lno
= e
->lno
;
132 split
[0].s_lno
= e
->s_lno
;
133 split
[0].num_lines
= tlno
- e
->s_lno
;
134 split
[1].lno
= e
->lno
+ tlno
- e
->s_lno
;
135 split
[1].s_lno
= plno
;
137 split
[1].lno
= e
->lno
;
138 split
[1].s_lno
= plno
+ (e
->s_lno
- tlno
);
141 if (same
< e
->s_lno
+ e
->num_lines
) {
142 /* there is a post-chunk part not blamed on parent */
143 split
[2].suspect
= origin_incref(e
->suspect
);
144 split
[2].lno
= e
->lno
+ (same
- e
->s_lno
);
145 split
[2].s_lno
= e
->s_lno
+ (same
- e
->s_lno
);
146 split
[2].num_lines
= e
->s_lno
+ e
->num_lines
- same
;
147 chunk_end_lno
= split
[2].lno
;
149 chunk_end_lno
= e
->lno
+ e
->num_lines
;
151 split
[1].num_lines
= chunk_end_lno
- split
[1].lno
;
154 * if it turns out there is nothing to blame the parent for, forget about
155 * the splitting. !split[1].suspect signals this.
157 if (split
[1].num_lines
< 1)
159 split
[1].suspect
= origin_incref(parent
);
163 * Link in a new blame entry to the scoreboard. Entries that cover the same
164 * line range have been removed from the scoreboard previously.
166 static void add_blame_entry(git_blame
*blame
, git_blame__entry
*e
)
168 git_blame__entry
*ent
, *prev
= NULL
;
170 origin_incref(e
->suspect
);
172 for (ent
= blame
->ent
; ent
&& ent
->lno
< e
->lno
; ent
= ent
->next
)
175 /* prev, if not NULL, is the last one that is below e */
178 e
->next
= prev
->next
;
181 e
->next
= blame
->ent
;
189 * src typically is on-stack; we want to copy the information in it to
190 * a malloced blame_entry that is already on the linked list of the scoreboard.
191 * The origin of dst loses a refcnt while the origin of src gains one.
193 static void dup_entry(git_blame__entry
*dst
, git_blame__entry
*src
)
195 git_blame__entry
*p
, *n
;
199 origin_incref(src
->suspect
);
200 origin_decref(dst
->suspect
);
201 memcpy(dst
, src
, sizeof(*src
));
208 * split_overlap() divided an existing blame e into up to three parts in split.
209 * Adjust the linked list of blames in the scoreboard to reflect the split.
211 static void split_blame(git_blame
*blame
, git_blame__entry
*split
, git_blame__entry
*e
)
213 git_blame__entry
*new_entry
;
215 if (split
[0].suspect
&& split
[2].suspect
) {
216 /* The first part (reuse storage for the existing entry e */
217 dup_entry(e
, &split
[0]);
219 /* The last part -- me */
220 new_entry
= git__malloc(sizeof(*new_entry
));
221 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
222 add_blame_entry(blame
, new_entry
);
224 /* ... and the middle part -- parent */
225 new_entry
= git__malloc(sizeof(*new_entry
));
226 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
227 add_blame_entry(blame
, new_entry
);
228 } else if (!split
[0].suspect
&& !split
[2].suspect
) {
230 * The parent covers the entire area; reuse storage for e and replace it
233 dup_entry(e
, &split
[1]);
234 } else if (split
[0].suspect
) {
235 /* me and then parent */
236 dup_entry(e
, &split
[0]);
237 new_entry
= git__malloc(sizeof(*new_entry
));
238 memcpy(new_entry
, &(split
[1]), sizeof(git_blame__entry
));
239 add_blame_entry(blame
, new_entry
);
241 /* parent and then me */
242 dup_entry(e
, &split
[1]);
243 new_entry
= git__malloc(sizeof(*new_entry
));
244 memcpy(new_entry
, &(split
[2]), sizeof(git_blame__entry
));
245 add_blame_entry(blame
, new_entry
);
250 * After splitting the blame, the origins used by the on-stack blame_entry
251 * should lose one refcnt each.
253 static void decref_split(git_blame__entry
*split
)
257 origin_decref(split
[i
].suspect
);
261 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
262 * hunk; split it and pass blame to the parent.
264 static void blame_overlap(
270 git_blame__origin
*parent
)
272 git_blame__entry split
[3] = {{0}};
274 split_overlap(split
, e
, tlno
, plno
, same
, parent
);
275 if (split
[1].suspect
)
276 split_blame(blame
, split
, e
);
281 * Process one hunk from the patch between the current suspect for blame_entry
282 * e and its parent. Find and split the overlap, and pass blame to the
283 * overlapping part to the parent.
285 static void blame_chunk(
290 git_blame__origin
*target
,
291 git_blame__origin
*parent
)
295 for (e
= blame
->ent
; e
; e
= e
->next
) {
296 if (e
->guilty
|| !same_suspect(e
->suspect
, target
))
298 if (same
<= e
->s_lno
)
300 if (tlno
< e
->s_lno
+ e
->num_lines
) {
301 blame_overlap(blame
, e
, tlno
, plno
, same
, parent
);
307 long start_a
, long count_a
,
308 long start_b
, long count_b
,
311 blame_chunk_cb_data
*d
= (blame_chunk_cb_data
*)cb_data
;
313 blame_chunk(d
->blame
, d
->tlno
, d
->plno
, start_b
, d
->target
, d
->parent
);
314 d
->plno
= start_a
+ count_a
;
315 d
->tlno
= start_b
+ count_b
;
320 static void trim_common_tail(mmfile_t
*a
, mmfile_t
*b
, long ctx
)
322 const int blk
= 1024;
323 long trimmed
= 0, recovered
= 0;
324 char *ap
= a
->ptr
+ a
->size
;
325 char *bp
= b
->ptr
+ b
->size
;
326 long smaller
= (long)((a
->size
< b
->size
) ? a
->size
: b
->size
);
331 while (blk
+ trimmed
<= smaller
&& !memcmp(ap
- blk
, bp
- blk
, blk
)) {
337 while (recovered
< trimmed
)
338 if (ap
[recovered
++] == '\n')
340 a
->size
-= trimmed
- recovered
;
341 b
->size
-= trimmed
- recovered
;
344 static int diff_hunks(mmfile_t file_a
, mmfile_t file_b
, void *cb_data
)
347 xdemitconf_t xecfg
= {0};
348 xdemitcb_t ecb
= {0};
350 xecfg
.hunk_func
= my_emit
;
353 trim_common_tail(&file_a
, &file_b
, 0);
354 return xdl_diff(&file_a
, &file_b
, &xpp
, &xecfg
, &ecb
);
357 static void fill_origin_blob(git_blame__origin
*o
, mmfile_t
*file
)
359 memset(file
, 0, sizeof(*file
));
361 file
->ptr
= (char*)git_blob_rawcontent(o
->blob
);
362 file
->size
= (size_t)git_blob_rawsize(o
->blob
);
366 static int pass_blame_to_parent(
368 git_blame__origin
*target
,
369 git_blame__origin
*parent
)
372 mmfile_t file_p
, file_o
;
373 blame_chunk_cb_data d
= { blame
, target
, parent
, 0, 0 };
375 last_in_target
= find_last_in_target(blame
, target
);
376 if (last_in_target
< 0)
377 return 1; /* nothing remains for this target */
379 fill_origin_blob(parent
, &file_p
);
380 fill_origin_blob(target
, &file_o
);
382 diff_hunks(file_p
, file_o
, &d
);
383 /* The reset (i.e. anything after tlno) are the same as the parent */
384 blame_chunk(blame
, d
.tlno
, d
.plno
, last_in_target
, target
, parent
);
389 static int paths_on_dup(void **old
, void *new)
396 static git_blame__origin
* find_origin(
399 git_blame__origin
*origin
)
401 git_blame__origin
*porigin
= NULL
;
402 git_diff
*difflist
= NULL
;
403 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
404 git_tree
*otree
=NULL
, *ptree
=NULL
;
406 /* Get the trees from this commit and its parent */
407 if (0 != git_commit_tree(&otree
, origin
->commit
) ||
408 0 != git_commit_tree(&ptree
, parent
))
411 /* Configure the diff */
412 diffopts
.context_lines
= 0;
413 diffopts
.flags
= GIT_DIFF_SKIP_BINARY_CHECK
;
415 /* Check to see if files we're interested have changed */
416 diffopts
.pathspec
.count
= blame
->paths
.length
;
417 diffopts
.pathspec
.strings
= (char**)blame
->paths
.contents
;
418 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
421 if (!git_diff_num_deltas(difflist
)) {
422 /* No changes; copy data */
423 git_blame__get_origin(&porigin
, blame
, parent
, origin
->path
);
425 git_diff_find_options findopts
= GIT_DIFF_FIND_OPTIONS_INIT
;
428 /* Generate a full diff between the two trees */
429 git_diff_free(difflist
);
430 diffopts
.pathspec
.count
= 0;
431 if (0 != git_diff_tree_to_tree(&difflist
, blame
->repository
, ptree
, otree
, &diffopts
))
434 /* Let diff find renames */
435 findopts
.flags
= GIT_DIFF_FIND_RENAMES
;
436 if (0 != git_diff_find_similar(difflist
, &findopts
))
439 /* Find one that matches */
440 for (i
=0; i
<(int)git_diff_num_deltas(difflist
); i
++) {
441 const git_diff_delta
*delta
= git_diff_get_delta(difflist
, i
);
443 if (!git_vector_bsearch(NULL
, &blame
->paths
, delta
->new_file
.path
))
445 git_vector_insert_sorted(&blame
->paths
, (void*)git__strdup(delta
->old_file
.path
),
447 make_origin(&porigin
, parent
, delta
->old_file
.path
);
453 git_diff_free(difflist
);
454 git_tree_free(otree
);
455 git_tree_free(ptree
);
460 * The blobs of origin and porigin exactly match, so everything origin is
461 * suspected for can be blamed on the parent.
463 static void pass_whole_blame(git_blame
*blame
,
464 git_blame__origin
*origin
, git_blame__origin
*porigin
)
469 git_object_lookup((git_object
**)&porigin
->blob
, blame
->repository
,
470 git_blob_id(origin
->blob
), GIT_OBJ_BLOB
);
471 for (e
=blame
->ent
; e
; e
=e
->next
) {
472 if (!same_suspect(e
->suspect
, origin
))
474 origin_incref(porigin
);
475 origin_decref(e
->suspect
);
476 e
->suspect
= porigin
;
480 static void pass_blame(git_blame
*blame
, git_blame__origin
*origin
, uint32_t opt
)
482 git_commit
*commit
= origin
->commit
;
484 git_blame__origin
*sg_buf
[16];
485 git_blame__origin
*porigin
, **sg_origin
= sg_buf
;
487 num_parents
= git_commit_parentcount(commit
);
488 if (!git_oid_cmp(git_commit_id(commit
), &blame
->options
.oldest_commit
))
489 /* Stop at oldest specified commit */
491 else if (opt
& GIT_BLAME_FIRST_PARENT
&& num_parents
> 1)
492 /* Limit search to the first parent */
496 git_oid_cpy(&blame
->options
.oldest_commit
, git_commit_id(commit
));
499 else if (num_parents
< (int)ARRAY_SIZE(sg_buf
))
500 memset(sg_buf
, 0, sizeof(sg_buf
));
502 sg_origin
= git__calloc(num_parents
, sizeof(*sg_origin
));
504 for (i
=0; i
<num_parents
; i
++) {
511 git_commit_parent(&p
, origin
->commit
, i
);
512 porigin
= find_origin(blame
, p
, origin
);
516 if (porigin
->blob
&& origin
->blob
&&
517 !git_oid_cmp(git_blob_id(porigin
->blob
), git_blob_id(origin
->blob
))) {
518 pass_whole_blame(blame
, origin
, porigin
);
519 origin_decref(porigin
);
522 for (j
= same
= 0; j
<i
; j
++)
524 !git_oid_cmp(git_blob_id(sg_origin
[j
]->blob
), git_blob_id(porigin
->blob
))) {
529 sg_origin
[i
] = porigin
;
531 origin_decref(porigin
);
535 for (i
=0; i
<num_parents
; i
++) {
536 git_blame__origin
*porigin
= sg_origin
[i
];
539 if (!origin
->previous
) {
540 origin_incref(porigin
);
541 origin
->previous
= porigin
;
543 if (pass_blame_to_parent(blame
, origin
, porigin
))
547 /* TODO: optionally find moves in parents' files */
549 /* TODO: optionally find copies in parents' files */
552 for (i
=0; i
<num_parents
; i
++)
554 origin_decref(sg_origin
[i
]);
555 if (sg_origin
!= sg_buf
)
556 git__free(sg_origin
);
561 * If two blame entries that are next to each other came from
562 * contiguous lines in the same origin (i.e. <commit, path> pair),
563 * merge them together.
565 static void coalesce(git_blame
*blame
)
567 git_blame__entry
*ent
, *next
;
569 for (ent
=blame
->ent
; ent
&& (next
= ent
->next
); ent
= next
) {
570 if (same_suspect(ent
->suspect
, next
->suspect
) &&
571 ent
->guilty
== next
->guilty
&&
572 ent
->s_lno
+ ent
->num_lines
== next
->s_lno
)
574 ent
->num_lines
+= next
->num_lines
;
575 ent
->next
= next
->next
;
577 ent
->next
->prev
= ent
;
578 origin_decref(next
->suspect
);
581 next
= ent
; /* again */
586 void git_blame__like_git(git_blame
*blame
, uint32_t opt
)
589 git_blame__entry
*ent
;
590 git_blame__origin
*suspect
= NULL
;
592 /* Find a suspect to break down */
593 for (ent
= blame
->ent
; !suspect
&& ent
; ent
= ent
->next
)
595 suspect
= ent
->suspect
;
597 return; /* all done */
599 /* We'll use this suspect later in the loop, so hold on to it for now. */
600 origin_incref(suspect
);
601 pass_blame(blame
, suspect
, opt
);
603 /* Take responsibility for the remaining entries */
604 for (ent
= blame
->ent
; ent
; ent
= ent
->next
) {
605 if (same_suspect(ent
->suspect
, suspect
)) {
607 ent
->is_boundary
= !git_oid_cmp(
608 git_commit_id(suspect
->commit
),
609 &blame
->options
.oldest_commit
);
612 origin_decref(suspect
);
618 void git_blame__free_entry(git_blame__entry
*ent
)
621 origin_decref(ent
->suspect
);