2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "git2/mailmap.h"
19 #include "repository.h"
20 #include "blame_git.h"
23 static int hunk_byfinalline_search_cmp(const void *key
, const void *entry
)
25 git_blame_hunk
*hunk
= (git_blame_hunk
*)entry
;
27 size_t lineno
= *(size_t*)key
;
28 size_t lines_in_hunk
= hunk
->lines_in_hunk
;
29 size_t final_start_line_number
= hunk
->final_start_line_number
;
31 if (lineno
< final_start_line_number
)
33 if (lineno
>= final_start_line_number
+ lines_in_hunk
)
38 static int paths_cmp(const void *a
, const void *b
) { return git__strcmp((char*)a
, (char*)b
); }
39 static int hunk_cmp(const void *_a
, const void *_b
)
41 git_blame_hunk
*a
= (git_blame_hunk
*)_a
,
42 *b
= (git_blame_hunk
*)_b
;
44 if (a
->final_start_line_number
> b
->final_start_line_number
)
46 else if (a
->final_start_line_number
< b
->final_start_line_number
)
52 static bool hunk_ends_at_or_before_line(git_blame_hunk
*hunk
, size_t line
)
54 return line
>= (hunk
->final_start_line_number
+ hunk
->lines_in_hunk
- 1);
57 static bool hunk_starts_at_or_after_line(git_blame_hunk
*hunk
, size_t line
)
59 return line
<= hunk
->final_start_line_number
;
62 static git_blame_hunk
* new_hunk(
68 git_blame_hunk
*hunk
= git__calloc(1, sizeof(git_blame_hunk
));
69 if (!hunk
) return NULL
;
71 hunk
->lines_in_hunk
= lines
;
72 hunk
->final_start_line_number
= start
;
73 hunk
->orig_start_line_number
= orig_start
;
74 hunk
->orig_path
= path
? git__strdup(path
) : NULL
;
79 static git_blame_hunk
* dup_hunk(git_blame_hunk
*hunk
)
81 git_blame_hunk
*newhunk
= new_hunk(
82 hunk
->final_start_line_number
,
84 hunk
->orig_start_line_number
,
90 git_oid_cpy(&newhunk
->orig_commit_id
, &hunk
->orig_commit_id
);
91 git_oid_cpy(&newhunk
->final_commit_id
, &hunk
->final_commit_id
);
92 newhunk
->boundary
= hunk
->boundary
;
93 git_signature_dup(&newhunk
->final_signature
, hunk
->final_signature
);
94 git_signature_dup(&newhunk
->orig_signature
, hunk
->orig_signature
);
98 static void free_hunk(git_blame_hunk
*hunk
)
100 git__free((void*)hunk
->orig_path
);
101 git_signature_free(hunk
->final_signature
);
102 git_signature_free(hunk
->orig_signature
);
106 /* Starting with the hunk that includes start_line, shift all following hunks'
107 * final_start_line by shift_by lines */
108 static void shift_hunks_by(git_vector
*v
, size_t start_line
, int shift_by
)
112 if (!git_vector_bsearch2(&i
, v
, hunk_byfinalline_search_cmp
, &start_line
)) {
113 for (; i
< v
->length
; i
++) {
114 git_blame_hunk
*hunk
= (git_blame_hunk
*)v
->contents
[i
];
115 hunk
->final_start_line_number
+= shift_by
;
120 git_blame
* git_blame__alloc(
121 git_repository
*repo
,
122 git_blame_options opts
,
125 git_blame
*gbr
= git__calloc(1, sizeof(git_blame
));
129 gbr
->repository
= repo
;
132 if (git_vector_init(&gbr
->hunks
, 8, hunk_cmp
) < 0 ||
133 git_vector_init(&gbr
->paths
, 8, paths_cmp
) < 0 ||
134 (gbr
->path
= git__strdup(path
)) == NULL
||
135 git_vector_insert(&gbr
->paths
, git__strdup(path
)) < 0)
141 if (opts
.flags
& GIT_BLAME_USE_MAILMAP
&&
142 git_mailmap_from_repository(&gbr
->mailmap
, repo
) < 0) {
150 void git_blame_free(git_blame
*blame
)
153 git_blame_hunk
*hunk
;
157 git_vector_foreach(&blame
->hunks
, i
, hunk
)
159 git_vector_free(&blame
->hunks
);
161 git_vector_free_deep(&blame
->paths
);
163 git_array_clear(blame
->line_index
);
165 git_mailmap_free(blame
->mailmap
);
167 git__free(blame
->path
);
168 git_blob_free(blame
->final_blob
);
172 uint32_t git_blame_get_hunk_count(git_blame
*blame
)
175 return (uint32_t)blame
->hunks
.length
;
178 const git_blame_hunk
*git_blame_get_hunk_byindex(git_blame
*blame
, uint32_t index
)
181 return (git_blame_hunk
*)git_vector_get(&blame
->hunks
, index
);
184 const git_blame_hunk
*git_blame_get_hunk_byline(git_blame
*blame
, size_t lineno
)
186 size_t i
, new_lineno
= lineno
;
189 if (!git_vector_bsearch2(&i
, &blame
->hunks
, hunk_byfinalline_search_cmp
, &new_lineno
)) {
190 return git_blame_get_hunk_byindex(blame
, (uint32_t)i
);
196 static int normalize_options(
197 git_blame_options
*out
,
198 const git_blame_options
*in
,
199 git_repository
*repo
)
201 git_blame_options dummy
= GIT_BLAME_OPTIONS_INIT
;
202 if (!in
) in
= &dummy
;
204 memcpy(out
, in
, sizeof(git_blame_options
));
206 /* No newest_commit => HEAD */
207 if (git_oid_iszero(&out
->newest_commit
)) {
208 if (git_reference_name_to_id(&out
->newest_commit
, repo
, "HEAD") < 0) {
213 /* min_line 0 really means 1 */
214 if (!out
->min_line
) out
->min_line
= 1;
215 /* max_line 0 really means N, but we don't know N yet */
217 /* Fix up option implications */
218 if (out
->flags
& GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES
)
219 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES
;
220 if (out
->flags
& GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES
)
221 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES
;
222 if (out
->flags
& GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES
)
223 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_FILE
;
228 static git_blame_hunk
*split_hunk_in_vector(
230 git_blame_hunk
*hunk
,
234 size_t new_line_count
;
237 /* Don't split if already at a boundary */
239 rel_line
>= hunk
->lines_in_hunk
)
244 new_line_count
= hunk
->lines_in_hunk
- rel_line
;
245 nh
= new_hunk(hunk
->final_start_line_number
+ rel_line
, new_line_count
,
246 hunk
->orig_start_line_number
+ rel_line
, hunk
->orig_path
);
251 git_oid_cpy(&nh
->final_commit_id
, &hunk
->final_commit_id
);
252 git_oid_cpy(&nh
->orig_commit_id
, &hunk
->orig_commit_id
);
254 /* Adjust hunk that was split */
255 hunk
->lines_in_hunk
-= new_line_count
;
256 git_vector_insert_sorted(vec
, nh
, NULL
);
258 git_blame_hunk
*ret
= return_new
? nh
: hunk
;
264 * Construct a list of char indices for where lines begin
265 * Adapted from core git:
266 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
268 static int index_blob_lines(git_blame
*blame
)
270 const char *buf
= blame
->final_buf
;
271 git_off_t len
= blame
->final_buf_size
;
272 int num
= 0, incomplete
= 0, bol
= 1;
275 if (len
&& buf
[len
-1] != '\n')
276 incomplete
++; /* incomplete line at the end */
279 i
= git_array_alloc(blame
->line_index
);
280 GIT_ERROR_CHECK_ALLOC(i
);
281 *i
= buf
- blame
->final_buf
;
284 if (*buf
++ == '\n') {
289 i
= git_array_alloc(blame
->line_index
);
290 GIT_ERROR_CHECK_ALLOC(i
);
291 *i
= buf
- blame
->final_buf
;
292 blame
->num_lines
= num
+ incomplete
;
293 return blame
->num_lines
;
296 static git_blame_hunk
* hunk_from_entry(git_blame__entry
*e
, git_blame
*blame
)
298 git_blame_hunk
*h
= new_hunk(
299 e
->lno
+1, e
->num_lines
, e
->s_lno
+1, e
->suspect
->path
);
304 git_oid_cpy(&h
->final_commit_id
, git_commit_id(e
->suspect
->commit
));
305 git_oid_cpy(&h
->orig_commit_id
, git_commit_id(e
->suspect
->commit
));
306 git_commit_author_with_mailmap(
307 &h
->final_signature
, e
->suspect
->commit
, blame
->mailmap
);
308 git_signature_dup(&h
->orig_signature
, h
->final_signature
);
309 h
->boundary
= e
->is_boundary
? 1 : 0;
313 static int load_blob(git_blame
*blame
)
317 if (blame
->final_blob
) return 0;
319 error
= git_commit_lookup(&blame
->final
, blame
->repository
, &blame
->options
.newest_commit
);
322 error
= git_object_lookup_bypath((git_object
**)&blame
->final_blob
,
323 (git_object
*)blame
->final
, blame
->path
, GIT_OBJECT_BLOB
);
329 static int blame_internal(git_blame
*blame
)
332 git_blame__entry
*ent
= NULL
;
333 git_blame__origin
*o
;
335 if ((error
= load_blob(blame
)) < 0 ||
336 (error
= git_blame__get_origin(&o
, blame
, blame
->final
, blame
->path
)) < 0)
338 blame
->final_buf
= git_blob_rawcontent(blame
->final_blob
);
339 blame
->final_buf_size
= git_blob_rawsize(blame
->final_blob
);
341 ent
= git__calloc(1, sizeof(git_blame__entry
));
342 GIT_ERROR_CHECK_ALLOC(ent
);
344 ent
->num_lines
= index_blob_lines(blame
);
345 ent
->lno
= blame
->options
.min_line
- 1;
346 ent
->num_lines
= ent
->num_lines
- blame
->options
.min_line
+ 1;
347 if (blame
->options
.max_line
> 0)
348 ent
->num_lines
= blame
->options
.max_line
- blame
->options
.min_line
+ 1;
349 ent
->s_lno
= ent
->lno
;
354 error
= git_blame__like_git(blame
, blame
->options
.flags
);
357 for (ent
= blame
->ent
; ent
; ) {
358 git_blame__entry
*e
= ent
->next
;
359 git_blame_hunk
*h
= hunk_from_entry(ent
, blame
);
361 git_vector_insert(&blame
->hunks
, h
);
363 git_blame__free_entry(ent
);
370 /*******************************************************************************
372 ******************************************************************************/
376 git_repository
*repo
,
378 git_blame_options
*options
)
381 git_blame_options normOptions
= GIT_BLAME_OPTIONS_INIT
;
382 git_blame
*blame
= NULL
;
384 assert(out
&& repo
&& path
);
385 if ((error
= normalize_options(&normOptions
, options
, repo
)) < 0)
388 blame
= git_blame__alloc(repo
, normOptions
, path
);
389 GIT_ERROR_CHECK_ALLOC(blame
);
391 if ((error
= load_blob(blame
)) < 0)
394 if ((error
= blame_internal(blame
)) < 0)
401 git_blame_free(blame
);
405 /*******************************************************************************
407 *******************************************************************************/
409 static bool hunk_is_bufferblame(git_blame_hunk
*hunk
)
411 return git_oid_iszero(&hunk
->final_commit_id
);
414 static int buffer_hunk_cb(
415 const git_diff_delta
*delta
,
416 const git_diff_hunk
*hunk
,
419 git_blame
*blame
= (git_blame
*)payload
;
424 wedge_line
= (hunk
->old_lines
== 0) ? hunk
->new_start
: hunk
->old_start
;
425 blame
->current_diff_line
= wedge_line
;
427 blame
->current_hunk
= (git_blame_hunk
*)git_blame_get_hunk_byline(blame
, wedge_line
);
428 if (!blame
->current_hunk
) {
429 /* Line added at the end of the file */
430 blame
->current_hunk
= new_hunk(wedge_line
, 0, wedge_line
, blame
->path
);
431 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
433 git_vector_insert(&blame
->hunks
, blame
->current_hunk
);
434 } else if (!hunk_starts_at_or_after_line(blame
->current_hunk
, wedge_line
)){
435 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
436 blame
->current_hunk
= split_hunk_in_vector(&blame
->hunks
, blame
->current_hunk
,
437 wedge_line
- blame
->current_hunk
->orig_start_line_number
, true);
438 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
444 static int ptrs_equal_cmp(const void *a
, const void *b
) { return a
<b
? -1 : a
>b
? 1 : 0; }
445 static int buffer_line_cb(
446 const git_diff_delta
*delta
,
447 const git_diff_hunk
*hunk
,
448 const git_diff_line
*line
,
451 git_blame
*blame
= (git_blame
*)payload
;
457 if (line
->origin
== GIT_DIFF_LINE_ADDITION
) {
458 if (hunk_is_bufferblame(blame
->current_hunk
) &&
459 hunk_ends_at_or_before_line(blame
->current_hunk
, blame
->current_diff_line
)) {
460 /* Append to the current buffer-blame hunk */
461 blame
->current_hunk
->lines_in_hunk
++;
462 shift_hunks_by(&blame
->hunks
, blame
->current_diff_line
+1, 1);
464 /* Create a new buffer-blame hunk with this line */
465 shift_hunks_by(&blame
->hunks
, blame
->current_diff_line
, 1);
466 blame
->current_hunk
= new_hunk(blame
->current_diff_line
, 1, 0, blame
->path
);
467 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
469 git_vector_insert_sorted(&blame
->hunks
, blame
->current_hunk
, NULL
);
471 blame
->current_diff_line
++;
474 if (line
->origin
== GIT_DIFF_LINE_DELETION
) {
475 /* Trim the line from the current hunk; remove it if it's now empty */
476 size_t shift_base
= blame
->current_diff_line
+ blame
->current_hunk
->lines_in_hunk
+1;
478 if (--(blame
->current_hunk
->lines_in_hunk
) == 0) {
481 if (!git_vector_search2(&i
, &blame
->hunks
, ptrs_equal_cmp
, blame
->current_hunk
)) {
482 git_vector_remove(&blame
->hunks
, i
);
483 free_hunk(blame
->current_hunk
);
484 blame
->current_hunk
= (git_blame_hunk
*)git_blame_get_hunk_byindex(blame
, (uint32_t)i
);
487 shift_hunks_by(&blame
->hunks
, shift_base
, -1);
492 int git_blame_buffer(
494 git_blame
*reference
,
499 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
501 git_blame_hunk
*hunk
;
503 diffopts
.context_lines
= 0;
505 assert(out
&& reference
&& buffer
&& buffer_len
);
507 blame
= git_blame__alloc(reference
->repository
, reference
->options
, reference
->path
);
508 GIT_ERROR_CHECK_ALLOC(blame
);
510 /* Duplicate all of the hunk structures in the reference blame */
511 git_vector_foreach(&reference
->hunks
, i
, hunk
) {
512 git_blame_hunk
*h
= dup_hunk(hunk
);
513 GIT_ERROR_CHECK_ALLOC(h
);
515 git_vector_insert(&blame
->hunks
, h
);
518 /* Diff to the reference blob */
519 git_diff_blob_to_buffer(reference
->final_blob
, blame
->path
,
520 buffer
, buffer_len
, blame
->path
, &diffopts
,
521 NULL
, NULL
, buffer_hunk_cb
, buffer_line_cb
, blame
);
527 int git_blame_init_options(git_blame_options
*opts
, unsigned int version
)
529 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
530 opts
, version
, git_blame_options
, GIT_BLAME_OPTIONS_INIT
);