2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "git2/mailmap.h"
19 #include "repository.h"
20 #include "blame_git.h"
23 static int hunk_byfinalline_search_cmp(const void *key
, const void *entry
)
25 git_blame_hunk
*hunk
= (git_blame_hunk
*)entry
;
27 size_t lineno
= *(size_t*)key
;
28 size_t lines_in_hunk
= hunk
->lines_in_hunk
;
29 size_t final_start_line_number
= hunk
->final_start_line_number
;
31 if (lineno
< final_start_line_number
)
33 if (lineno
>= final_start_line_number
+ lines_in_hunk
)
38 static int paths_cmp(const void *a
, const void *b
) { return git__strcmp((char*)a
, (char*)b
); }
39 static int hunk_cmp(const void *_a
, const void *_b
)
41 git_blame_hunk
*a
= (git_blame_hunk
*)_a
,
42 *b
= (git_blame_hunk
*)_b
;
44 if (a
->final_start_line_number
> b
->final_start_line_number
)
46 else if (a
->final_start_line_number
< b
->final_start_line_number
)
52 static bool hunk_ends_at_or_before_line(git_blame_hunk
*hunk
, size_t line
)
54 return line
>= (hunk
->final_start_line_number
+ hunk
->lines_in_hunk
- 1);
57 static bool hunk_starts_at_or_after_line(git_blame_hunk
*hunk
, size_t line
)
59 return line
<= hunk
->final_start_line_number
;
62 static git_blame_hunk
* new_hunk(
68 git_blame_hunk
*hunk
= git__calloc(1, sizeof(git_blame_hunk
));
69 if (!hunk
) return NULL
;
71 hunk
->lines_in_hunk
= lines
;
72 hunk
->final_start_line_number
= start
;
73 hunk
->orig_start_line_number
= orig_start
;
74 hunk
->orig_path
= path
? git__strdup(path
) : NULL
;
79 static git_blame_hunk
* dup_hunk(git_blame_hunk
*hunk
)
81 git_blame_hunk
*newhunk
= new_hunk(
82 hunk
->final_start_line_number
,
84 hunk
->orig_start_line_number
,
90 git_oid_cpy(&newhunk
->orig_commit_id
, &hunk
->orig_commit_id
);
91 git_oid_cpy(&newhunk
->final_commit_id
, &hunk
->final_commit_id
);
92 newhunk
->boundary
= hunk
->boundary
;
93 git_signature_dup(&newhunk
->final_signature
, hunk
->final_signature
);
94 git_signature_dup(&newhunk
->orig_signature
, hunk
->orig_signature
);
98 static void free_hunk(git_blame_hunk
*hunk
)
100 git__free((void*)hunk
->orig_path
);
101 git_signature_free(hunk
->final_signature
);
102 git_signature_free(hunk
->orig_signature
);
106 /* Starting with the hunk that includes start_line, shift all following hunks'
107 * final_start_line by shift_by lines */
108 static void shift_hunks_by(git_vector
*v
, size_t start_line
, int shift_by
)
112 if (!git_vector_bsearch2(&i
, v
, hunk_byfinalline_search_cmp
, &start_line
)) {
113 for (; i
< v
->length
; i
++) {
114 git_blame_hunk
*hunk
= (git_blame_hunk
*)v
->contents
[i
];
115 hunk
->final_start_line_number
+= shift_by
;
120 git_blame
* git_blame__alloc(
121 git_repository
*repo
,
122 git_blame_options opts
,
125 git_blame
*gbr
= git__calloc(1, sizeof(git_blame
));
129 gbr
->repository
= repo
;
132 if (git_vector_init(&gbr
->hunks
, 8, hunk_cmp
) < 0 ||
133 git_vector_init(&gbr
->paths
, 8, paths_cmp
) < 0 ||
134 (gbr
->path
= git__strdup(path
)) == NULL
||
135 git_vector_insert(&gbr
->paths
, git__strdup(path
)) < 0)
141 if (opts
.flags
& GIT_BLAME_USE_MAILMAP
&&
142 git_mailmap_from_repository(&gbr
->mailmap
, repo
) < 0) {
150 void git_blame_free(git_blame
*blame
)
153 git_blame_hunk
*hunk
;
157 git_vector_foreach(&blame
->hunks
, i
, hunk
)
159 git_vector_free(&blame
->hunks
);
161 git_vector_free_deep(&blame
->paths
);
163 git_array_clear(blame
->line_index
);
165 git_mailmap_free(blame
->mailmap
);
167 git__free(blame
->path
);
168 git_blob_free(blame
->final_blob
);
172 uint32_t git_blame_get_hunk_count(git_blame
*blame
)
175 return (uint32_t)blame
->hunks
.length
;
178 const git_blame_hunk
*git_blame_get_hunk_byindex(git_blame
*blame
, uint32_t index
)
181 return (git_blame_hunk
*)git_vector_get(&blame
->hunks
, index
);
184 const git_blame_hunk
*git_blame_get_hunk_byline(git_blame
*blame
, size_t lineno
)
186 size_t i
, new_lineno
= lineno
;
189 if (!git_vector_bsearch2(&i
, &blame
->hunks
, hunk_byfinalline_search_cmp
, &new_lineno
)) {
190 return git_blame_get_hunk_byindex(blame
, (uint32_t)i
);
196 static int normalize_options(
197 git_blame_options
*out
,
198 const git_blame_options
*in
,
199 git_repository
*repo
)
201 git_blame_options dummy
= GIT_BLAME_OPTIONS_INIT
;
202 if (!in
) in
= &dummy
;
204 memcpy(out
, in
, sizeof(git_blame_options
));
206 /* No newest_commit => HEAD */
207 if (git_oid_is_zero(&out
->newest_commit
)) {
208 if (git_reference_name_to_id(&out
->newest_commit
, repo
, "HEAD") < 0) {
213 /* min_line 0 really means 1 */
214 if (!out
->min_line
) out
->min_line
= 1;
215 /* max_line 0 really means N, but we don't know N yet */
217 /* Fix up option implications */
218 if (out
->flags
& GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES
)
219 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES
;
220 if (out
->flags
& GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES
)
221 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES
;
222 if (out
->flags
& GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES
)
223 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_FILE
;
228 static git_blame_hunk
*split_hunk_in_vector(
230 git_blame_hunk
*hunk
,
234 size_t new_line_count
;
237 /* Don't split if already at a boundary */
239 rel_line
>= hunk
->lines_in_hunk
)
244 new_line_count
= hunk
->lines_in_hunk
- rel_line
;
245 nh
= new_hunk(hunk
->final_start_line_number
+ rel_line
, new_line_count
,
246 hunk
->orig_start_line_number
+ rel_line
, hunk
->orig_path
);
251 git_oid_cpy(&nh
->final_commit_id
, &hunk
->final_commit_id
);
252 git_oid_cpy(&nh
->orig_commit_id
, &hunk
->orig_commit_id
);
254 /* Adjust hunk that was split */
255 hunk
->lines_in_hunk
-= new_line_count
;
256 git_vector_insert_sorted(vec
, nh
, NULL
);
258 git_blame_hunk
*ret
= return_new
? nh
: hunk
;
264 * Construct a list of char indices for where lines begin
265 * Adapted from core git:
266 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
268 static int index_blob_lines(git_blame
*blame
)
270 const char *buf
= blame
->final_buf
;
271 size_t len
= blame
->final_buf_size
;
272 int num
= 0, incomplete
= 0, bol
= 1;
275 if (len
&& buf
[len
-1] != '\n')
276 incomplete
++; /* incomplete line at the end */
279 i
= git_array_alloc(blame
->line_index
);
280 GIT_ERROR_CHECK_ALLOC(i
);
281 *i
= buf
- blame
->final_buf
;
284 if (*buf
++ == '\n') {
289 i
= git_array_alloc(blame
->line_index
);
290 GIT_ERROR_CHECK_ALLOC(i
);
291 *i
= buf
- blame
->final_buf
;
292 blame
->num_lines
= num
+ incomplete
;
293 return blame
->num_lines
;
296 static git_blame_hunk
* hunk_from_entry(git_blame__entry
*e
, git_blame
*blame
)
298 git_blame_hunk
*h
= new_hunk(
299 e
->lno
+1, e
->num_lines
, e
->s_lno
+1, e
->suspect
->path
);
304 git_oid_cpy(&h
->final_commit_id
, git_commit_id(e
->suspect
->commit
));
305 git_oid_cpy(&h
->orig_commit_id
, git_commit_id(e
->suspect
->commit
));
306 git_commit_author_with_mailmap(
307 &h
->final_signature
, e
->suspect
->commit
, blame
->mailmap
);
308 git_signature_dup(&h
->orig_signature
, h
->final_signature
);
309 h
->boundary
= e
->is_boundary
? 1 : 0;
313 static int load_blob(git_blame
*blame
)
317 if (blame
->final_blob
) return 0;
319 error
= git_commit_lookup(&blame
->final
, blame
->repository
, &blame
->options
.newest_commit
);
322 error
= git_object_lookup_bypath((git_object
**)&blame
->final_blob
,
323 (git_object
*)blame
->final
, blame
->path
, GIT_OBJECT_BLOB
);
329 static int blame_internal(git_blame
*blame
)
332 git_blame__entry
*ent
= NULL
;
333 git_blame__origin
*o
;
335 if ((error
= load_blob(blame
)) < 0 ||
336 (error
= git_blame__get_origin(&o
, blame
, blame
->final
, blame
->path
)) < 0)
339 if (git_blob_rawsize(blame
->final_blob
) > SIZE_MAX
) {
340 git_error_set(GIT_ERROR_NOMEMORY
, "blob is too large to blame");
345 blame
->final_buf
= git_blob_rawcontent(blame
->final_blob
);
346 blame
->final_buf_size
= (size_t)git_blob_rawsize(blame
->final_blob
);
348 ent
= git__calloc(1, sizeof(git_blame__entry
));
349 GIT_ERROR_CHECK_ALLOC(ent
);
351 ent
->num_lines
= index_blob_lines(blame
);
352 ent
->lno
= blame
->options
.min_line
- 1;
353 ent
->num_lines
= ent
->num_lines
- blame
->options
.min_line
+ 1;
354 if (blame
->options
.max_line
> 0)
355 ent
->num_lines
= blame
->options
.max_line
- blame
->options
.min_line
+ 1;
356 ent
->s_lno
= ent
->lno
;
361 error
= git_blame__like_git(blame
, blame
->options
.flags
);
364 for (ent
= blame
->ent
; ent
; ) {
365 git_blame__entry
*e
= ent
->next
;
366 git_blame_hunk
*h
= hunk_from_entry(ent
, blame
);
368 git_vector_insert(&blame
->hunks
, h
);
370 git_blame__free_entry(ent
);
377 /*******************************************************************************
379 ******************************************************************************/
383 git_repository
*repo
,
385 git_blame_options
*options
)
388 git_blame_options normOptions
= GIT_BLAME_OPTIONS_INIT
;
389 git_blame
*blame
= NULL
;
391 assert(out
&& repo
&& path
);
392 if ((error
= normalize_options(&normOptions
, options
, repo
)) < 0)
395 blame
= git_blame__alloc(repo
, normOptions
, path
);
396 GIT_ERROR_CHECK_ALLOC(blame
);
398 if ((error
= load_blob(blame
)) < 0)
401 if ((error
= blame_internal(blame
)) < 0)
408 git_blame_free(blame
);
412 /*******************************************************************************
414 *******************************************************************************/
416 static bool hunk_is_bufferblame(git_blame_hunk
*hunk
)
418 return hunk
&& git_oid_is_zero(&hunk
->final_commit_id
);
421 static int buffer_hunk_cb(
422 const git_diff_delta
*delta
,
423 const git_diff_hunk
*hunk
,
426 git_blame
*blame
= (git_blame
*)payload
;
431 wedge_line
= (hunk
->old_lines
== 0) ? hunk
->new_start
: hunk
->old_start
;
432 blame
->current_diff_line
= wedge_line
;
434 blame
->current_hunk
= (git_blame_hunk
*)git_blame_get_hunk_byline(blame
, wedge_line
);
435 if (!blame
->current_hunk
) {
436 /* Line added at the end of the file */
437 blame
->current_hunk
= new_hunk(wedge_line
, 0, wedge_line
, blame
->path
);
438 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
440 git_vector_insert(&blame
->hunks
, blame
->current_hunk
);
441 } else if (!hunk_starts_at_or_after_line(blame
->current_hunk
, wedge_line
)){
442 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
443 blame
->current_hunk
= split_hunk_in_vector(&blame
->hunks
, blame
->current_hunk
,
444 wedge_line
- blame
->current_hunk
->orig_start_line_number
, true);
445 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
451 static int ptrs_equal_cmp(const void *a
, const void *b
) { return a
<b
? -1 : a
>b
? 1 : 0; }
452 static int buffer_line_cb(
453 const git_diff_delta
*delta
,
454 const git_diff_hunk
*hunk
,
455 const git_diff_line
*line
,
458 git_blame
*blame
= (git_blame
*)payload
;
464 if (line
->origin
== GIT_DIFF_LINE_ADDITION
) {
465 if (hunk_is_bufferblame(blame
->current_hunk
) &&
466 hunk_ends_at_or_before_line(blame
->current_hunk
, blame
->current_diff_line
)) {
467 /* Append to the current buffer-blame hunk */
468 blame
->current_hunk
->lines_in_hunk
++;
469 shift_hunks_by(&blame
->hunks
, blame
->current_diff_line
+1, 1);
471 /* Create a new buffer-blame hunk with this line */
472 shift_hunks_by(&blame
->hunks
, blame
->current_diff_line
, 1);
473 blame
->current_hunk
= new_hunk(blame
->current_diff_line
, 1, 0, blame
->path
);
474 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
476 git_vector_insert_sorted(&blame
->hunks
, blame
->current_hunk
, NULL
);
478 blame
->current_diff_line
++;
481 if (line
->origin
== GIT_DIFF_LINE_DELETION
) {
482 /* Trim the line from the current hunk; remove it if it's now empty */
483 size_t shift_base
= blame
->current_diff_line
+ blame
->current_hunk
->lines_in_hunk
+1;
485 if (--(blame
->current_hunk
->lines_in_hunk
) == 0) {
488 if (!git_vector_search2(&i
, &blame
->hunks
, ptrs_equal_cmp
, blame
->current_hunk
)) {
489 git_vector_remove(&blame
->hunks
, i
);
490 free_hunk(blame
->current_hunk
);
491 blame
->current_hunk
= (git_blame_hunk
*)git_blame_get_hunk_byindex(blame
, (uint32_t)i
);
494 shift_hunks_by(&blame
->hunks
, shift_base
, -1);
499 int git_blame_buffer(
501 git_blame
*reference
,
506 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
508 git_blame_hunk
*hunk
;
510 diffopts
.context_lines
= 0;
512 assert(out
&& reference
&& buffer
&& buffer_len
);
514 blame
= git_blame__alloc(reference
->repository
, reference
->options
, reference
->path
);
515 GIT_ERROR_CHECK_ALLOC(blame
);
517 /* Duplicate all of the hunk structures in the reference blame */
518 git_vector_foreach(&reference
->hunks
, i
, hunk
) {
519 git_blame_hunk
*h
= dup_hunk(hunk
);
520 GIT_ERROR_CHECK_ALLOC(h
);
522 git_vector_insert(&blame
->hunks
, h
);
525 /* Diff to the reference blob */
526 git_diff_blob_to_buffer(reference
->final_blob
, blame
->path
,
527 buffer
, buffer_len
, blame
->path
, &diffopts
,
528 NULL
, NULL
, buffer_hunk_cb
, buffer_line_cb
, blame
);
534 int git_blame_options_init(git_blame_options
*opts
, unsigned int version
)
536 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
537 opts
, version
, git_blame_options
, GIT_BLAME_OPTIONS_INIT
);
541 #ifndef GIT_DEPRECATE_HARD
542 int git_blame_init_options(git_blame_options
*opts
, unsigned int version
)
544 return git_blame_options_init(opts
, version
);