2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "git2/mailmap.h"
19 #include "repository.h"
20 #include "blame_git.h"
23 static int hunk_byfinalline_search_cmp(const void *key
, const void *entry
)
25 git_blame_hunk
*hunk
= (git_blame_hunk
*)entry
;
27 size_t lineno
= *(size_t*)key
;
28 size_t lines_in_hunk
= hunk
->lines_in_hunk
;
29 size_t final_start_line_number
= hunk
->final_start_line_number
;
31 if (lineno
< final_start_line_number
)
33 if (lineno
>= final_start_line_number
+ lines_in_hunk
)
38 static int paths_cmp(const void *a
, const void *b
) { return git__strcmp((char*)a
, (char*)b
); }
39 static int hunk_cmp(const void *_a
, const void *_b
)
41 git_blame_hunk
*a
= (git_blame_hunk
*)_a
,
42 *b
= (git_blame_hunk
*)_b
;
44 if (a
->final_start_line_number
> b
->final_start_line_number
)
46 else if (a
->final_start_line_number
< b
->final_start_line_number
)
52 static bool hunk_ends_at_or_before_line(git_blame_hunk
*hunk
, size_t line
)
54 return line
>= (hunk
->final_start_line_number
+ hunk
->lines_in_hunk
- 1);
57 static bool hunk_starts_at_or_after_line(git_blame_hunk
*hunk
, size_t line
)
59 return line
<= hunk
->final_start_line_number
;
62 static git_blame_hunk
*new_hunk(
68 git_blame_hunk
*hunk
= git__calloc(1, sizeof(git_blame_hunk
));
69 if (!hunk
) return NULL
;
71 hunk
->lines_in_hunk
= lines
;
72 hunk
->final_start_line_number
= start
;
73 hunk
->orig_start_line_number
= orig_start
;
74 hunk
->orig_path
= path
? git__strdup(path
) : NULL
;
79 static void free_hunk(git_blame_hunk
*hunk
)
81 git__free((void*)hunk
->orig_path
);
82 git_signature_free(hunk
->final_signature
);
83 git_signature_free(hunk
->orig_signature
);
87 static git_blame_hunk
*dup_hunk(git_blame_hunk
*hunk
)
89 git_blame_hunk
*newhunk
= new_hunk(
90 hunk
->final_start_line_number
,
92 hunk
->orig_start_line_number
,
98 git_oid_cpy(&newhunk
->orig_commit_id
, &hunk
->orig_commit_id
);
99 git_oid_cpy(&newhunk
->final_commit_id
, &hunk
->final_commit_id
);
100 newhunk
->boundary
= hunk
->boundary
;
102 if (git_signature_dup(&newhunk
->final_signature
, hunk
->final_signature
) < 0 ||
103 git_signature_dup(&newhunk
->orig_signature
, hunk
->orig_signature
) < 0) {
111 /* Starting with the hunk that includes start_line, shift all following hunks'
112 * final_start_line by shift_by lines */
113 static void shift_hunks_by(git_vector
*v
, size_t start_line
, int shift_by
)
117 if (!git_vector_bsearch2(&i
, v
, hunk_byfinalline_search_cmp
, &start_line
)) {
118 for (; i
< v
->length
; i
++) {
119 git_blame_hunk
*hunk
= (git_blame_hunk
*)v
->contents
[i
];
120 hunk
->final_start_line_number
+= shift_by
;
125 git_blame
*git_blame__alloc(
126 git_repository
*repo
,
127 git_blame_options opts
,
130 git_blame
*gbr
= git__calloc(1, sizeof(git_blame
));
134 gbr
->repository
= repo
;
137 if (git_vector_init(&gbr
->hunks
, 8, hunk_cmp
) < 0 ||
138 git_vector_init(&gbr
->paths
, 8, paths_cmp
) < 0 ||
139 (gbr
->path
= git__strdup(path
)) == NULL
||
140 git_vector_insert(&gbr
->paths
, git__strdup(path
)) < 0)
146 if (opts
.flags
& GIT_BLAME_USE_MAILMAP
&&
147 git_mailmap_from_repository(&gbr
->mailmap
, repo
) < 0) {
155 void git_blame_free(git_blame
*blame
)
158 git_blame_hunk
*hunk
;
162 git_vector_foreach(&blame
->hunks
, i
, hunk
)
164 git_vector_free(&blame
->hunks
);
166 git_vector_free_deep(&blame
->paths
);
168 git_array_clear(blame
->line_index
);
170 git_mailmap_free(blame
->mailmap
);
172 git__free(blame
->path
);
173 git_blob_free(blame
->final_blob
);
177 uint32_t git_blame_get_hunk_count(git_blame
*blame
)
179 GIT_ASSERT_ARG(blame
);
180 return (uint32_t)blame
->hunks
.length
;
183 const git_blame_hunk
*git_blame_get_hunk_byindex(git_blame
*blame
, uint32_t index
)
185 GIT_ASSERT_ARG_WITH_RETVAL(blame
, NULL
);
186 return (git_blame_hunk
*)git_vector_get(&blame
->hunks
, index
);
189 const git_blame_hunk
*git_blame_get_hunk_byline(git_blame
*blame
, size_t lineno
)
191 size_t i
, new_lineno
= lineno
;
193 GIT_ASSERT_ARG_WITH_RETVAL(blame
, NULL
);
195 if (!git_vector_bsearch2(&i
, &blame
->hunks
, hunk_byfinalline_search_cmp
, &new_lineno
)) {
196 return git_blame_get_hunk_byindex(blame
, (uint32_t)i
);
202 static int normalize_options(
203 git_blame_options
*out
,
204 const git_blame_options
*in
,
205 git_repository
*repo
)
207 git_blame_options dummy
= GIT_BLAME_OPTIONS_INIT
;
208 if (!in
) in
= &dummy
;
210 memcpy(out
, in
, sizeof(git_blame_options
));
212 /* No newest_commit => HEAD */
213 if (git_oid_is_zero(&out
->newest_commit
)) {
214 if (git_reference_name_to_id(&out
->newest_commit
, repo
, "HEAD") < 0) {
219 /* min_line 0 really means 1 */
220 if (!out
->min_line
) out
->min_line
= 1;
221 /* max_line 0 really means N, but we don't know N yet */
223 /* Fix up option implications */
224 if (out
->flags
& GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES
)
225 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES
;
226 if (out
->flags
& GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES
)
227 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES
;
228 if (out
->flags
& GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES
)
229 out
->flags
|= GIT_BLAME_TRACK_COPIES_SAME_FILE
;
234 static git_blame_hunk
*split_hunk_in_vector(
236 git_blame_hunk
*hunk
,
240 size_t new_line_count
;
243 /* Don't split if already at a boundary */
245 rel_line
>= hunk
->lines_in_hunk
)
250 new_line_count
= hunk
->lines_in_hunk
- rel_line
;
251 nh
= new_hunk(hunk
->final_start_line_number
+ rel_line
, new_line_count
,
252 hunk
->orig_start_line_number
+ rel_line
, hunk
->orig_path
);
257 git_oid_cpy(&nh
->final_commit_id
, &hunk
->final_commit_id
);
258 git_oid_cpy(&nh
->orig_commit_id
, &hunk
->orig_commit_id
);
260 /* Adjust hunk that was split */
261 hunk
->lines_in_hunk
-= new_line_count
;
262 git_vector_insert_sorted(vec
, nh
, NULL
);
264 git_blame_hunk
*ret
= return_new
? nh
: hunk
;
270 * Construct a list of char indices for where lines begin
271 * Adapted from core git:
272 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
274 static int index_blob_lines(git_blame
*blame
)
276 const char *buf
= blame
->final_buf
;
277 size_t len
= blame
->final_buf_size
;
278 int num
= 0, incomplete
= 0, bol
= 1;
281 if (len
&& buf
[len
-1] != '\n')
282 incomplete
++; /* incomplete line at the end */
285 i
= git_array_alloc(blame
->line_index
);
286 GIT_ERROR_CHECK_ALLOC(i
);
287 *i
= buf
- blame
->final_buf
;
290 if (*buf
++ == '\n') {
295 i
= git_array_alloc(blame
->line_index
);
296 GIT_ERROR_CHECK_ALLOC(i
);
297 *i
= buf
- blame
->final_buf
;
298 blame
->num_lines
= num
+ incomplete
;
299 return blame
->num_lines
;
302 static git_blame_hunk
*hunk_from_entry(git_blame__entry
*e
, git_blame
*blame
)
304 git_blame_hunk
*h
= new_hunk(
305 e
->lno
+1, e
->num_lines
, e
->s_lno
+1, e
->suspect
->path
);
310 git_oid_cpy(&h
->final_commit_id
, git_commit_id(e
->suspect
->commit
));
311 git_oid_cpy(&h
->orig_commit_id
, git_commit_id(e
->suspect
->commit
));
312 git_commit_author_with_mailmap(
313 &h
->final_signature
, e
->suspect
->commit
, blame
->mailmap
);
314 git_signature_dup(&h
->orig_signature
, h
->final_signature
);
315 h
->boundary
= e
->is_boundary
? 1 : 0;
319 static int load_blob(git_blame
*blame
)
323 if (blame
->final_blob
) return 0;
325 error
= git_commit_lookup(&blame
->final
, blame
->repository
, &blame
->options
.newest_commit
);
328 error
= git_object_lookup_bypath((git_object
**)&blame
->final_blob
,
329 (git_object
*)blame
->final
, blame
->path
, GIT_OBJECT_BLOB
);
335 static int blame_internal(git_blame
*blame
)
338 git_blame__entry
*ent
= NULL
;
339 git_blame__origin
*o
;
341 if ((error
= load_blob(blame
)) < 0 ||
342 (error
= git_blame__get_origin(&o
, blame
, blame
->final
, blame
->path
)) < 0)
345 if (git_blob_rawsize(blame
->final_blob
) > SIZE_MAX
) {
346 git_error_set(GIT_ERROR_NOMEMORY
, "blob is too large to blame");
351 blame
->final_buf
= git_blob_rawcontent(blame
->final_blob
);
352 blame
->final_buf_size
= (size_t)git_blob_rawsize(blame
->final_blob
);
354 ent
= git__calloc(1, sizeof(git_blame__entry
));
355 GIT_ERROR_CHECK_ALLOC(ent
);
357 ent
->num_lines
= index_blob_lines(blame
);
358 ent
->lno
= blame
->options
.min_line
- 1;
359 ent
->num_lines
= ent
->num_lines
- blame
->options
.min_line
+ 1;
360 if (blame
->options
.max_line
> 0)
361 ent
->num_lines
= blame
->options
.max_line
- blame
->options
.min_line
+ 1;
362 ent
->s_lno
= ent
->lno
;
367 error
= git_blame__like_git(blame
, blame
->options
.flags
);
370 for (ent
= blame
->ent
; ent
; ) {
371 git_blame__entry
*e
= ent
->next
;
372 git_blame_hunk
*h
= hunk_from_entry(ent
, blame
);
374 git_vector_insert(&blame
->hunks
, h
);
376 git_blame__free_entry(ent
);
383 /*******************************************************************************
385 ******************************************************************************/
389 git_repository
*repo
,
391 git_blame_options
*options
)
394 git_blame_options normOptions
= GIT_BLAME_OPTIONS_INIT
;
395 git_blame
*blame
= NULL
;
398 GIT_ASSERT_ARG(repo
);
399 GIT_ASSERT_ARG(path
);
401 if ((error
= normalize_options(&normOptions
, options
, repo
)) < 0)
404 blame
= git_blame__alloc(repo
, normOptions
, path
);
405 GIT_ERROR_CHECK_ALLOC(blame
);
407 if ((error
= load_blob(blame
)) < 0)
410 if ((error
= blame_internal(blame
)) < 0)
417 git_blame_free(blame
);
421 /*******************************************************************************
423 *******************************************************************************/
425 static bool hunk_is_bufferblame(git_blame_hunk
*hunk
)
427 return hunk
&& git_oid_is_zero(&hunk
->final_commit_id
);
430 static int buffer_hunk_cb(
431 const git_diff_delta
*delta
,
432 const git_diff_hunk
*hunk
,
435 git_blame
*blame
= (git_blame
*)payload
;
440 wedge_line
= (hunk
->old_lines
== 0) ? hunk
->new_start
: hunk
->old_start
;
441 blame
->current_diff_line
= wedge_line
;
443 blame
->current_hunk
= (git_blame_hunk
*)git_blame_get_hunk_byline(blame
, wedge_line
);
444 if (!blame
->current_hunk
) {
445 /* Line added at the end of the file */
446 blame
->current_hunk
= new_hunk(wedge_line
, 0, wedge_line
, blame
->path
);
447 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
449 git_vector_insert(&blame
->hunks
, blame
->current_hunk
);
450 } else if (!hunk_starts_at_or_after_line(blame
->current_hunk
, wedge_line
)){
451 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
452 blame
->current_hunk
= split_hunk_in_vector(&blame
->hunks
, blame
->current_hunk
,
453 wedge_line
- blame
->current_hunk
->orig_start_line_number
, true);
454 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
460 static int ptrs_equal_cmp(const void *a
, const void *b
) { return a
<b
? -1 : a
>b
? 1 : 0; }
461 static int buffer_line_cb(
462 const git_diff_delta
*delta
,
463 const git_diff_hunk
*hunk
,
464 const git_diff_line
*line
,
467 git_blame
*blame
= (git_blame
*)payload
;
473 if (line
->origin
== GIT_DIFF_LINE_ADDITION
) {
474 if (hunk_is_bufferblame(blame
->current_hunk
) &&
475 hunk_ends_at_or_before_line(blame
->current_hunk
, blame
->current_diff_line
)) {
476 /* Append to the current buffer-blame hunk */
477 blame
->current_hunk
->lines_in_hunk
++;
478 shift_hunks_by(&blame
->hunks
, blame
->current_diff_line
+1, 1);
480 /* Create a new buffer-blame hunk with this line */
481 shift_hunks_by(&blame
->hunks
, blame
->current_diff_line
, 1);
482 blame
->current_hunk
= new_hunk(blame
->current_diff_line
, 1, 0, blame
->path
);
483 GIT_ERROR_CHECK_ALLOC(blame
->current_hunk
);
485 git_vector_insert_sorted(&blame
->hunks
, blame
->current_hunk
, NULL
);
487 blame
->current_diff_line
++;
490 if (line
->origin
== GIT_DIFF_LINE_DELETION
) {
491 /* Trim the line from the current hunk; remove it if it's now empty */
492 size_t shift_base
= blame
->current_diff_line
+ blame
->current_hunk
->lines_in_hunk
+1;
494 if (--(blame
->current_hunk
->lines_in_hunk
) == 0) {
497 if (!git_vector_search2(&i
, &blame
->hunks
, ptrs_equal_cmp
, blame
->current_hunk
)) {
498 git_vector_remove(&blame
->hunks
, i
);
499 free_hunk(blame
->current_hunk
);
500 blame
->current_hunk
= (git_blame_hunk
*)git_blame_get_hunk_byindex(blame
, (uint32_t)i
);
503 shift_hunks_by(&blame
->hunks
, shift_base
, -1);
508 int git_blame_buffer(
510 git_blame
*reference
,
515 git_diff_options diffopts
= GIT_DIFF_OPTIONS_INIT
;
517 git_blame_hunk
*hunk
;
519 diffopts
.context_lines
= 0;
522 GIT_ASSERT_ARG(reference
);
523 GIT_ASSERT_ARG(buffer
&& buffer_len
);
525 blame
= git_blame__alloc(reference
->repository
, reference
->options
, reference
->path
);
526 GIT_ERROR_CHECK_ALLOC(blame
);
528 /* Duplicate all of the hunk structures in the reference blame */
529 git_vector_foreach(&reference
->hunks
, i
, hunk
) {
530 git_blame_hunk
*h
= dup_hunk(hunk
);
531 GIT_ERROR_CHECK_ALLOC(h
);
533 git_vector_insert(&blame
->hunks
, h
);
536 /* Diff to the reference blob */
537 git_diff_blob_to_buffer(reference
->final_blob
, blame
->path
,
538 buffer
, buffer_len
, blame
->path
, &diffopts
,
539 NULL
, NULL
, buffer_hunk_cb
, buffer_line_cb
, blame
);
545 int git_blame_options_init(git_blame_options
*opts
, unsigned int version
)
547 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
548 opts
, version
, git_blame_options
, GIT_BLAME_OPTIONS_INIT
);
552 #ifndef GIT_DEPRECATE_HARD
553 int git_blame_init_options(git_blame_options
*opts
, unsigned int version
)
555 return git_blame_options_init(opts
, version
);