]> git.proxmox.com Git - libgit2.git/blame - src/blame.c
install as examples
[libgit2.git] / src / blame.c
CommitLineData
ceab4e26
BS
1/*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8#include "blame.h"
eae0bfdc 9
ceab4e26
BS
10#include "git2/commit.h"
11#include "git2/revparse.h"
12#include "git2/revwalk.h"
13#include "git2/tree.h"
14#include "git2/diff.h"
15#include "git2/blob.h"
c1ca2b67 16#include "git2/signature.h"
ac3d33df 17#include "git2/mailmap.h"
ceab4e26
BS
18#include "util.h"
19#include "repository.h"
20#include "blame_git.h"
21
22
f0c9d8ba 23static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
ceab4e26 24{
f0c9d8ba
BS
25 git_blame_hunk *hunk = (git_blame_hunk*)entry;
26
79aa0302 27 size_t lineno = *(size_t*)key;
cb1cb24c
PS
28 size_t lines_in_hunk = hunk->lines_in_hunk;
29 size_t final_start_line_number = hunk->final_start_line_number;
79aa0302
JP
30
31 if (lineno < final_start_line_number)
ceab4e26 32 return -1;
79aa0302 33 if (lineno >= final_start_line_number + lines_in_hunk)
ceab4e26
BS
34 return 1;
35 return 0;
36}
f0c9d8ba
BS
37
38static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
39static int hunk_cmp(const void *_a, const void *_b)
ceab4e26
BS
40{
41 git_blame_hunk *a = (git_blame_hunk*)_a,
42 *b = (git_blame_hunk*)_b;
43
ac3d33df
JK
44 if (a->final_start_line_number > b->final_start_line_number)
45 return 1;
46 else if (a->final_start_line_number < b->final_start_line_number)
47 return -1;
48 else
49 return 0;
ceab4e26
BS
50}
51
52static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
53{
cb1cb24c 54 return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
ceab4e26
BS
55}
56
57static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
58{
59 return line <= hunk->final_start_line_number;
60}
61
f0c9d8ba 62static git_blame_hunk* new_hunk(
cb1cb24c
PS
63 size_t start,
64 size_t lines,
65 size_t orig_start,
f0c9d8ba 66 const char *path)
ceab4e26
BS
67{
68 git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
69 if (!hunk) return NULL;
70
71 hunk->lines_in_hunk = lines;
72 hunk->final_start_line_number = start;
73 hunk->orig_start_line_number = orig_start;
74 hunk->orig_path = path ? git__strdup(path) : NULL;
75
76 return hunk;
77}
78
ceab4e26
BS
79static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
80{
f0c9d8ba
BS
81 git_blame_hunk *newhunk = new_hunk(
82 hunk->final_start_line_number,
83 hunk->lines_in_hunk,
84 hunk->orig_start_line_number,
85 hunk->orig_path);
392702ee
ET
86
87 if (!newhunk)
88 return NULL;
89
ceab4e26
BS
90 git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
91 git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
089297b2 92 newhunk->boundary = hunk->boundary;
29be3a6d
AS
93 git_signature_dup(&newhunk->final_signature, hunk->final_signature);
94 git_signature_dup(&newhunk->orig_signature, hunk->orig_signature);
ceab4e26
BS
95 return newhunk;
96}
97
98static void free_hunk(git_blame_hunk *hunk)
99{
100 git__free((void*)hunk->orig_path);
c1ca2b67
BS
101 git_signature_free(hunk->final_signature);
102 git_signature_free(hunk->orig_signature);
ceab4e26
BS
103 git__free(hunk);
104}
105
106/* Starting with the hunk that includes start_line, shift all following hunks'
107 * final_start_line by shift_by lines */
f0c9d8ba 108static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
ceab4e26
BS
109{
110 size_t i;
111
79aa0302 112 if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
ceab4e26
BS
113 for (; i < v->length; i++) {
114 git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
115 hunk->final_start_line_number += shift_by;
116 }
117 }
118}
f0c9d8ba 119
ceab4e26
BS
120git_blame* git_blame__alloc(
121 git_repository *repo,
122 git_blame_options opts,
123 const char *path)
124{
96869a4e
RB
125 git_blame *gbr = git__calloc(1, sizeof(git_blame));
126 if (!gbr)
ceab4e26 127 return NULL;
96869a4e 128
ceab4e26
BS
129 gbr->repository = repo;
130 gbr->options = opts;
96869a4e
RB
131
132 if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
133 git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
134 (gbr->path = git__strdup(path)) == NULL ||
135 git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
136 {
137 git_blame_free(gbr);
96869a4e
RB
138 return NULL;
139 }
140
ac3d33df
JK
141 if (opts.flags & GIT_BLAME_USE_MAILMAP &&
142 git_mailmap_from_repository(&gbr->mailmap, repo) < 0) {
143 git_blame_free(gbr);
144 return NULL;
145 }
146
ceab4e26
BS
147 return gbr;
148}
149
150void git_blame_free(git_blame *blame)
151{
152 size_t i;
153 git_blame_hunk *hunk;
ceab4e26
BS
154
155 if (!blame) return;
156
157 git_vector_foreach(&blame->hunks, i, hunk)
158 free_hunk(hunk);
159 git_vector_free(&blame->hunks);
160
9cfce273 161 git_vector_free_deep(&blame->paths);
ceab4e26 162
f0c9d8ba
BS
163 git_array_clear(blame->line_index);
164
ac3d33df
JK
165 git_mailmap_free(blame->mailmap);
166
96869a4e 167 git__free(blame->path);
ceab4e26
BS
168 git_blob_free(blame->final_blob);
169 git__free(blame);
170}
171
172uint32_t git_blame_get_hunk_count(git_blame *blame)
173{
174 assert(blame);
aad5403f 175 return (uint32_t)blame->hunks.length;
ceab4e26
BS
176}
177
178const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
179{
180 assert(blame);
181 return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
182}
183
cb1cb24c 184const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
ceab4e26 185{
cb1cb24c 186 size_t i, new_lineno = lineno;
ceab4e26
BS
187 assert(blame);
188
79aa0302 189 if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
aad5403f 190 return git_blame_get_hunk_byindex(blame, (uint32_t)i);
ceab4e26
BS
191 }
192
193 return NULL;
194}
195
f17ed637 196static int normalize_options(
ceab4e26
BS
197 git_blame_options *out,
198 const git_blame_options *in,
199 git_repository *repo)
200{
201 git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
202 if (!in) in = &dummy;
203
204 memcpy(out, in, sizeof(git_blame_options));
205
206 /* No newest_commit => HEAD */
22a2d3d5 207 if (git_oid_is_zero(&out->newest_commit)) {
f17ed637
PS
208 if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) {
209 return -1;
210 }
ceab4e26 211 }
d1228f1c
BS
212
213 /* min_line 0 really means 1 */
214 if (!out->min_line) out->min_line = 1;
215 /* max_line 0 really means N, but we don't know N yet */
364d800b
BS
216
217 /* Fix up option implications */
218 if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
219 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
220 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
221 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
222 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
223 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
f17ed637
PS
224
225 return 0;
ceab4e26
BS
226}
227
228static git_blame_hunk *split_hunk_in_vector(
229 git_vector *vec,
230 git_blame_hunk *hunk,
231 size_t rel_line,
232 bool return_new)
233{
234 size_t new_line_count;
235 git_blame_hunk *nh;
236
237 /* Don't split if already at a boundary */
238 if (rel_line <= 0 ||
239 rel_line >= hunk->lines_in_hunk)
240 {
241 return hunk;
242 }
243
244 new_line_count = hunk->lines_in_hunk - rel_line;
cb1cb24c
PS
245 nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
246 hunk->orig_start_line_number + rel_line, hunk->orig_path);
392702ee
ET
247
248 if (!nh)
249 return NULL;
250
ceab4e26
BS
251 git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
252 git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
253
254 /* Adjust hunk that was split */
cb1cb24c 255 hunk->lines_in_hunk -= new_line_count;
ceab4e26
BS
256 git_vector_insert_sorted(vec, nh, NULL);
257 {
258 git_blame_hunk *ret = return_new ? nh : hunk;
259 return ret;
260 }
261}
262
ceab4e26 263/*
f0c9d8ba
BS
264 * Construct a list of char indices for where lines begin
265 * Adapted from core git:
266 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
ceab4e26 267 */
f0c9d8ba 268static int index_blob_lines(git_blame *blame)
ceab4e26 269{
f0c9d8ba 270 const char *buf = blame->final_buf;
22a2d3d5 271 size_t len = blame->final_buf_size;
f0c9d8ba
BS
272 int num = 0, incomplete = 0, bol = 1;
273 size_t *i;
a06474f8 274
f0c9d8ba
BS
275 if (len && buf[len-1] != '\n')
276 incomplete++; /* incomplete line at the end */
277 while (len--) {
278 if (bol) {
279 i = git_array_alloc(blame->line_index);
ac3d33df 280 GIT_ERROR_CHECK_ALLOC(i);
f0c9d8ba
BS
281 *i = buf - blame->final_buf;
282 bol = 0;
283 }
284 if (*buf++ == '\n') {
285 num++;
286 bol = 1;
287 }
288 }
289 i = git_array_alloc(blame->line_index);
ac3d33df 290 GIT_ERROR_CHECK_ALLOC(i);
f0c9d8ba
BS
291 *i = buf - blame->final_buf;
292 blame->num_lines = num + incomplete;
293 return blame->num_lines;
ceab4e26 294}
a06474f8 295
ac3d33df 296static git_blame_hunk* hunk_from_entry(git_blame__entry *e, git_blame *blame)
ceab4e26
BS
297{
298 git_blame_hunk *h = new_hunk(
299 e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
392702ee
ET
300
301 if (!h)
302 return NULL;
303
ceab4e26 304 git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
a06474f8 305 git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
ac3d33df
JK
306 git_commit_author_with_mailmap(
307 &h->final_signature, e->suspect->commit, blame->mailmap);
308 git_signature_dup(&h->orig_signature, h->final_signature);
25c47aae 309 h->boundary = e->is_boundary ? 1 : 0;
ceab4e26
BS
310 return h;
311}
312
77db6ff5
BS
313static int load_blob(git_blame *blame)
314{
315 int error;
316
b6f60a4d
BS
317 if (blame->final_blob) return 0;
318
77db6ff5
BS
319 error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
320 if (error < 0)
321 goto cleanup;
322 error = git_object_lookup_bypath((git_object**)&blame->final_blob,
ac3d33df 323 (git_object*)blame->final, blame->path, GIT_OBJECT_BLOB);
77db6ff5
BS
324
325cleanup:
326 return error;
327}
328
f0c9d8ba 329static int blame_internal(git_blame *blame)
ceab4e26
BS
330{
331 int error;
a121e580 332 git_blame__entry *ent = NULL;
a121e580 333 git_blame__origin *o;
ceab4e26 334
b6f60a4d
BS
335 if ((error = load_blob(blame)) < 0 ||
336 (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
ceab4e26 337 goto cleanup;
22a2d3d5
UG
338
339 if (git_blob_rawsize(blame->final_blob) > SIZE_MAX) {
340 git_error_set(GIT_ERROR_NOMEMORY, "blob is too large to blame");
341 error = -1;
342 goto cleanup;
343 }
344
b6f60a4d 345 blame->final_buf = git_blob_rawcontent(blame->final_blob);
22a2d3d5 346 blame->final_buf_size = (size_t)git_blob_rawsize(blame->final_blob);
ceab4e26 347
f0c9d8ba 348 ent = git__calloc(1, sizeof(git_blame__entry));
ac3d33df 349 GIT_ERROR_CHECK_ALLOC(ent);
392702ee 350
f0c9d8ba 351 ent->num_lines = index_blob_lines(blame);
d1228f1c
BS
352 ent->lno = blame->options.min_line - 1;
353 ent->num_lines = ent->num_lines - blame->options.min_line + 1;
f0c9d8ba 354 if (blame->options.max_line > 0)
d1228f1c 355 ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
d1228f1c 356 ent->s_lno = ent->lno;
ceab4e26 357 ent->suspect = o;
d1228f1c 358
0a0f0558 359 blame->ent = ent;
ceab4e26 360
ae195a71 361 error = git_blame__like_git(blame, blame->options.flags);
ceab4e26 362
ceab4e26 363cleanup:
0a0f0558 364 for (ent = blame->ent; ent; ) {
a121e580 365 git_blame__entry *e = ent->next;
ac3d33df 366 git_blame_hunk *h = hunk_from_entry(ent, blame);
ceab4e26 367
392702ee 368 git_vector_insert(&blame->hunks, h);
ceab4e26 369
b6f60a4d 370 git_blame__free_entry(ent);
ceab4e26
BS
371 ent = e;
372 }
373
ceab4e26
BS
374 return error;
375}
376
f0c9d8ba
BS
377/*******************************************************************************
378 * File blaming
379 ******************************************************************************/
380
ceab4e26
BS
381int git_blame_file(
382 git_blame **out,
383 git_repository *repo,
384 const char *path,
385 git_blame_options *options)
386{
387 int error = -1;
388 git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
389 git_blame *blame = NULL;
390
391 assert(out && repo && path);
f17ed637
PS
392 if ((error = normalize_options(&normOptions, options, repo)) < 0)
393 goto on_error;
ceab4e26
BS
394
395 blame = git_blame__alloc(repo, normOptions, path);
ac3d33df 396 GIT_ERROR_CHECK_ALLOC(blame);
ceab4e26 397
77db6ff5 398 if ((error = load_blob(blame)) < 0)
ceab4e26
BS
399 goto on_error;
400
f0c9d8ba 401 if ((error = blame_internal(blame)) < 0)
ceab4e26
BS
402 goto on_error;
403
404 *out = blame;
405 return 0;
406
407on_error:
408 git_blame_free(blame);
409 return error;
410}
411
412/*******************************************************************************
413 * Buffer blaming
414 *******************************************************************************/
415
416static bool hunk_is_bufferblame(git_blame_hunk *hunk)
417{
22a2d3d5 418 return hunk && git_oid_is_zero(&hunk->final_commit_id);
ceab4e26
BS
419}
420
421static int buffer_hunk_cb(
422 const git_diff_delta *delta,
7dcb1c45 423 const git_diff_hunk *hunk,
ceab4e26
BS
424 void *payload)
425{
426 git_blame *blame = (git_blame*)payload;
aad5403f 427 uint32_t wedge_line;
ceab4e26
BS
428
429 GIT_UNUSED(delta);
ceab4e26 430
7dcb1c45 431 wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
ceab4e26
BS
432 blame->current_diff_line = wedge_line;
433
ceab4e26 434 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
9db56cc4
BS
435 if (!blame->current_hunk) {
436 /* Line added at the end of the file */
437 blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
ac3d33df 438 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
392702ee 439
9db56cc4
BS
440 git_vector_insert(&blame->hunks, blame->current_hunk);
441 } else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
442 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
ceab4e26
BS
443 blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
444 wedge_line - blame->current_hunk->orig_start_line_number, true);
ac3d33df 445 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
ceab4e26
BS
446 }
447
448 return 0;
449}
450
451static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
452static int buffer_line_cb(
453 const git_diff_delta *delta,
7dcb1c45
BS
454 const git_diff_hunk *hunk,
455 const git_diff_line *line,
ceab4e26
BS
456 void *payload)
457{
458 git_blame *blame = (git_blame*)payload;
459
460 GIT_UNUSED(delta);
7dcb1c45
BS
461 GIT_UNUSED(hunk);
462 GIT_UNUSED(line);
ceab4e26 463
7dcb1c45 464 if (line->origin == GIT_DIFF_LINE_ADDITION) {
ceab4e26
BS
465 if (hunk_is_bufferblame(blame->current_hunk) &&
466 hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
467 /* Append to the current buffer-blame hunk */
468 blame->current_hunk->lines_in_hunk++;
f0c9d8ba 469 shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
ceab4e26
BS
470 } else {
471 /* Create a new buffer-blame hunk with this line */
f0c9d8ba 472 shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
cb1cb24c 473 blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
ac3d33df 474 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
392702ee 475
ceab4e26
BS
476 git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
477 }
478 blame->current_diff_line++;
479 }
480
7dcb1c45 481 if (line->origin == GIT_DIFF_LINE_DELETION) {
ceab4e26
BS
482 /* Trim the line from the current hunk; remove it if it's now empty */
483 size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
484
485 if (--(blame->current_hunk->lines_in_hunk) == 0) {
486 size_t i;
487 shift_base--;
488 if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
489 git_vector_remove(&blame->hunks, i);
490 free_hunk(blame->current_hunk);
aad5403f 491 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
ceab4e26
BS
492 }
493 }
f0c9d8ba 494 shift_hunks_by(&blame->hunks, shift_base, -1);
ceab4e26
BS
495 }
496 return 0;
497}
498
499int git_blame_buffer(
500 git_blame **out,
501 git_blame *reference,
502 const char *buffer,
e9d5e5f3 503 size_t buffer_len)
ceab4e26
BS
504{
505 git_blame *blame;
506 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
507 size_t i;
508 git_blame_hunk *hunk;
509
510 diffopts.context_lines = 0;
511
512 assert(out && reference && buffer && buffer_len);
513
514 blame = git_blame__alloc(reference->repository, reference->options, reference->path);
ac3d33df 515 GIT_ERROR_CHECK_ALLOC(blame);
ceab4e26
BS
516
517 /* Duplicate all of the hunk structures in the reference blame */
518 git_vector_foreach(&reference->hunks, i, hunk) {
392702ee 519 git_blame_hunk *h = dup_hunk(hunk);
ac3d33df 520 GIT_ERROR_CHECK_ALLOC(h);
392702ee
ET
521
522 git_vector_insert(&blame->hunks, h);
ceab4e26
BS
523 }
524
525 /* Diff to the reference blob */
526 git_diff_blob_to_buffer(reference->final_blob, blame->path,
8147b1af
ET
527 buffer, buffer_len, blame->path, &diffopts,
528 NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
ceab4e26
BS
529
530 *out = blame;
531 return 0;
532}
b9f81997 533
22a2d3d5 534int git_blame_options_init(git_blame_options *opts, unsigned int version)
b9f81997 535{
702efc89
RB
536 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
537 opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
538 return 0;
b9f81997 539}
22a2d3d5
UG
540
541#ifndef GIT_DEPRECATE_HARD
542int git_blame_init_options(git_blame_options *opts, unsigned int version)
543{
544 return git_blame_options_init(opts, version);
545}
546#endif