]> git.proxmox.com Git - libgit2.git/blame - src/blame.c
Merge pull request #3508 from libgit2/cmn/tree-parse-speed
[libgit2.git] / src / blame.c
CommitLineData
ceab4e26
BS
1/*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8#include "blame.h"
9#include "git2/commit.h"
10#include "git2/revparse.h"
11#include "git2/revwalk.h"
12#include "git2/tree.h"
13#include "git2/diff.h"
14#include "git2/blob.h"
c1ca2b67 15#include "git2/signature.h"
ceab4e26
BS
16#include "util.h"
17#include "repository.h"
18#include "blame_git.h"
19
20
f0c9d8ba 21static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
ceab4e26 22{
f0c9d8ba
BS
23 git_blame_hunk *hunk = (git_blame_hunk*)entry;
24
79aa0302
JP
25 size_t lineno = *(size_t*)key;
26 size_t lines_in_hunk = (size_t)hunk->lines_in_hunk;
27 size_t final_start_line_number = (size_t)hunk->final_start_line_number;
28
29 if (lineno < final_start_line_number)
ceab4e26 30 return -1;
79aa0302 31 if (lineno >= final_start_line_number + lines_in_hunk)
ceab4e26
BS
32 return 1;
33 return 0;
34}
f0c9d8ba
BS
35
36static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
37static int hunk_cmp(const void *_a, const void *_b)
ceab4e26
BS
38{
39 git_blame_hunk *a = (git_blame_hunk*)_a,
40 *b = (git_blame_hunk*)_b;
41
42 return a->final_start_line_number - b->final_start_line_number;
43}
44
45static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
46{
47 return line >= (size_t)(hunk->final_start_line_number + hunk->lines_in_hunk - 1);
48}
49
50static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
51{
52 return line <= hunk->final_start_line_number;
53}
54
f0c9d8ba
BS
55static git_blame_hunk* new_hunk(
56 uint16_t start,
57 uint16_t lines,
58 uint16_t orig_start,
59 const char *path)
ceab4e26
BS
60{
61 git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
62 if (!hunk) return NULL;
63
64 hunk->lines_in_hunk = lines;
65 hunk->final_start_line_number = start;
66 hunk->orig_start_line_number = orig_start;
67 hunk->orig_path = path ? git__strdup(path) : NULL;
68
69 return hunk;
70}
71
ceab4e26
BS
72static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
73{
f0c9d8ba
BS
74 git_blame_hunk *newhunk = new_hunk(
75 hunk->final_start_line_number,
76 hunk->lines_in_hunk,
77 hunk->orig_start_line_number,
78 hunk->orig_path);
392702ee
ET
79
80 if (!newhunk)
81 return NULL;
82
ceab4e26
BS
83 git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
84 git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
089297b2 85 newhunk->boundary = hunk->boundary;
29be3a6d
AS
86 git_signature_dup(&newhunk->final_signature, hunk->final_signature);
87 git_signature_dup(&newhunk->orig_signature, hunk->orig_signature);
ceab4e26
BS
88 return newhunk;
89}
90
91static void free_hunk(git_blame_hunk *hunk)
92{
93 git__free((void*)hunk->orig_path);
c1ca2b67
BS
94 git_signature_free(hunk->final_signature);
95 git_signature_free(hunk->orig_signature);
ceab4e26
BS
96 git__free(hunk);
97}
98
99/* Starting with the hunk that includes start_line, shift all following hunks'
100 * final_start_line by shift_by lines */
f0c9d8ba 101static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
ceab4e26
BS
102{
103 size_t i;
104
79aa0302 105 if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
ceab4e26
BS
106 for (; i < v->length; i++) {
107 git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
108 hunk->final_start_line_number += shift_by;
109 }
110 }
111}
f0c9d8ba 112
ceab4e26
BS
113git_blame* git_blame__alloc(
114 git_repository *repo,
115 git_blame_options opts,
116 const char *path)
117{
96869a4e
RB
118 git_blame *gbr = git__calloc(1, sizeof(git_blame));
119 if (!gbr)
ceab4e26 120 return NULL;
96869a4e 121
ceab4e26
BS
122 gbr->repository = repo;
123 gbr->options = opts;
96869a4e
RB
124
125 if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
126 git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
127 (gbr->path = git__strdup(path)) == NULL ||
128 git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
129 {
130 git_blame_free(gbr);
96869a4e
RB
131 return NULL;
132 }
133
ceab4e26
BS
134 return gbr;
135}
136
137void git_blame_free(git_blame *blame)
138{
139 size_t i;
140 git_blame_hunk *hunk;
ceab4e26
BS
141
142 if (!blame) return;
143
144 git_vector_foreach(&blame->hunks, i, hunk)
145 free_hunk(hunk);
146 git_vector_free(&blame->hunks);
147
9cfce273 148 git_vector_free_deep(&blame->paths);
ceab4e26 149
f0c9d8ba
BS
150 git_array_clear(blame->line_index);
151
96869a4e 152 git__free(blame->path);
ceab4e26
BS
153 git_blob_free(blame->final_blob);
154 git__free(blame);
155}
156
157uint32_t git_blame_get_hunk_count(git_blame *blame)
158{
159 assert(blame);
aad5403f 160 return (uint32_t)blame->hunks.length;
ceab4e26
BS
161}
162
163const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
164{
165 assert(blame);
166 return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
167}
168
169const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, uint32_t lineno)
170{
79aa0302 171 size_t i, new_lineno = (size_t)lineno;
ceab4e26
BS
172 assert(blame);
173
79aa0302 174 if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
aad5403f 175 return git_blame_get_hunk_byindex(blame, (uint32_t)i);
ceab4e26
BS
176 }
177
178 return NULL;
179}
180
181static void normalize_options(
182 git_blame_options *out,
183 const git_blame_options *in,
184 git_repository *repo)
185{
186 git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
187 if (!in) in = &dummy;
188
189 memcpy(out, in, sizeof(git_blame_options));
190
191 /* No newest_commit => HEAD */
192 if (git_oid_iszero(&out->newest_commit)) {
193 git_reference_name_to_id(&out->newest_commit, repo, "HEAD");
194 }
d1228f1c
BS
195
196 /* min_line 0 really means 1 */
197 if (!out->min_line) out->min_line = 1;
198 /* max_line 0 really means N, but we don't know N yet */
364d800b
BS
199
200 /* Fix up option implications */
201 if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
202 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
203 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
204 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
205 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
206 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
ceab4e26
BS
207}
208
209static git_blame_hunk *split_hunk_in_vector(
210 git_vector *vec,
211 git_blame_hunk *hunk,
212 size_t rel_line,
213 bool return_new)
214{
215 size_t new_line_count;
216 git_blame_hunk *nh;
217
218 /* Don't split if already at a boundary */
219 if (rel_line <= 0 ||
220 rel_line >= hunk->lines_in_hunk)
221 {
222 return hunk;
223 }
224
225 new_line_count = hunk->lines_in_hunk - rel_line;
aad5403f
BS
226 nh = new_hunk((uint16_t)(hunk->final_start_line_number+rel_line), (uint16_t)new_line_count,
227 (uint16_t)(hunk->orig_start_line_number+rel_line), hunk->orig_path);
392702ee
ET
228
229 if (!nh)
230 return NULL;
231
ceab4e26
BS
232 git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
233 git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
234
235 /* Adjust hunk that was split */
aad5403f 236 hunk->lines_in_hunk -= (uint16_t)new_line_count;
ceab4e26
BS
237 git_vector_insert_sorted(vec, nh, NULL);
238 {
239 git_blame_hunk *ret = return_new ? nh : hunk;
240 return ret;
241 }
242}
243
ceab4e26 244/*
f0c9d8ba
BS
245 * Construct a list of char indices for where lines begin
246 * Adapted from core git:
247 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
ceab4e26 248 */
f0c9d8ba 249static int index_blob_lines(git_blame *blame)
ceab4e26 250{
f0c9d8ba
BS
251 const char *buf = blame->final_buf;
252 git_off_t len = blame->final_buf_size;
253 int num = 0, incomplete = 0, bol = 1;
254 size_t *i;
a06474f8 255
f0c9d8ba
BS
256 if (len && buf[len-1] != '\n')
257 incomplete++; /* incomplete line at the end */
258 while (len--) {
259 if (bol) {
260 i = git_array_alloc(blame->line_index);
261 GITERR_CHECK_ALLOC(i);
262 *i = buf - blame->final_buf;
263 bol = 0;
264 }
265 if (*buf++ == '\n') {
266 num++;
267 bol = 1;
268 }
269 }
270 i = git_array_alloc(blame->line_index);
271 GITERR_CHECK_ALLOC(i);
272 *i = buf - blame->final_buf;
273 blame->num_lines = num + incomplete;
274 return blame->num_lines;
ceab4e26 275}
a06474f8 276
a121e580 277static git_blame_hunk* hunk_from_entry(git_blame__entry *e)
ceab4e26
BS
278{
279 git_blame_hunk *h = new_hunk(
280 e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
392702ee
ET
281
282 if (!h)
283 return NULL;
284
ceab4e26 285 git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
a06474f8 286 git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
29be3a6d
AS
287 git_signature_dup(&h->final_signature, git_commit_author(e->suspect->commit));
288 git_signature_dup(&h->orig_signature, git_commit_author(e->suspect->commit));
25c47aae 289 h->boundary = e->is_boundary ? 1 : 0;
ceab4e26
BS
290 return h;
291}
292
77db6ff5
BS
293static int load_blob(git_blame *blame)
294{
295 int error;
296
b6f60a4d
BS
297 if (blame->final_blob) return 0;
298
77db6ff5
BS
299 error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
300 if (error < 0)
301 goto cleanup;
302 error = git_object_lookup_bypath((git_object**)&blame->final_blob,
303 (git_object*)blame->final, blame->path, GIT_OBJ_BLOB);
77db6ff5
BS
304
305cleanup:
306 return error;
307}
308
f0c9d8ba 309static int blame_internal(git_blame *blame)
ceab4e26
BS
310{
311 int error;
a121e580 312 git_blame__entry *ent = NULL;
a121e580 313 git_blame__origin *o;
ceab4e26 314
b6f60a4d
BS
315 if ((error = load_blob(blame)) < 0 ||
316 (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
ceab4e26 317 goto cleanup;
b6f60a4d
BS
318 blame->final_buf = git_blob_rawcontent(blame->final_blob);
319 blame->final_buf_size = git_blob_rawsize(blame->final_blob);
ceab4e26 320
f0c9d8ba 321 ent = git__calloc(1, sizeof(git_blame__entry));
392702ee
ET
322 GITERR_CHECK_ALLOC(ent);
323
f0c9d8ba 324 ent->num_lines = index_blob_lines(blame);
d1228f1c
BS
325 ent->lno = blame->options.min_line - 1;
326 ent->num_lines = ent->num_lines - blame->options.min_line + 1;
f0c9d8ba 327 if (blame->options.max_line > 0)
d1228f1c 328 ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
d1228f1c 329 ent->s_lno = ent->lno;
ceab4e26 330 ent->suspect = o;
d1228f1c 331
0a0f0558 332 blame->ent = ent;
ceab4e26 333
ae195a71 334 error = git_blame__like_git(blame, blame->options.flags);
ceab4e26 335
ceab4e26 336cleanup:
0a0f0558 337 for (ent = blame->ent; ent; ) {
a121e580 338 git_blame__entry *e = ent->next;
392702ee 339 git_blame_hunk *h = hunk_from_entry(ent);
ceab4e26 340
392702ee 341 git_vector_insert(&blame->hunks, h);
ceab4e26 342
b6f60a4d 343 git_blame__free_entry(ent);
ceab4e26
BS
344 ent = e;
345 }
346
ceab4e26
BS
347 return error;
348}
349
f0c9d8ba
BS
350/*******************************************************************************
351 * File blaming
352 ******************************************************************************/
353
ceab4e26
BS
354int git_blame_file(
355 git_blame **out,
356 git_repository *repo,
357 const char *path,
358 git_blame_options *options)
359{
360 int error = -1;
361 git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
362 git_blame *blame = NULL;
363
364 assert(out && repo && path);
365 normalize_options(&normOptions, options, repo);
366
367 blame = git_blame__alloc(repo, normOptions, path);
368 GITERR_CHECK_ALLOC(blame);
369
77db6ff5 370 if ((error = load_blob(blame)) < 0)
ceab4e26
BS
371 goto on_error;
372
f0c9d8ba 373 if ((error = blame_internal(blame)) < 0)
ceab4e26
BS
374 goto on_error;
375
376 *out = blame;
377 return 0;
378
379on_error:
380 git_blame_free(blame);
381 return error;
382}
383
384/*******************************************************************************
385 * Buffer blaming
386 *******************************************************************************/
387
388static bool hunk_is_bufferblame(git_blame_hunk *hunk)
389{
390 return git_oid_iszero(&hunk->final_commit_id);
391}
392
393static int buffer_hunk_cb(
394 const git_diff_delta *delta,
7dcb1c45 395 const git_diff_hunk *hunk,
ceab4e26
BS
396 void *payload)
397{
398 git_blame *blame = (git_blame*)payload;
aad5403f 399 uint32_t wedge_line;
ceab4e26
BS
400
401 GIT_UNUSED(delta);
ceab4e26 402
7dcb1c45 403 wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
ceab4e26
BS
404 blame->current_diff_line = wedge_line;
405
ceab4e26 406 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
9db56cc4
BS
407 if (!blame->current_hunk) {
408 /* Line added at the end of the file */
409 blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
392702ee
ET
410 GITERR_CHECK_ALLOC(blame->current_hunk);
411
9db56cc4
BS
412 git_vector_insert(&blame->hunks, blame->current_hunk);
413 } else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
414 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
ceab4e26
BS
415 blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
416 wedge_line - blame->current_hunk->orig_start_line_number, true);
392702ee 417 GITERR_CHECK_ALLOC(blame->current_hunk);
ceab4e26
BS
418 }
419
420 return 0;
421}
422
423static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
424static int buffer_line_cb(
425 const git_diff_delta *delta,
7dcb1c45
BS
426 const git_diff_hunk *hunk,
427 const git_diff_line *line,
ceab4e26
BS
428 void *payload)
429{
430 git_blame *blame = (git_blame*)payload;
431
432 GIT_UNUSED(delta);
7dcb1c45
BS
433 GIT_UNUSED(hunk);
434 GIT_UNUSED(line);
ceab4e26 435
7dcb1c45 436 if (line->origin == GIT_DIFF_LINE_ADDITION) {
ceab4e26
BS
437 if (hunk_is_bufferblame(blame->current_hunk) &&
438 hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
439 /* Append to the current buffer-blame hunk */
440 blame->current_hunk->lines_in_hunk++;
f0c9d8ba 441 shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
ceab4e26
BS
442 } else {
443 /* Create a new buffer-blame hunk with this line */
f0c9d8ba 444 shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
aad5403f 445 blame->current_hunk = new_hunk((uint16_t)blame->current_diff_line, 1, 0, blame->path);
392702ee
ET
446 GITERR_CHECK_ALLOC(blame->current_hunk);
447
ceab4e26
BS
448 git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
449 }
450 blame->current_diff_line++;
451 }
452
7dcb1c45 453 if (line->origin == GIT_DIFF_LINE_DELETION) {
ceab4e26
BS
454 /* Trim the line from the current hunk; remove it if it's now empty */
455 size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
456
457 if (--(blame->current_hunk->lines_in_hunk) == 0) {
458 size_t i;
459 shift_base--;
460 if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
461 git_vector_remove(&blame->hunks, i);
462 free_hunk(blame->current_hunk);
aad5403f 463 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
ceab4e26
BS
464 }
465 }
f0c9d8ba 466 shift_hunks_by(&blame->hunks, shift_base, -1);
ceab4e26
BS
467 }
468 return 0;
469}
470
471int git_blame_buffer(
472 git_blame **out,
473 git_blame *reference,
474 const char *buffer,
e9d5e5f3 475 size_t buffer_len)
ceab4e26
BS
476{
477 git_blame *blame;
478 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
479 size_t i;
480 git_blame_hunk *hunk;
481
482 diffopts.context_lines = 0;
483
484 assert(out && reference && buffer && buffer_len);
485
486 blame = git_blame__alloc(reference->repository, reference->options, reference->path);
392702ee 487 GITERR_CHECK_ALLOC(blame);
ceab4e26
BS
488
489 /* Duplicate all of the hunk structures in the reference blame */
490 git_vector_foreach(&reference->hunks, i, hunk) {
392702ee
ET
491 git_blame_hunk *h = dup_hunk(hunk);
492 GITERR_CHECK_ALLOC(h);
493
494 git_vector_insert(&blame->hunks, h);
ceab4e26
BS
495 }
496
497 /* Diff to the reference blob */
498 git_diff_blob_to_buffer(reference->final_blob, blame->path,
8147b1af
ET
499 buffer, buffer_len, blame->path, &diffopts,
500 NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
ceab4e26
BS
501
502 *out = blame;
503 return 0;
504}
b9f81997 505
702efc89 506int git_blame_init_options(git_blame_options *opts, unsigned int version)
b9f81997 507{
702efc89
RB
508 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
509 opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
510 return 0;
b9f81997 511}