]> git.proxmox.com Git - libgit2.git/blob - src/blame.c
a6ab43efd8f83aa7dc26463048f81af9d6b29e4c
[libgit2.git] / src / blame.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame.h"
9
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "git2/mailmap.h"
18 #include "util.h"
19 #include "repository.h"
20 #include "blame_git.h"
21
22
23 static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
24 {
25 git_blame_hunk *hunk = (git_blame_hunk*)entry;
26
27 size_t lineno = *(size_t*)key;
28 size_t lines_in_hunk = hunk->lines_in_hunk;
29 size_t final_start_line_number = hunk->final_start_line_number;
30
31 if (lineno < final_start_line_number)
32 return -1;
33 if (lineno >= final_start_line_number + lines_in_hunk)
34 return 1;
35 return 0;
36 }
37
38 static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
39 static int hunk_cmp(const void *_a, const void *_b)
40 {
41 git_blame_hunk *a = (git_blame_hunk*)_a,
42 *b = (git_blame_hunk*)_b;
43
44 if (a->final_start_line_number > b->final_start_line_number)
45 return 1;
46 else if (a->final_start_line_number < b->final_start_line_number)
47 return -1;
48 else
49 return 0;
50 }
51
52 static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
53 {
54 return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
55 }
56
57 static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
58 {
59 return line <= hunk->final_start_line_number;
60 }
61
62 static git_blame_hunk *new_hunk(
63 size_t start,
64 size_t lines,
65 size_t orig_start,
66 const char *path)
67 {
68 git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
69 if (!hunk) return NULL;
70
71 hunk->lines_in_hunk = lines;
72 hunk->final_start_line_number = start;
73 hunk->orig_start_line_number = orig_start;
74 hunk->orig_path = path ? git__strdup(path) : NULL;
75
76 return hunk;
77 }
78
79 static void free_hunk(git_blame_hunk *hunk)
80 {
81 git__free((void*)hunk->orig_path);
82 git_signature_free(hunk->final_signature);
83 git_signature_free(hunk->orig_signature);
84 git__free(hunk);
85 }
86
87 static git_blame_hunk *dup_hunk(git_blame_hunk *hunk)
88 {
89 git_blame_hunk *newhunk = new_hunk(
90 hunk->final_start_line_number,
91 hunk->lines_in_hunk,
92 hunk->orig_start_line_number,
93 hunk->orig_path);
94
95 if (!newhunk)
96 return NULL;
97
98 git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
99 git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
100 newhunk->boundary = hunk->boundary;
101
102 if (git_signature_dup(&newhunk->final_signature, hunk->final_signature) < 0 ||
103 git_signature_dup(&newhunk->orig_signature, hunk->orig_signature) < 0) {
104 free_hunk(newhunk);
105 return NULL;
106 }
107
108 return newhunk;
109 }
110
111 /* Starting with the hunk that includes start_line, shift all following hunks'
112 * final_start_line by shift_by lines */
113 static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
114 {
115 size_t i;
116
117 if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
118 for (; i < v->length; i++) {
119 git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
120 hunk->final_start_line_number += shift_by;
121 }
122 }
123 }
124
125 git_blame *git_blame__alloc(
126 git_repository *repo,
127 git_blame_options opts,
128 const char *path)
129 {
130 git_blame *gbr = git__calloc(1, sizeof(git_blame));
131 if (!gbr)
132 return NULL;
133
134 gbr->repository = repo;
135 gbr->options = opts;
136
137 if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
138 git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
139 (gbr->path = git__strdup(path)) == NULL ||
140 git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
141 {
142 git_blame_free(gbr);
143 return NULL;
144 }
145
146 if (opts.flags & GIT_BLAME_USE_MAILMAP &&
147 git_mailmap_from_repository(&gbr->mailmap, repo) < 0) {
148 git_blame_free(gbr);
149 return NULL;
150 }
151
152 return gbr;
153 }
154
155 void git_blame_free(git_blame *blame)
156 {
157 size_t i;
158 git_blame_hunk *hunk;
159
160 if (!blame) return;
161
162 git_vector_foreach(&blame->hunks, i, hunk)
163 free_hunk(hunk);
164 git_vector_free(&blame->hunks);
165
166 git_vector_free_deep(&blame->paths);
167
168 git_array_clear(blame->line_index);
169
170 git_mailmap_free(blame->mailmap);
171
172 git__free(blame->path);
173 git_blob_free(blame->final_blob);
174 git__free(blame);
175 }
176
177 uint32_t git_blame_get_hunk_count(git_blame *blame)
178 {
179 GIT_ASSERT_ARG(blame);
180 return (uint32_t)blame->hunks.length;
181 }
182
183 const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
184 {
185 GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL);
186 return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
187 }
188
189 const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
190 {
191 size_t i, new_lineno = lineno;
192
193 GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL);
194
195 if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
196 return git_blame_get_hunk_byindex(blame, (uint32_t)i);
197 }
198
199 return NULL;
200 }
201
202 static int normalize_options(
203 git_blame_options *out,
204 const git_blame_options *in,
205 git_repository *repo)
206 {
207 git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
208 if (!in) in = &dummy;
209
210 memcpy(out, in, sizeof(git_blame_options));
211
212 /* No newest_commit => HEAD */
213 if (git_oid_is_zero(&out->newest_commit)) {
214 if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) {
215 return -1;
216 }
217 }
218
219 /* min_line 0 really means 1 */
220 if (!out->min_line) out->min_line = 1;
221 /* max_line 0 really means N, but we don't know N yet */
222
223 /* Fix up option implications */
224 if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
225 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
226 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
227 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
228 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
229 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
230
231 return 0;
232 }
233
234 static git_blame_hunk *split_hunk_in_vector(
235 git_vector *vec,
236 git_blame_hunk *hunk,
237 size_t rel_line,
238 bool return_new)
239 {
240 size_t new_line_count;
241 git_blame_hunk *nh;
242
243 /* Don't split if already at a boundary */
244 if (rel_line <= 0 ||
245 rel_line >= hunk->lines_in_hunk)
246 {
247 return hunk;
248 }
249
250 new_line_count = hunk->lines_in_hunk - rel_line;
251 nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
252 hunk->orig_start_line_number + rel_line, hunk->orig_path);
253
254 if (!nh)
255 return NULL;
256
257 git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
258 git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
259
260 /* Adjust hunk that was split */
261 hunk->lines_in_hunk -= new_line_count;
262 git_vector_insert_sorted(vec, nh, NULL);
263 {
264 git_blame_hunk *ret = return_new ? nh : hunk;
265 return ret;
266 }
267 }
268
269 /*
270 * Construct a list of char indices for where lines begin
271 * Adapted from core git:
272 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
273 */
274 static int index_blob_lines(git_blame *blame)
275 {
276 const char *buf = blame->final_buf;
277 size_t len = blame->final_buf_size;
278 int num = 0, incomplete = 0, bol = 1;
279 size_t *i;
280
281 if (len && buf[len-1] != '\n')
282 incomplete++; /* incomplete line at the end */
283 while (len--) {
284 if (bol) {
285 i = git_array_alloc(blame->line_index);
286 GIT_ERROR_CHECK_ALLOC(i);
287 *i = buf - blame->final_buf;
288 bol = 0;
289 }
290 if (*buf++ == '\n') {
291 num++;
292 bol = 1;
293 }
294 }
295 i = git_array_alloc(blame->line_index);
296 GIT_ERROR_CHECK_ALLOC(i);
297 *i = buf - blame->final_buf;
298 blame->num_lines = num + incomplete;
299 return blame->num_lines;
300 }
301
302 static git_blame_hunk *hunk_from_entry(git_blame__entry *e, git_blame *blame)
303 {
304 git_blame_hunk *h = new_hunk(
305 e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
306
307 if (!h)
308 return NULL;
309
310 git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
311 git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
312 git_commit_author_with_mailmap(
313 &h->final_signature, e->suspect->commit, blame->mailmap);
314 git_signature_dup(&h->orig_signature, h->final_signature);
315 h->boundary = e->is_boundary ? 1 : 0;
316 return h;
317 }
318
319 static int load_blob(git_blame *blame)
320 {
321 int error;
322
323 if (blame->final_blob) return 0;
324
325 error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
326 if (error < 0)
327 goto cleanup;
328 error = git_object_lookup_bypath((git_object**)&blame->final_blob,
329 (git_object*)blame->final, blame->path, GIT_OBJECT_BLOB);
330
331 cleanup:
332 return error;
333 }
334
335 static int blame_internal(git_blame *blame)
336 {
337 int error;
338 git_blame__entry *ent = NULL;
339 git_blame__origin *o;
340
341 if ((error = load_blob(blame)) < 0 ||
342 (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
343 goto cleanup;
344
345 if (git_blob_rawsize(blame->final_blob) > SIZE_MAX) {
346 git_error_set(GIT_ERROR_NOMEMORY, "blob is too large to blame");
347 error = -1;
348 goto cleanup;
349 }
350
351 blame->final_buf = git_blob_rawcontent(blame->final_blob);
352 blame->final_buf_size = (size_t)git_blob_rawsize(blame->final_blob);
353
354 ent = git__calloc(1, sizeof(git_blame__entry));
355 GIT_ERROR_CHECK_ALLOC(ent);
356
357 ent->num_lines = index_blob_lines(blame);
358 ent->lno = blame->options.min_line - 1;
359 ent->num_lines = ent->num_lines - blame->options.min_line + 1;
360 if (blame->options.max_line > 0)
361 ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
362 ent->s_lno = ent->lno;
363 ent->suspect = o;
364
365 blame->ent = ent;
366
367 error = git_blame__like_git(blame, blame->options.flags);
368
369 cleanup:
370 for (ent = blame->ent; ent; ) {
371 git_blame__entry *e = ent->next;
372 git_blame_hunk *h = hunk_from_entry(ent, blame);
373
374 git_vector_insert(&blame->hunks, h);
375
376 git_blame__free_entry(ent);
377 ent = e;
378 }
379
380 return error;
381 }
382
383 /*******************************************************************************
384 * File blaming
385 ******************************************************************************/
386
387 int git_blame_file(
388 git_blame **out,
389 git_repository *repo,
390 const char *path,
391 git_blame_options *options)
392 {
393 int error = -1;
394 git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
395 git_blame *blame = NULL;
396
397 GIT_ASSERT_ARG(out);
398 GIT_ASSERT_ARG(repo);
399 GIT_ASSERT_ARG(path);
400
401 if ((error = normalize_options(&normOptions, options, repo)) < 0)
402 goto on_error;
403
404 blame = git_blame__alloc(repo, normOptions, path);
405 GIT_ERROR_CHECK_ALLOC(blame);
406
407 if ((error = load_blob(blame)) < 0)
408 goto on_error;
409
410 if ((error = blame_internal(blame)) < 0)
411 goto on_error;
412
413 *out = blame;
414 return 0;
415
416 on_error:
417 git_blame_free(blame);
418 return error;
419 }
420
421 /*******************************************************************************
422 * Buffer blaming
423 *******************************************************************************/
424
425 static bool hunk_is_bufferblame(git_blame_hunk *hunk)
426 {
427 return hunk && git_oid_is_zero(&hunk->final_commit_id);
428 }
429
430 static int buffer_hunk_cb(
431 const git_diff_delta *delta,
432 const git_diff_hunk *hunk,
433 void *payload)
434 {
435 git_blame *blame = (git_blame*)payload;
436 uint32_t wedge_line;
437
438 GIT_UNUSED(delta);
439
440 wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
441 blame->current_diff_line = wedge_line;
442
443 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
444 if (!blame->current_hunk) {
445 /* Line added at the end of the file */
446 blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
447 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
448
449 git_vector_insert(&blame->hunks, blame->current_hunk);
450 } else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
451 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
452 blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
453 wedge_line - blame->current_hunk->orig_start_line_number, true);
454 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
455 }
456
457 return 0;
458 }
459
460 static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
461 static int buffer_line_cb(
462 const git_diff_delta *delta,
463 const git_diff_hunk *hunk,
464 const git_diff_line *line,
465 void *payload)
466 {
467 git_blame *blame = (git_blame*)payload;
468
469 GIT_UNUSED(delta);
470 GIT_UNUSED(hunk);
471 GIT_UNUSED(line);
472
473 if (line->origin == GIT_DIFF_LINE_ADDITION) {
474 if (hunk_is_bufferblame(blame->current_hunk) &&
475 hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
476 /* Append to the current buffer-blame hunk */
477 blame->current_hunk->lines_in_hunk++;
478 shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
479 } else {
480 /* Create a new buffer-blame hunk with this line */
481 shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
482 blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
483 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
484
485 git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
486 }
487 blame->current_diff_line++;
488 }
489
490 if (line->origin == GIT_DIFF_LINE_DELETION) {
491 /* Trim the line from the current hunk; remove it if it's now empty */
492 size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
493
494 if (--(blame->current_hunk->lines_in_hunk) == 0) {
495 size_t i;
496 shift_base--;
497 if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
498 git_vector_remove(&blame->hunks, i);
499 free_hunk(blame->current_hunk);
500 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
501 }
502 }
503 shift_hunks_by(&blame->hunks, shift_base, -1);
504 }
505 return 0;
506 }
507
508 int git_blame_buffer(
509 git_blame **out,
510 git_blame *reference,
511 const char *buffer,
512 size_t buffer_len)
513 {
514 git_blame *blame;
515 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
516 size_t i;
517 git_blame_hunk *hunk;
518
519 diffopts.context_lines = 0;
520
521 GIT_ASSERT_ARG(out);
522 GIT_ASSERT_ARG(reference);
523 GIT_ASSERT_ARG(buffer && buffer_len);
524
525 blame = git_blame__alloc(reference->repository, reference->options, reference->path);
526 GIT_ERROR_CHECK_ALLOC(blame);
527
528 /* Duplicate all of the hunk structures in the reference blame */
529 git_vector_foreach(&reference->hunks, i, hunk) {
530 git_blame_hunk *h = dup_hunk(hunk);
531 GIT_ERROR_CHECK_ALLOC(h);
532
533 git_vector_insert(&blame->hunks, h);
534 }
535
536 /* Diff to the reference blob */
537 git_diff_blob_to_buffer(reference->final_blob, blame->path,
538 buffer, buffer_len, blame->path, &diffopts,
539 NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
540
541 *out = blame;
542 return 0;
543 }
544
545 int git_blame_options_init(git_blame_options *opts, unsigned int version)
546 {
547 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
548 opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
549 return 0;
550 }
551
552 #ifndef GIT_DEPRECATE_HARD
553 int git_blame_init_options(git_blame_options *opts, unsigned int version)
554 {
555 return git_blame_options_init(opts, version);
556 }
557 #endif