]> git.proxmox.com Git - libgit2.git/blob - src/blame.c
be10c15d6d5875a9352b9404d252142d42d06a84
[libgit2.git] / src / blame.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame.h"
9
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "git2/mailmap.h"
18 #include "util.h"
19 #include "repository.h"
20 #include "blame_git.h"
21
22
23 static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
24 {
25 git_blame_hunk *hunk = (git_blame_hunk*)entry;
26
27 size_t lineno = *(size_t*)key;
28 size_t lines_in_hunk = hunk->lines_in_hunk;
29 size_t final_start_line_number = hunk->final_start_line_number;
30
31 if (lineno < final_start_line_number)
32 return -1;
33 if (lineno >= final_start_line_number + lines_in_hunk)
34 return 1;
35 return 0;
36 }
37
38 static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
39 static int hunk_cmp(const void *_a, const void *_b)
40 {
41 git_blame_hunk *a = (git_blame_hunk*)_a,
42 *b = (git_blame_hunk*)_b;
43
44 if (a->final_start_line_number > b->final_start_line_number)
45 return 1;
46 else if (a->final_start_line_number < b->final_start_line_number)
47 return -1;
48 else
49 return 0;
50 }
51
52 static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
53 {
54 return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
55 }
56
57 static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
58 {
59 return line <= hunk->final_start_line_number;
60 }
61
62 static git_blame_hunk* new_hunk(
63 size_t start,
64 size_t lines,
65 size_t orig_start,
66 const char *path)
67 {
68 git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
69 if (!hunk) return NULL;
70
71 hunk->lines_in_hunk = lines;
72 hunk->final_start_line_number = start;
73 hunk->orig_start_line_number = orig_start;
74 hunk->orig_path = path ? git__strdup(path) : NULL;
75
76 return hunk;
77 }
78
79 static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
80 {
81 git_blame_hunk *newhunk = new_hunk(
82 hunk->final_start_line_number,
83 hunk->lines_in_hunk,
84 hunk->orig_start_line_number,
85 hunk->orig_path);
86
87 if (!newhunk)
88 return NULL;
89
90 git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
91 git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
92 newhunk->boundary = hunk->boundary;
93 git_signature_dup(&newhunk->final_signature, hunk->final_signature);
94 git_signature_dup(&newhunk->orig_signature, hunk->orig_signature);
95 return newhunk;
96 }
97
98 static void free_hunk(git_blame_hunk *hunk)
99 {
100 git__free((void*)hunk->orig_path);
101 git_signature_free(hunk->final_signature);
102 git_signature_free(hunk->orig_signature);
103 git__free(hunk);
104 }
105
106 /* Starting with the hunk that includes start_line, shift all following hunks'
107 * final_start_line by shift_by lines */
108 static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
109 {
110 size_t i;
111
112 if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
113 for (; i < v->length; i++) {
114 git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
115 hunk->final_start_line_number += shift_by;
116 }
117 }
118 }
119
120 git_blame* git_blame__alloc(
121 git_repository *repo,
122 git_blame_options opts,
123 const char *path)
124 {
125 git_blame *gbr = git__calloc(1, sizeof(git_blame));
126 if (!gbr)
127 return NULL;
128
129 gbr->repository = repo;
130 gbr->options = opts;
131
132 if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
133 git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
134 (gbr->path = git__strdup(path)) == NULL ||
135 git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
136 {
137 git_blame_free(gbr);
138 return NULL;
139 }
140
141 if (opts.flags & GIT_BLAME_USE_MAILMAP &&
142 git_mailmap_from_repository(&gbr->mailmap, repo) < 0) {
143 git_blame_free(gbr);
144 return NULL;
145 }
146
147 return gbr;
148 }
149
150 void git_blame_free(git_blame *blame)
151 {
152 size_t i;
153 git_blame_hunk *hunk;
154
155 if (!blame) return;
156
157 git_vector_foreach(&blame->hunks, i, hunk)
158 free_hunk(hunk);
159 git_vector_free(&blame->hunks);
160
161 git_vector_free_deep(&blame->paths);
162
163 git_array_clear(blame->line_index);
164
165 git_mailmap_free(blame->mailmap);
166
167 git__free(blame->path);
168 git_blob_free(blame->final_blob);
169 git__free(blame);
170 }
171
172 uint32_t git_blame_get_hunk_count(git_blame *blame)
173 {
174 assert(blame);
175 return (uint32_t)blame->hunks.length;
176 }
177
178 const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
179 {
180 assert(blame);
181 return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
182 }
183
184 const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
185 {
186 size_t i, new_lineno = lineno;
187 assert(blame);
188
189 if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
190 return git_blame_get_hunk_byindex(blame, (uint32_t)i);
191 }
192
193 return NULL;
194 }
195
196 static int normalize_options(
197 git_blame_options *out,
198 const git_blame_options *in,
199 git_repository *repo)
200 {
201 git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
202 if (!in) in = &dummy;
203
204 memcpy(out, in, sizeof(git_blame_options));
205
206 /* No newest_commit => HEAD */
207 if (git_oid_iszero(&out->newest_commit)) {
208 if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) {
209 return -1;
210 }
211 }
212
213 /* min_line 0 really means 1 */
214 if (!out->min_line) out->min_line = 1;
215 /* max_line 0 really means N, but we don't know N yet */
216
217 /* Fix up option implications */
218 if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
219 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
220 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
221 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
222 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
223 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
224
225 return 0;
226 }
227
228 static git_blame_hunk *split_hunk_in_vector(
229 git_vector *vec,
230 git_blame_hunk *hunk,
231 size_t rel_line,
232 bool return_new)
233 {
234 size_t new_line_count;
235 git_blame_hunk *nh;
236
237 /* Don't split if already at a boundary */
238 if (rel_line <= 0 ||
239 rel_line >= hunk->lines_in_hunk)
240 {
241 return hunk;
242 }
243
244 new_line_count = hunk->lines_in_hunk - rel_line;
245 nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
246 hunk->orig_start_line_number + rel_line, hunk->orig_path);
247
248 if (!nh)
249 return NULL;
250
251 git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
252 git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
253
254 /* Adjust hunk that was split */
255 hunk->lines_in_hunk -= new_line_count;
256 git_vector_insert_sorted(vec, nh, NULL);
257 {
258 git_blame_hunk *ret = return_new ? nh : hunk;
259 return ret;
260 }
261 }
262
263 /*
264 * Construct a list of char indices for where lines begin
265 * Adapted from core git:
266 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
267 */
268 static int index_blob_lines(git_blame *blame)
269 {
270 const char *buf = blame->final_buf;
271 git_off_t len = blame->final_buf_size;
272 int num = 0, incomplete = 0, bol = 1;
273 size_t *i;
274
275 if (len && buf[len-1] != '\n')
276 incomplete++; /* incomplete line at the end */
277 while (len--) {
278 if (bol) {
279 i = git_array_alloc(blame->line_index);
280 GIT_ERROR_CHECK_ALLOC(i);
281 *i = buf - blame->final_buf;
282 bol = 0;
283 }
284 if (*buf++ == '\n') {
285 num++;
286 bol = 1;
287 }
288 }
289 i = git_array_alloc(blame->line_index);
290 GIT_ERROR_CHECK_ALLOC(i);
291 *i = buf - blame->final_buf;
292 blame->num_lines = num + incomplete;
293 return blame->num_lines;
294 }
295
296 static git_blame_hunk* hunk_from_entry(git_blame__entry *e, git_blame *blame)
297 {
298 git_blame_hunk *h = new_hunk(
299 e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
300
301 if (!h)
302 return NULL;
303
304 git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
305 git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
306 git_commit_author_with_mailmap(
307 &h->final_signature, e->suspect->commit, blame->mailmap);
308 git_signature_dup(&h->orig_signature, h->final_signature);
309 h->boundary = e->is_boundary ? 1 : 0;
310 return h;
311 }
312
313 static int load_blob(git_blame *blame)
314 {
315 int error;
316
317 if (blame->final_blob) return 0;
318
319 error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
320 if (error < 0)
321 goto cleanup;
322 error = git_object_lookup_bypath((git_object**)&blame->final_blob,
323 (git_object*)blame->final, blame->path, GIT_OBJECT_BLOB);
324
325 cleanup:
326 return error;
327 }
328
329 static int blame_internal(git_blame *blame)
330 {
331 int error;
332 git_blame__entry *ent = NULL;
333 git_blame__origin *o;
334
335 if ((error = load_blob(blame)) < 0 ||
336 (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
337 goto cleanup;
338 blame->final_buf = git_blob_rawcontent(blame->final_blob);
339 blame->final_buf_size = git_blob_rawsize(blame->final_blob);
340
341 ent = git__calloc(1, sizeof(git_blame__entry));
342 GIT_ERROR_CHECK_ALLOC(ent);
343
344 ent->num_lines = index_blob_lines(blame);
345 ent->lno = blame->options.min_line - 1;
346 ent->num_lines = ent->num_lines - blame->options.min_line + 1;
347 if (blame->options.max_line > 0)
348 ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
349 ent->s_lno = ent->lno;
350 ent->suspect = o;
351
352 blame->ent = ent;
353
354 error = git_blame__like_git(blame, blame->options.flags);
355
356 cleanup:
357 for (ent = blame->ent; ent; ) {
358 git_blame__entry *e = ent->next;
359 git_blame_hunk *h = hunk_from_entry(ent, blame);
360
361 git_vector_insert(&blame->hunks, h);
362
363 git_blame__free_entry(ent);
364 ent = e;
365 }
366
367 return error;
368 }
369
370 /*******************************************************************************
371 * File blaming
372 ******************************************************************************/
373
374 int git_blame_file(
375 git_blame **out,
376 git_repository *repo,
377 const char *path,
378 git_blame_options *options)
379 {
380 int error = -1;
381 git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
382 git_blame *blame = NULL;
383
384 assert(out && repo && path);
385 if ((error = normalize_options(&normOptions, options, repo)) < 0)
386 goto on_error;
387
388 blame = git_blame__alloc(repo, normOptions, path);
389 GIT_ERROR_CHECK_ALLOC(blame);
390
391 if ((error = load_blob(blame)) < 0)
392 goto on_error;
393
394 if ((error = blame_internal(blame)) < 0)
395 goto on_error;
396
397 *out = blame;
398 return 0;
399
400 on_error:
401 git_blame_free(blame);
402 return error;
403 }
404
405 /*******************************************************************************
406 * Buffer blaming
407 *******************************************************************************/
408
409 static bool hunk_is_bufferblame(git_blame_hunk *hunk)
410 {
411 return git_oid_iszero(&hunk->final_commit_id);
412 }
413
414 static int buffer_hunk_cb(
415 const git_diff_delta *delta,
416 const git_diff_hunk *hunk,
417 void *payload)
418 {
419 git_blame *blame = (git_blame*)payload;
420 uint32_t wedge_line;
421
422 GIT_UNUSED(delta);
423
424 wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
425 blame->current_diff_line = wedge_line;
426
427 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
428 if (!blame->current_hunk) {
429 /* Line added at the end of the file */
430 blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
431 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
432
433 git_vector_insert(&blame->hunks, blame->current_hunk);
434 } else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
435 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
436 blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
437 wedge_line - blame->current_hunk->orig_start_line_number, true);
438 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
439 }
440
441 return 0;
442 }
443
444 static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
445 static int buffer_line_cb(
446 const git_diff_delta *delta,
447 const git_diff_hunk *hunk,
448 const git_diff_line *line,
449 void *payload)
450 {
451 git_blame *blame = (git_blame*)payload;
452
453 GIT_UNUSED(delta);
454 GIT_UNUSED(hunk);
455 GIT_UNUSED(line);
456
457 if (line->origin == GIT_DIFF_LINE_ADDITION) {
458 if (hunk_is_bufferblame(blame->current_hunk) &&
459 hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
460 /* Append to the current buffer-blame hunk */
461 blame->current_hunk->lines_in_hunk++;
462 shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
463 } else {
464 /* Create a new buffer-blame hunk with this line */
465 shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
466 blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
467 GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
468
469 git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
470 }
471 blame->current_diff_line++;
472 }
473
474 if (line->origin == GIT_DIFF_LINE_DELETION) {
475 /* Trim the line from the current hunk; remove it if it's now empty */
476 size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
477
478 if (--(blame->current_hunk->lines_in_hunk) == 0) {
479 size_t i;
480 shift_base--;
481 if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
482 git_vector_remove(&blame->hunks, i);
483 free_hunk(blame->current_hunk);
484 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
485 }
486 }
487 shift_hunks_by(&blame->hunks, shift_base, -1);
488 }
489 return 0;
490 }
491
492 int git_blame_buffer(
493 git_blame **out,
494 git_blame *reference,
495 const char *buffer,
496 size_t buffer_len)
497 {
498 git_blame *blame;
499 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
500 size_t i;
501 git_blame_hunk *hunk;
502
503 diffopts.context_lines = 0;
504
505 assert(out && reference && buffer && buffer_len);
506
507 blame = git_blame__alloc(reference->repository, reference->options, reference->path);
508 GIT_ERROR_CHECK_ALLOC(blame);
509
510 /* Duplicate all of the hunk structures in the reference blame */
511 git_vector_foreach(&reference->hunks, i, hunk) {
512 git_blame_hunk *h = dup_hunk(hunk);
513 GIT_ERROR_CHECK_ALLOC(h);
514
515 git_vector_insert(&blame->hunks, h);
516 }
517
518 /* Diff to the reference blob */
519 git_diff_blob_to_buffer(reference->final_blob, blame->path,
520 buffer, buffer_len, blame->path, &diffopts,
521 NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
522
523 *out = blame;
524 return 0;
525 }
526
527 int git_blame_init_options(git_blame_options *opts, unsigned int version)
528 {
529 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
530 opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
531 return 0;
532 }