]> git.proxmox.com Git - libgit2.git/blame - src/blame.c
Port blame from git.git
[libgit2.git] / src / blame.c
CommitLineData
ceab4e26
BS
1/*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8#include "blame.h"
9#include "git2/commit.h"
10#include "git2/revparse.h"
11#include "git2/revwalk.h"
12#include "git2/tree.h"
13#include "git2/diff.h"
14#include "git2/blob.h"
15#include "util.h"
16#include "repository.h"
17#include "blame_git.h"
18
19
20static int hunk_search_cmp_helper(const void *key, size_t start_line, size_t num_lines)
21{
22 uint32_t lineno = *(size_t*)key;
23 if (lineno < start_line)
24 return -1;
25 if (lineno >= ((uint32_t)start_line + num_lines))
26 return 1;
27 return 0;
28}
29static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
30{
31 git_blame_hunk *hunk = (git_blame_hunk*)entry;
32 return hunk_search_cmp_helper(key, hunk->final_start_line_number, hunk->lines_in_hunk);
33}
34static int hunk_sort_cmp_by_start_line(const void *_a, const void *_b)
35{
36 git_blame_hunk *a = (git_blame_hunk*)_a,
37 *b = (git_blame_hunk*)_b;
38
39 return a->final_start_line_number - b->final_start_line_number;
40}
41
42static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
43{
44 return line >= (size_t)(hunk->final_start_line_number + hunk->lines_in_hunk - 1);
45}
46
47static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
48{
49 return line <= hunk->final_start_line_number;
50}
51
52static git_blame_hunk* new_hunk(uint16_t start, uint16_t lines, uint16_t orig_start, const char *path)
53{
54 git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
55 if (!hunk) return NULL;
56
57 hunk->lines_in_hunk = lines;
58 hunk->final_start_line_number = start;
59 hunk->orig_start_line_number = orig_start;
60 hunk->orig_path = path ? git__strdup(path) : NULL;
61
62 return hunk;
63}
64
65git_blame_hunk* git_blame__alloc_hunk()
66{
67 return new_hunk(0,0,0,NULL);
68}
69
70static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
71{
72 git_blame_hunk *newhunk = new_hunk(hunk->final_start_line_number, hunk->lines_in_hunk, hunk->orig_start_line_number, hunk->orig_path);
73 git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
74 git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
75 return newhunk;
76}
77
78static void free_hunk(git_blame_hunk *hunk)
79{
80 git__free((void*)hunk->orig_path);
81 git__free(hunk);
82}
83
84/* Starting with the hunk that includes start_line, shift all following hunks'
85 * final_start_line by shift_by lines */
86static void shift_hunks_by_final(git_vector *v, size_t start_line, int shift_by)
87{
88 size_t i;
89
90 if (!git_vector_bsearch2( &i, v, hunk_byfinalline_search_cmp, &start_line)) {
91 for (; i < v->length; i++) {
92 git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
93 hunk->final_start_line_number += shift_by;
94 }
95 }
96}
97static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
98git_blame* git_blame__alloc(
99 git_repository *repo,
100 git_blame_options opts,
101 const char *path)
102{
103 git_blame *gbr = (git_blame*)git__calloc(1, sizeof(git_blame));
104 if (!gbr) {
105 giterr_set_oom();
106 return NULL;
107 }
108 git_vector_init(&gbr->hunks, 8, hunk_sort_cmp_by_start_line);
109 git_vector_init(&gbr->unclaimed_hunks, 8, hunk_sort_cmp_by_start_line);
110 git_vector_init(&gbr->paths, 8, paths_cmp);
111 gbr->repository = repo;
112 gbr->options = opts;
113 gbr->path = git__strdup(path);
114 git_vector_insert(&gbr->paths, git__strdup(path));
115 gbr->final_blob = NULL;
116 return gbr;
117}
118
119void git_blame_free(git_blame *blame)
120{
121 size_t i;
122 git_blame_hunk *hunk;
123 char *path;
124
125 if (!blame) return;
126
127 git_vector_foreach(&blame->hunks, i, hunk)
128 free_hunk(hunk);
129 git_vector_free(&blame->hunks);
130
131 git_vector_foreach(&blame->unclaimed_hunks, i, hunk)
132 free_hunk(hunk);
133 git_vector_free(&blame->unclaimed_hunks);
134
135 git_vector_foreach(&blame->paths, i, path)
136 git__free(path);
137 git_vector_free(&blame->paths);
138
139 git__free((void*)blame->path);
140 git_blob_free(blame->final_blob);
141 git__free(blame);
142}
143
144uint32_t git_blame_get_hunk_count(git_blame *blame)
145{
146 assert(blame);
147 return blame->hunks.length;
148}
149
150const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
151{
152 assert(blame);
153 return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
154}
155
156const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, uint32_t lineno)
157{
158 size_t i;
159 assert(blame);
160
161 if (!git_vector_bsearch2( &i, &blame->hunks, hunk_byfinalline_search_cmp, &lineno)) {
162 return git_blame_get_hunk_byindex(blame, i);
163 }
164
165 return NULL;
166}
167
168static void normalize_options(
169 git_blame_options *out,
170 const git_blame_options *in,
171 git_repository *repo)
172{
173 git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
174 if (!in) in = &dummy;
175
176 memcpy(out, in, sizeof(git_blame_options));
177
178 /* No newest_commit => HEAD */
179 if (git_oid_iszero(&out->newest_commit)) {
180 git_reference_name_to_id(&out->newest_commit, repo, "HEAD");
181 }
182}
183
184static git_blame_hunk *split_hunk_in_vector(
185 git_vector *vec,
186 git_blame_hunk *hunk,
187 size_t rel_line,
188 bool return_new)
189{
190 size_t new_line_count;
191 git_blame_hunk *nh;
192
193 /* Don't split if already at a boundary */
194 if (rel_line <= 0 ||
195 rel_line >= hunk->lines_in_hunk)
196 {
197 return hunk;
198 }
199
200 new_line_count = hunk->lines_in_hunk - rel_line;
201 nh = new_hunk(hunk->final_start_line_number+rel_line, new_line_count,
202 hunk->orig_start_line_number+rel_line, hunk->orig_path);
203 git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
204 git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
205
206 /* Adjust hunk that was split */
207 hunk->lines_in_hunk -= new_line_count;
208 git_vector_insert_sorted(vec, nh, NULL);
209 {
210 git_blame_hunk *ret = return_new ? nh : hunk;
211 return ret;
212 }
213}
214
215
216/*
217 * To allow quick access to the contents of nth line in the
218 * final image, prepare an index in the scoreboard.
219 */
220static int prepare_lines(struct scoreboard *sb)
221{
222 const char *buf = sb->final_buf;
223 git_off_t len = sb->final_buf_size;
224 int num = 0, incomplete = 0, bol = 1;
225
226 if (len && buf[len-1] != '\n')
227 incomplete++; /* incomplete line at the end */
228 while (len--) {
229 if (bol) {
230 bol = 0;
231 }
232 if (*buf++ == '\n') {
233 num++;
234 bol = 1;
235 }
236 }
237 sb->num_lines = num + incomplete;
238 return sb->num_lines;
239}
240
241static git_blame_hunk* hunk_from_entry(struct blame_entry *e)
242{
243 git_blame_hunk *h = new_hunk(
244 e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
245 git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
246 return h;
247}
248
249static void free_if_not_already_freed(git_vector *already, struct origin *o)
250{
251 size_t i;
252
253 if (!o) return;
254 if (!git_vector_search(&i, already, o))
255 return;
256
257 git_vector_insert(already, o);
258 free_if_not_already_freed(already, o->previous);
259 git_blob_free(o->blob);
260 git_commit_free(o->commit);
261 git__free(o);
262}
263
264static int walk_and_mark(git_blame *blame)
265{
266 int error;
267
268 struct scoreboard sb = {0};
269 struct blame_entry *ent = NULL;
270 git_blob *blob = NULL;
271 struct origin *o;
272 git_vector already = GIT_VECTOR_INIT;
273
274 if ((error = git_commit_lookup(&sb.final, blame->repository, &blame->options.newest_commit)) < 0 ||
275 (error = git_object_lookup_bypath((git_object**)&blob, (git_object*)sb.final, blame->path, GIT_OBJ_BLOB)) < 0)
276 goto cleanup;
277 sb.final_buf = git_blob_rawcontent(blob);
278 sb.final_buf_size = git_blob_rawsize(blob);
279 if ((error = get_origin(&o, &sb, sb.final, blame->path)) < 0)
280 goto cleanup;
281
282 ent = git__calloc(1, sizeof(*ent));
283 ent->num_lines = prepare_lines(&sb);
284 ent->suspect = o;
285 sb.ent = ent;
286 sb.path = blame->path;
287 sb.blame = blame;
288
289 assign_blame(&sb, blame->options.flags);
290 coalesce(&sb);
291
292 for (ent = sb.ent; ent; ) {
293 git_vector_insert(&blame->hunks, hunk_from_entry(ent));
294 ent = ent->next;
295 }
296
297cleanup:
298 for (ent = sb.ent; ent; ) {
299 struct blame_entry *e = ent->next;
300 struct origin *o = ent->suspect;
301
302 /* Linkages might not be ordered, so we only free pointers we haven't
303 * seen before. */
304 free_if_not_already_freed(&already, o);
305
306 git__free(ent);
307 ent = e;
308 }
309
310 git_vector_free(&already);
311 git_commit_free(sb.final);
312 git_blob_free(blob);
313 return error;
314}
315
316static int load_blob(git_blame *blame, git_repository *repo, git_oid *commit_id, const char *path)
317{
318 int retval = -1;
319 git_commit *commit = NULL;
320 git_tree *tree = NULL;
321 git_tree_entry *tree_entry = NULL;
322 git_object *obj = NULL;
323
324 if (((retval = git_commit_lookup(&commit, repo, commit_id)) < 0) ||
325 ((retval = git_object_lookup_bypath(&obj, (git_object*)commit, path, GIT_OBJ_BLOB)) < 0) ||
326 ((retval = git_object_type(obj)) != GIT_OBJ_BLOB))
327 goto cleanup;
328 blame->final_blob = (git_blob*)obj;
329
330cleanup:
331 git_tree_entry_free(tree_entry);
332 git_tree_free(tree);
333 git_commit_free(commit);
334 return retval;
335}
336
337/*******************************************************************************
338 * File blaming
339 ******************************************************************************/
340
341int git_blame_file(
342 git_blame **out,
343 git_repository *repo,
344 const char *path,
345 git_blame_options *options)
346{
347 int error = -1;
348 git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
349 git_blame *blame = NULL;
350
351 assert(out && repo && path);
352 normalize_options(&normOptions, options, repo);
353
354 blame = git_blame__alloc(repo, normOptions, path);
355 GITERR_CHECK_ALLOC(blame);
356
357 if ((error = load_blob(blame, repo, &normOptions.newest_commit, path)) < 0)
358 goto on_error;
359
360 if ((error = walk_and_mark(blame)) < 0)
361 goto on_error;
362
363 *out = blame;
364 return 0;
365
366on_error:
367 git_blame_free(blame);
368 return error;
369}
370
371/*******************************************************************************
372 * Buffer blaming
373 *******************************************************************************/
374
375static bool hunk_is_bufferblame(git_blame_hunk *hunk)
376{
377 return git_oid_iszero(&hunk->final_commit_id);
378}
379
380static int buffer_hunk_cb(
381 const git_diff_delta *delta,
382 const git_diff_range *range,
383 const char *header,
384 size_t header_len,
385 void *payload)
386{
387 git_blame *blame = (git_blame*)payload;
388 size_t wedge_line;
389
390 GIT_UNUSED(delta);
391 GIT_UNUSED(header);
392 GIT_UNUSED(header_len);
393
394 wedge_line = (range->old_lines == 0) ? range->new_start : range->old_start;
395 blame->current_diff_line = wedge_line;
396
397 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
398 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
399 if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
400 blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
401 wedge_line - blame->current_hunk->orig_start_line_number, true);
402 }
403
404 return 0;
405}
406
407static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
408static int buffer_line_cb(
409 const git_diff_delta *delta,
410 const git_diff_range *range,
411 char line_origin,
412 const char *content,
413 size_t content_len,
414 void *payload)
415{
416 git_blame *blame = (git_blame*)payload;
417
418 GIT_UNUSED(delta);
419 GIT_UNUSED(range);
420 GIT_UNUSED(content);
421 GIT_UNUSED(content_len);
422
423#ifdef DO_DEBUG
424 {
425 char *str = git__substrdup(content, content_len);
426 git__free(str);
427 }
428#endif
429
430 if (line_origin == GIT_DIFF_LINE_ADDITION) {
431 if (hunk_is_bufferblame(blame->current_hunk) &&
432 hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
433 /* Append to the current buffer-blame hunk */
434 blame->current_hunk->lines_in_hunk++;
435 shift_hunks_by_final(&blame->hunks, blame->current_diff_line+1, 1);
436 } else {
437 /* Create a new buffer-blame hunk with this line */
438 shift_hunks_by_final(&blame->hunks, blame->current_diff_line, 1);
439 blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
440 git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
441 }
442 blame->current_diff_line++;
443 }
444
445 if (line_origin == GIT_DIFF_LINE_DELETION) {
446 /* Trim the line from the current hunk; remove it if it's now empty */
447 size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
448
449 if (--(blame->current_hunk->lines_in_hunk) == 0) {
450 size_t i;
451 shift_base--;
452 if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
453 git_vector_remove(&blame->hunks, i);
454 free_hunk(blame->current_hunk);
455 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, i);
456 }
457 }
458 shift_hunks_by_final(&blame->hunks, shift_base, -1);
459 }
460 return 0;
461}
462
463int git_blame_buffer(
464 git_blame **out,
465 git_blame *reference,
466 const char *buffer,
467 size_t buffer_len)
468{
469 git_blame *blame;
470 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
471 size_t i;
472 git_blame_hunk *hunk;
473
474 diffopts.context_lines = 0;
475
476 assert(out && reference && buffer && buffer_len);
477
478 blame = git_blame__alloc(reference->repository, reference->options, reference->path);
479
480 /* Duplicate all of the hunk structures in the reference blame */
481 git_vector_foreach(&reference->hunks, i, hunk) {
482 git_vector_insert(&blame->hunks, dup_hunk(hunk));
483 }
484
485 /* Diff to the reference blob */
486 git_diff_blob_to_buffer(reference->final_blob, blame->path,
487 buffer, buffer_len, blame->path,
488 &diffopts, NULL, buffer_hunk_cb, buffer_line_cb, blame);
489
490 *out = blame;
491 return 0;
492}