]> git.proxmox.com Git - libgit2.git/blob - src/blame.c
treebuilder: fix memory leaks in `write_with_buffer`
[libgit2.git] / src / blame.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame.h"
9 #include "git2/commit.h"
10 #include "git2/revparse.h"
11 #include "git2/revwalk.h"
12 #include "git2/tree.h"
13 #include "git2/diff.h"
14 #include "git2/blob.h"
15 #include "git2/signature.h"
16 #include "util.h"
17 #include "repository.h"
18 #include "blame_git.h"
19
20
21 static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
22 {
23 git_blame_hunk *hunk = (git_blame_hunk*)entry;
24
25 size_t lineno = *(size_t*)key;
26 size_t lines_in_hunk = hunk->lines_in_hunk;
27 size_t final_start_line_number = hunk->final_start_line_number;
28
29 if (lineno < final_start_line_number)
30 return -1;
31 if (lineno >= final_start_line_number + lines_in_hunk)
32 return 1;
33 return 0;
34 }
35
36 static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
37 static int hunk_cmp(const void *_a, const void *_b)
38 {
39 git_blame_hunk *a = (git_blame_hunk*)_a,
40 *b = (git_blame_hunk*)_b;
41
42 return a->final_start_line_number - b->final_start_line_number;
43 }
44
45 static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
46 {
47 return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
48 }
49
50 static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
51 {
52 return line <= hunk->final_start_line_number;
53 }
54
55 static git_blame_hunk* new_hunk(
56 size_t start,
57 size_t lines,
58 size_t orig_start,
59 const char *path)
60 {
61 git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
62 if (!hunk) return NULL;
63
64 hunk->lines_in_hunk = lines;
65 hunk->final_start_line_number = start;
66 hunk->orig_start_line_number = orig_start;
67 hunk->orig_path = path ? git__strdup(path) : NULL;
68
69 return hunk;
70 }
71
72 static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
73 {
74 git_blame_hunk *newhunk = new_hunk(
75 hunk->final_start_line_number,
76 hunk->lines_in_hunk,
77 hunk->orig_start_line_number,
78 hunk->orig_path);
79
80 if (!newhunk)
81 return NULL;
82
83 git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
84 git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
85 newhunk->boundary = hunk->boundary;
86 git_signature_dup(&newhunk->final_signature, hunk->final_signature);
87 git_signature_dup(&newhunk->orig_signature, hunk->orig_signature);
88 return newhunk;
89 }
90
91 static void free_hunk(git_blame_hunk *hunk)
92 {
93 git__free((void*)hunk->orig_path);
94 git_signature_free(hunk->final_signature);
95 git_signature_free(hunk->orig_signature);
96 git__free(hunk);
97 }
98
99 /* Starting with the hunk that includes start_line, shift all following hunks'
100 * final_start_line by shift_by lines */
101 static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
102 {
103 size_t i;
104
105 if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
106 for (; i < v->length; i++) {
107 git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
108 hunk->final_start_line_number += shift_by;
109 }
110 }
111 }
112
113 git_blame* git_blame__alloc(
114 git_repository *repo,
115 git_blame_options opts,
116 const char *path)
117 {
118 git_blame *gbr = git__calloc(1, sizeof(git_blame));
119 if (!gbr)
120 return NULL;
121
122 gbr->repository = repo;
123 gbr->options = opts;
124
125 if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
126 git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
127 (gbr->path = git__strdup(path)) == NULL ||
128 git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
129 {
130 git_blame_free(gbr);
131 return NULL;
132 }
133
134 return gbr;
135 }
136
137 void git_blame_free(git_blame *blame)
138 {
139 size_t i;
140 git_blame_hunk *hunk;
141
142 if (!blame) return;
143
144 git_vector_foreach(&blame->hunks, i, hunk)
145 free_hunk(hunk);
146 git_vector_free(&blame->hunks);
147
148 git_vector_free_deep(&blame->paths);
149
150 git_array_clear(blame->line_index);
151
152 git__free(blame->path);
153 git_blob_free(blame->final_blob);
154 git__free(blame);
155 }
156
157 uint32_t git_blame_get_hunk_count(git_blame *blame)
158 {
159 assert(blame);
160 return (uint32_t)blame->hunks.length;
161 }
162
163 const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
164 {
165 assert(blame);
166 return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
167 }
168
169 const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
170 {
171 size_t i, new_lineno = lineno;
172 assert(blame);
173
174 if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
175 return git_blame_get_hunk_byindex(blame, (uint32_t)i);
176 }
177
178 return NULL;
179 }
180
181 static int normalize_options(
182 git_blame_options *out,
183 const git_blame_options *in,
184 git_repository *repo)
185 {
186 git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
187 if (!in) in = &dummy;
188
189 memcpy(out, in, sizeof(git_blame_options));
190
191 /* No newest_commit => HEAD */
192 if (git_oid_iszero(&out->newest_commit)) {
193 if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) {
194 return -1;
195 }
196 }
197
198 /* min_line 0 really means 1 */
199 if (!out->min_line) out->min_line = 1;
200 /* max_line 0 really means N, but we don't know N yet */
201
202 /* Fix up option implications */
203 if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
204 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
205 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
206 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
207 if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
208 out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
209
210 return 0;
211 }
212
213 static git_blame_hunk *split_hunk_in_vector(
214 git_vector *vec,
215 git_blame_hunk *hunk,
216 size_t rel_line,
217 bool return_new)
218 {
219 size_t new_line_count;
220 git_blame_hunk *nh;
221
222 /* Don't split if already at a boundary */
223 if (rel_line <= 0 ||
224 rel_line >= hunk->lines_in_hunk)
225 {
226 return hunk;
227 }
228
229 new_line_count = hunk->lines_in_hunk - rel_line;
230 nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
231 hunk->orig_start_line_number + rel_line, hunk->orig_path);
232
233 if (!nh)
234 return NULL;
235
236 git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
237 git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
238
239 /* Adjust hunk that was split */
240 hunk->lines_in_hunk -= new_line_count;
241 git_vector_insert_sorted(vec, nh, NULL);
242 {
243 git_blame_hunk *ret = return_new ? nh : hunk;
244 return ret;
245 }
246 }
247
248 /*
249 * Construct a list of char indices for where lines begin
250 * Adapted from core git:
251 * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
252 */
253 static int index_blob_lines(git_blame *blame)
254 {
255 const char *buf = blame->final_buf;
256 git_off_t len = blame->final_buf_size;
257 int num = 0, incomplete = 0, bol = 1;
258 size_t *i;
259
260 if (len && buf[len-1] != '\n')
261 incomplete++; /* incomplete line at the end */
262 while (len--) {
263 if (bol) {
264 i = git_array_alloc(blame->line_index);
265 GITERR_CHECK_ALLOC(i);
266 *i = buf - blame->final_buf;
267 bol = 0;
268 }
269 if (*buf++ == '\n') {
270 num++;
271 bol = 1;
272 }
273 }
274 i = git_array_alloc(blame->line_index);
275 GITERR_CHECK_ALLOC(i);
276 *i = buf - blame->final_buf;
277 blame->num_lines = num + incomplete;
278 return blame->num_lines;
279 }
280
281 static git_blame_hunk* hunk_from_entry(git_blame__entry *e)
282 {
283 git_blame_hunk *h = new_hunk(
284 e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
285
286 if (!h)
287 return NULL;
288
289 git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
290 git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
291 git_signature_dup(&h->final_signature, git_commit_author(e->suspect->commit));
292 git_signature_dup(&h->orig_signature, git_commit_author(e->suspect->commit));
293 h->boundary = e->is_boundary ? 1 : 0;
294 return h;
295 }
296
297 static int load_blob(git_blame *blame)
298 {
299 int error;
300
301 if (blame->final_blob) return 0;
302
303 error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
304 if (error < 0)
305 goto cleanup;
306 error = git_object_lookup_bypath((git_object**)&blame->final_blob,
307 (git_object*)blame->final, blame->path, GIT_OBJ_BLOB);
308
309 cleanup:
310 return error;
311 }
312
313 static int blame_internal(git_blame *blame)
314 {
315 int error;
316 git_blame__entry *ent = NULL;
317 git_blame__origin *o;
318
319 if ((error = load_blob(blame)) < 0 ||
320 (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
321 goto cleanup;
322 blame->final_buf = git_blob_rawcontent(blame->final_blob);
323 blame->final_buf_size = git_blob_rawsize(blame->final_blob);
324
325 ent = git__calloc(1, sizeof(git_blame__entry));
326 GITERR_CHECK_ALLOC(ent);
327
328 ent->num_lines = index_blob_lines(blame);
329 ent->lno = blame->options.min_line - 1;
330 ent->num_lines = ent->num_lines - blame->options.min_line + 1;
331 if (blame->options.max_line > 0)
332 ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
333 ent->s_lno = ent->lno;
334 ent->suspect = o;
335
336 blame->ent = ent;
337
338 error = git_blame__like_git(blame, blame->options.flags);
339
340 cleanup:
341 for (ent = blame->ent; ent; ) {
342 git_blame__entry *e = ent->next;
343 git_blame_hunk *h = hunk_from_entry(ent);
344
345 git_vector_insert(&blame->hunks, h);
346
347 git_blame__free_entry(ent);
348 ent = e;
349 }
350
351 return error;
352 }
353
354 /*******************************************************************************
355 * File blaming
356 ******************************************************************************/
357
358 int git_blame_file(
359 git_blame **out,
360 git_repository *repo,
361 const char *path,
362 git_blame_options *options)
363 {
364 int error = -1;
365 git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
366 git_blame *blame = NULL;
367
368 assert(out && repo && path);
369 if ((error = normalize_options(&normOptions, options, repo)) < 0)
370 goto on_error;
371
372 blame = git_blame__alloc(repo, normOptions, path);
373 GITERR_CHECK_ALLOC(blame);
374
375 if ((error = load_blob(blame)) < 0)
376 goto on_error;
377
378 if ((error = blame_internal(blame)) < 0)
379 goto on_error;
380
381 *out = blame;
382 return 0;
383
384 on_error:
385 git_blame_free(blame);
386 return error;
387 }
388
389 /*******************************************************************************
390 * Buffer blaming
391 *******************************************************************************/
392
393 static bool hunk_is_bufferblame(git_blame_hunk *hunk)
394 {
395 return git_oid_iszero(&hunk->final_commit_id);
396 }
397
398 static int buffer_hunk_cb(
399 const git_diff_delta *delta,
400 const git_diff_hunk *hunk,
401 void *payload)
402 {
403 git_blame *blame = (git_blame*)payload;
404 uint32_t wedge_line;
405
406 GIT_UNUSED(delta);
407
408 wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
409 blame->current_diff_line = wedge_line;
410
411 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
412 if (!blame->current_hunk) {
413 /* Line added at the end of the file */
414 blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
415 GITERR_CHECK_ALLOC(blame->current_hunk);
416
417 git_vector_insert(&blame->hunks, blame->current_hunk);
418 } else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
419 /* If this hunk doesn't start between existing hunks, split a hunk up so it does */
420 blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
421 wedge_line - blame->current_hunk->orig_start_line_number, true);
422 GITERR_CHECK_ALLOC(blame->current_hunk);
423 }
424
425 return 0;
426 }
427
428 static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
429 static int buffer_line_cb(
430 const git_diff_delta *delta,
431 const git_diff_hunk *hunk,
432 const git_diff_line *line,
433 void *payload)
434 {
435 git_blame *blame = (git_blame*)payload;
436
437 GIT_UNUSED(delta);
438 GIT_UNUSED(hunk);
439 GIT_UNUSED(line);
440
441 if (line->origin == GIT_DIFF_LINE_ADDITION) {
442 if (hunk_is_bufferblame(blame->current_hunk) &&
443 hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
444 /* Append to the current buffer-blame hunk */
445 blame->current_hunk->lines_in_hunk++;
446 shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
447 } else {
448 /* Create a new buffer-blame hunk with this line */
449 shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
450 blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
451 GITERR_CHECK_ALLOC(blame->current_hunk);
452
453 git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
454 }
455 blame->current_diff_line++;
456 }
457
458 if (line->origin == GIT_DIFF_LINE_DELETION) {
459 /* Trim the line from the current hunk; remove it if it's now empty */
460 size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
461
462 if (--(blame->current_hunk->lines_in_hunk) == 0) {
463 size_t i;
464 shift_base--;
465 if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
466 git_vector_remove(&blame->hunks, i);
467 free_hunk(blame->current_hunk);
468 blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
469 }
470 }
471 shift_hunks_by(&blame->hunks, shift_base, -1);
472 }
473 return 0;
474 }
475
476 int git_blame_buffer(
477 git_blame **out,
478 git_blame *reference,
479 const char *buffer,
480 size_t buffer_len)
481 {
482 git_blame *blame;
483 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
484 size_t i;
485 git_blame_hunk *hunk;
486
487 diffopts.context_lines = 0;
488
489 assert(out && reference && buffer && buffer_len);
490
491 blame = git_blame__alloc(reference->repository, reference->options, reference->path);
492 GITERR_CHECK_ALLOC(blame);
493
494 /* Duplicate all of the hunk structures in the reference blame */
495 git_vector_foreach(&reference->hunks, i, hunk) {
496 git_blame_hunk *h = dup_hunk(hunk);
497 GITERR_CHECK_ALLOC(h);
498
499 git_vector_insert(&blame->hunks, h);
500 }
501
502 /* Diff to the reference blob */
503 git_diff_blob_to_buffer(reference->final_blob, blame->path,
504 buffer, buffer_len, blame->path, &diffopts,
505 NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
506
507 *out = blame;
508 return 0;
509 }
510
511 int git_blame_init_options(git_blame_options *opts, unsigned int version)
512 {
513 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
514 opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
515 return 0;
516 }