]> git.proxmox.com Git - libgit2.git/blob - src/blame_git.c
Merge pull request #4183 from pks-t/pks/coverity
[libgit2.git] / src / blame_git.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame_git.h"
9 #include "commit.h"
10 #include "blob.h"
11 #include "xdiff/xinclude.h"
12 #include "diff_xdiff.h"
13
14 /*
15 * Origin is refcounted and usually we keep the blob contents to be
16 * reused.
17 */
18 static git_blame__origin *origin_incref(git_blame__origin *o)
19 {
20 if (o)
21 o->refcnt++;
22 return o;
23 }
24
25 static void origin_decref(git_blame__origin *o)
26 {
27 if (o && --o->refcnt <= 0) {
28 if (o->previous)
29 origin_decref(o->previous);
30 git_blob_free(o->blob);
31 git_commit_free(o->commit);
32 git__free(o);
33 }
34 }
35
36 /* Given a commit and a path in it, create a new origin structure. */
37 static int make_origin(git_blame__origin **out, git_commit *commit, const char *path)
38 {
39 git_blame__origin *o;
40 git_object *blob;
41 size_t path_len = strlen(path), alloc_len;
42 int error = 0;
43
44 if ((error = git_object_lookup_bypath(&blob, (git_object*)commit,
45 path, GIT_OBJ_BLOB)) < 0)
46 return error;
47
48 GITERR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*o), path_len);
49 GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1);
50 o = git__calloc(1, alloc_len);
51 GITERR_CHECK_ALLOC(o);
52
53 o->commit = commit;
54 o->blob = (git_blob *) blob;
55 o->refcnt = 1;
56 strcpy(o->path, path);
57
58 *out = o;
59
60 return 0;
61 }
62
63 /* Locate an existing origin or create a new one. */
64 int git_blame__get_origin(
65 git_blame__origin **out,
66 git_blame *blame,
67 git_commit *commit,
68 const char *path)
69 {
70 git_blame__entry *e;
71
72 for (e = blame->ent; e; e = e->next) {
73 if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) {
74 *out = origin_incref(e->suspect);
75 }
76 }
77 return make_origin(out, commit, path);
78 }
79
80 typedef struct blame_chunk_cb_data {
81 git_blame *blame;
82 git_blame__origin *target;
83 git_blame__origin *parent;
84 long tlno;
85 long plno;
86 }blame_chunk_cb_data;
87
88 static bool same_suspect(git_blame__origin *a, git_blame__origin *b)
89 {
90 if (a == b)
91 return true;
92 if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit)))
93 return false;
94 return 0 == strcmp(a->path, b->path);
95 }
96
97 /* find the line number of the last line the target is suspected for */
98 static bool find_last_in_target(size_t *out, git_blame *blame, git_blame__origin *target)
99 {
100 git_blame__entry *e;
101 size_t last_in_target = 0;
102 bool found = false;
103
104 *out = 0;
105
106 for (e=blame->ent; e; e=e->next) {
107 if (e->guilty || !same_suspect(e->suspect, target))
108 continue;
109 if (last_in_target < e->s_lno + e->num_lines) {
110 found = true;
111 last_in_target = e->s_lno + e->num_lines;
112 }
113 }
114
115 *out = last_in_target;
116 return found;
117 }
118
119 /*
120 * It is known that lines between tlno to same came from parent, and e
121 * has an overlap with that range. it also is known that parent's
122 * line plno corresponds to e's line tlno.
123 *
124 * <---- e ----->
125 * <------> (entirely within)
126 * <------------> (extends past)
127 * <------------> (starts before)
128 * <------------------> (entirely encloses)
129 *
130 * Split e into potentially three parts; before this chunk, the chunk
131 * to be blamed for the parent, and after that portion.
132 */
133 static void split_overlap(git_blame__entry *split, git_blame__entry *e,
134 size_t tlno, size_t plno, size_t same, git_blame__origin *parent)
135 {
136 size_t chunk_end_lno;
137
138 if (e->s_lno < tlno) {
139 /* there is a pre-chunk part not blamed on the parent */
140 split[0].suspect = origin_incref(e->suspect);
141 split[0].lno = e->lno;
142 split[0].s_lno = e->s_lno;
143 split[0].num_lines = tlno - e->s_lno;
144 split[1].lno = e->lno + tlno - e->s_lno;
145 split[1].s_lno = plno;
146 } else {
147 split[1].lno = e->lno;
148 split[1].s_lno = plno + (e->s_lno - tlno);
149 }
150
151 if (same < e->s_lno + e->num_lines) {
152 /* there is a post-chunk part not blamed on parent */
153 split[2].suspect = origin_incref(e->suspect);
154 split[2].lno = e->lno + (same - e->s_lno);
155 split[2].s_lno = e->s_lno + (same - e->s_lno);
156 split[2].num_lines = e->s_lno + e->num_lines - same;
157 chunk_end_lno = split[2].lno;
158 } else {
159 chunk_end_lno = e->lno + e->num_lines;
160 }
161 split[1].num_lines = chunk_end_lno - split[1].lno;
162
163 /*
164 * if it turns out there is nothing to blame the parent for, forget about
165 * the splitting. !split[1].suspect signals this.
166 */
167 if (split[1].num_lines < 1)
168 return;
169 split[1].suspect = origin_incref(parent);
170 }
171
172 /*
173 * Link in a new blame entry to the scoreboard. Entries that cover the same
174 * line range have been removed from the scoreboard previously.
175 */
176 static void add_blame_entry(git_blame *blame, git_blame__entry *e)
177 {
178 git_blame__entry *ent, *prev = NULL;
179
180 origin_incref(e->suspect);
181
182 for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next)
183 prev = ent;
184
185 /* prev, if not NULL, is the last one that is below e */
186 e->prev = prev;
187 if (prev) {
188 e->next = prev->next;
189 prev->next = e;
190 } else {
191 e->next = blame->ent;
192 blame->ent = e;
193 }
194 if (e->next)
195 e->next->prev = e;
196 }
197
198 /*
199 * src typically is on-stack; we want to copy the information in it to
200 * a malloced blame_entry that is already on the linked list of the scoreboard.
201 * The origin of dst loses a refcnt while the origin of src gains one.
202 */
203 static void dup_entry(git_blame__entry *dst, git_blame__entry *src)
204 {
205 git_blame__entry *p, *n;
206
207 p = dst->prev;
208 n = dst->next;
209 origin_incref(src->suspect);
210 origin_decref(dst->suspect);
211 memcpy(dst, src, sizeof(*src));
212 dst->prev = p;
213 dst->next = n;
214 dst->score = 0;
215 }
216
217 /*
218 * split_overlap() divided an existing blame e into up to three parts in split.
219 * Adjust the linked list of blames in the scoreboard to reflect the split.
220 */
221 static void split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e)
222 {
223 git_blame__entry *new_entry;
224
225 if (split[0].suspect && split[2].suspect) {
226 /* The first part (reuse storage for the existing entry e */
227 dup_entry(e, &split[0]);
228
229 /* The last part -- me */
230 new_entry = git__malloc(sizeof(*new_entry));
231 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
232 add_blame_entry(blame, new_entry);
233
234 /* ... and the middle part -- parent */
235 new_entry = git__malloc(sizeof(*new_entry));
236 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
237 add_blame_entry(blame, new_entry);
238 } else if (!split[0].suspect && !split[2].suspect) {
239 /*
240 * The parent covers the entire area; reuse storage for e and replace it
241 * with the parent
242 */
243 dup_entry(e, &split[1]);
244 } else if (split[0].suspect) {
245 /* me and then parent */
246 dup_entry(e, &split[0]);
247 new_entry = git__malloc(sizeof(*new_entry));
248 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
249 add_blame_entry(blame, new_entry);
250 } else {
251 /* parent and then me */
252 dup_entry(e, &split[1]);
253 new_entry = git__malloc(sizeof(*new_entry));
254 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
255 add_blame_entry(blame, new_entry);
256 }
257 }
258
259 /*
260 * After splitting the blame, the origins used by the on-stack blame_entry
261 * should lose one refcnt each.
262 */
263 static void decref_split(git_blame__entry *split)
264 {
265 int i;
266 for (i=0; i<3; i++)
267 origin_decref(split[i].suspect);
268 }
269
270 /*
271 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
272 * hunk; split it and pass blame to the parent.
273 */
274 static void blame_overlap(
275 git_blame *blame,
276 git_blame__entry *e,
277 size_t tlno,
278 size_t plno,
279 size_t same,
280 git_blame__origin *parent)
281 {
282 git_blame__entry split[3] = {{0}};
283
284 split_overlap(split, e, tlno, plno, same, parent);
285 if (split[1].suspect)
286 split_blame(blame, split, e);
287 decref_split(split);
288 }
289
290 /*
291 * Process one hunk from the patch between the current suspect for blame_entry
292 * e and its parent. Find and split the overlap, and pass blame to the
293 * overlapping part to the parent.
294 */
295 static void blame_chunk(
296 git_blame *blame,
297 size_t tlno,
298 size_t plno,
299 size_t same,
300 git_blame__origin *target,
301 git_blame__origin *parent)
302 {
303 git_blame__entry *e;
304
305 for (e = blame->ent; e; e = e->next) {
306 if (e->guilty || !same_suspect(e->suspect, target))
307 continue;
308 if (same <= e->s_lno)
309 continue;
310 if (tlno < e->s_lno + e->num_lines) {
311 blame_overlap(blame, e, tlno, plno, same, parent);
312 }
313 }
314 }
315
316 static int my_emit(
317 long start_a, long count_a,
318 long start_b, long count_b,
319 void *cb_data)
320 {
321 blame_chunk_cb_data *d = (blame_chunk_cb_data *)cb_data;
322
323 blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent);
324 d->plno = start_a + count_a;
325 d->tlno = start_b + count_b;
326
327 return 0;
328 }
329
330 static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx)
331 {
332 const int blk = 1024;
333 long trimmed = 0, recovered = 0;
334 char *ap = a->ptr + a->size;
335 char *bp = b->ptr + b->size;
336 long smaller = (long)((a->size < b->size) ? a->size : b->size);
337
338 if (ctx)
339 return;
340
341 while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) {
342 trimmed += blk;
343 ap -= blk;
344 bp -= blk;
345 }
346
347 while (recovered < trimmed)
348 if (ap[recovered++] == '\n')
349 break;
350 a->size -= trimmed - recovered;
351 b->size -= trimmed - recovered;
352 }
353
354 static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data)
355 {
356 xpparam_t xpp = {0};
357 xdemitconf_t xecfg = {0};
358 xdemitcb_t ecb = {0};
359
360 xecfg.hunk_func = my_emit;
361 ecb.priv = cb_data;
362
363 trim_common_tail(&file_a, &file_b, 0);
364
365 if (file_a.size > GIT_XDIFF_MAX_SIZE ||
366 file_b.size > GIT_XDIFF_MAX_SIZE) {
367 giterr_set(GITERR_INVALID, "file too large to blame");
368 return -1;
369 }
370
371 return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb);
372 }
373
374 static void fill_origin_blob(git_blame__origin *o, mmfile_t *file)
375 {
376 memset(file, 0, sizeof(*file));
377 if (o->blob) {
378 file->ptr = (char*)git_blob_rawcontent(o->blob);
379 file->size = (size_t)git_blob_rawsize(o->blob);
380 }
381 }
382
383 static int pass_blame_to_parent(
384 git_blame *blame,
385 git_blame__origin *target,
386 git_blame__origin *parent)
387 {
388 size_t last_in_target;
389 mmfile_t file_p, file_o;
390 blame_chunk_cb_data d = { blame, target, parent, 0, 0 };
391
392 if (!find_last_in_target(&last_in_target, blame, target))
393 return 1; /* nothing remains for this target */
394
395 fill_origin_blob(parent, &file_p);
396 fill_origin_blob(target, &file_o);
397
398 if (diff_hunks(file_p, file_o, &d) < 0)
399 return -1;
400
401 /* The reset (i.e. anything after tlno) are the same as the parent */
402 blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent);
403
404 return 0;
405 }
406
407 static int paths_on_dup(void **old, void *new)
408 {
409 GIT_UNUSED(old);
410 git__free(new);
411 return -1;
412 }
413
414 static git_blame__origin* find_origin(
415 git_blame *blame,
416 git_commit *parent,
417 git_blame__origin *origin)
418 {
419 git_blame__origin *porigin = NULL;
420 git_diff *difflist = NULL;
421 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
422 git_tree *otree=NULL, *ptree=NULL;
423
424 /* Get the trees from this commit and its parent */
425 if (0 != git_commit_tree(&otree, origin->commit) ||
426 0 != git_commit_tree(&ptree, parent))
427 goto cleanup;
428
429 /* Configure the diff */
430 diffopts.context_lines = 0;
431 diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK;
432
433 /* Check to see if files we're interested have changed */
434 diffopts.pathspec.count = blame->paths.length;
435 diffopts.pathspec.strings = (char**)blame->paths.contents;
436 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
437 goto cleanup;
438
439 if (!git_diff_num_deltas(difflist)) {
440 /* No changes; copy data */
441 git_blame__get_origin(&porigin, blame, parent, origin->path);
442 } else {
443 git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
444 int i;
445
446 /* Generate a full diff between the two trees */
447 git_diff_free(difflist);
448 diffopts.pathspec.count = 0;
449 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
450 goto cleanup;
451
452 /* Let diff find renames */
453 findopts.flags = GIT_DIFF_FIND_RENAMES;
454 if (0 != git_diff_find_similar(difflist, &findopts))
455 goto cleanup;
456
457 /* Find one that matches */
458 for (i=0; i<(int)git_diff_num_deltas(difflist); i++) {
459 const git_diff_delta *delta = git_diff_get_delta(difflist, i);
460
461 if (!git_vector_bsearch(NULL, &blame->paths, delta->new_file.path))
462 {
463 git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path),
464 paths_on_dup);
465 make_origin(&porigin, parent, delta->old_file.path);
466 }
467 }
468 }
469
470 cleanup:
471 git_diff_free(difflist);
472 git_tree_free(otree);
473 git_tree_free(ptree);
474 return porigin;
475 }
476
477 /*
478 * The blobs of origin and porigin exactly match, so everything origin is
479 * suspected for can be blamed on the parent.
480 */
481 static int pass_whole_blame(git_blame *blame,
482 git_blame__origin *origin, git_blame__origin *porigin)
483 {
484 git_blame__entry *e;
485
486 if (!porigin->blob &&
487 git_object_lookup((git_object**)&porigin->blob, blame->repository,
488 git_blob_id(origin->blob), GIT_OBJ_BLOB) < 0)
489 return -1;
490 for (e=blame->ent; e; e=e->next) {
491 if (!same_suspect(e->suspect, origin))
492 continue;
493 origin_incref(porigin);
494 origin_decref(e->suspect);
495 e->suspect = porigin;
496 }
497
498 return 0;
499 }
500
501 static int pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt)
502 {
503 git_commit *commit = origin->commit;
504 int i, num_parents;
505 git_blame__origin *sg_buf[16];
506 git_blame__origin *porigin, **sg_origin = sg_buf;
507 int ret, error = 0;
508
509 num_parents = git_commit_parentcount(commit);
510 if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit))
511 /* Stop at oldest specified commit */
512 num_parents = 0;
513 else if (opt & GIT_BLAME_FIRST_PARENT && num_parents > 1)
514 /* Limit search to the first parent */
515 num_parents = 1;
516
517 if (!num_parents) {
518 git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit));
519 goto finish;
520 } else if (num_parents < (int)ARRAY_SIZE(sg_buf))
521 memset(sg_buf, 0, sizeof(sg_buf));
522 else {
523 sg_origin = git__calloc(num_parents, sizeof(*sg_origin));
524 GITERR_CHECK_ALLOC(sg_origin);
525 }
526
527 for (i=0; i<num_parents; i++) {
528 git_commit *p;
529 int j, same;
530
531 if (sg_origin[i])
532 continue;
533
534 if ((error = git_commit_parent(&p, origin->commit, i)) < 0)
535 goto finish;
536 porigin = find_origin(blame, p, origin);
537
538 if (!porigin) {
539 /*
540 * We only have to decrement the parent's
541 * reference count when no porigin has
542 * been created, as otherwise the commit
543 * is assigned to the created object.
544 */
545 git_commit_free(p);
546 continue;
547 }
548 if (porigin->blob && origin->blob &&
549 !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) {
550 error = pass_whole_blame(blame, origin, porigin);
551 origin_decref(porigin);
552 goto finish;
553 }
554 for (j = same = 0; j<i; j++)
555 if (sg_origin[j] &&
556 !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) {
557 same = 1;
558 break;
559 }
560 if (!same)
561 sg_origin[i] = porigin;
562 else
563 origin_decref(porigin);
564 }
565
566 /* Standard blame */
567 for (i=0; i<num_parents; i++) {
568 git_blame__origin *porigin = sg_origin[i];
569 if (!porigin)
570 continue;
571 if (!origin->previous) {
572 origin_incref(porigin);
573 origin->previous = porigin;
574 }
575
576 if ((ret = pass_blame_to_parent(blame, origin, porigin)) != 0) {
577 if (ret < 0)
578 error = -1;
579
580 goto finish;
581 }
582 }
583
584 /* TODO: optionally find moves in parents' files */
585
586 /* TODO: optionally find copies in parents' files */
587
588 finish:
589 for (i=0; i<num_parents; i++)
590 if (sg_origin[i])
591 origin_decref(sg_origin[i]);
592 if (sg_origin != sg_buf)
593 git__free(sg_origin);
594 return error;
595 }
596
597 /*
598 * If two blame entries that are next to each other came from
599 * contiguous lines in the same origin (i.e. <commit, path> pair),
600 * merge them together.
601 */
602 static void coalesce(git_blame *blame)
603 {
604 git_blame__entry *ent, *next;
605
606 for (ent=blame->ent; ent && (next = ent->next); ent = next) {
607 if (same_suspect(ent->suspect, next->suspect) &&
608 ent->guilty == next->guilty &&
609 ent->s_lno + ent->num_lines == next->s_lno)
610 {
611 ent->num_lines += next->num_lines;
612 ent->next = next->next;
613 if (ent->next)
614 ent->next->prev = ent;
615 origin_decref(next->suspect);
616 git__free(next);
617 ent->score = 0;
618 next = ent; /* again */
619 }
620 }
621 }
622
623 int git_blame__like_git(git_blame *blame, uint32_t opt)
624 {
625 while (true) {
626 git_blame__entry *ent;
627 git_blame__origin *suspect = NULL;
628
629 /* Find a suspect to break down */
630 for (ent = blame->ent; !suspect && ent; ent = ent->next)
631 if (!ent->guilty)
632 suspect = ent->suspect;
633 if (!suspect)
634 return 0; /* all done */
635
636 /* We'll use this suspect later in the loop, so hold on to it for now. */
637 origin_incref(suspect);
638
639 if (pass_blame(blame, suspect, opt) < 0)
640 return -1;
641
642 /* Take responsibility for the remaining entries */
643 for (ent = blame->ent; ent; ent = ent->next) {
644 if (same_suspect(ent->suspect, suspect)) {
645 ent->guilty = true;
646 ent->is_boundary = !git_oid_cmp(
647 git_commit_id(suspect->commit),
648 &blame->options.oldest_commit);
649 }
650 }
651 origin_decref(suspect);
652 }
653
654 coalesce(blame);
655
656 return 0;
657 }
658
659 void git_blame__free_entry(git_blame__entry *ent)
660 {
661 if (!ent) return;
662 origin_decref(ent->suspect);
663 git__free(ent);
664 }