]> git.proxmox.com Git - libgit2.git/blob - src/blame_git.c
Add BD on ca-certificates
[libgit2.git] / src / blame_git.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame_git.h"
9
10 #include "commit.h"
11 #include "blob.h"
12 #include "xdiff/xinclude.h"
13 #include "diff_xdiff.h"
14
15 /*
16 * Origin is refcounted and usually we keep the blob contents to be
17 * reused.
18 */
19 static git_blame__origin *origin_incref(git_blame__origin *o)
20 {
21 if (o)
22 o->refcnt++;
23 return o;
24 }
25
26 static void origin_decref(git_blame__origin *o)
27 {
28 if (o && --o->refcnt <= 0) {
29 if (o->previous)
30 origin_decref(o->previous);
31 git_blob_free(o->blob);
32 git_commit_free(o->commit);
33 git__free(o);
34 }
35 }
36
37 /* Given a commit and a path in it, create a new origin structure. */
38 static int make_origin(git_blame__origin **out, git_commit *commit, const char *path)
39 {
40 git_blame__origin *o;
41 git_object *blob;
42 size_t path_len = strlen(path), alloc_len;
43 int error = 0;
44
45 if ((error = git_object_lookup_bypath(&blob, (git_object*)commit,
46 path, GIT_OBJECT_BLOB)) < 0)
47 return error;
48
49 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*o), path_len);
50 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1);
51 o = git__calloc(1, alloc_len);
52 GIT_ERROR_CHECK_ALLOC(o);
53
54 o->commit = commit;
55 o->blob = (git_blob *) blob;
56 o->refcnt = 1;
57 strcpy(o->path, path);
58
59 *out = o;
60
61 return 0;
62 }
63
64 /* Locate an existing origin or create a new one. */
65 int git_blame__get_origin(
66 git_blame__origin **out,
67 git_blame *blame,
68 git_commit *commit,
69 const char *path)
70 {
71 git_blame__entry *e;
72
73 for (e = blame->ent; e; e = e->next) {
74 if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) {
75 *out = origin_incref(e->suspect);
76 }
77 }
78 return make_origin(out, commit, path);
79 }
80
81 typedef struct blame_chunk_cb_data {
82 git_blame *blame;
83 git_blame__origin *target;
84 git_blame__origin *parent;
85 long tlno;
86 long plno;
87 }blame_chunk_cb_data;
88
89 static bool same_suspect(git_blame__origin *a, git_blame__origin *b)
90 {
91 if (a == b)
92 return true;
93 if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit)))
94 return false;
95 return 0 == strcmp(a->path, b->path);
96 }
97
98 /* find the line number of the last line the target is suspected for */
99 static bool find_last_in_target(size_t *out, git_blame *blame, git_blame__origin *target)
100 {
101 git_blame__entry *e;
102 size_t last_in_target = 0;
103 bool found = false;
104
105 *out = 0;
106
107 for (e=blame->ent; e; e=e->next) {
108 if (e->guilty || !same_suspect(e->suspect, target))
109 continue;
110 if (last_in_target < e->s_lno + e->num_lines) {
111 found = true;
112 last_in_target = e->s_lno + e->num_lines;
113 }
114 }
115
116 *out = last_in_target;
117 return found;
118 }
119
120 /*
121 * It is known that lines between tlno to same came from parent, and e
122 * has an overlap with that range. it also is known that parent's
123 * line plno corresponds to e's line tlno.
124 *
125 * <---- e ----->
126 * <------> (entirely within)
127 * <------------> (extends past)
128 * <------------> (starts before)
129 * <------------------> (entirely encloses)
130 *
131 * Split e into potentially three parts; before this chunk, the chunk
132 * to be blamed for the parent, and after that portion.
133 */
134 static void split_overlap(git_blame__entry *split, git_blame__entry *e,
135 size_t tlno, size_t plno, size_t same, git_blame__origin *parent)
136 {
137 size_t chunk_end_lno;
138
139 if (e->s_lno < tlno) {
140 /* there is a pre-chunk part not blamed on the parent */
141 split[0].suspect = origin_incref(e->suspect);
142 split[0].lno = e->lno;
143 split[0].s_lno = e->s_lno;
144 split[0].num_lines = tlno - e->s_lno;
145 split[1].lno = e->lno + tlno - e->s_lno;
146 split[1].s_lno = plno;
147 } else {
148 split[1].lno = e->lno;
149 split[1].s_lno = plno + (e->s_lno - tlno);
150 }
151
152 if (same < e->s_lno + e->num_lines) {
153 /* there is a post-chunk part not blamed on parent */
154 split[2].suspect = origin_incref(e->suspect);
155 split[2].lno = e->lno + (same - e->s_lno);
156 split[2].s_lno = e->s_lno + (same - e->s_lno);
157 split[2].num_lines = e->s_lno + e->num_lines - same;
158 chunk_end_lno = split[2].lno;
159 } else {
160 chunk_end_lno = e->lno + e->num_lines;
161 }
162 split[1].num_lines = chunk_end_lno - split[1].lno;
163
164 /*
165 * if it turns out there is nothing to blame the parent for, forget about
166 * the splitting. !split[1].suspect signals this.
167 */
168 if (split[1].num_lines < 1)
169 return;
170 split[1].suspect = origin_incref(parent);
171 }
172
173 /*
174 * Link in a new blame entry to the scoreboard. Entries that cover the same
175 * line range have been removed from the scoreboard previously.
176 */
177 static void add_blame_entry(git_blame *blame, git_blame__entry *e)
178 {
179 git_blame__entry *ent, *prev = NULL;
180
181 origin_incref(e->suspect);
182
183 for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next)
184 prev = ent;
185
186 /* prev, if not NULL, is the last one that is below e */
187 e->prev = prev;
188 if (prev) {
189 e->next = prev->next;
190 prev->next = e;
191 } else {
192 e->next = blame->ent;
193 blame->ent = e;
194 }
195 if (e->next)
196 e->next->prev = e;
197 }
198
199 /*
200 * src typically is on-stack; we want to copy the information in it to
201 * a malloced blame_entry that is already on the linked list of the scoreboard.
202 * The origin of dst loses a refcnt while the origin of src gains one.
203 */
204 static void dup_entry(git_blame__entry *dst, git_blame__entry *src)
205 {
206 git_blame__entry *p, *n;
207
208 p = dst->prev;
209 n = dst->next;
210 origin_incref(src->suspect);
211 origin_decref(dst->suspect);
212 memcpy(dst, src, sizeof(*src));
213 dst->prev = p;
214 dst->next = n;
215 dst->score = 0;
216 }
217
218 /*
219 * split_overlap() divided an existing blame e into up to three parts in split.
220 * Adjust the linked list of blames in the scoreboard to reflect the split.
221 */
222 static void split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e)
223 {
224 git_blame__entry *new_entry;
225
226 if (split[0].suspect && split[2].suspect) {
227 /* The first part (reuse storage for the existing entry e */
228 dup_entry(e, &split[0]);
229
230 /* The last part -- me */
231 new_entry = git__malloc(sizeof(*new_entry));
232 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
233 add_blame_entry(blame, new_entry);
234
235 /* ... and the middle part -- parent */
236 new_entry = git__malloc(sizeof(*new_entry));
237 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
238 add_blame_entry(blame, new_entry);
239 } else if (!split[0].suspect && !split[2].suspect) {
240 /*
241 * The parent covers the entire area; reuse storage for e and replace it
242 * with the parent
243 */
244 dup_entry(e, &split[1]);
245 } else if (split[0].suspect) {
246 /* me and then parent */
247 dup_entry(e, &split[0]);
248 new_entry = git__malloc(sizeof(*new_entry));
249 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
250 add_blame_entry(blame, new_entry);
251 } else {
252 /* parent and then me */
253 dup_entry(e, &split[1]);
254 new_entry = git__malloc(sizeof(*new_entry));
255 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
256 add_blame_entry(blame, new_entry);
257 }
258 }
259
260 /*
261 * After splitting the blame, the origins used by the on-stack blame_entry
262 * should lose one refcnt each.
263 */
264 static void decref_split(git_blame__entry *split)
265 {
266 int i;
267 for (i=0; i<3; i++)
268 origin_decref(split[i].suspect);
269 }
270
271 /*
272 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
273 * hunk; split it and pass blame to the parent.
274 */
275 static void blame_overlap(
276 git_blame *blame,
277 git_blame__entry *e,
278 size_t tlno,
279 size_t plno,
280 size_t same,
281 git_blame__origin *parent)
282 {
283 git_blame__entry split[3] = {{0}};
284
285 split_overlap(split, e, tlno, plno, same, parent);
286 if (split[1].suspect)
287 split_blame(blame, split, e);
288 decref_split(split);
289 }
290
291 /*
292 * Process one hunk from the patch between the current suspect for blame_entry
293 * e and its parent. Find and split the overlap, and pass blame to the
294 * overlapping part to the parent.
295 */
296 static void blame_chunk(
297 git_blame *blame,
298 size_t tlno,
299 size_t plno,
300 size_t same,
301 git_blame__origin *target,
302 git_blame__origin *parent)
303 {
304 git_blame__entry *e;
305
306 for (e = blame->ent; e; e = e->next) {
307 if (e->guilty || !same_suspect(e->suspect, target))
308 continue;
309 if (same <= e->s_lno)
310 continue;
311 if (tlno < e->s_lno + e->num_lines) {
312 blame_overlap(blame, e, tlno, plno, same, parent);
313 }
314 }
315 }
316
317 static int my_emit(
318 long start_a, long count_a,
319 long start_b, long count_b,
320 void *cb_data)
321 {
322 blame_chunk_cb_data *d = (blame_chunk_cb_data *)cb_data;
323
324 blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent);
325 d->plno = start_a + count_a;
326 d->tlno = start_b + count_b;
327
328 return 0;
329 }
330
331 static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx)
332 {
333 const int blk = 1024;
334 long trimmed = 0, recovered = 0;
335 char *ap = a->ptr + a->size;
336 char *bp = b->ptr + b->size;
337 long smaller = (long)((a->size < b->size) ? a->size : b->size);
338
339 if (ctx)
340 return;
341
342 while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) {
343 trimmed += blk;
344 ap -= blk;
345 bp -= blk;
346 }
347
348 while (recovered < trimmed)
349 if (ap[recovered++] == '\n')
350 break;
351 a->size -= trimmed - recovered;
352 b->size -= trimmed - recovered;
353 }
354
355 static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data)
356 {
357 xpparam_t xpp = {0};
358 xdemitconf_t xecfg = {0};
359 xdemitcb_t ecb = {0};
360
361 xecfg.hunk_func = my_emit;
362 ecb.priv = cb_data;
363
364 trim_common_tail(&file_a, &file_b, 0);
365
366 if (file_a.size > GIT_XDIFF_MAX_SIZE ||
367 file_b.size > GIT_XDIFF_MAX_SIZE) {
368 git_error_set(GIT_ERROR_INVALID, "file too large to blame");
369 return -1;
370 }
371
372 return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb);
373 }
374
375 static void fill_origin_blob(git_blame__origin *o, mmfile_t *file)
376 {
377 memset(file, 0, sizeof(*file));
378 if (o->blob) {
379 file->ptr = (char*)git_blob_rawcontent(o->blob);
380 file->size = (size_t)git_blob_rawsize(o->blob);
381 }
382 }
383
384 static int pass_blame_to_parent(
385 git_blame *blame,
386 git_blame__origin *target,
387 git_blame__origin *parent)
388 {
389 size_t last_in_target;
390 mmfile_t file_p, file_o;
391 blame_chunk_cb_data d = { blame, target, parent, 0, 0 };
392
393 if (!find_last_in_target(&last_in_target, blame, target))
394 return 1; /* nothing remains for this target */
395
396 fill_origin_blob(parent, &file_p);
397 fill_origin_blob(target, &file_o);
398
399 if (diff_hunks(file_p, file_o, &d) < 0)
400 return -1;
401
402 /* The reset (i.e. anything after tlno) are the same as the parent */
403 blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent);
404
405 return 0;
406 }
407
408 static int paths_on_dup(void **old, void *new)
409 {
410 GIT_UNUSED(old);
411 git__free(new);
412 return -1;
413 }
414
415 static git_blame__origin* find_origin(
416 git_blame *blame,
417 git_commit *parent,
418 git_blame__origin *origin)
419 {
420 git_blame__origin *porigin = NULL;
421 git_diff *difflist = NULL;
422 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
423 git_tree *otree=NULL, *ptree=NULL;
424
425 /* Get the trees from this commit and its parent */
426 if (0 != git_commit_tree(&otree, origin->commit) ||
427 0 != git_commit_tree(&ptree, parent))
428 goto cleanup;
429
430 /* Configure the diff */
431 diffopts.context_lines = 0;
432 diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK;
433
434 /* Check to see if files we're interested have changed */
435 diffopts.pathspec.count = blame->paths.length;
436 diffopts.pathspec.strings = (char**)blame->paths.contents;
437 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
438 goto cleanup;
439
440 if (!git_diff_num_deltas(difflist)) {
441 /* No changes; copy data */
442 git_blame__get_origin(&porigin, blame, parent, origin->path);
443 } else {
444 git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
445 int i;
446
447 /* Generate a full diff between the two trees */
448 git_diff_free(difflist);
449 diffopts.pathspec.count = 0;
450 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
451 goto cleanup;
452
453 /* Let diff find renames */
454 findopts.flags = GIT_DIFF_FIND_RENAMES;
455 if (0 != git_diff_find_similar(difflist, &findopts))
456 goto cleanup;
457
458 /* Find one that matches */
459 for (i=0; i<(int)git_diff_num_deltas(difflist); i++) {
460 const git_diff_delta *delta = git_diff_get_delta(difflist, i);
461
462 if (!git_vector_bsearch(NULL, &blame->paths, delta->new_file.path))
463 {
464 git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path),
465 paths_on_dup);
466 make_origin(&porigin, parent, delta->old_file.path);
467 }
468 }
469 }
470
471 cleanup:
472 git_diff_free(difflist);
473 git_tree_free(otree);
474 git_tree_free(ptree);
475 return porigin;
476 }
477
478 /*
479 * The blobs of origin and porigin exactly match, so everything origin is
480 * suspected for can be blamed on the parent.
481 */
482 static int pass_whole_blame(git_blame *blame,
483 git_blame__origin *origin, git_blame__origin *porigin)
484 {
485 git_blame__entry *e;
486
487 if (!porigin->blob &&
488 git_object_lookup((git_object**)&porigin->blob, blame->repository,
489 git_blob_id(origin->blob), GIT_OBJECT_BLOB) < 0)
490 return -1;
491 for (e=blame->ent; e; e=e->next) {
492 if (!same_suspect(e->suspect, origin))
493 continue;
494 origin_incref(porigin);
495 origin_decref(e->suspect);
496 e->suspect = porigin;
497 }
498
499 return 0;
500 }
501
502 static int pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt)
503 {
504 git_commit *commit = origin->commit;
505 int i, num_parents;
506 git_blame__origin *sg_buf[16];
507 git_blame__origin *porigin, **sg_origin = sg_buf;
508 int ret, error = 0;
509
510 num_parents = git_commit_parentcount(commit);
511 if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit))
512 /* Stop at oldest specified commit */
513 num_parents = 0;
514 else if (opt & GIT_BLAME_FIRST_PARENT && num_parents > 1)
515 /* Limit search to the first parent */
516 num_parents = 1;
517
518 if (!num_parents) {
519 git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit));
520 goto finish;
521 } else if (num_parents < (int)ARRAY_SIZE(sg_buf))
522 memset(sg_buf, 0, sizeof(sg_buf));
523 else {
524 sg_origin = git__calloc(num_parents, sizeof(*sg_origin));
525 GIT_ERROR_CHECK_ALLOC(sg_origin);
526 }
527
528 for (i=0; i<num_parents; i++) {
529 git_commit *p;
530 int j, same;
531
532 if (sg_origin[i])
533 continue;
534
535 if ((error = git_commit_parent(&p, origin->commit, i)) < 0)
536 goto finish;
537 porigin = find_origin(blame, p, origin);
538
539 if (!porigin) {
540 /*
541 * We only have to decrement the parent's
542 * reference count when no porigin has
543 * been created, as otherwise the commit
544 * is assigned to the created object.
545 */
546 git_commit_free(p);
547 continue;
548 }
549 if (porigin->blob && origin->blob &&
550 !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) {
551 error = pass_whole_blame(blame, origin, porigin);
552 origin_decref(porigin);
553 goto finish;
554 }
555 for (j = same = 0; j<i; j++)
556 if (sg_origin[j] &&
557 !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) {
558 same = 1;
559 break;
560 }
561 if (!same)
562 sg_origin[i] = porigin;
563 else
564 origin_decref(porigin);
565 }
566
567 /* Standard blame */
568 for (i=0; i<num_parents; i++) {
569 git_blame__origin *porigin = sg_origin[i];
570 if (!porigin)
571 continue;
572 if (!origin->previous) {
573 origin_incref(porigin);
574 origin->previous = porigin;
575 }
576
577 if ((ret = pass_blame_to_parent(blame, origin, porigin)) != 0) {
578 if (ret < 0)
579 error = -1;
580
581 goto finish;
582 }
583 }
584
585 /* TODO: optionally find moves in parents' files */
586
587 /* TODO: optionally find copies in parents' files */
588
589 finish:
590 for (i=0; i<num_parents; i++)
591 if (sg_origin[i])
592 origin_decref(sg_origin[i]);
593 if (sg_origin != sg_buf)
594 git__free(sg_origin);
595 return error;
596 }
597
598 /*
599 * If two blame entries that are next to each other came from
600 * contiguous lines in the same origin (i.e. <commit, path> pair),
601 * merge them together.
602 */
603 static void coalesce(git_blame *blame)
604 {
605 git_blame__entry *ent, *next;
606
607 for (ent=blame->ent; ent && (next = ent->next); ent = next) {
608 if (same_suspect(ent->suspect, next->suspect) &&
609 ent->guilty == next->guilty &&
610 ent->s_lno + ent->num_lines == next->s_lno)
611 {
612 ent->num_lines += next->num_lines;
613 ent->next = next->next;
614 if (ent->next)
615 ent->next->prev = ent;
616 origin_decref(next->suspect);
617 git__free(next);
618 ent->score = 0;
619 next = ent; /* again */
620 }
621 }
622 }
623
624 int git_blame__like_git(git_blame *blame, uint32_t opt)
625 {
626 int error = 0;
627
628 while (true) {
629 git_blame__entry *ent;
630 git_blame__origin *suspect = NULL;
631
632 /* Find a suspect to break down */
633 for (ent = blame->ent; !suspect && ent; ent = ent->next)
634 if (!ent->guilty)
635 suspect = ent->suspect;
636 if (!suspect)
637 break;
638
639 /* We'll use this suspect later in the loop, so hold on to it for now. */
640 origin_incref(suspect);
641
642 if ((error = pass_blame(blame, suspect, opt)) < 0)
643 break;
644
645 /* Take responsibility for the remaining entries */
646 for (ent = blame->ent; ent; ent = ent->next) {
647 if (same_suspect(ent->suspect, suspect)) {
648 ent->guilty = true;
649 ent->is_boundary = !git_oid_cmp(
650 git_commit_id(suspect->commit),
651 &blame->options.oldest_commit);
652 }
653 }
654 origin_decref(suspect);
655 }
656
657 if (!error)
658 coalesce(blame);
659
660 return error;
661 }
662
663 void git_blame__free_entry(git_blame__entry *ent)
664 {
665 if (!ent) return;
666 origin_decref(ent->suspect);
667 git__free(ent);
668 }