]> git.proxmox.com Git - libgit2.git/blob - src/blame_git.c
Merge pull request #2144 from linquize/branch-f-current
[libgit2.git] / src / blame_git.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame_git.h"
9 #include "commit.h"
10 #include "blob.h"
11 #include "xdiff/xinclude.h"
12
13 /*
14 * Origin is refcounted and usually we keep the blob contents to be
15 * reused.
16 */
17 static git_blame__origin *origin_incref(git_blame__origin *o)
18 {
19 if (o)
20 o->refcnt++;
21 return o;
22 }
23
24 static void origin_decref(git_blame__origin *o)
25 {
26 if (o && --o->refcnt <= 0) {
27 if (o->previous)
28 origin_decref(o->previous);
29 git_blob_free(o->blob);
30 git_commit_free(o->commit);
31 git__free(o);
32 }
33 }
34
35 /* Given a commit and a path in it, create a new origin structure. */
36 static int make_origin(git_blame__origin **out, git_commit *commit, const char *path)
37 {
38 int error = 0;
39 git_blame__origin *o;
40
41 o = git__calloc(1, sizeof(*o) + strlen(path) + 1);
42 GITERR_CHECK_ALLOC(o);
43 o->commit = commit;
44 o->refcnt = 1;
45 strcpy(o->path, path);
46
47 if (!(error = git_object_lookup_bypath((git_object**)&o->blob, (git_object*)commit,
48 path, GIT_OBJ_BLOB))) {
49 *out = o;
50 } else {
51 origin_decref(o);
52 }
53 return error;
54 }
55
56 /* Locate an existing origin or create a new one. */
57 int git_blame__get_origin(
58 git_blame__origin **out,
59 git_blame *blame,
60 git_commit *commit,
61 const char *path)
62 {
63 git_blame__entry *e;
64
65 for (e = blame->ent; e; e = e->next) {
66 if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) {
67 *out = origin_incref(e->suspect);
68 }
69 }
70 return make_origin(out, commit, path);
71 }
72
73 typedef struct blame_chunk_cb_data {
74 git_blame *blame;
75 git_blame__origin *target;
76 git_blame__origin *parent;
77 long tlno;
78 long plno;
79 }blame_chunk_cb_data;
80
81 static bool same_suspect(git_blame__origin *a, git_blame__origin *b)
82 {
83 if (a == b)
84 return true;
85 if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit)))
86 return false;
87 return 0 == strcmp(a->path, b->path);
88 }
89
90 /* find the line number of the last line the target is suspected for */
91 static int find_last_in_target(git_blame *blame, git_blame__origin *target)
92 {
93 git_blame__entry *e;
94 int last_in_target = -1;
95
96 for (e=blame->ent; e; e=e->next) {
97 if (e->guilty || !same_suspect(e->suspect, target))
98 continue;
99 if (last_in_target < e->s_lno + e->num_lines)
100 last_in_target = e->s_lno + e->num_lines;
101 }
102 return last_in_target;
103 }
104
105 /*
106 * It is known that lines between tlno to same came from parent, and e
107 * has an overlap with that range. it also is known that parent's
108 * line plno corresponds to e's line tlno.
109 *
110 * <---- e ----->
111 * <------> (entirely within)
112 * <------------> (extends past)
113 * <------------> (starts before)
114 * <------------------> (entirely encloses)
115 *
116 * Split e into potentially three parts; before this chunk, the chunk
117 * to be blamed for the parent, and after that portion.
118 */
119 static void split_overlap(git_blame__entry *split, git_blame__entry *e,
120 int tlno, int plno, int same, git_blame__origin *parent)
121 {
122 int chunk_end_lno;
123
124 if (e->s_lno < tlno) {
125 /* there is a pre-chunk part not blamed on the parent */
126 split[0].suspect = origin_incref(e->suspect);
127 split[0].lno = e->lno;
128 split[0].s_lno = e->s_lno;
129 split[0].num_lines = tlno - e->s_lno;
130 split[1].lno = e->lno + tlno - e->s_lno;
131 split[1].s_lno = plno;
132 } else {
133 split[1].lno = e->lno;
134 split[1].s_lno = plno + (e->s_lno - tlno);
135 }
136
137 if (same < e->s_lno + e->num_lines) {
138 /* there is a post-chunk part not blamed on parent */
139 split[2].suspect = origin_incref(e->suspect);
140 split[2].lno = e->lno + (same - e->s_lno);
141 split[2].s_lno = e->s_lno + (same - e->s_lno);
142 split[2].num_lines = e->s_lno + e->num_lines - same;
143 chunk_end_lno = split[2].lno;
144 } else {
145 chunk_end_lno = e->lno + e->num_lines;
146 }
147 split[1].num_lines = chunk_end_lno - split[1].lno;
148
149 /*
150 * if it turns out there is nothing to blame the parent for, forget about
151 * the splitting. !split[1].suspect signals this.
152 */
153 if (split[1].num_lines < 1)
154 return;
155 split[1].suspect = origin_incref(parent);
156 }
157
158 /*
159 * Link in a new blame entry to the scoreboard. Entries that cover the same
160 * line range have been removed from the scoreboard previously.
161 */
162 static void add_blame_entry(git_blame *blame, git_blame__entry *e)
163 {
164 git_blame__entry *ent, *prev = NULL;
165
166 origin_incref(e->suspect);
167
168 for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next)
169 prev = ent;
170
171 /* prev, if not NULL, is the last one that is below e */
172 e->prev = prev;
173 if (prev) {
174 e->next = prev->next;
175 prev->next = e;
176 } else {
177 e->next = blame->ent;
178 blame->ent = e;
179 }
180 if (e->next)
181 e->next->prev = e;
182 }
183
184 /*
185 * src typically is on-stack; we want to copy the information in it to
186 * a malloced blame_entry that is already on the linked list of the scoreboard.
187 * The origin of dst loses a refcnt while the origin of src gains one.
188 */
189 static void dup_entry(git_blame__entry *dst, git_blame__entry *src)
190 {
191 git_blame__entry *p, *n;
192
193 p = dst->prev;
194 n = dst->next;
195 origin_incref(src->suspect);
196 origin_decref(dst->suspect);
197 memcpy(dst, src, sizeof(*src));
198 dst->prev = p;
199 dst->next = n;
200 dst->score = 0;
201 }
202
203 /*
204 * split_overlap() divided an existing blame e into up to three parts in split.
205 * Adjust the linked list of blames in the scoreboard to reflect the split.
206 */
207 static void split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e)
208 {
209 git_blame__entry *new_entry;
210
211 if (split[0].suspect && split[2].suspect) {
212 /* The first part (reuse storage for the existing entry e */
213 dup_entry(e, &split[0]);
214
215 /* The last part -- me */
216 new_entry = git__malloc(sizeof(*new_entry));
217 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
218 add_blame_entry(blame, new_entry);
219
220 /* ... and the middle part -- parent */
221 new_entry = git__malloc(sizeof(*new_entry));
222 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
223 add_blame_entry(blame, new_entry);
224 } else if (!split[0].suspect && !split[2].suspect) {
225 /*
226 * The parent covers the entire area; reuse storage for e and replace it
227 * with the parent
228 */
229 dup_entry(e, &split[1]);
230 } else if (split[0].suspect) {
231 /* me and then parent */
232 dup_entry(e, &split[0]);
233 new_entry = git__malloc(sizeof(*new_entry));
234 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
235 add_blame_entry(blame, new_entry);
236 } else {
237 /* parent and then me */
238 dup_entry(e, &split[1]);
239 new_entry = git__malloc(sizeof(*new_entry));
240 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
241 add_blame_entry(blame, new_entry);
242 }
243 }
244
245 /*
246 * After splitting the blame, the origins used by the on-stack blame_entry
247 * should lose one refcnt each.
248 */
249 static void decref_split(git_blame__entry *split)
250 {
251 int i;
252 for (i=0; i<3; i++)
253 origin_decref(split[i].suspect);
254 }
255
256 /*
257 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
258 * hunk; split it and pass blame to the parent.
259 */
260 static void blame_overlap(
261 git_blame *blame,
262 git_blame__entry *e,
263 int tlno,
264 int plno,
265 int same,
266 git_blame__origin *parent)
267 {
268 git_blame__entry split[3] = {{0}};
269
270 split_overlap(split, e, tlno, plno, same, parent);
271 if (split[1].suspect)
272 split_blame(blame, split, e);
273 decref_split(split);
274 }
275
276 /*
277 * Process one hunk from the patch between the current suspect for blame_entry
278 * e and its parent. Find and split the overlap, and pass blame to the
279 * overlapping part to the parent.
280 */
281 static void blame_chunk(
282 git_blame *blame,
283 int tlno,
284 int plno,
285 int same,
286 git_blame__origin *target,
287 git_blame__origin *parent)
288 {
289 git_blame__entry *e;
290
291 for (e = blame->ent; e; e = e->next) {
292 if (e->guilty || !same_suspect(e->suspect, target))
293 continue;
294 if (same <= e->s_lno)
295 continue;
296 if (tlno < e->s_lno + e->num_lines) {
297 blame_overlap(blame, e, tlno, plno, same, parent);
298 }
299 }
300 }
301
302 static int my_emit(
303 xdfenv_t *xe,
304 xdchange_t *xscr,
305 xdemitcb_t *ecb,
306 xdemitconf_t const *xecfg)
307 {
308 xdchange_t *xch = xscr;
309 GIT_UNUSED(xe);
310 GIT_UNUSED(xecfg);
311 while (xch) {
312 blame_chunk_cb_data *d = ecb->priv;
313 blame_chunk(d->blame, d->tlno, d->plno, xch->i2, d->target, d->parent);
314 d->plno = xch->i1 + xch->chg1;
315 d->tlno = xch->i2 + xch->chg2;
316 xch = xch->next;
317 }
318 return 0;
319 }
320
321 static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx)
322 {
323 const int blk = 1024;
324 long trimmed = 0, recovered = 0;
325 char *ap = a->ptr + a->size;
326 char *bp = b->ptr + b->size;
327 long smaller = (long)((a->size < b->size) ? a->size : b->size);
328
329 if (ctx)
330 return;
331
332 while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) {
333 trimmed += blk;
334 ap -= blk;
335 bp -= blk;
336 }
337
338 while (recovered < trimmed)
339 if (ap[recovered++] == '\n')
340 break;
341 a->size -= trimmed - recovered;
342 b->size -= trimmed - recovered;
343 }
344
345 static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data)
346 {
347 xpparam_t xpp = {0};
348 xdemitconf_t xecfg = {0};
349 xdemitcb_t ecb = {0};
350
351 xecfg.emit_func = (void(*)(void))my_emit;
352 ecb.priv = cb_data;
353
354 trim_common_tail(&file_a, &file_b, 0);
355 return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb);
356 }
357
358 static void fill_origin_blob(git_blame__origin *o, mmfile_t *file)
359 {
360 memset(file, 0, sizeof(*file));
361 if (o->blob) {
362 file->ptr = (char*)git_blob_rawcontent(o->blob);
363 file->size = (size_t)git_blob_rawsize(o->blob);
364 }
365 }
366
367 static int pass_blame_to_parent(
368 git_blame *blame,
369 git_blame__origin *target,
370 git_blame__origin *parent)
371 {
372 int last_in_target;
373 mmfile_t file_p, file_o;
374 blame_chunk_cb_data d = { blame, target, parent, 0, 0 };
375
376 last_in_target = find_last_in_target(blame, target);
377 if (last_in_target < 0)
378 return 1; /* nothing remains for this target */
379
380 fill_origin_blob(parent, &file_p);
381 fill_origin_blob(target, &file_o);
382
383 diff_hunks(file_p, file_o, &d);
384 /* The reset (i.e. anything after tlno) are the same as the parent */
385 blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent);
386
387 return 0;
388 }
389
390 static int paths_on_dup(void **old, void *new)
391 {
392 GIT_UNUSED(old);
393 git__free(new);
394 return -1;
395 }
396
397 static git_blame__origin* find_origin(
398 git_blame *blame,
399 git_commit *parent,
400 git_blame__origin *origin)
401 {
402 git_blame__origin *porigin = NULL;
403 git_diff *difflist = NULL;
404 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
405 git_tree *otree=NULL, *ptree=NULL;
406
407 /* Get the trees from this commit and its parent */
408 if (0 != git_commit_tree(&otree, origin->commit) ||
409 0 != git_commit_tree(&ptree, parent))
410 goto cleanup;
411
412 /* Configure the diff */
413 diffopts.context_lines = 0;
414 diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK;
415
416 /* Check to see if files we're interested have changed */
417 diffopts.pathspec.count = blame->paths.length;
418 diffopts.pathspec.strings = (char**)blame->paths.contents;
419 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
420 goto cleanup;
421
422 if (!git_diff_num_deltas(difflist)) {
423 /* No changes; copy data */
424 git_blame__get_origin(&porigin, blame, parent, origin->path);
425 } else {
426 git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
427 int i;
428
429 /* Generate a full diff between the two trees */
430 git_diff_free(difflist);
431 diffopts.pathspec.count = 0;
432 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
433 goto cleanup;
434
435 /* Let diff find renames */
436 findopts.flags = GIT_DIFF_FIND_RENAMES;
437 if (0 != git_diff_find_similar(difflist, &findopts))
438 goto cleanup;
439
440 /* Find one that matches */
441 for (i=0; i<(int)git_diff_num_deltas(difflist); i++) {
442 const git_diff_delta *delta = git_diff_get_delta(difflist, i);
443
444 if (!git_vector_bsearch(NULL, &blame->paths, delta->new_file.path))
445 {
446 git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path),
447 paths_on_dup);
448 make_origin(&porigin, parent, delta->old_file.path);
449 }
450 }
451 }
452
453 cleanup:
454 git_diff_free(difflist);
455 git_tree_free(otree);
456 git_tree_free(ptree);
457 return porigin;
458 }
459
460 /*
461 * The blobs of origin and porigin exactly match, so everything origin is
462 * suspected for can be blamed on the parent.
463 */
464 static void pass_whole_blame(git_blame *blame,
465 git_blame__origin *origin, git_blame__origin *porigin)
466 {
467 git_blame__entry *e;
468
469 if (!porigin->blob)
470 git_object_lookup((git_object**)&porigin->blob, blame->repository,
471 git_blob_id(origin->blob), GIT_OBJ_BLOB);
472 for (e=blame->ent; e; e=e->next) {
473 if (!same_suspect(e->suspect, origin))
474 continue;
475 origin_incref(porigin);
476 origin_decref(e->suspect);
477 e->suspect = porigin;
478 }
479 }
480
481 static void pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt)
482 {
483 git_commit *commit = origin->commit;
484 int i, num_parents;
485 git_blame__origin *sg_buf[16];
486 git_blame__origin *porigin, **sg_origin = sg_buf;
487
488 num_parents = git_commit_parentcount(commit);
489 if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit))
490 /* Stop at oldest specified commit */
491 num_parents = 0;
492 else if (opt & GIT_BLAME_FIRST_PARENT && num_parents > 1)
493 /* Limit search to the first parent */
494 num_parents = 1;
495
496 if (!num_parents) {
497 git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit));
498 goto finish;
499 }
500 else if (num_parents < (int)ARRAY_SIZE(sg_buf))
501 memset(sg_buf, 0, sizeof(sg_buf));
502 else
503 sg_origin = git__calloc(num_parents, sizeof(*sg_origin));
504
505 for (i=0; i<num_parents; i++) {
506 git_commit *p;
507 int j, same;
508
509 if (sg_origin[i])
510 continue;
511
512 git_commit_parent(&p, origin->commit, i);
513 porigin = find_origin(blame, p, origin);
514
515 if (!porigin)
516 continue;
517 if (porigin->blob && origin->blob &&
518 !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) {
519 pass_whole_blame(blame, origin, porigin);
520 origin_decref(porigin);
521 goto finish;
522 }
523 for (j = same = 0; j<i; j++)
524 if (sg_origin[j] &&
525 !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) {
526 same = 1;
527 break;
528 }
529 if (!same)
530 sg_origin[i] = porigin;
531 else
532 origin_decref(porigin);
533 }
534
535 /* Standard blame */
536 for (i=0; i<num_parents; i++) {
537 git_blame__origin *porigin = sg_origin[i];
538 if (!porigin)
539 continue;
540 if (!origin->previous) {
541 origin_incref(porigin);
542 origin->previous = porigin;
543 }
544 if (pass_blame_to_parent(blame, origin, porigin))
545 goto finish;
546 }
547
548 /* TODO: optionally find moves in parents' files */
549
550 /* TODO: optionally find copies in parents' files */
551
552 finish:
553 for (i=0; i<num_parents; i++)
554 if (sg_origin[i])
555 origin_decref(sg_origin[i]);
556 if (sg_origin != sg_buf)
557 git__free(sg_origin);
558 return;
559 }
560
561 /*
562 * If two blame entries that are next to each other came from
563 * contiguous lines in the same origin (i.e. <commit, path> pair),
564 * merge them together.
565 */
566 static void coalesce(git_blame *blame)
567 {
568 git_blame__entry *ent, *next;
569
570 for (ent=blame->ent; ent && (next = ent->next); ent = next) {
571 if (same_suspect(ent->suspect, next->suspect) &&
572 ent->guilty == next->guilty &&
573 ent->s_lno + ent->num_lines == next->s_lno)
574 {
575 ent->num_lines += next->num_lines;
576 ent->next = next->next;
577 if (ent->next)
578 ent->next->prev = ent;
579 origin_decref(next->suspect);
580 git__free(next);
581 ent->score = 0;
582 next = ent; /* again */
583 }
584 }
585 }
586
587 void git_blame__like_git(git_blame *blame, uint32_t opt)
588 {
589 while (true) {
590 git_blame__entry *ent;
591 git_blame__origin *suspect = NULL;
592
593 /* Find a suspect to break down */
594 for (ent = blame->ent; !suspect && ent; ent = ent->next)
595 if (!ent->guilty)
596 suspect = ent->suspect;
597 if (!suspect)
598 return; /* all done */
599
600 /* We'll use this suspect later in the loop, so hold on to it for now. */
601 origin_incref(suspect);
602 pass_blame(blame, suspect, opt);
603
604 /* Take responsibility for the remaining entries */
605 for (ent = blame->ent; ent; ent = ent->next) {
606 if (same_suspect(ent->suspect, suspect)) {
607 ent->guilty = true;
608 ent->is_boundary = !git_oid_cmp(
609 git_commit_id(suspect->commit),
610 &blame->options.oldest_commit);
611 }
612 }
613 origin_decref(suspect);
614 }
615
616 coalesce(blame);
617 }
618
619 void git_blame__free_entry(git_blame__entry *ent)
620 {
621 if (!ent) return;
622 origin_decref(ent->suspect);
623 git__free(ent);
624 }