]> git.proxmox.com Git - libgit2.git/blob - src/blame_git.c
Merge pull request #3311 from Fallso/MacroRedefinition
[libgit2.git] / src / blame_git.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blame_git.h"
9 #include "commit.h"
10 #include "blob.h"
11 #include "xdiff/xinclude.h"
12
13 /*
14 * Origin is refcounted and usually we keep the blob contents to be
15 * reused.
16 */
17 static git_blame__origin *origin_incref(git_blame__origin *o)
18 {
19 if (o)
20 o->refcnt++;
21 return o;
22 }
23
24 static void origin_decref(git_blame__origin *o)
25 {
26 if (o && --o->refcnt <= 0) {
27 if (o->previous)
28 origin_decref(o->previous);
29 git_blob_free(o->blob);
30 git_commit_free(o->commit);
31 git__free(o);
32 }
33 }
34
35 /* Given a commit and a path in it, create a new origin structure. */
36 static int make_origin(git_blame__origin **out, git_commit *commit, const char *path)
37 {
38 git_blame__origin *o;
39 size_t path_len = strlen(path), alloc_len;
40 int error = 0;
41
42 GITERR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*o), path_len);
43 GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1);
44 o = git__calloc(1, alloc_len);
45 GITERR_CHECK_ALLOC(o);
46
47 o->commit = commit;
48 o->refcnt = 1;
49 strcpy(o->path, path);
50
51 if (!(error = git_object_lookup_bypath((git_object**)&o->blob, (git_object*)commit,
52 path, GIT_OBJ_BLOB))) {
53 *out = o;
54 } else {
55 origin_decref(o);
56 }
57 return error;
58 }
59
60 /* Locate an existing origin or create a new one. */
61 int git_blame__get_origin(
62 git_blame__origin **out,
63 git_blame *blame,
64 git_commit *commit,
65 const char *path)
66 {
67 git_blame__entry *e;
68
69 for (e = blame->ent; e; e = e->next) {
70 if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) {
71 *out = origin_incref(e->suspect);
72 }
73 }
74 return make_origin(out, commit, path);
75 }
76
77 typedef struct blame_chunk_cb_data {
78 git_blame *blame;
79 git_blame__origin *target;
80 git_blame__origin *parent;
81 long tlno;
82 long plno;
83 }blame_chunk_cb_data;
84
85 static bool same_suspect(git_blame__origin *a, git_blame__origin *b)
86 {
87 if (a == b)
88 return true;
89 if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit)))
90 return false;
91 return 0 == strcmp(a->path, b->path);
92 }
93
94 /* find the line number of the last line the target is suspected for */
95 static int find_last_in_target(git_blame *blame, git_blame__origin *target)
96 {
97 git_blame__entry *e;
98 int last_in_target = -1;
99
100 for (e=blame->ent; e; e=e->next) {
101 if (e->guilty || !same_suspect(e->suspect, target))
102 continue;
103 if (last_in_target < e->s_lno + e->num_lines)
104 last_in_target = e->s_lno + e->num_lines;
105 }
106 return last_in_target;
107 }
108
109 /*
110 * It is known that lines between tlno to same came from parent, and e
111 * has an overlap with that range. it also is known that parent's
112 * line plno corresponds to e's line tlno.
113 *
114 * <---- e ----->
115 * <------> (entirely within)
116 * <------------> (extends past)
117 * <------------> (starts before)
118 * <------------------> (entirely encloses)
119 *
120 * Split e into potentially three parts; before this chunk, the chunk
121 * to be blamed for the parent, and after that portion.
122 */
123 static void split_overlap(git_blame__entry *split, git_blame__entry *e,
124 int tlno, int plno, int same, git_blame__origin *parent)
125 {
126 int chunk_end_lno;
127
128 if (e->s_lno < tlno) {
129 /* there is a pre-chunk part not blamed on the parent */
130 split[0].suspect = origin_incref(e->suspect);
131 split[0].lno = e->lno;
132 split[0].s_lno = e->s_lno;
133 split[0].num_lines = tlno - e->s_lno;
134 split[1].lno = e->lno + tlno - e->s_lno;
135 split[1].s_lno = plno;
136 } else {
137 split[1].lno = e->lno;
138 split[1].s_lno = plno + (e->s_lno - tlno);
139 }
140
141 if (same < e->s_lno + e->num_lines) {
142 /* there is a post-chunk part not blamed on parent */
143 split[2].suspect = origin_incref(e->suspect);
144 split[2].lno = e->lno + (same - e->s_lno);
145 split[2].s_lno = e->s_lno + (same - e->s_lno);
146 split[2].num_lines = e->s_lno + e->num_lines - same;
147 chunk_end_lno = split[2].lno;
148 } else {
149 chunk_end_lno = e->lno + e->num_lines;
150 }
151 split[1].num_lines = chunk_end_lno - split[1].lno;
152
153 /*
154 * if it turns out there is nothing to blame the parent for, forget about
155 * the splitting. !split[1].suspect signals this.
156 */
157 if (split[1].num_lines < 1)
158 return;
159 split[1].suspect = origin_incref(parent);
160 }
161
162 /*
163 * Link in a new blame entry to the scoreboard. Entries that cover the same
164 * line range have been removed from the scoreboard previously.
165 */
166 static void add_blame_entry(git_blame *blame, git_blame__entry *e)
167 {
168 git_blame__entry *ent, *prev = NULL;
169
170 origin_incref(e->suspect);
171
172 for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next)
173 prev = ent;
174
175 /* prev, if not NULL, is the last one that is below e */
176 e->prev = prev;
177 if (prev) {
178 e->next = prev->next;
179 prev->next = e;
180 } else {
181 e->next = blame->ent;
182 blame->ent = e;
183 }
184 if (e->next)
185 e->next->prev = e;
186 }
187
188 /*
189 * src typically is on-stack; we want to copy the information in it to
190 * a malloced blame_entry that is already on the linked list of the scoreboard.
191 * The origin of dst loses a refcnt while the origin of src gains one.
192 */
193 static void dup_entry(git_blame__entry *dst, git_blame__entry *src)
194 {
195 git_blame__entry *p, *n;
196
197 p = dst->prev;
198 n = dst->next;
199 origin_incref(src->suspect);
200 origin_decref(dst->suspect);
201 memcpy(dst, src, sizeof(*src));
202 dst->prev = p;
203 dst->next = n;
204 dst->score = 0;
205 }
206
207 /*
208 * split_overlap() divided an existing blame e into up to three parts in split.
209 * Adjust the linked list of blames in the scoreboard to reflect the split.
210 */
211 static void split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e)
212 {
213 git_blame__entry *new_entry;
214
215 if (split[0].suspect && split[2].suspect) {
216 /* The first part (reuse storage for the existing entry e */
217 dup_entry(e, &split[0]);
218
219 /* The last part -- me */
220 new_entry = git__malloc(sizeof(*new_entry));
221 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
222 add_blame_entry(blame, new_entry);
223
224 /* ... and the middle part -- parent */
225 new_entry = git__malloc(sizeof(*new_entry));
226 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
227 add_blame_entry(blame, new_entry);
228 } else if (!split[0].suspect && !split[2].suspect) {
229 /*
230 * The parent covers the entire area; reuse storage for e and replace it
231 * with the parent
232 */
233 dup_entry(e, &split[1]);
234 } else if (split[0].suspect) {
235 /* me and then parent */
236 dup_entry(e, &split[0]);
237 new_entry = git__malloc(sizeof(*new_entry));
238 memcpy(new_entry, &(split[1]), sizeof(git_blame__entry));
239 add_blame_entry(blame, new_entry);
240 } else {
241 /* parent and then me */
242 dup_entry(e, &split[1]);
243 new_entry = git__malloc(sizeof(*new_entry));
244 memcpy(new_entry, &(split[2]), sizeof(git_blame__entry));
245 add_blame_entry(blame, new_entry);
246 }
247 }
248
249 /*
250 * After splitting the blame, the origins used by the on-stack blame_entry
251 * should lose one refcnt each.
252 */
253 static void decref_split(git_blame__entry *split)
254 {
255 int i;
256 for (i=0; i<3; i++)
257 origin_decref(split[i].suspect);
258 }
259
260 /*
261 * Helper for blame_chunk(). blame_entry e is known to overlap with the patch
262 * hunk; split it and pass blame to the parent.
263 */
264 static void blame_overlap(
265 git_blame *blame,
266 git_blame__entry *e,
267 int tlno,
268 int plno,
269 int same,
270 git_blame__origin *parent)
271 {
272 git_blame__entry split[3] = {{0}};
273
274 split_overlap(split, e, tlno, plno, same, parent);
275 if (split[1].suspect)
276 split_blame(blame, split, e);
277 decref_split(split);
278 }
279
280 /*
281 * Process one hunk from the patch between the current suspect for blame_entry
282 * e and its parent. Find and split the overlap, and pass blame to the
283 * overlapping part to the parent.
284 */
285 static void blame_chunk(
286 git_blame *blame,
287 int tlno,
288 int plno,
289 int same,
290 git_blame__origin *target,
291 git_blame__origin *parent)
292 {
293 git_blame__entry *e;
294
295 for (e = blame->ent; e; e = e->next) {
296 if (e->guilty || !same_suspect(e->suspect, target))
297 continue;
298 if (same <= e->s_lno)
299 continue;
300 if (tlno < e->s_lno + e->num_lines) {
301 blame_overlap(blame, e, tlno, plno, same, parent);
302 }
303 }
304 }
305
306 static int my_emit(
307 long start_a, long count_a,
308 long start_b, long count_b,
309 void *cb_data)
310 {
311 blame_chunk_cb_data *d = (blame_chunk_cb_data *)cb_data;
312
313 blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent);
314 d->plno = start_a + count_a;
315 d->tlno = start_b + count_b;
316
317 return 0;
318 }
319
320 static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx)
321 {
322 const int blk = 1024;
323 long trimmed = 0, recovered = 0;
324 char *ap = a->ptr + a->size;
325 char *bp = b->ptr + b->size;
326 long smaller = (long)((a->size < b->size) ? a->size : b->size);
327
328 if (ctx)
329 return;
330
331 while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) {
332 trimmed += blk;
333 ap -= blk;
334 bp -= blk;
335 }
336
337 while (recovered < trimmed)
338 if (ap[recovered++] == '\n')
339 break;
340 a->size -= trimmed - recovered;
341 b->size -= trimmed - recovered;
342 }
343
344 static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data)
345 {
346 xpparam_t xpp = {0};
347 xdemitconf_t xecfg = {0};
348 xdemitcb_t ecb = {0};
349
350 xecfg.hunk_func = my_emit;
351 ecb.priv = cb_data;
352
353 trim_common_tail(&file_a, &file_b, 0);
354 return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb);
355 }
356
357 static void fill_origin_blob(git_blame__origin *o, mmfile_t *file)
358 {
359 memset(file, 0, sizeof(*file));
360 if (o->blob) {
361 file->ptr = (char*)git_blob_rawcontent(o->blob);
362 file->size = (size_t)git_blob_rawsize(o->blob);
363 }
364 }
365
366 static int pass_blame_to_parent(
367 git_blame *blame,
368 git_blame__origin *target,
369 git_blame__origin *parent)
370 {
371 int last_in_target;
372 mmfile_t file_p, file_o;
373 blame_chunk_cb_data d = { blame, target, parent, 0, 0 };
374
375 last_in_target = find_last_in_target(blame, target);
376 if (last_in_target < 0)
377 return 1; /* nothing remains for this target */
378
379 fill_origin_blob(parent, &file_p);
380 fill_origin_blob(target, &file_o);
381
382 diff_hunks(file_p, file_o, &d);
383 /* The reset (i.e. anything after tlno) are the same as the parent */
384 blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent);
385
386 return 0;
387 }
388
389 static int paths_on_dup(void **old, void *new)
390 {
391 GIT_UNUSED(old);
392 git__free(new);
393 return -1;
394 }
395
396 static git_blame__origin* find_origin(
397 git_blame *blame,
398 git_commit *parent,
399 git_blame__origin *origin)
400 {
401 git_blame__origin *porigin = NULL;
402 git_diff *difflist = NULL;
403 git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
404 git_tree *otree=NULL, *ptree=NULL;
405
406 /* Get the trees from this commit and its parent */
407 if (0 != git_commit_tree(&otree, origin->commit) ||
408 0 != git_commit_tree(&ptree, parent))
409 goto cleanup;
410
411 /* Configure the diff */
412 diffopts.context_lines = 0;
413 diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK;
414
415 /* Check to see if files we're interested have changed */
416 diffopts.pathspec.count = blame->paths.length;
417 diffopts.pathspec.strings = (char**)blame->paths.contents;
418 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
419 goto cleanup;
420
421 if (!git_diff_num_deltas(difflist)) {
422 /* No changes; copy data */
423 git_blame__get_origin(&porigin, blame, parent, origin->path);
424 } else {
425 git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
426 int i;
427
428 /* Generate a full diff between the two trees */
429 git_diff_free(difflist);
430 diffopts.pathspec.count = 0;
431 if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts))
432 goto cleanup;
433
434 /* Let diff find renames */
435 findopts.flags = GIT_DIFF_FIND_RENAMES;
436 if (0 != git_diff_find_similar(difflist, &findopts))
437 goto cleanup;
438
439 /* Find one that matches */
440 for (i=0; i<(int)git_diff_num_deltas(difflist); i++) {
441 const git_diff_delta *delta = git_diff_get_delta(difflist, i);
442
443 if (!git_vector_bsearch(NULL, &blame->paths, delta->new_file.path))
444 {
445 git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path),
446 paths_on_dup);
447 make_origin(&porigin, parent, delta->old_file.path);
448 }
449 }
450 }
451
452 cleanup:
453 git_diff_free(difflist);
454 git_tree_free(otree);
455 git_tree_free(ptree);
456 return porigin;
457 }
458
459 /*
460 * The blobs of origin and porigin exactly match, so everything origin is
461 * suspected for can be blamed on the parent.
462 */
463 static void pass_whole_blame(git_blame *blame,
464 git_blame__origin *origin, git_blame__origin *porigin)
465 {
466 git_blame__entry *e;
467
468 if (!porigin->blob)
469 git_object_lookup((git_object**)&porigin->blob, blame->repository,
470 git_blob_id(origin->blob), GIT_OBJ_BLOB);
471 for (e=blame->ent; e; e=e->next) {
472 if (!same_suspect(e->suspect, origin))
473 continue;
474 origin_incref(porigin);
475 origin_decref(e->suspect);
476 e->suspect = porigin;
477 }
478 }
479
480 static void pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt)
481 {
482 git_commit *commit = origin->commit;
483 int i, num_parents;
484 git_blame__origin *sg_buf[16];
485 git_blame__origin *porigin, **sg_origin = sg_buf;
486
487 num_parents = git_commit_parentcount(commit);
488 if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit))
489 /* Stop at oldest specified commit */
490 num_parents = 0;
491 else if (opt & GIT_BLAME_FIRST_PARENT && num_parents > 1)
492 /* Limit search to the first parent */
493 num_parents = 1;
494
495 if (!num_parents) {
496 git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit));
497 goto finish;
498 }
499 else if (num_parents < (int)ARRAY_SIZE(sg_buf))
500 memset(sg_buf, 0, sizeof(sg_buf));
501 else
502 sg_origin = git__calloc(num_parents, sizeof(*sg_origin));
503
504 for (i=0; i<num_parents; i++) {
505 git_commit *p;
506 int j, same;
507
508 if (sg_origin[i])
509 continue;
510
511 git_commit_parent(&p, origin->commit, i);
512 porigin = find_origin(blame, p, origin);
513
514 if (!porigin)
515 continue;
516 if (porigin->blob && origin->blob &&
517 !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) {
518 pass_whole_blame(blame, origin, porigin);
519 origin_decref(porigin);
520 goto finish;
521 }
522 for (j = same = 0; j<i; j++)
523 if (sg_origin[j] &&
524 !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) {
525 same = 1;
526 break;
527 }
528 if (!same)
529 sg_origin[i] = porigin;
530 else
531 origin_decref(porigin);
532 }
533
534 /* Standard blame */
535 for (i=0; i<num_parents; i++) {
536 git_blame__origin *porigin = sg_origin[i];
537 if (!porigin)
538 continue;
539 if (!origin->previous) {
540 origin_incref(porigin);
541 origin->previous = porigin;
542 }
543 if (pass_blame_to_parent(blame, origin, porigin))
544 goto finish;
545 }
546
547 /* TODO: optionally find moves in parents' files */
548
549 /* TODO: optionally find copies in parents' files */
550
551 finish:
552 for (i=0; i<num_parents; i++)
553 if (sg_origin[i])
554 origin_decref(sg_origin[i]);
555 if (sg_origin != sg_buf)
556 git__free(sg_origin);
557 return;
558 }
559
560 /*
561 * If two blame entries that are next to each other came from
562 * contiguous lines in the same origin (i.e. <commit, path> pair),
563 * merge them together.
564 */
565 static void coalesce(git_blame *blame)
566 {
567 git_blame__entry *ent, *next;
568
569 for (ent=blame->ent; ent && (next = ent->next); ent = next) {
570 if (same_suspect(ent->suspect, next->suspect) &&
571 ent->guilty == next->guilty &&
572 ent->s_lno + ent->num_lines == next->s_lno)
573 {
574 ent->num_lines += next->num_lines;
575 ent->next = next->next;
576 if (ent->next)
577 ent->next->prev = ent;
578 origin_decref(next->suspect);
579 git__free(next);
580 ent->score = 0;
581 next = ent; /* again */
582 }
583 }
584 }
585
586 void git_blame__like_git(git_blame *blame, uint32_t opt)
587 {
588 while (true) {
589 git_blame__entry *ent;
590 git_blame__origin *suspect = NULL;
591
592 /* Find a suspect to break down */
593 for (ent = blame->ent; !suspect && ent; ent = ent->next)
594 if (!ent->guilty)
595 suspect = ent->suspect;
596 if (!suspect)
597 return; /* all done */
598
599 /* We'll use this suspect later in the loop, so hold on to it for now. */
600 origin_incref(suspect);
601 pass_blame(blame, suspect, opt);
602
603 /* Take responsibility for the remaining entries */
604 for (ent = blame->ent; ent; ent = ent->next) {
605 if (same_suspect(ent->suspect, suspect)) {
606 ent->guilty = true;
607 ent->is_boundary = !git_oid_cmp(
608 git_commit_id(suspect->commit),
609 &blame->options.oldest_commit);
610 }
611 }
612 origin_decref(suspect);
613 }
614
615 coalesce(blame);
616 }
617
618 void git_blame__free_entry(git_blame__entry *ent)
619 {
620 if (!ent) return;
621 origin_decref(ent->suspect);
622 git__free(ent);
623 }