]>
Commit | Line | Data |
---|---|---|
ceab4e26 BS |
1 | /* |
2 | * Copyright (C) the libgit2 contributors. All rights reserved. | |
3 | * | |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with | |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
6 | */ | |
7 | ||
8 | #include "blame.h" | |
9 | #include "git2/commit.h" | |
10 | #include "git2/revparse.h" | |
11 | #include "git2/revwalk.h" | |
12 | #include "git2/tree.h" | |
13 | #include "git2/diff.h" | |
14 | #include "git2/blob.h" | |
15 | #include "util.h" | |
16 | #include "repository.h" | |
17 | #include "blame_git.h" | |
18 | ||
19 | ||
20 | static int hunk_search_cmp_helper(const void *key, size_t start_line, size_t num_lines) | |
21 | { | |
22 | uint32_t lineno = *(size_t*)key; | |
23 | if (lineno < start_line) | |
24 | return -1; | |
25 | if (lineno >= ((uint32_t)start_line + num_lines)) | |
26 | return 1; | |
27 | return 0; | |
28 | } | |
29 | static int hunk_byfinalline_search_cmp(const void *key, const void *entry) | |
30 | { | |
31 | git_blame_hunk *hunk = (git_blame_hunk*)entry; | |
32 | return hunk_search_cmp_helper(key, hunk->final_start_line_number, hunk->lines_in_hunk); | |
33 | } | |
34 | static int hunk_sort_cmp_by_start_line(const void *_a, const void *_b) | |
35 | { | |
36 | git_blame_hunk *a = (git_blame_hunk*)_a, | |
37 | *b = (git_blame_hunk*)_b; | |
38 | ||
39 | return a->final_start_line_number - b->final_start_line_number; | |
40 | } | |
41 | ||
42 | static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line) | |
43 | { | |
44 | return line >= (size_t)(hunk->final_start_line_number + hunk->lines_in_hunk - 1); | |
45 | } | |
46 | ||
47 | static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line) | |
48 | { | |
49 | return line <= hunk->final_start_line_number; | |
50 | } | |
51 | ||
52 | static git_blame_hunk* new_hunk(uint16_t start, uint16_t lines, uint16_t orig_start, const char *path) | |
53 | { | |
54 | git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk)); | |
55 | if (!hunk) return NULL; | |
56 | ||
57 | hunk->lines_in_hunk = lines; | |
58 | hunk->final_start_line_number = start; | |
59 | hunk->orig_start_line_number = orig_start; | |
60 | hunk->orig_path = path ? git__strdup(path) : NULL; | |
61 | ||
62 | return hunk; | |
63 | } | |
64 | ||
65 | git_blame_hunk* git_blame__alloc_hunk() | |
66 | { | |
67 | return new_hunk(0,0,0,NULL); | |
68 | } | |
69 | ||
70 | static git_blame_hunk* dup_hunk(git_blame_hunk *hunk) | |
71 | { | |
72 | git_blame_hunk *newhunk = new_hunk(hunk->final_start_line_number, hunk->lines_in_hunk, hunk->orig_start_line_number, hunk->orig_path); | |
73 | git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id); | |
74 | git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id); | |
75 | return newhunk; | |
76 | } | |
77 | ||
78 | static void free_hunk(git_blame_hunk *hunk) | |
79 | { | |
80 | git__free((void*)hunk->orig_path); | |
81 | git__free(hunk); | |
82 | } | |
83 | ||
84 | /* Starting with the hunk that includes start_line, shift all following hunks' | |
85 | * final_start_line by shift_by lines */ | |
86 | static void shift_hunks_by_final(git_vector *v, size_t start_line, int shift_by) | |
87 | { | |
88 | size_t i; | |
89 | ||
90 | if (!git_vector_bsearch2( &i, v, hunk_byfinalline_search_cmp, &start_line)) { | |
91 | for (; i < v->length; i++) { | |
92 | git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i]; | |
93 | hunk->final_start_line_number += shift_by; | |
94 | } | |
95 | } | |
96 | } | |
97 | static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); } | |
98 | git_blame* git_blame__alloc( | |
99 | git_repository *repo, | |
100 | git_blame_options opts, | |
101 | const char *path) | |
102 | { | |
103 | git_blame *gbr = (git_blame*)git__calloc(1, sizeof(git_blame)); | |
104 | if (!gbr) { | |
105 | giterr_set_oom(); | |
106 | return NULL; | |
107 | } | |
108 | git_vector_init(&gbr->hunks, 8, hunk_sort_cmp_by_start_line); | |
109 | git_vector_init(&gbr->unclaimed_hunks, 8, hunk_sort_cmp_by_start_line); | |
110 | git_vector_init(&gbr->paths, 8, paths_cmp); | |
111 | gbr->repository = repo; | |
112 | gbr->options = opts; | |
113 | gbr->path = git__strdup(path); | |
114 | git_vector_insert(&gbr->paths, git__strdup(path)); | |
115 | gbr->final_blob = NULL; | |
116 | return gbr; | |
117 | } | |
118 | ||
119 | void git_blame_free(git_blame *blame) | |
120 | { | |
121 | size_t i; | |
122 | git_blame_hunk *hunk; | |
123 | char *path; | |
124 | ||
125 | if (!blame) return; | |
126 | ||
127 | git_vector_foreach(&blame->hunks, i, hunk) | |
128 | free_hunk(hunk); | |
129 | git_vector_free(&blame->hunks); | |
130 | ||
131 | git_vector_foreach(&blame->unclaimed_hunks, i, hunk) | |
132 | free_hunk(hunk); | |
133 | git_vector_free(&blame->unclaimed_hunks); | |
134 | ||
135 | git_vector_foreach(&blame->paths, i, path) | |
136 | git__free(path); | |
137 | git_vector_free(&blame->paths); | |
138 | ||
139 | git__free((void*)blame->path); | |
140 | git_blob_free(blame->final_blob); | |
141 | git__free(blame); | |
142 | } | |
143 | ||
144 | uint32_t git_blame_get_hunk_count(git_blame *blame) | |
145 | { | |
146 | assert(blame); | |
147 | return blame->hunks.length; | |
148 | } | |
149 | ||
150 | const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index) | |
151 | { | |
152 | assert(blame); | |
153 | return (git_blame_hunk*)git_vector_get(&blame->hunks, index); | |
154 | } | |
155 | ||
156 | const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, uint32_t lineno) | |
157 | { | |
158 | size_t i; | |
159 | assert(blame); | |
160 | ||
161 | if (!git_vector_bsearch2( &i, &blame->hunks, hunk_byfinalline_search_cmp, &lineno)) { | |
162 | return git_blame_get_hunk_byindex(blame, i); | |
163 | } | |
164 | ||
165 | return NULL; | |
166 | } | |
167 | ||
168 | static void normalize_options( | |
169 | git_blame_options *out, | |
170 | const git_blame_options *in, | |
171 | git_repository *repo) | |
172 | { | |
173 | git_blame_options dummy = GIT_BLAME_OPTIONS_INIT; | |
174 | if (!in) in = &dummy; | |
175 | ||
176 | memcpy(out, in, sizeof(git_blame_options)); | |
177 | ||
178 | /* No newest_commit => HEAD */ | |
179 | if (git_oid_iszero(&out->newest_commit)) { | |
180 | git_reference_name_to_id(&out->newest_commit, repo, "HEAD"); | |
181 | } | |
182 | } | |
183 | ||
184 | static git_blame_hunk *split_hunk_in_vector( | |
185 | git_vector *vec, | |
186 | git_blame_hunk *hunk, | |
187 | size_t rel_line, | |
188 | bool return_new) | |
189 | { | |
190 | size_t new_line_count; | |
191 | git_blame_hunk *nh; | |
192 | ||
193 | /* Don't split if already at a boundary */ | |
194 | if (rel_line <= 0 || | |
195 | rel_line >= hunk->lines_in_hunk) | |
196 | { | |
197 | return hunk; | |
198 | } | |
199 | ||
200 | new_line_count = hunk->lines_in_hunk - rel_line; | |
201 | nh = new_hunk(hunk->final_start_line_number+rel_line, new_line_count, | |
202 | hunk->orig_start_line_number+rel_line, hunk->orig_path); | |
203 | git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id); | |
204 | git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id); | |
205 | ||
206 | /* Adjust hunk that was split */ | |
207 | hunk->lines_in_hunk -= new_line_count; | |
208 | git_vector_insert_sorted(vec, nh, NULL); | |
209 | { | |
210 | git_blame_hunk *ret = return_new ? nh : hunk; | |
211 | return ret; | |
212 | } | |
213 | } | |
214 | ||
215 | ||
216 | /* | |
217 | * To allow quick access to the contents of nth line in the | |
218 | * final image, prepare an index in the scoreboard. | |
219 | */ | |
220 | static int prepare_lines(struct scoreboard *sb) | |
221 | { | |
222 | const char *buf = sb->final_buf; | |
223 | git_off_t len = sb->final_buf_size; | |
224 | int num = 0, incomplete = 0, bol = 1; | |
225 | ||
226 | if (len && buf[len-1] != '\n') | |
227 | incomplete++; /* incomplete line at the end */ | |
228 | while (len--) { | |
229 | if (bol) { | |
230 | bol = 0; | |
231 | } | |
232 | if (*buf++ == '\n') { | |
233 | num++; | |
234 | bol = 1; | |
235 | } | |
236 | } | |
237 | sb->num_lines = num + incomplete; | |
238 | return sb->num_lines; | |
239 | } | |
240 | ||
241 | static git_blame_hunk* hunk_from_entry(struct blame_entry *e) | |
242 | { | |
243 | git_blame_hunk *h = new_hunk( | |
244 | e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path); | |
245 | git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit)); | |
246 | return h; | |
247 | } | |
248 | ||
249 | static void free_if_not_already_freed(git_vector *already, struct origin *o) | |
250 | { | |
251 | size_t i; | |
252 | ||
253 | if (!o) return; | |
254 | if (!git_vector_search(&i, already, o)) | |
255 | return; | |
256 | ||
257 | git_vector_insert(already, o); | |
258 | free_if_not_already_freed(already, o->previous); | |
259 | git_blob_free(o->blob); | |
260 | git_commit_free(o->commit); | |
261 | git__free(o); | |
262 | } | |
263 | ||
264 | static int walk_and_mark(git_blame *blame) | |
265 | { | |
266 | int error; | |
267 | ||
268 | struct scoreboard sb = {0}; | |
269 | struct blame_entry *ent = NULL; | |
270 | git_blob *blob = NULL; | |
271 | struct origin *o; | |
272 | git_vector already = GIT_VECTOR_INIT; | |
273 | ||
274 | if ((error = git_commit_lookup(&sb.final, blame->repository, &blame->options.newest_commit)) < 0 || | |
275 | (error = git_object_lookup_bypath((git_object**)&blob, (git_object*)sb.final, blame->path, GIT_OBJ_BLOB)) < 0) | |
276 | goto cleanup; | |
277 | sb.final_buf = git_blob_rawcontent(blob); | |
278 | sb.final_buf_size = git_blob_rawsize(blob); | |
279 | if ((error = get_origin(&o, &sb, sb.final, blame->path)) < 0) | |
280 | goto cleanup; | |
281 | ||
282 | ent = git__calloc(1, sizeof(*ent)); | |
283 | ent->num_lines = prepare_lines(&sb); | |
284 | ent->suspect = o; | |
285 | sb.ent = ent; | |
286 | sb.path = blame->path; | |
287 | sb.blame = blame; | |
288 | ||
289 | assign_blame(&sb, blame->options.flags); | |
290 | coalesce(&sb); | |
291 | ||
292 | for (ent = sb.ent; ent; ) { | |
293 | git_vector_insert(&blame->hunks, hunk_from_entry(ent)); | |
294 | ent = ent->next; | |
295 | } | |
296 | ||
297 | cleanup: | |
298 | for (ent = sb.ent; ent; ) { | |
299 | struct blame_entry *e = ent->next; | |
300 | struct origin *o = ent->suspect; | |
301 | ||
302 | /* Linkages might not be ordered, so we only free pointers we haven't | |
303 | * seen before. */ | |
304 | free_if_not_already_freed(&already, o); | |
305 | ||
306 | git__free(ent); | |
307 | ent = e; | |
308 | } | |
309 | ||
310 | git_vector_free(&already); | |
311 | git_commit_free(sb.final); | |
312 | git_blob_free(blob); | |
313 | return error; | |
314 | } | |
315 | ||
316 | static int load_blob(git_blame *blame, git_repository *repo, git_oid *commit_id, const char *path) | |
317 | { | |
318 | int retval = -1; | |
319 | git_commit *commit = NULL; | |
320 | git_tree *tree = NULL; | |
321 | git_tree_entry *tree_entry = NULL; | |
322 | git_object *obj = NULL; | |
323 | ||
324 | if (((retval = git_commit_lookup(&commit, repo, commit_id)) < 0) || | |
325 | ((retval = git_object_lookup_bypath(&obj, (git_object*)commit, path, GIT_OBJ_BLOB)) < 0) || | |
326 | ((retval = git_object_type(obj)) != GIT_OBJ_BLOB)) | |
327 | goto cleanup; | |
328 | blame->final_blob = (git_blob*)obj; | |
329 | ||
330 | cleanup: | |
331 | git_tree_entry_free(tree_entry); | |
332 | git_tree_free(tree); | |
333 | git_commit_free(commit); | |
334 | return retval; | |
335 | } | |
336 | ||
337 | /******************************************************************************* | |
338 | * File blaming | |
339 | ******************************************************************************/ | |
340 | ||
341 | int git_blame_file( | |
342 | git_blame **out, | |
343 | git_repository *repo, | |
344 | const char *path, | |
345 | git_blame_options *options) | |
346 | { | |
347 | int error = -1; | |
348 | git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT; | |
349 | git_blame *blame = NULL; | |
350 | ||
351 | assert(out && repo && path); | |
352 | normalize_options(&normOptions, options, repo); | |
353 | ||
354 | blame = git_blame__alloc(repo, normOptions, path); | |
355 | GITERR_CHECK_ALLOC(blame); | |
356 | ||
357 | if ((error = load_blob(blame, repo, &normOptions.newest_commit, path)) < 0) | |
358 | goto on_error; | |
359 | ||
360 | if ((error = walk_and_mark(blame)) < 0) | |
361 | goto on_error; | |
362 | ||
363 | *out = blame; | |
364 | return 0; | |
365 | ||
366 | on_error: | |
367 | git_blame_free(blame); | |
368 | return error; | |
369 | } | |
370 | ||
371 | /******************************************************************************* | |
372 | * Buffer blaming | |
373 | *******************************************************************************/ | |
374 | ||
375 | static bool hunk_is_bufferblame(git_blame_hunk *hunk) | |
376 | { | |
377 | return git_oid_iszero(&hunk->final_commit_id); | |
378 | } | |
379 | ||
380 | static int buffer_hunk_cb( | |
381 | const git_diff_delta *delta, | |
382 | const git_diff_range *range, | |
383 | const char *header, | |
384 | size_t header_len, | |
385 | void *payload) | |
386 | { | |
387 | git_blame *blame = (git_blame*)payload; | |
388 | size_t wedge_line; | |
389 | ||
390 | GIT_UNUSED(delta); | |
391 | GIT_UNUSED(header); | |
392 | GIT_UNUSED(header_len); | |
393 | ||
394 | wedge_line = (range->old_lines == 0) ? range->new_start : range->old_start; | |
395 | blame->current_diff_line = wedge_line; | |
396 | ||
397 | /* If this hunk doesn't start between existing hunks, split a hunk up so it does */ | |
398 | blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line); | |
399 | if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){ | |
400 | blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk, | |
401 | wedge_line - blame->current_hunk->orig_start_line_number, true); | |
402 | } | |
403 | ||
404 | return 0; | |
405 | } | |
406 | ||
407 | static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; } | |
408 | static int buffer_line_cb( | |
409 | const git_diff_delta *delta, | |
410 | const git_diff_range *range, | |
411 | char line_origin, | |
412 | const char *content, | |
413 | size_t content_len, | |
414 | void *payload) | |
415 | { | |
416 | git_blame *blame = (git_blame*)payload; | |
417 | ||
418 | GIT_UNUSED(delta); | |
419 | GIT_UNUSED(range); | |
420 | GIT_UNUSED(content); | |
421 | GIT_UNUSED(content_len); | |
422 | ||
423 | #ifdef DO_DEBUG | |
424 | { | |
425 | char *str = git__substrdup(content, content_len); | |
426 | git__free(str); | |
427 | } | |
428 | #endif | |
429 | ||
430 | if (line_origin == GIT_DIFF_LINE_ADDITION) { | |
431 | if (hunk_is_bufferblame(blame->current_hunk) && | |
432 | hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) { | |
433 | /* Append to the current buffer-blame hunk */ | |
434 | blame->current_hunk->lines_in_hunk++; | |
435 | shift_hunks_by_final(&blame->hunks, blame->current_diff_line+1, 1); | |
436 | } else { | |
437 | /* Create a new buffer-blame hunk with this line */ | |
438 | shift_hunks_by_final(&blame->hunks, blame->current_diff_line, 1); | |
439 | blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path); | |
440 | git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL); | |
441 | } | |
442 | blame->current_diff_line++; | |
443 | } | |
444 | ||
445 | if (line_origin == GIT_DIFF_LINE_DELETION) { | |
446 | /* Trim the line from the current hunk; remove it if it's now empty */ | |
447 | size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1; | |
448 | ||
449 | if (--(blame->current_hunk->lines_in_hunk) == 0) { | |
450 | size_t i; | |
451 | shift_base--; | |
452 | if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) { | |
453 | git_vector_remove(&blame->hunks, i); | |
454 | free_hunk(blame->current_hunk); | |
455 | blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, i); | |
456 | } | |
457 | } | |
458 | shift_hunks_by_final(&blame->hunks, shift_base, -1); | |
459 | } | |
460 | return 0; | |
461 | } | |
462 | ||
463 | int git_blame_buffer( | |
464 | git_blame **out, | |
465 | git_blame *reference, | |
466 | const char *buffer, | |
467 | size_t buffer_len) | |
468 | { | |
469 | git_blame *blame; | |
470 | git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; | |
471 | size_t i; | |
472 | git_blame_hunk *hunk; | |
473 | ||
474 | diffopts.context_lines = 0; | |
475 | ||
476 | assert(out && reference && buffer && buffer_len); | |
477 | ||
478 | blame = git_blame__alloc(reference->repository, reference->options, reference->path); | |
479 | ||
480 | /* Duplicate all of the hunk structures in the reference blame */ | |
481 | git_vector_foreach(&reference->hunks, i, hunk) { | |
482 | git_vector_insert(&blame->hunks, dup_hunk(hunk)); | |
483 | } | |
484 | ||
485 | /* Diff to the reference blob */ | |
486 | git_diff_blob_to_buffer(reference->final_blob, blame->path, | |
487 | buffer, buffer_len, blame->path, | |
488 | &diffopts, NULL, buffer_hunk_cb, buffer_line_cb, blame); | |
489 | ||
490 | *out = blame; | |
491 | return 0; | |
492 | } |