]> git.proxmox.com Git - libgit2.git/blob - src/indexer.c
khash: avoid using `kh_foreach`/`kh_foreach_value` directly
[libgit2.git] / src / indexer.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "git2/indexer.h"
9 #include "git2/object.h"
10
11 #include "common.h"
12 #include "pack.h"
13 #include "mwindow.h"
14 #include "posix.h"
15 #include "pack.h"
16 #include "filebuf.h"
17 #include "oid.h"
18 #include "oidmap.h"
19 #include "zstream.h"
20
21 GIT__USE_OIDMAP
22
23 extern git_mutex git__mwindow_mutex;
24
25 #define UINT31_MAX (0x7FFFFFFF)
26
27 struct entry {
28 git_oid oid;
29 uint32_t crc;
30 uint32_t offset;
31 uint64_t offset_long;
32 };
33
34 struct git_indexer {
35 unsigned int parsed_header :1,
36 pack_committed :1,
37 have_stream :1,
38 have_delta :1;
39 struct git_pack_header hdr;
40 struct git_pack_file *pack;
41 unsigned int mode;
42 git_off_t off;
43 git_off_t entry_start;
44 git_packfile_stream stream;
45 size_t nr_objects;
46 git_vector objects;
47 git_vector deltas;
48 unsigned int fanout[256];
49 git_hash_ctx hash_ctx;
50 git_oid hash;
51 git_transfer_progress_cb progress_cb;
52 void *progress_payload;
53 char objbuf[8*1024];
54
55 /* Needed to look up objects which we want to inject to fix a thin pack */
56 git_odb *odb;
57
58 /* Fields for calculating the packfile trailer (hash of everything before it) */
59 char inbuf[GIT_OID_RAWSZ];
60 size_t inbuf_len;
61 git_hash_ctx trailer;
62 };
63
64 struct delta_info {
65 git_off_t delta_off;
66 };
67
68 const git_oid *git_indexer_hash(const git_indexer *idx)
69 {
70 return &idx->hash;
71 }
72
73 static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
74 {
75 int error;
76 git_map map;
77
78 if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
79 return error;
80
81 memcpy(hdr, map.data, sizeof(*hdr));
82 p_munmap(&map);
83
84 /* Verify we recognize this pack file format. */
85 if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
86 giterr_set(GITERR_INDEXER, "wrong pack signature");
87 return -1;
88 }
89
90 if (!pack_version_ok(hdr->hdr_version)) {
91 giterr_set(GITERR_INDEXER, "wrong pack version");
92 return -1;
93 }
94
95 return 0;
96 }
97
98 static int objects_cmp(const void *a, const void *b)
99 {
100 const struct entry *entrya = a;
101 const struct entry *entryb = b;
102
103 return git_oid__cmp(&entrya->oid, &entryb->oid);
104 }
105
106 int git_indexer_new(
107 git_indexer **out,
108 const char *prefix,
109 unsigned int mode,
110 git_odb *odb,
111 git_transfer_progress_cb progress_cb,
112 void *progress_payload)
113 {
114 git_indexer *idx;
115 git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
116 static const char suff[] = "/pack";
117 int error, fd = -1;
118
119 idx = git__calloc(1, sizeof(git_indexer));
120 GITERR_CHECK_ALLOC(idx);
121 idx->odb = odb;
122 idx->progress_cb = progress_cb;
123 idx->progress_payload = progress_payload;
124 idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
125 git_hash_ctx_init(&idx->hash_ctx);
126 git_hash_ctx_init(&idx->trailer);
127
128 error = git_buf_joinpath(&path, prefix, suff);
129 if (error < 0)
130 goto cleanup;
131
132 fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
133 git_buf_free(&path);
134 if (fd < 0)
135 goto cleanup;
136
137 error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
138 git_buf_free(&tmp_path);
139
140 if (error < 0)
141 goto cleanup;
142
143 idx->pack->mwf.fd = fd;
144 if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
145 goto cleanup;
146
147 *out = idx;
148 return 0;
149
150 cleanup:
151 if (fd != -1)
152 p_close(fd);
153
154 if (git_buf_len(&tmp_path) > 0)
155 p_unlink(git_buf_cstr(&tmp_path));
156
157 if (idx->pack != NULL)
158 p_unlink(idx->pack->pack_name);
159
160 git_buf_free(&path);
161 git_buf_free(&tmp_path);
162 git__free(idx);
163 return -1;
164 }
165
166 /* Try to store the delta so we can try to resolve it later */
167 static int store_delta(git_indexer *idx)
168 {
169 struct delta_info *delta;
170
171 delta = git__calloc(1, sizeof(struct delta_info));
172 GITERR_CHECK_ALLOC(delta);
173 delta->delta_off = idx->entry_start;
174
175 if (git_vector_insert(&idx->deltas, delta) < 0)
176 return -1;
177
178 return 0;
179 }
180
181 static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
182 {
183 char buffer[64];
184 size_t hdrlen;
185
186 hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
187 git_hash_update(ctx, buffer, hdrlen);
188 }
189
190 static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
191 {
192 ssize_t read;
193
194 assert(idx && stream);
195
196 do {
197 if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
198 break;
199
200 git_hash_update(&idx->hash_ctx, idx->objbuf, read);
201 } while (read > 0);
202
203 if (read < 0)
204 return (int)read;
205
206 return 0;
207 }
208
209 /* In order to create the packfile stream, we need to skip over the delta base description */
210 static int advance_delta_offset(git_indexer *idx, git_otype type)
211 {
212 git_mwindow *w = NULL;
213
214 assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
215
216 if (type == GIT_OBJ_REF_DELTA) {
217 idx->off += GIT_OID_RAWSZ;
218 } else {
219 git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
220 git_mwindow_close(&w);
221 if (base_off < 0)
222 return (int)base_off;
223 }
224
225 return 0;
226 }
227
228 /* Read from the stream and discard any output */
229 static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
230 {
231 ssize_t read;
232
233 assert(stream);
234
235 do {
236 read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
237 } while (read > 0);
238
239 if (read < 0)
240 return (int)read;
241
242 return 0;
243 }
244
245 static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
246 {
247 void *ptr;
248 uint32_t crc;
249 unsigned int left, len;
250 git_mwindow *w = NULL;
251
252 crc = crc32(0L, Z_NULL, 0);
253 while (size) {
254 ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
255 if (ptr == NULL)
256 return -1;
257
258 len = min(left, (unsigned int)size);
259 crc = crc32(crc, ptr, len);
260 size -= len;
261 start += len;
262 git_mwindow_close(&w);
263 }
264
265 *crc_out = htonl(crc);
266 return 0;
267 }
268
269 static int store_object(git_indexer *idx)
270 {
271 int i, error;
272 khiter_t k;
273 git_oid oid;
274 struct entry *entry;
275 git_off_t entry_size;
276 struct git_pack_entry *pentry;
277 git_off_t entry_start = idx->entry_start;
278
279 entry = git__calloc(1, sizeof(*entry));
280 GITERR_CHECK_ALLOC(entry);
281
282 pentry = git__calloc(1, sizeof(struct git_pack_entry));
283 GITERR_CHECK_ALLOC(pentry);
284
285 git_hash_final(&oid, &idx->hash_ctx);
286 entry_size = idx->off - entry_start;
287 if (entry_start > UINT31_MAX) {
288 entry->offset = UINT32_MAX;
289 entry->offset_long = entry_start;
290 } else {
291 entry->offset = (uint32_t)entry_start;
292 }
293
294 git_oid_cpy(&pentry->sha1, &oid);
295 pentry->offset = entry_start;
296
297 k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
298 if (error == -1) {
299 git__free(pentry);
300 giterr_set_oom();
301 goto on_error;
302 }
303
304 if (error == 0) {
305 giterr_set(GITERR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
306 git__free(pentry);
307 goto on_error;
308 }
309
310
311 kh_value(idx->pack->idx_cache, k) = pentry;
312
313 git_oid_cpy(&entry->oid, &oid);
314
315 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
316 goto on_error;
317
318 /* Add the object to the list */
319 if (git_vector_insert(&idx->objects, entry) < 0)
320 goto on_error;
321
322 for (i = oid.id[0]; i < 256; ++i) {
323 idx->fanout[i]++;
324 }
325
326 return 0;
327
328 on_error:
329 git__free(entry);
330
331 return -1;
332 }
333
334 GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
335 {
336 khiter_t k;
337 k = kh_get(oid, idx->pack->idx_cache, id);
338 return (k != kh_end(idx->pack->idx_cache));
339 }
340
341 static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
342 {
343 int i, error;
344 khiter_t k;
345
346 if (entry_start > UINT31_MAX) {
347 entry->offset = UINT32_MAX;
348 entry->offset_long = entry_start;
349 } else {
350 entry->offset = (uint32_t)entry_start;
351 }
352
353 pentry->offset = entry_start;
354 k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
355
356 if (error <= 0) {
357 giterr_set(GITERR_INDEXER, "cannot insert object into pack");
358 return -1;
359 }
360
361 kh_value(idx->pack->idx_cache, k) = pentry;
362
363 /* Add the object to the list */
364 if (git_vector_insert(&idx->objects, entry) < 0)
365 return -1;
366
367 for (i = entry->oid.id[0]; i < 256; ++i) {
368 idx->fanout[i]++;
369 }
370
371 return 0;
372 }
373
374 static int hash_and_save(git_indexer *idx, git_rawobj *obj, git_off_t entry_start)
375 {
376 git_oid oid;
377 size_t entry_size;
378 struct entry *entry;
379 struct git_pack_entry *pentry = NULL;
380
381 entry = git__calloc(1, sizeof(*entry));
382 GITERR_CHECK_ALLOC(entry);
383
384 if (git_odb__hashobj(&oid, obj) < 0) {
385 giterr_set(GITERR_INDEXER, "failed to hash object");
386 goto on_error;
387 }
388
389 pentry = git__calloc(1, sizeof(struct git_pack_entry));
390 GITERR_CHECK_ALLOC(pentry);
391
392 git_oid_cpy(&pentry->sha1, &oid);
393 git_oid_cpy(&entry->oid, &oid);
394 entry->crc = crc32(0L, Z_NULL, 0);
395
396 entry_size = (size_t)(idx->off - entry_start);
397 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
398 goto on_error;
399
400 return save_entry(idx, entry, pentry, entry_start);
401
402 on_error:
403 git__free(pentry);
404 git__free(entry);
405 git__free(obj->data);
406 return -1;
407 }
408
409 static int do_progress_callback(git_indexer *idx, git_transfer_progress *stats)
410 {
411 if (idx->progress_cb)
412 return giterr_set_after_callback_function(
413 idx->progress_cb(stats, idx->progress_payload),
414 "indexer progress");
415 return 0;
416 }
417
418 /* Hash everything but the last 20B of input */
419 static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
420 {
421 size_t to_expell, to_keep;
422
423 if (size == 0)
424 return;
425
426 /* Easy case, dump the buffer and the data minus the last 20 bytes */
427 if (size >= GIT_OID_RAWSZ) {
428 git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
429 git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);
430
431 data += size - GIT_OID_RAWSZ;
432 memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
433 idx->inbuf_len = GIT_OID_RAWSZ;
434 return;
435 }
436
437 /* We can just append */
438 if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
439 memcpy(idx->inbuf + idx->inbuf_len, data, size);
440 idx->inbuf_len += size;
441 return;
442 }
443
444 /* We need to partially drain the buffer and then append */
445 to_keep = GIT_OID_RAWSZ - size;
446 to_expell = idx->inbuf_len - to_keep;
447
448 git_hash_update(&idx->trailer, idx->inbuf, to_expell);
449
450 memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
451 memcpy(idx->inbuf + to_keep, data, size);
452 idx->inbuf_len += size - to_expell;
453 }
454
455 static int write_at(git_indexer *idx, const void *data, git_off_t offset, size_t size)
456 {
457 git_file fd = idx->pack->mwf.fd;
458 size_t mmap_alignment;
459 size_t page_offset;
460 git_off_t page_start;
461 unsigned char *map_data;
462 git_map map;
463 int error;
464
465 assert(data && size);
466
467 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
468 return error;
469
470 /* the offset needs to be at the mmap boundary for the platform */
471 page_offset = offset % mmap_alignment;
472 page_start = offset - page_offset;
473
474 if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
475 return error;
476
477 map_data = (unsigned char *)map.data;
478 memcpy(map_data + page_offset, data, size);
479 p_munmap(&map);
480
481 return 0;
482 }
483
484 static int append_to_pack(git_indexer *idx, const void *data, size_t size)
485 {
486 git_off_t new_size;
487 size_t mmap_alignment;
488 size_t page_offset;
489 git_off_t page_start;
490 git_off_t current_size = idx->pack->mwf.size;
491 int fd = idx->pack->mwf.fd;
492 int error;
493
494 if (!size)
495 return 0;
496
497 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
498 return error;
499
500 /* Write a single byte to force the file system to allocate space now or
501 * report an error, since we can't report errors when writing using mmap.
502 * Round the size up to the nearest page so that we only need to perform file
503 * I/O when we add a page, instead of whenever we write even a single byte. */
504 new_size = current_size + size;
505 page_offset = new_size % mmap_alignment;
506 page_start = new_size - page_offset;
507
508 if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 ||
509 p_write(idx->pack->mwf.fd, data, 1) < 0) {
510 giterr_set(GITERR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
511 return -1;
512 }
513
514 return write_at(idx, data, idx->pack->mwf.size, size);
515 }
516
517 int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
518 {
519 int error = -1;
520 size_t processed;
521 struct git_pack_header *hdr = &idx->hdr;
522 git_mwindow_file *mwf = &idx->pack->mwf;
523
524 assert(idx && data && stats);
525
526 processed = stats->indexed_objects;
527
528 if ((error = append_to_pack(idx, data, size)) < 0)
529 return error;
530
531 hash_partially(idx, data, (int)size);
532
533 /* Make sure we set the new size of the pack */
534 idx->pack->mwf.size += size;
535
536 if (!idx->parsed_header) {
537 unsigned int total_objects;
538
539 if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
540 return 0;
541
542 if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
543 return error;
544
545 idx->parsed_header = 1;
546 idx->nr_objects = ntohl(hdr->hdr_entries);
547 idx->off = sizeof(struct git_pack_header);
548
549 /* for now, limit to 2^32 objects */
550 assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
551 if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects))
552 total_objects = (unsigned int)idx->nr_objects;
553 else
554 total_objects = UINT_MAX;
555
556 idx->pack->idx_cache = git_oidmap_alloc();
557 GITERR_CHECK_ALLOC(idx->pack->idx_cache);
558
559 idx->pack->has_cache = 1;
560 if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
561 return -1;
562
563 if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
564 return -1;
565
566 stats->received_objects = 0;
567 stats->local_objects = 0;
568 stats->total_deltas = 0;
569 stats->indexed_deltas = 0;
570 processed = stats->indexed_objects = 0;
571 stats->total_objects = total_objects;
572
573 if ((error = do_progress_callback(idx, stats)) != 0)
574 return error;
575 }
576
577 /* Now that we have data in the pack, let's try to parse it */
578
579 /* As the file grows any windows we try to use will be out of date */
580 git_mwindow_free_all(mwf);
581
582 while (processed < idx->nr_objects) {
583 git_packfile_stream *stream = &idx->stream;
584 git_off_t entry_start = idx->off;
585 size_t entry_size;
586 git_otype type;
587 git_mwindow *w = NULL;
588
589 if (idx->pack->mwf.size <= idx->off + 20)
590 return 0;
591
592 if (!idx->have_stream) {
593 error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
594 if (error == GIT_EBUFS) {
595 idx->off = entry_start;
596 return 0;
597 }
598 if (error < 0)
599 goto on_error;
600
601 git_mwindow_close(&w);
602 idx->entry_start = entry_start;
603 git_hash_init(&idx->hash_ctx);
604
605 if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
606 error = advance_delta_offset(idx, type);
607 if (error == GIT_EBUFS) {
608 idx->off = entry_start;
609 return 0;
610 }
611 if (error < 0)
612 goto on_error;
613
614 idx->have_delta = 1;
615 } else {
616 idx->have_delta = 0;
617 hash_header(&idx->hash_ctx, entry_size, type);
618 }
619
620 idx->have_stream = 1;
621
622 error = git_packfile_stream_open(stream, idx->pack, idx->off);
623 if (error < 0)
624 goto on_error;
625 }
626
627 if (idx->have_delta) {
628 error = read_object_stream(idx, stream);
629 } else {
630 error = hash_object_stream(idx, stream);
631 }
632
633 idx->off = stream->curpos;
634 if (error == GIT_EBUFS)
635 return 0;
636
637 /* We want to free the stream reasorces no matter what here */
638 idx->have_stream = 0;
639 git_packfile_stream_free(stream);
640
641 if (error < 0)
642 goto on_error;
643
644 if (idx->have_delta) {
645 error = store_delta(idx);
646 } else {
647 error = store_object(idx);
648 }
649
650 if (error < 0)
651 goto on_error;
652
653 if (!idx->have_delta) {
654 stats->indexed_objects = (unsigned int)++processed;
655 }
656 stats->received_objects++;
657
658 if ((error = do_progress_callback(idx, stats)) != 0)
659 goto on_error;
660 }
661
662 return 0;
663
664 on_error:
665 git_mwindow_free_all(mwf);
666 return error;
667 }
668
669 static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
670 {
671 const char prefix[] = "pack-";
672 size_t slash = (size_t)path->size;
673
674 /* search backwards for '/' */
675 while (slash > 0 && path->ptr[slash - 1] != '/')
676 slash--;
677
678 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
679 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
680 return -1;
681
682 git_buf_truncate(path, slash);
683 git_buf_puts(path, prefix);
684 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
685 path->size += GIT_OID_HEXSZ;
686 git_buf_puts(path, suffix);
687
688 return git_buf_oom(path) ? -1 : 0;
689 }
690
691 /**
692 * Rewind the packfile by the trailer, as we might need to fix the
693 * packfile by injecting objects at the tail and must overwrite it.
694 */
695 static void seek_back_trailer(git_indexer *idx)
696 {
697 idx->pack->mwf.size -= GIT_OID_RAWSZ;
698 git_mwindow_free_all(&idx->pack->mwf);
699 }
700
701 static int inject_object(git_indexer *idx, git_oid *id)
702 {
703 git_odb_object *obj;
704 struct entry *entry;
705 struct git_pack_entry *pentry = NULL;
706 git_oid foo = {{0}};
707 unsigned char hdr[64];
708 git_buf buf = GIT_BUF_INIT;
709 git_off_t entry_start;
710 const void *data;
711 size_t len, hdr_len;
712 int error;
713
714 seek_back_trailer(idx);
715 entry_start = idx->pack->mwf.size;
716
717 if (git_odb_read(&obj, idx->odb, id) < 0) {
718 giterr_set(GITERR_INDEXER, "missing delta bases");
719 return -1;
720 }
721
722 data = git_odb_object_data(obj);
723 len = git_odb_object_size(obj);
724
725 entry = git__calloc(1, sizeof(*entry));
726 GITERR_CHECK_ALLOC(entry);
727
728 entry->crc = crc32(0L, Z_NULL, 0);
729
730 /* Write out the object header */
731 hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
732 if ((error = append_to_pack(idx, hdr, hdr_len)) < 0)
733 goto cleanup;
734
735 idx->pack->mwf.size += hdr_len;
736 entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);
737
738 if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
739 goto cleanup;
740
741 /* And then the compressed object */
742 if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
743 goto cleanup;
744
745 idx->pack->mwf.size += buf.size;
746 entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
747 git_buf_free(&buf);
748
749 /* Write a fake trailer so the pack functions play ball */
750
751 if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
752 goto cleanup;
753
754 idx->pack->mwf.size += GIT_OID_RAWSZ;
755
756 pentry = git__calloc(1, sizeof(struct git_pack_entry));
757 GITERR_CHECK_ALLOC(pentry);
758
759 git_oid_cpy(&pentry->sha1, id);
760 git_oid_cpy(&entry->oid, id);
761 idx->off = entry_start + hdr_len + len;
762
763 error = save_entry(idx, entry, pentry, entry_start);
764
765 cleanup:
766 if (error) {
767 git__free(entry);
768 git__free(pentry);
769 }
770
771 git_odb_object_free(obj);
772 return error;
773 }
774
775 static int fix_thin_pack(git_indexer *idx, git_transfer_progress *stats)
776 {
777 int error, found_ref_delta = 0;
778 unsigned int i;
779 struct delta_info *delta;
780 size_t size;
781 git_otype type;
782 git_mwindow *w = NULL;
783 git_off_t curpos = 0;
784 unsigned char *base_info;
785 unsigned int left = 0;
786 git_oid base;
787
788 assert(git_vector_length(&idx->deltas) > 0);
789
790 if (idx->odb == NULL) {
791 giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB");
792 return -1;
793 }
794
795 /* Loop until we find the first REF delta */
796 git_vector_foreach(&idx->deltas, i, delta) {
797 if (!delta)
798 continue;
799
800 curpos = delta->delta_off;
801 error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
802 if (error < 0)
803 return error;
804
805 if (type == GIT_OBJ_REF_DELTA) {
806 found_ref_delta = 1;
807 break;
808 }
809 }
810
811 if (!found_ref_delta) {
812 giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object");
813 return -1;
814 }
815
816 /* curpos now points to the base information, which is an OID */
817 base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
818 if (base_info == NULL) {
819 giterr_set(GITERR_INDEXER, "failed to map delta information");
820 return -1;
821 }
822
823 git_oid_fromraw(&base, base_info);
824 git_mwindow_close(&w);
825
826 if (has_entry(idx, &base))
827 return 0;
828
829 if (inject_object(idx, &base) < 0)
830 return -1;
831
832 stats->local_objects++;
833
834 return 0;
835 }
836
837 static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
838 {
839 unsigned int i;
840 struct delta_info *delta;
841 int progressed = 0, non_null = 0, progress_cb_result;
842
843 while (idx->deltas.length > 0) {
844 progressed = 0;
845 non_null = 0;
846 git_vector_foreach(&idx->deltas, i, delta) {
847 git_rawobj obj = {NULL};
848
849 if (!delta)
850 continue;
851
852 non_null = 1;
853 idx->off = delta->delta_off;
854 if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
855 continue;
856
857 if (hash_and_save(idx, &obj, delta->delta_off) < 0)
858 continue;
859
860 git__free(obj.data);
861 stats->indexed_objects++;
862 stats->indexed_deltas++;
863 progressed = 1;
864 if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
865 return progress_cb_result;
866
867 /* remove from the list */
868 git_vector_set(NULL, &idx->deltas, i, NULL);
869 git__free(delta);
870 }
871
872 /* if none were actually set, we're done */
873 if (!non_null)
874 break;
875
876 if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
877 return -1;
878 }
879 }
880
881 return 0;
882 }
883
884 static int update_header_and_rehash(git_indexer *idx, git_transfer_progress *stats)
885 {
886 void *ptr;
887 size_t chunk = 1024*1024;
888 git_off_t hashed = 0;
889 git_mwindow *w = NULL;
890 git_mwindow_file *mwf;
891 unsigned int left;
892
893 mwf = &idx->pack->mwf;
894
895 git_hash_init(&idx->trailer);
896
897
898 /* Update the header to include the numer of local objects we injected */
899 idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
900 if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
901 return -1;
902
903 /*
904 * We now use the same technique as before to determine the
905 * hash. We keep reading up to the end and let
906 * hash_partially() keep the existing trailer out of the
907 * calculation.
908 */
909 git_mwindow_free_all(mwf);
910 idx->inbuf_len = 0;
911 while (hashed < mwf->size) {
912 ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
913 if (ptr == NULL)
914 return -1;
915
916 hash_partially(idx, ptr, left);
917 hashed += left;
918
919 git_mwindow_close(&w);
920 }
921
922 return 0;
923 }
924
925 int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
926 {
927 git_mwindow *w = NULL;
928 unsigned int i, long_offsets = 0, left;
929 int error;
930 struct git_pack_idx_header hdr;
931 git_buf filename = GIT_BUF_INIT;
932 struct entry *entry;
933 git_oid trailer_hash, file_hash;
934 git_hash_ctx ctx;
935 git_filebuf index_file = {0};
936 void *packfile_trailer;
937
938 if (!idx->parsed_header) {
939 giterr_set(GITERR_INDEXER, "incomplete pack header");
940 return -1;
941 }
942
943 if (git_hash_ctx_init(&ctx) < 0)
944 return -1;
945
946 /* Test for this before resolve_deltas(), as it plays with idx->off */
947 if (idx->off + 20 < idx->pack->mwf.size) {
948 giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack");
949 return -1;
950 }
951
952 packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
953 if (packfile_trailer == NULL) {
954 git_mwindow_close(&w);
955 goto on_error;
956 }
957
958 /* Compare the packfile trailer as it was sent to us and what we calculated */
959 git_oid_fromraw(&file_hash, packfile_trailer);
960 git_mwindow_close(&w);
961
962 git_hash_final(&trailer_hash, &idx->trailer);
963 if (git_oid_cmp(&file_hash, &trailer_hash)) {
964 giterr_set(GITERR_INDEXER, "packfile trailer mismatch");
965 return -1;
966 }
967
968 /* Freeze the number of deltas */
969 stats->total_deltas = stats->total_objects - stats->indexed_objects;
970
971 if ((error = resolve_deltas(idx, stats)) < 0)
972 return error;
973
974 if (stats->indexed_objects != stats->total_objects) {
975 giterr_set(GITERR_INDEXER, "early EOF");
976 return -1;
977 }
978
979 if (stats->local_objects > 0) {
980 if (update_header_and_rehash(idx, stats) < 0)
981 return -1;
982
983 git_hash_final(&trailer_hash, &idx->trailer);
984 write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
985 }
986
987 git_vector_sort(&idx->objects);
988
989 git_buf_sets(&filename, idx->pack->pack_name);
990 git_buf_shorten(&filename, strlen("pack"));
991 git_buf_puts(&filename, "idx");
992 if (git_buf_oom(&filename))
993 return -1;
994
995 if (git_filebuf_open(&index_file, filename.ptr,
996 GIT_FILEBUF_HASH_CONTENTS, idx->mode) < 0)
997 goto on_error;
998
999 /* Write out the header */
1000 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
1001 hdr.idx_version = htonl(2);
1002 git_filebuf_write(&index_file, &hdr, sizeof(hdr));
1003
1004 /* Write out the fanout table */
1005 for (i = 0; i < 256; ++i) {
1006 uint32_t n = htonl(idx->fanout[i]);
1007 git_filebuf_write(&index_file, &n, sizeof(n));
1008 }
1009
1010 /* Write out the object names (SHA-1 hashes) */
1011 git_vector_foreach(&idx->objects, i, entry) {
1012 git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
1013 git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
1014 }
1015 git_hash_final(&idx->hash, &ctx);
1016
1017 /* Write out the CRC32 values */
1018 git_vector_foreach(&idx->objects, i, entry) {
1019 git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
1020 }
1021
1022 /* Write out the offsets */
1023 git_vector_foreach(&idx->objects, i, entry) {
1024 uint32_t n;
1025
1026 if (entry->offset == UINT32_MAX)
1027 n = htonl(0x80000000 | long_offsets++);
1028 else
1029 n = htonl(entry->offset);
1030
1031 git_filebuf_write(&index_file, &n, sizeof(uint32_t));
1032 }
1033
1034 /* Write out the long offsets */
1035 git_vector_foreach(&idx->objects, i, entry) {
1036 uint32_t split[2];
1037
1038 if (entry->offset != UINT32_MAX)
1039 continue;
1040
1041 split[0] = htonl(entry->offset_long >> 32);
1042 split[1] = htonl(entry->offset_long & 0xffffffff);
1043
1044 git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
1045 }
1046
1047 /* Write out the packfile trailer to the index */
1048 if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
1049 goto on_error;
1050
1051 /* Write out the hash of the idx */
1052 if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
1053 goto on_error;
1054
1055 git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
1056
1057 /* Figure out what the final name should be */
1058 if (index_path(&filename, idx, ".idx") < 0)
1059 goto on_error;
1060
1061 /* Commit file */
1062 if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
1063 goto on_error;
1064
1065 git_mwindow_free_all(&idx->pack->mwf);
1066
1067 /* Truncate file to undo rounding up to next page_size in append_to_pack */
1068 if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
1069 giterr_set(GITERR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
1070 return -1;
1071 }
1072
1073 /* We need to close the descriptor here so Windows doesn't choke on commit_at */
1074 if (p_close(idx->pack->mwf.fd) < 0) {
1075 giterr_set(GITERR_OS, "failed to close packfile");
1076 goto on_error;
1077 }
1078
1079 idx->pack->mwf.fd = -1;
1080
1081 if (index_path(&filename, idx, ".pack") < 0)
1082 goto on_error;
1083
1084 /* And don't forget to rename the packfile to its new place. */
1085 p_rename(idx->pack->pack_name, git_buf_cstr(&filename));
1086 idx->pack_committed = 1;
1087
1088 git_buf_free(&filename);
1089 git_hash_ctx_cleanup(&ctx);
1090 return 0;
1091
1092 on_error:
1093 git_mwindow_free_all(&idx->pack->mwf);
1094 git_filebuf_cleanup(&index_file);
1095 git_buf_free(&filename);
1096 git_hash_ctx_cleanup(&ctx);
1097 return -1;
1098 }
1099
1100 void git_indexer_free(git_indexer *idx)
1101 {
1102 if (idx == NULL)
1103 return;
1104
1105 git_vector_free_deep(&idx->objects);
1106
1107 if (idx->pack->idx_cache) {
1108 struct git_pack_entry *pentry;
1109 git_oidmap_foreach_value(idx->pack->idx_cache, pentry, {
1110 git__free(pentry);
1111 });
1112
1113 git_oidmap_free(idx->pack->idx_cache);
1114 }
1115
1116 git_vector_free_deep(&idx->deltas);
1117
1118 if (!git_mutex_lock(&git__mwindow_mutex)) {
1119 if (!idx->pack_committed)
1120 git_packfile_close(idx->pack, true);
1121
1122 git_packfile_free(idx->pack);
1123 git_mutex_unlock(&git__mwindow_mutex);
1124 }
1125
1126 git_hash_ctx_cleanup(&idx->trailer);
1127 git_hash_ctx_cleanup(&idx->hash_ctx);
1128 git__free(idx);
1129 }