]> git.proxmox.com Git - libgit2.git/blob - src/indexer.c
Refresh patches
[libgit2.git] / src / indexer.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "indexer.h"
9
10 #include "git2/indexer.h"
11 #include "git2/object.h"
12
13 #include "commit.h"
14 #include "tree.h"
15 #include "tag.h"
16 #include "pack.h"
17 #include "mwindow.h"
18 #include "posix.h"
19 #include "pack.h"
20 #include "filebuf.h"
21 #include "oid.h"
22 #include "oidarray.h"
23 #include "oidmap.h"
24 #include "zstream.h"
25 #include "object.h"
26
27 size_t git_indexer__max_objects = UINT32_MAX;
28
29 #define UINT31_MAX (0x7FFFFFFF)
30
31 struct entry {
32 git_oid oid;
33 uint32_t crc;
34 uint32_t offset;
35 uint64_t offset_long;
36 };
37
38 struct git_indexer {
39 unsigned int parsed_header :1,
40 pack_committed :1,
41 have_stream :1,
42 have_delta :1,
43 do_fsync :1,
44 do_verify :1;
45 struct git_pack_header hdr;
46 struct git_pack_file *pack;
47 unsigned int mode;
48 off64_t off;
49 off64_t entry_start;
50 git_object_t entry_type;
51 git_buf entry_data;
52 git_packfile_stream stream;
53 size_t nr_objects;
54 git_vector objects;
55 git_vector deltas;
56 unsigned int fanout[256];
57 git_hash_ctx hash_ctx;
58 git_oid hash;
59 git_indexer_progress_cb progress_cb;
60 void *progress_payload;
61 char objbuf[8*1024];
62
63 /* OIDs referenced from pack objects. Used for verification. */
64 git_oidmap *expected_oids;
65
66 /* Needed to look up objects which we want to inject to fix a thin pack */
67 git_odb *odb;
68
69 /* Fields for calculating the packfile trailer (hash of everything before it) */
70 char inbuf[GIT_OID_RAWSZ];
71 size_t inbuf_len;
72 git_hash_ctx trailer;
73 };
74
75 struct delta_info {
76 off64_t delta_off;
77 };
78
79 const git_oid *git_indexer_hash(const git_indexer *idx)
80 {
81 return &idx->hash;
82 }
83
84 static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
85 {
86 int error;
87 git_map map;
88
89 if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
90 return error;
91
92 memcpy(hdr, map.data, sizeof(*hdr));
93 p_munmap(&map);
94
95 /* Verify we recognize this pack file format. */
96 if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
97 git_error_set(GIT_ERROR_INDEXER, "wrong pack signature");
98 return -1;
99 }
100
101 if (!pack_version_ok(hdr->hdr_version)) {
102 git_error_set(GIT_ERROR_INDEXER, "wrong pack version");
103 return -1;
104 }
105
106 return 0;
107 }
108
109 static int objects_cmp(const void *a, const void *b)
110 {
111 const struct entry *entrya = a;
112 const struct entry *entryb = b;
113
114 return git_oid__cmp(&entrya->oid, &entryb->oid);
115 }
116
117 int git_indexer_options_init(git_indexer_options *opts, unsigned int version)
118 {
119 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
120 opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT);
121 return 0;
122 }
123
124 #ifndef GIT_DEPRECATE_HARD
125 int git_indexer_init_options(git_indexer_options *opts, unsigned int version)
126 {
127 return git_indexer_options_init(opts, version);
128 }
129 #endif
130
131 int git_indexer_new(
132 git_indexer **out,
133 const char *prefix,
134 unsigned int mode,
135 git_odb *odb,
136 git_indexer_options *in_opts)
137 {
138 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
139 git_indexer *idx;
140 git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
141 static const char suff[] = "/pack";
142 int error, fd = -1;
143
144 if (in_opts)
145 memcpy(&opts, in_opts, sizeof(opts));
146
147 idx = git__calloc(1, sizeof(git_indexer));
148 GIT_ERROR_CHECK_ALLOC(idx);
149 idx->odb = odb;
150 idx->progress_cb = opts.progress_cb;
151 idx->progress_payload = opts.progress_cb_payload;
152 idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
153 git_buf_init(&idx->entry_data, 0);
154
155 if ((error = git_hash_ctx_init(&idx->hash_ctx)) < 0 ||
156 (error = git_hash_ctx_init(&idx->trailer)) < 0 ||
157 (error = git_oidmap_new(&idx->expected_oids)) < 0)
158 goto cleanup;
159
160 idx->do_verify = opts.verify;
161
162 if (git_repository__fsync_gitdir)
163 idx->do_fsync = 1;
164
165 error = git_buf_joinpath(&path, prefix, suff);
166 if (error < 0)
167 goto cleanup;
168
169 fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
170 git_buf_dispose(&path);
171 if (fd < 0)
172 goto cleanup;
173
174 error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
175 git_buf_dispose(&tmp_path);
176
177 if (error < 0)
178 goto cleanup;
179
180 idx->pack->mwf.fd = fd;
181 if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
182 goto cleanup;
183
184 *out = idx;
185 return 0;
186
187 cleanup:
188 if (fd != -1)
189 p_close(fd);
190
191 if (git_buf_len(&tmp_path) > 0)
192 p_unlink(git_buf_cstr(&tmp_path));
193
194 if (idx->pack != NULL)
195 p_unlink(idx->pack->pack_name);
196
197 git_buf_dispose(&path);
198 git_buf_dispose(&tmp_path);
199 git__free(idx);
200 return -1;
201 }
202
203 void git_indexer__set_fsync(git_indexer *idx, int do_fsync)
204 {
205 idx->do_fsync = !!do_fsync;
206 }
207
208 /* Try to store the delta so we can try to resolve it later */
209 static int store_delta(git_indexer *idx)
210 {
211 struct delta_info *delta;
212
213 delta = git__calloc(1, sizeof(struct delta_info));
214 GIT_ERROR_CHECK_ALLOC(delta);
215 delta->delta_off = idx->entry_start;
216
217 if (git_vector_insert(&idx->deltas, delta) < 0)
218 return -1;
219
220 return 0;
221 }
222
223 static int hash_header(git_hash_ctx *ctx, off64_t len, git_object_t type)
224 {
225 char buffer[64];
226 size_t hdrlen;
227 int error;
228
229 if ((error = git_odb__format_object_header(&hdrlen,
230 buffer, sizeof(buffer), (size_t)len, type)) < 0)
231 return error;
232
233 return git_hash_update(ctx, buffer, hdrlen);
234 }
235
236 static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
237 {
238 ssize_t read;
239
240 GIT_ASSERT_ARG(idx);
241 GIT_ASSERT_ARG(stream);
242
243 do {
244 if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
245 break;
246
247 if (idx->do_verify)
248 git_buf_put(&idx->entry_data, idx->objbuf, read);
249
250 git_hash_update(&idx->hash_ctx, idx->objbuf, read);
251 } while (read > 0);
252
253 if (read < 0)
254 return (int)read;
255
256 return 0;
257 }
258
259 /* In order to create the packfile stream, we need to skip over the delta base description */
260 static int advance_delta_offset(git_indexer *idx, git_object_t type)
261 {
262 git_mwindow *w = NULL;
263
264 GIT_ASSERT_ARG(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA);
265
266 if (type == GIT_OBJECT_REF_DELTA) {
267 idx->off += GIT_OID_RAWSZ;
268 } else {
269 off64_t base_off;
270 int error = get_delta_base(&base_off, idx->pack, &w, &idx->off, type, idx->entry_start);
271 git_mwindow_close(&w);
272 if (error < 0)
273 return error;
274 }
275
276 return 0;
277 }
278
279 /* Read from the stream and discard any output */
280 static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
281 {
282 ssize_t read;
283
284 GIT_ASSERT_ARG(stream);
285
286 do {
287 read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
288 } while (read > 0);
289
290 if (read < 0)
291 return (int)read;
292
293 return 0;
294 }
295
296 static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, off64_t start, off64_t size)
297 {
298 void *ptr;
299 uint32_t crc;
300 unsigned int left, len;
301 git_mwindow *w = NULL;
302
303 crc = crc32(0L, Z_NULL, 0);
304 while (size) {
305 ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
306 if (ptr == NULL)
307 return -1;
308
309 len = min(left, (unsigned int)size);
310 crc = crc32(crc, ptr, len);
311 size -= len;
312 start += len;
313 git_mwindow_close(&w);
314 }
315
316 *crc_out = htonl(crc);
317 return 0;
318 }
319
320 static int add_expected_oid(git_indexer *idx, const git_oid *oid)
321 {
322 /*
323 * If we know about that object because it is stored in our ODB or
324 * because we have already processed it as part of our pack file, we do
325 * not have to expect it.
326 */
327 if ((!idx->odb || !git_odb_exists(idx->odb, oid)) &&
328 !git_oidmap_exists(idx->pack->idx_cache, oid) &&
329 !git_oidmap_exists(idx->expected_oids, oid)) {
330 git_oid *dup = git__malloc(sizeof(*oid));
331 GIT_ERROR_CHECK_ALLOC(dup);
332 git_oid_cpy(dup, oid);
333 return git_oidmap_set(idx->expected_oids, dup, dup);
334 }
335
336 return 0;
337 }
338
339 static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
340 {
341 git_object *object;
342 git_oid *expected;
343 int error;
344
345 if (obj->type != GIT_OBJECT_BLOB &&
346 obj->type != GIT_OBJECT_TREE &&
347 obj->type != GIT_OBJECT_COMMIT &&
348 obj->type != GIT_OBJECT_TAG)
349 return 0;
350
351 if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0)
352 goto out;
353
354 if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) {
355 git_oidmap_delete(idx->expected_oids, &object->cached.oid);
356 git__free(expected);
357 }
358
359 /*
360 * Check whether this is a known object. If so, we can just continue as
361 * we assume that the ODB has a complete graph.
362 */
363 if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid))
364 return 0;
365
366 switch (obj->type) {
367 case GIT_OBJECT_TREE:
368 {
369 git_tree *tree = (git_tree *) object;
370 git_tree_entry *entry;
371 size_t i;
372
373 git_array_foreach(tree->entries, i, entry)
374 if (add_expected_oid(idx, entry->oid) < 0)
375 goto out;
376
377 break;
378 }
379 case GIT_OBJECT_COMMIT:
380 {
381 git_commit *commit = (git_commit *) object;
382 git_oid *parent_oid;
383 size_t i;
384
385 git_array_foreach(commit->parent_ids, i, parent_oid)
386 if (add_expected_oid(idx, parent_oid) < 0)
387 goto out;
388
389 if (add_expected_oid(idx, &commit->tree_id) < 0)
390 goto out;
391
392 break;
393 }
394 case GIT_OBJECT_TAG:
395 {
396 git_tag *tag = (git_tag *) object;
397
398 if (add_expected_oid(idx, &tag->target) < 0)
399 goto out;
400
401 break;
402 }
403 case GIT_OBJECT_BLOB:
404 default:
405 break;
406 }
407
408 out:
409 git_object_free(object);
410
411 return error;
412 }
413
414 static int store_object(git_indexer *idx)
415 {
416 int i, error;
417 git_oid oid;
418 struct entry *entry;
419 off64_t entry_size;
420 struct git_pack_entry *pentry;
421 off64_t entry_start = idx->entry_start;
422
423 entry = git__calloc(1, sizeof(*entry));
424 GIT_ERROR_CHECK_ALLOC(entry);
425
426 pentry = git__calloc(1, sizeof(struct git_pack_entry));
427 GIT_ERROR_CHECK_ALLOC(pentry);
428
429 if (git_hash_final(&oid, &idx->hash_ctx)) {
430 git__free(pentry);
431 goto on_error;
432 }
433 entry_size = idx->off - entry_start;
434 if (entry_start > UINT31_MAX) {
435 entry->offset = UINT32_MAX;
436 entry->offset_long = entry_start;
437 } else {
438 entry->offset = (uint32_t)entry_start;
439 }
440
441 if (idx->do_verify) {
442 git_rawobj rawobj = {
443 idx->entry_data.ptr,
444 idx->entry_data.size,
445 idx->entry_type
446 };
447
448 if ((error = check_object_connectivity(idx, &rawobj)) < 0)
449 goto on_error;
450 }
451
452 git_oid_cpy(&pentry->sha1, &oid);
453 pentry->offset = entry_start;
454
455 if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1)) {
456 git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
457 git__free(pentry);
458 goto on_error;
459 }
460
461 if ((error = git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry)) < 0) {
462 git__free(pentry);
463 git_error_set_oom();
464 goto on_error;
465 }
466
467 git_oid_cpy(&entry->oid, &oid);
468
469 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
470 goto on_error;
471
472 /* Add the object to the list */
473 if (git_vector_insert(&idx->objects, entry) < 0)
474 goto on_error;
475
476 for (i = oid.id[0]; i < 256; ++i) {
477 idx->fanout[i]++;
478 }
479
480 return 0;
481
482 on_error:
483 git__free(entry);
484
485 return -1;
486 }
487
488 GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
489 {
490 return git_oidmap_exists(idx->pack->idx_cache, id);
491 }
492
493 static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, off64_t entry_start)
494 {
495 int i;
496
497 if (entry_start > UINT31_MAX) {
498 entry->offset = UINT32_MAX;
499 entry->offset_long = entry_start;
500 } else {
501 entry->offset = (uint32_t)entry_start;
502 }
503
504 pentry->offset = entry_start;
505
506 if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1) ||
507 git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry) < 0) {
508 git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack");
509 return -1;
510 }
511
512 /* Add the object to the list */
513 if (git_vector_insert(&idx->objects, entry) < 0)
514 return -1;
515
516 for (i = entry->oid.id[0]; i < 256; ++i) {
517 idx->fanout[i]++;
518 }
519
520 return 0;
521 }
522
523 static int hash_and_save(git_indexer *idx, git_rawobj *obj, off64_t entry_start)
524 {
525 git_oid oid;
526 size_t entry_size;
527 struct entry *entry;
528 struct git_pack_entry *pentry = NULL;
529
530 entry = git__calloc(1, sizeof(*entry));
531 GIT_ERROR_CHECK_ALLOC(entry);
532
533 if (git_odb__hashobj(&oid, obj) < 0) {
534 git_error_set(GIT_ERROR_INDEXER, "failed to hash object");
535 goto on_error;
536 }
537
538 pentry = git__calloc(1, sizeof(struct git_pack_entry));
539 GIT_ERROR_CHECK_ALLOC(pentry);
540
541 git_oid_cpy(&pentry->sha1, &oid);
542 git_oid_cpy(&entry->oid, &oid);
543 entry->crc = crc32(0L, Z_NULL, 0);
544
545 entry_size = (size_t)(idx->off - entry_start);
546 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
547 goto on_error;
548
549 return save_entry(idx, entry, pentry, entry_start);
550
551 on_error:
552 git__free(pentry);
553 git__free(entry);
554 git__free(obj->data);
555 return -1;
556 }
557
558 static int do_progress_callback(git_indexer *idx, git_indexer_progress *stats)
559 {
560 if (idx->progress_cb)
561 return git_error_set_after_callback_function(
562 idx->progress_cb(stats, idx->progress_payload),
563 "indexer progress");
564 return 0;
565 }
566
567 /* Hash everything but the last 20B of input */
568 static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
569 {
570 size_t to_expell, to_keep;
571
572 if (size == 0)
573 return;
574
575 /* Easy case, dump the buffer and the data minus the last 20 bytes */
576 if (size >= GIT_OID_RAWSZ) {
577 git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
578 git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);
579
580 data += size - GIT_OID_RAWSZ;
581 memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
582 idx->inbuf_len = GIT_OID_RAWSZ;
583 return;
584 }
585
586 /* We can just append */
587 if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
588 memcpy(idx->inbuf + idx->inbuf_len, data, size);
589 idx->inbuf_len += size;
590 return;
591 }
592
593 /* We need to partially drain the buffer and then append */
594 to_keep = GIT_OID_RAWSZ - size;
595 to_expell = idx->inbuf_len - to_keep;
596
597 git_hash_update(&idx->trailer, idx->inbuf, to_expell);
598
599 memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
600 memcpy(idx->inbuf + to_keep, data, size);
601 idx->inbuf_len += size - to_expell;
602 }
603
604 #if defined(NO_MMAP) || !defined(GIT_WIN32)
605
606 static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size)
607 {
608 size_t remaining_size = size;
609 const char *ptr = (const char *)data;
610
611 /* Handle data size larger that ssize_t */
612 while (remaining_size > 0) {
613 ssize_t nb;
614 HANDLE_EINTR(nb, p_pwrite(idx->pack->mwf.fd, (void *)ptr,
615 remaining_size, offset));
616 if (nb <= 0)
617 return -1;
618
619 ptr += nb;
620 offset += nb;
621 remaining_size -= nb;
622 }
623
624 return 0;
625 }
626
627 static int append_to_pack(git_indexer *idx, const void *data, size_t size)
628 {
629 if (write_at(idx, data, idx->pack->mwf.size, size) < 0) {
630 git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
631 return -1;
632 }
633
634 return 0;
635 }
636
637 #else
638
639 /*
640 * Windows may keep different views to a networked file for the mmap- and
641 * open-accessed versions of a file, so any writes done through
642 * `write(2)`/`pwrite(2)` may not be reflected on the data that `mmap(2)` is
643 * able to read.
644 */
645
646 static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size)
647 {
648 git_file fd = idx->pack->mwf.fd;
649 size_t mmap_alignment;
650 size_t page_offset;
651 off64_t page_start;
652 unsigned char *map_data;
653 git_map map;
654 int error;
655
656 GIT_ASSERT_ARG(data);
657 GIT_ASSERT_ARG(size);
658
659 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
660 return error;
661
662 /* the offset needs to be at the mmap boundary for the platform */
663 page_offset = offset % mmap_alignment;
664 page_start = offset - page_offset;
665
666 if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
667 return error;
668
669 map_data = (unsigned char *)map.data;
670 memcpy(map_data + page_offset, data, size);
671 p_munmap(&map);
672
673 return 0;
674 }
675
676 static int append_to_pack(git_indexer *idx, const void *data, size_t size)
677 {
678 off64_t new_size;
679 size_t mmap_alignment;
680 size_t page_offset;
681 off64_t page_start;
682 off64_t current_size = idx->pack->mwf.size;
683 int error;
684
685 if (!size)
686 return 0;
687
688 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
689 return error;
690
691 /* Write a single byte to force the file system to allocate space now or
692 * report an error, since we can't report errors when writing using mmap.
693 * Round the size up to the nearest page so that we only need to perform file
694 * I/O when we add a page, instead of whenever we write even a single byte. */
695 new_size = current_size + size;
696 page_offset = new_size % mmap_alignment;
697 page_start = new_size - page_offset;
698
699 if (p_pwrite(idx->pack->mwf.fd, data, 1, page_start + mmap_alignment - 1) < 0) {
700 git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
701 return -1;
702 }
703
704 return write_at(idx, data, idx->pack->mwf.size, size);
705 }
706
707 #endif
708
709 static int read_stream_object(git_indexer *idx, git_indexer_progress *stats)
710 {
711 git_packfile_stream *stream = &idx->stream;
712 off64_t entry_start = idx->off;
713 size_t entry_size;
714 git_object_t type;
715 git_mwindow *w = NULL;
716 int error;
717
718 if (idx->pack->mwf.size <= idx->off + 20)
719 return GIT_EBUFS;
720
721 if (!idx->have_stream) {
722 error = git_packfile_unpack_header(&entry_size, &type, idx->pack, &w, &idx->off);
723 if (error == GIT_EBUFS) {
724 idx->off = entry_start;
725 return error;
726 }
727 if (error < 0)
728 return error;
729
730 git_mwindow_close(&w);
731 idx->entry_start = entry_start;
732 git_hash_init(&idx->hash_ctx);
733 git_buf_clear(&idx->entry_data);
734
735 if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) {
736 error = advance_delta_offset(idx, type);
737 if (error == GIT_EBUFS) {
738 idx->off = entry_start;
739 return error;
740 }
741 if (error < 0)
742 return error;
743
744 idx->have_delta = 1;
745 } else {
746 idx->have_delta = 0;
747
748 error = hash_header(&idx->hash_ctx, entry_size, type);
749 if (error < 0)
750 return error;
751 }
752
753 idx->have_stream = 1;
754 idx->entry_type = type;
755
756 error = git_packfile_stream_open(stream, idx->pack, idx->off);
757 if (error < 0)
758 return error;
759 }
760
761 if (idx->have_delta) {
762 error = read_object_stream(idx, stream);
763 } else {
764 error = hash_object_stream(idx, stream);
765 }
766
767 idx->off = stream->curpos;
768 if (error == GIT_EBUFS)
769 return error;
770
771 /* We want to free the stream reasorces no matter what here */
772 idx->have_stream = 0;
773 git_packfile_stream_dispose(stream);
774
775 if (error < 0)
776 return error;
777
778 if (idx->have_delta) {
779 error = store_delta(idx);
780 } else {
781 error = store_object(idx);
782 }
783
784 if (error < 0)
785 return error;
786
787 if (!idx->have_delta) {
788 stats->indexed_objects++;
789 }
790 stats->received_objects++;
791
792 if ((error = do_progress_callback(idx, stats)) != 0)
793 return error;
794
795 return 0;
796 }
797
798 int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_indexer_progress *stats)
799 {
800 int error = -1;
801 struct git_pack_header *hdr = &idx->hdr;
802 git_mwindow_file *mwf = &idx->pack->mwf;
803
804 GIT_ASSERT_ARG(idx);
805 GIT_ASSERT_ARG(data);
806 GIT_ASSERT_ARG(stats);
807
808 if ((error = append_to_pack(idx, data, size)) < 0)
809 return error;
810
811 hash_partially(idx, data, (int)size);
812
813 /* Make sure we set the new size of the pack */
814 idx->pack->mwf.size += size;
815
816 if (!idx->parsed_header) {
817 unsigned int total_objects;
818
819 if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
820 return 0;
821
822 if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
823 return error;
824
825 idx->parsed_header = 1;
826 idx->nr_objects = ntohl(hdr->hdr_entries);
827 idx->off = sizeof(struct git_pack_header);
828
829 if (idx->nr_objects <= git_indexer__max_objects) {
830 total_objects = (unsigned int)idx->nr_objects;
831 } else {
832 git_error_set(GIT_ERROR_INDEXER, "too many objects");
833 return -1;
834 }
835
836 if (git_oidmap_new(&idx->pack->idx_cache) < 0)
837 return -1;
838
839 idx->pack->has_cache = 1;
840 if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
841 return -1;
842
843 if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
844 return -1;
845
846 stats->received_objects = 0;
847 stats->local_objects = 0;
848 stats->total_deltas = 0;
849 stats->indexed_deltas = 0;
850 stats->indexed_objects = 0;
851 stats->total_objects = total_objects;
852
853 if ((error = do_progress_callback(idx, stats)) != 0)
854 return error;
855 }
856
857 /* Now that we have data in the pack, let's try to parse it */
858
859 /* As the file grows any windows we try to use will be out of date */
860 if ((error = git_mwindow_free_all(mwf)) < 0)
861 goto on_error;
862
863 while (stats->indexed_objects < idx->nr_objects) {
864 if ((error = read_stream_object(idx, stats)) != 0) {
865 if (error == GIT_EBUFS)
866 break;
867 else
868 goto on_error;
869 }
870 }
871
872 return 0;
873
874 on_error:
875 git_mwindow_free_all(mwf);
876 return error;
877 }
878
879 static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
880 {
881 const char prefix[] = "pack-";
882 size_t slash = (size_t)path->size;
883
884 /* search backwards for '/' */
885 while (slash > 0 && path->ptr[slash - 1] != '/')
886 slash--;
887
888 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
889 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
890 return -1;
891
892 git_buf_truncate(path, slash);
893 git_buf_puts(path, prefix);
894 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
895 path->size += GIT_OID_HEXSZ;
896 git_buf_puts(path, suffix);
897
898 return git_buf_oom(path) ? -1 : 0;
899 }
900
901 /**
902 * Rewind the packfile by the trailer, as we might need to fix the
903 * packfile by injecting objects at the tail and must overwrite it.
904 */
905 static int seek_back_trailer(git_indexer *idx)
906 {
907 idx->pack->mwf.size -= GIT_OID_RAWSZ;
908 return git_mwindow_free_all(&idx->pack->mwf);
909 }
910
911 static int inject_object(git_indexer *idx, git_oid *id)
912 {
913 git_odb_object *obj = NULL;
914 struct entry *entry = NULL;
915 struct git_pack_entry *pentry = NULL;
916 git_oid foo = {{0}};
917 unsigned char hdr[64];
918 git_buf buf = GIT_BUF_INIT;
919 off64_t entry_start;
920 const void *data;
921 size_t len, hdr_len;
922 int error;
923
924 if ((error = seek_back_trailer(idx)) < 0)
925 goto cleanup;
926
927 entry_start = idx->pack->mwf.size;
928
929 if ((error = git_odb_read(&obj, idx->odb, id)) < 0) {
930 git_error_set(GIT_ERROR_INDEXER, "missing delta bases");
931 goto cleanup;
932 }
933
934 data = git_odb_object_data(obj);
935 len = git_odb_object_size(obj);
936
937 entry = git__calloc(1, sizeof(*entry));
938 GIT_ERROR_CHECK_ALLOC(entry);
939
940 entry->crc = crc32(0L, Z_NULL, 0);
941
942 /* Write out the object header */
943 if ((error = git_packfile__object_header(&hdr_len, hdr, len, git_odb_object_type(obj))) < 0 ||
944 (error = append_to_pack(idx, hdr, hdr_len)) < 0)
945 goto cleanup;
946
947 idx->pack->mwf.size += hdr_len;
948 entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);
949
950 if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
951 goto cleanup;
952
953 /* And then the compressed object */
954 if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
955 goto cleanup;
956
957 idx->pack->mwf.size += buf.size;
958 entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
959 git_buf_dispose(&buf);
960
961 /* Write a fake trailer so the pack functions play ball */
962
963 if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
964 goto cleanup;
965
966 idx->pack->mwf.size += GIT_OID_RAWSZ;
967
968 pentry = git__calloc(1, sizeof(struct git_pack_entry));
969 GIT_ERROR_CHECK_ALLOC(pentry);
970
971 git_oid_cpy(&pentry->sha1, id);
972 git_oid_cpy(&entry->oid, id);
973 idx->off = entry_start + hdr_len + len;
974
975 error = save_entry(idx, entry, pentry, entry_start);
976
977 cleanup:
978 if (error) {
979 git__free(entry);
980 git__free(pentry);
981 }
982
983 git_odb_object_free(obj);
984 return error;
985 }
986
987 static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats)
988 {
989 int error, found_ref_delta = 0;
990 unsigned int i;
991 struct delta_info *delta;
992 size_t size;
993 git_object_t type;
994 git_mwindow *w = NULL;
995 off64_t curpos = 0;
996 unsigned char *base_info;
997 unsigned int left = 0;
998 git_oid base;
999
1000 GIT_ASSERT(git_vector_length(&idx->deltas) > 0);
1001
1002 if (idx->odb == NULL) {
1003 git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB");
1004 return -1;
1005 }
1006
1007 /* Loop until we find the first REF delta */
1008 git_vector_foreach(&idx->deltas, i, delta) {
1009 if (!delta)
1010 continue;
1011
1012 curpos = delta->delta_off;
1013 error = git_packfile_unpack_header(&size, &type, idx->pack, &w, &curpos);
1014 if (error < 0)
1015 return error;
1016
1017 if (type == GIT_OBJECT_REF_DELTA) {
1018 found_ref_delta = 1;
1019 break;
1020 }
1021 }
1022
1023 if (!found_ref_delta) {
1024 git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object");
1025 return -1;
1026 }
1027
1028 /* curpos now points to the base information, which is an OID */
1029 base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
1030 if (base_info == NULL) {
1031 git_error_set(GIT_ERROR_INDEXER, "failed to map delta information");
1032 return -1;
1033 }
1034
1035 git_oid_fromraw(&base, base_info);
1036 git_mwindow_close(&w);
1037
1038 if (has_entry(idx, &base))
1039 return 0;
1040
1041 if (inject_object(idx, &base) < 0)
1042 return -1;
1043
1044 stats->local_objects++;
1045
1046 return 0;
1047 }
1048
1049 static int resolve_deltas(git_indexer *idx, git_indexer_progress *stats)
1050 {
1051 unsigned int i;
1052 int error;
1053 struct delta_info *delta;
1054 int progressed = 0, non_null = 0, progress_cb_result;
1055
1056 while (idx->deltas.length > 0) {
1057 progressed = 0;
1058 non_null = 0;
1059 git_vector_foreach(&idx->deltas, i, delta) {
1060 git_rawobj obj = {0};
1061
1062 if (!delta)
1063 continue;
1064
1065 non_null = 1;
1066 idx->off = delta->delta_off;
1067 if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) {
1068 if (error == GIT_PASSTHROUGH) {
1069 /* We have not seen the base object, we'll try again later. */
1070 continue;
1071 }
1072 return -1;
1073 }
1074
1075 if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
1076 /* TODO: error? continue? */
1077 continue;
1078
1079 if (hash_and_save(idx, &obj, delta->delta_off) < 0)
1080 continue;
1081
1082 git__free(obj.data);
1083 stats->indexed_objects++;
1084 stats->indexed_deltas++;
1085 progressed = 1;
1086 if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
1087 return progress_cb_result;
1088
1089 /* remove from the list */
1090 git_vector_set(NULL, &idx->deltas, i, NULL);
1091 git__free(delta);
1092 }
1093
1094 /* if none were actually set, we're done */
1095 if (!non_null)
1096 break;
1097
1098 if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
1099 return -1;
1100 }
1101 }
1102
1103 return 0;
1104 }
1105
1106 static int update_header_and_rehash(git_indexer *idx, git_indexer_progress *stats)
1107 {
1108 void *ptr;
1109 size_t chunk = 1024*1024;
1110 off64_t hashed = 0;
1111 git_mwindow *w = NULL;
1112 git_mwindow_file *mwf;
1113 unsigned int left;
1114
1115 mwf = &idx->pack->mwf;
1116
1117 git_hash_init(&idx->trailer);
1118
1119
1120 /* Update the header to include the numer of local objects we injected */
1121 idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
1122 if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
1123 return -1;
1124
1125 /*
1126 * We now use the same technique as before to determine the
1127 * hash. We keep reading up to the end and let
1128 * hash_partially() keep the existing trailer out of the
1129 * calculation.
1130 */
1131 if (git_mwindow_free_all(mwf) < 0)
1132 return -1;
1133
1134 idx->inbuf_len = 0;
1135 while (hashed < mwf->size) {
1136 ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
1137 if (ptr == NULL)
1138 return -1;
1139
1140 hash_partially(idx, ptr, left);
1141 hashed += left;
1142
1143 git_mwindow_close(&w);
1144 }
1145
1146 return 0;
1147 }
1148
1149 int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
1150 {
1151 git_mwindow *w = NULL;
1152 unsigned int i, long_offsets = 0, left;
1153 int error;
1154 struct git_pack_idx_header hdr;
1155 git_buf filename = GIT_BUF_INIT;
1156 struct entry *entry;
1157 git_oid trailer_hash, file_hash;
1158 git_filebuf index_file = {0};
1159 void *packfile_trailer;
1160
1161 if (!idx->parsed_header) {
1162 git_error_set(GIT_ERROR_INDEXER, "incomplete pack header");
1163 return -1;
1164 }
1165
1166 /* Test for this before resolve_deltas(), as it plays with idx->off */
1167 if (idx->off + 20 < idx->pack->mwf.size) {
1168 git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack");
1169 return -1;
1170 }
1171 if (idx->off + 20 > idx->pack->mwf.size) {
1172 git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack");
1173 return -1;
1174 }
1175
1176 packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
1177 if (packfile_trailer == NULL) {
1178 git_mwindow_close(&w);
1179 goto on_error;
1180 }
1181
1182 /* Compare the packfile trailer as it was sent to us and what we calculated */
1183 git_oid_fromraw(&file_hash, packfile_trailer);
1184 git_mwindow_close(&w);
1185
1186 git_hash_final(&trailer_hash, &idx->trailer);
1187 if (git_oid_cmp(&file_hash, &trailer_hash)) {
1188 git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch");
1189 return -1;
1190 }
1191
1192 /* Freeze the number of deltas */
1193 stats->total_deltas = stats->total_objects - stats->indexed_objects;
1194
1195 if ((error = resolve_deltas(idx, stats)) < 0)
1196 return error;
1197
1198 if (stats->indexed_objects != stats->total_objects) {
1199 git_error_set(GIT_ERROR_INDEXER, "early EOF");
1200 return -1;
1201 }
1202
1203 if (stats->local_objects > 0) {
1204 if (update_header_and_rehash(idx, stats) < 0)
1205 return -1;
1206
1207 git_hash_final(&trailer_hash, &idx->trailer);
1208 write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
1209 }
1210
1211 /*
1212 * Is the resulting graph fully connected or are we still
1213 * missing some objects? In the second case, we can
1214 * bail out due to an incomplete and thus corrupt
1215 * packfile.
1216 */
1217 if (git_oidmap_size(idx->expected_oids) > 0) {
1218 git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects",
1219 git_oidmap_size(idx->expected_oids));
1220 return -1;
1221 }
1222
1223 git_vector_sort(&idx->objects);
1224
1225 /* Use the trailer hash as the pack file name to ensure
1226 * files with different contents have different names */
1227 git_oid_cpy(&idx->hash, &trailer_hash);
1228
1229 git_buf_sets(&filename, idx->pack->pack_name);
1230 git_buf_shorten(&filename, strlen("pack"));
1231 git_buf_puts(&filename, "idx");
1232 if (git_buf_oom(&filename))
1233 return -1;
1234
1235 if (git_filebuf_open(&index_file, filename.ptr,
1236 GIT_FILEBUF_HASH_CONTENTS |
1237 (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0),
1238 idx->mode) < 0)
1239 goto on_error;
1240
1241 /* Write out the header */
1242 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
1243 hdr.idx_version = htonl(2);
1244 git_filebuf_write(&index_file, &hdr, sizeof(hdr));
1245
1246 /* Write out the fanout table */
1247 for (i = 0; i < 256; ++i) {
1248 uint32_t n = htonl(idx->fanout[i]);
1249 git_filebuf_write(&index_file, &n, sizeof(n));
1250 }
1251
1252 /* Write out the object names (SHA-1 hashes) */
1253 git_vector_foreach(&idx->objects, i, entry) {
1254 git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
1255 }
1256
1257 /* Write out the CRC32 values */
1258 git_vector_foreach(&idx->objects, i, entry) {
1259 git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
1260 }
1261
1262 /* Write out the offsets */
1263 git_vector_foreach(&idx->objects, i, entry) {
1264 uint32_t n;
1265
1266 if (entry->offset == UINT32_MAX)
1267 n = htonl(0x80000000 | long_offsets++);
1268 else
1269 n = htonl(entry->offset);
1270
1271 git_filebuf_write(&index_file, &n, sizeof(uint32_t));
1272 }
1273
1274 /* Write out the long offsets */
1275 git_vector_foreach(&idx->objects, i, entry) {
1276 uint32_t split[2];
1277
1278 if (entry->offset != UINT32_MAX)
1279 continue;
1280
1281 split[0] = htonl(entry->offset_long >> 32);
1282 split[1] = htonl(entry->offset_long & 0xffffffff);
1283
1284 git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
1285 }
1286
1287 /* Write out the packfile trailer to the index */
1288 if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
1289 goto on_error;
1290
1291 /* Write out the hash of the idx */
1292 if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
1293 goto on_error;
1294
1295 git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
1296
1297 /* Figure out what the final name should be */
1298 if (index_path(&filename, idx, ".idx") < 0)
1299 goto on_error;
1300
1301 /* Commit file */
1302 if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
1303 goto on_error;
1304
1305 if (git_mwindow_free_all(&idx->pack->mwf) < 0)
1306 goto on_error;
1307
1308 #if !defined(NO_MMAP) && defined(GIT_WIN32)
1309 /*
1310 * Some non-Windows remote filesystems fail when truncating files if the
1311 * file permissions change after opening the file (done by p_mkstemp).
1312 *
1313 * Truncation is only needed when mmap is used to undo rounding up to next
1314 * page_size in append_to_pack.
1315 */
1316 if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
1317 git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
1318 return -1;
1319 }
1320 #endif
1321
1322 if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) {
1323 git_error_set(GIT_ERROR_OS, "failed to fsync packfile");
1324 goto on_error;
1325 }
1326
1327 /* We need to close the descriptor here so Windows doesn't choke on commit_at */
1328 if (p_close(idx->pack->mwf.fd) < 0) {
1329 git_error_set(GIT_ERROR_OS, "failed to close packfile");
1330 goto on_error;
1331 }
1332
1333 idx->pack->mwf.fd = -1;
1334
1335 if (index_path(&filename, idx, ".pack") < 0)
1336 goto on_error;
1337
1338 /* And don't forget to rename the packfile to its new place. */
1339 if (p_rename(idx->pack->pack_name, git_buf_cstr(&filename)) < 0)
1340 goto on_error;
1341
1342 /* And fsync the parent directory if we're asked to. */
1343 if (idx->do_fsync &&
1344 git_futils_fsync_parent(git_buf_cstr(&filename)) < 0)
1345 goto on_error;
1346
1347 idx->pack_committed = 1;
1348
1349 git_buf_dispose(&filename);
1350 return 0;
1351
1352 on_error:
1353 git_mwindow_free_all(&idx->pack->mwf);
1354 git_filebuf_cleanup(&index_file);
1355 git_buf_dispose(&filename);
1356 return -1;
1357 }
1358
1359 void git_indexer_free(git_indexer *idx)
1360 {
1361 const git_oid *key;
1362 git_oid *value;
1363 size_t iter;
1364
1365 if (idx == NULL)
1366 return;
1367
1368 if (idx->have_stream)
1369 git_packfile_stream_dispose(&idx->stream);
1370
1371 git_vector_free_deep(&idx->objects);
1372
1373 if (idx->pack->idx_cache) {
1374 struct git_pack_entry *pentry;
1375 git_oidmap_foreach_value(idx->pack->idx_cache, pentry, {
1376 git__free(pentry);
1377 });
1378
1379 git_oidmap_free(idx->pack->idx_cache);
1380 }
1381
1382 git_vector_free_deep(&idx->deltas);
1383
1384 git_packfile_free(idx->pack, !idx->pack_committed);
1385
1386 iter = 0;
1387 while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0)
1388 git__free(value);
1389
1390 git_hash_ctx_cleanup(&idx->trailer);
1391 git_hash_ctx_cleanup(&idx->hash_ctx);
1392 git_buf_dispose(&idx->entry_data);
1393 git_oidmap_free(idx->expected_oids);
1394 git__free(idx);
1395 }