2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/indexer.h"
11 #include "git2/object.h"
26 extern git_mutex git__mwindow_mutex
;
28 size_t git_indexer__max_objects
= UINT32_MAX
;
30 #define UINT31_MAX (0x7FFFFFFF)
40 unsigned int parsed_header
:1,
46 struct git_pack_header hdr
;
47 struct git_pack_file
*pack
;
50 git_off_t entry_start
;
51 git_object_t entry_type
;
53 git_packfile_stream stream
;
57 unsigned int fanout
[256];
58 git_hash_ctx hash_ctx
;
60 git_transfer_progress_cb progress_cb
;
61 void *progress_payload
;
64 /* OIDs referenced from pack objects. Used for verification. */
65 git_oidmap
*expected_oids
;
67 /* Needed to look up objects which we want to inject to fix a thin pack */
70 /* Fields for calculating the packfile trailer (hash of everything before it) */
71 char inbuf
[GIT_OID_RAWSZ
];
80 const git_oid
*git_indexer_hash(const git_indexer
*idx
)
85 static int parse_header(struct git_pack_header
*hdr
, struct git_pack_file
*pack
)
90 if ((error
= p_mmap(&map
, sizeof(*hdr
), GIT_PROT_READ
, GIT_MAP_SHARED
, pack
->mwf
.fd
, 0)) < 0)
93 memcpy(hdr
, map
.data
, sizeof(*hdr
));
96 /* Verify we recognize this pack file format. */
97 if (hdr
->hdr_signature
!= ntohl(PACK_SIGNATURE
)) {
98 git_error_set(GIT_ERROR_INDEXER
, "wrong pack signature");
102 if (!pack_version_ok(hdr
->hdr_version
)) {
103 git_error_set(GIT_ERROR_INDEXER
, "wrong pack version");
110 static int objects_cmp(const void *a
, const void *b
)
112 const struct entry
*entrya
= a
;
113 const struct entry
*entryb
= b
;
115 return git_oid__cmp(&entrya
->oid
, &entryb
->oid
);
118 int git_indexer_init_options(git_indexer_options
*opts
, unsigned int version
)
120 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
121 opts
, version
, git_indexer_options
, GIT_INDEXER_OPTIONS_INIT
);
130 git_indexer_options
*in_opts
)
132 git_indexer_options opts
= GIT_INDEXER_OPTIONS_INIT
;
134 git_buf path
= GIT_BUF_INIT
, tmp_path
= GIT_BUF_INIT
;
135 static const char suff
[] = "/pack";
139 memcpy(&opts
, in_opts
, sizeof(opts
));
141 idx
= git__calloc(1, sizeof(git_indexer
));
142 GIT_ERROR_CHECK_ALLOC(idx
);
144 idx
->progress_cb
= opts
.progress_cb
;
145 idx
->progress_payload
= opts
.progress_cb_payload
;
146 idx
->mode
= mode
? mode
: GIT_PACK_FILE_MODE
;
147 git_hash_ctx_init(&idx
->hash_ctx
);
148 git_hash_ctx_init(&idx
->trailer
);
149 git_buf_init(&idx
->entry_data
, 0);
150 idx
->expected_oids
= git_oidmap_alloc();
151 GIT_ERROR_CHECK_ALLOC(idx
->expected_oids
);
153 idx
->do_verify
= opts
.verify
;
155 if (git_repository__fsync_gitdir
)
158 error
= git_buf_joinpath(&path
, prefix
, suff
);
162 fd
= git_futils_mktmp(&tmp_path
, git_buf_cstr(&path
), idx
->mode
);
163 git_buf_dispose(&path
);
167 error
= git_packfile_alloc(&idx
->pack
, git_buf_cstr(&tmp_path
));
168 git_buf_dispose(&tmp_path
);
173 idx
->pack
->mwf
.fd
= fd
;
174 if ((error
= git_mwindow_file_register(&idx
->pack
->mwf
)) < 0)
184 if (git_buf_len(&tmp_path
) > 0)
185 p_unlink(git_buf_cstr(&tmp_path
));
187 if (idx
->pack
!= NULL
)
188 p_unlink(idx
->pack
->pack_name
);
190 git_buf_dispose(&path
);
191 git_buf_dispose(&tmp_path
);
196 void git_indexer__set_fsync(git_indexer
*idx
, int do_fsync
)
198 idx
->do_fsync
= !!do_fsync
;
201 /* Try to store the delta so we can try to resolve it later */
202 static int store_delta(git_indexer
*idx
)
204 struct delta_info
*delta
;
206 delta
= git__calloc(1, sizeof(struct delta_info
));
207 GIT_ERROR_CHECK_ALLOC(delta
);
208 delta
->delta_off
= idx
->entry_start
;
210 if (git_vector_insert(&idx
->deltas
, delta
) < 0)
216 static int hash_header(git_hash_ctx
*ctx
, git_off_t len
, git_object_t type
)
222 if ((error
= git_odb__format_object_header(&hdrlen
,
223 buffer
, sizeof(buffer
), (size_t)len
, type
)) < 0)
226 return git_hash_update(ctx
, buffer
, hdrlen
);
229 static int hash_object_stream(git_indexer
*idx
, git_packfile_stream
*stream
)
233 assert(idx
&& stream
);
236 if ((read
= git_packfile_stream_read(stream
, idx
->objbuf
, sizeof(idx
->objbuf
))) < 0)
240 git_buf_put(&idx
->entry_data
, idx
->objbuf
, read
);
242 git_hash_update(&idx
->hash_ctx
, idx
->objbuf
, read
);
251 /* In order to create the packfile stream, we need to skip over the delta base description */
252 static int advance_delta_offset(git_indexer
*idx
, git_object_t type
)
254 git_mwindow
*w
= NULL
;
256 assert(type
== GIT_OBJECT_REF_DELTA
|| type
== GIT_OBJECT_OFS_DELTA
);
258 if (type
== GIT_OBJECT_REF_DELTA
) {
259 idx
->off
+= GIT_OID_RAWSZ
;
261 git_off_t base_off
= get_delta_base(idx
->pack
, &w
, &idx
->off
, type
, idx
->entry_start
);
262 git_mwindow_close(&w
);
264 return (int)base_off
;
270 /* Read from the stream and discard any output */
271 static int read_object_stream(git_indexer
*idx
, git_packfile_stream
*stream
)
278 read
= git_packfile_stream_read(stream
, idx
->objbuf
, sizeof(idx
->objbuf
));
287 static int crc_object(uint32_t *crc_out
, git_mwindow_file
*mwf
, git_off_t start
, git_off_t size
)
291 unsigned int left
, len
;
292 git_mwindow
*w
= NULL
;
294 crc
= crc32(0L, Z_NULL
, 0);
296 ptr
= git_mwindow_open(mwf
, &w
, start
, (size_t)size
, &left
);
300 len
= min(left
, (unsigned int)size
);
301 crc
= crc32(crc
, ptr
, len
);
304 git_mwindow_close(&w
);
307 *crc_out
= htonl(crc
);
311 static void add_expected_oid(git_indexer
*idx
, const git_oid
*oid
)
316 * If we know about that object because it is stored in our ODB or
317 * because we have already processed it as part of our pack file, we do
318 * not have to expect it.
320 if ((!idx
->odb
|| !git_odb_exists(idx
->odb
, oid
)) &&
321 !git_oidmap_exists(idx
->pack
->idx_cache
, oid
) &&
322 !git_oidmap_exists(idx
->expected_oids
, oid
)) {
323 git_oid
*dup
= git__malloc(sizeof(*oid
));
324 git_oid_cpy(dup
, oid
);
325 git_oidmap_put(idx
->expected_oids
, dup
, &ret
);
329 static int check_object_connectivity(git_indexer
*idx
, const git_rawobj
*obj
)
335 if (obj
->type
!= GIT_OBJECT_BLOB
&&
336 obj
->type
!= GIT_OBJECT_TREE
&&
337 obj
->type
!= GIT_OBJECT_COMMIT
&&
338 obj
->type
!= GIT_OBJECT_TAG
)
341 if ((error
= git_object__from_raw(&object
, obj
->data
, obj
->len
, obj
->type
)) < 0)
344 keyidx
= git_oidmap_lookup_index(idx
->expected_oids
, &object
->cached
.oid
);
345 if (git_oidmap_valid_index(idx
->expected_oids
, keyidx
)) {
346 const git_oid
*key
= git_oidmap_key(idx
->expected_oids
, keyidx
);
347 git__free((git_oid
*) key
);
348 git_oidmap_delete_at(idx
->expected_oids
, keyidx
);
352 * Check whether this is a known object. If so, we can just continue as
353 * we assume that the ODB has a complete graph.
355 if (idx
->odb
&& git_odb_exists(idx
->odb
, &object
->cached
.oid
))
359 case GIT_OBJECT_TREE
:
361 git_tree
*tree
= (git_tree
*) object
;
362 git_tree_entry
*entry
;
365 git_array_foreach(tree
->entries
, i
, entry
)
366 add_expected_oid(idx
, entry
->oid
);
370 case GIT_OBJECT_COMMIT
:
372 git_commit
*commit
= (git_commit
*) object
;
376 git_array_foreach(commit
->parent_ids
, i
, parent_oid
)
377 add_expected_oid(idx
, parent_oid
);
379 add_expected_oid(idx
, &commit
->tree_id
);
385 git_tag
*tag
= (git_tag
*) object
;
387 add_expected_oid(idx
, &tag
->target
);
391 case GIT_OBJECT_BLOB
:
397 git_object_free(object
);
402 static int store_object(git_indexer
*idx
)
408 git_off_t entry_size
;
409 struct git_pack_entry
*pentry
;
410 git_off_t entry_start
= idx
->entry_start
;
412 entry
= git__calloc(1, sizeof(*entry
));
413 GIT_ERROR_CHECK_ALLOC(entry
);
415 pentry
= git__calloc(1, sizeof(struct git_pack_entry
));
416 GIT_ERROR_CHECK_ALLOC(pentry
);
418 git_hash_final(&oid
, &idx
->hash_ctx
);
419 entry_size
= idx
->off
- entry_start
;
420 if (entry_start
> UINT31_MAX
) {
421 entry
->offset
= UINT32_MAX
;
422 entry
->offset_long
= entry_start
;
424 entry
->offset
= (uint32_t)entry_start
;
427 if (idx
->do_verify
) {
428 git_rawobj rawobj
= {
430 idx
->entry_data
.size
,
434 if ((error
= check_object_connectivity(idx
, &rawobj
)) < 0)
438 git_oid_cpy(&pentry
->sha1
, &oid
);
439 pentry
->offset
= entry_start
;
441 k
= git_oidmap_put(idx
->pack
->idx_cache
, &pentry
->sha1
, &error
);
449 git_error_set(GIT_ERROR_INDEXER
, "duplicate object %s found in pack", git_oid_tostr_s(&pentry
->sha1
));
455 git_oidmap_set_value_at(idx
->pack
->idx_cache
, k
, pentry
);
457 git_oid_cpy(&entry
->oid
, &oid
);
459 if (crc_object(&entry
->crc
, &idx
->pack
->mwf
, entry_start
, entry_size
) < 0)
462 /* Add the object to the list */
463 if (git_vector_insert(&idx
->objects
, entry
) < 0)
466 for (i
= oid
.id
[0]; i
< 256; ++i
) {
478 GIT_INLINE(bool) has_entry(git_indexer
*idx
, git_oid
*id
)
480 return git_oidmap_exists(idx
->pack
->idx_cache
, id
);
483 static int save_entry(git_indexer
*idx
, struct entry
*entry
, struct git_pack_entry
*pentry
, git_off_t entry_start
)
488 if (entry_start
> UINT31_MAX
) {
489 entry
->offset
= UINT32_MAX
;
490 entry
->offset_long
= entry_start
;
492 entry
->offset
= (uint32_t)entry_start
;
495 pentry
->offset
= entry_start
;
496 k
= git_oidmap_put(idx
->pack
->idx_cache
, &pentry
->sha1
, &error
);
499 git_error_set(GIT_ERROR_INDEXER
, "cannot insert object into pack");
503 git_oidmap_set_value_at(idx
->pack
->idx_cache
, k
, pentry
);
505 /* Add the object to the list */
506 if (git_vector_insert(&idx
->objects
, entry
) < 0)
509 for (i
= entry
->oid
.id
[0]; i
< 256; ++i
) {
516 static int hash_and_save(git_indexer
*idx
, git_rawobj
*obj
, git_off_t entry_start
)
521 struct git_pack_entry
*pentry
= NULL
;
523 entry
= git__calloc(1, sizeof(*entry
));
524 GIT_ERROR_CHECK_ALLOC(entry
);
526 if (git_odb__hashobj(&oid
, obj
) < 0) {
527 git_error_set(GIT_ERROR_INDEXER
, "failed to hash object");
531 pentry
= git__calloc(1, sizeof(struct git_pack_entry
));
532 GIT_ERROR_CHECK_ALLOC(pentry
);
534 git_oid_cpy(&pentry
->sha1
, &oid
);
535 git_oid_cpy(&entry
->oid
, &oid
);
536 entry
->crc
= crc32(0L, Z_NULL
, 0);
538 entry_size
= (size_t)(idx
->off
- entry_start
);
539 if (crc_object(&entry
->crc
, &idx
->pack
->mwf
, entry_start
, entry_size
) < 0)
542 return save_entry(idx
, entry
, pentry
, entry_start
);
547 git__free(obj
->data
);
551 static int do_progress_callback(git_indexer
*idx
, git_transfer_progress
*stats
)
553 if (idx
->progress_cb
)
554 return git_error_set_after_callback_function(
555 idx
->progress_cb(stats
, idx
->progress_payload
),
560 /* Hash everything but the last 20B of input */
561 static void hash_partially(git_indexer
*idx
, const uint8_t *data
, size_t size
)
563 size_t to_expell
, to_keep
;
568 /* Easy case, dump the buffer and the data minus the last 20 bytes */
569 if (size
>= GIT_OID_RAWSZ
) {
570 git_hash_update(&idx
->trailer
, idx
->inbuf
, idx
->inbuf_len
);
571 git_hash_update(&idx
->trailer
, data
, size
- GIT_OID_RAWSZ
);
573 data
+= size
- GIT_OID_RAWSZ
;
574 memcpy(idx
->inbuf
, data
, GIT_OID_RAWSZ
);
575 idx
->inbuf_len
= GIT_OID_RAWSZ
;
579 /* We can just append */
580 if (idx
->inbuf_len
+ size
<= GIT_OID_RAWSZ
) {
581 memcpy(idx
->inbuf
+ idx
->inbuf_len
, data
, size
);
582 idx
->inbuf_len
+= size
;
586 /* We need to partially drain the buffer and then append */
587 to_keep
= GIT_OID_RAWSZ
- size
;
588 to_expell
= idx
->inbuf_len
- to_keep
;
590 git_hash_update(&idx
->trailer
, idx
->inbuf
, to_expell
);
592 memmove(idx
->inbuf
, idx
->inbuf
+ to_expell
, to_keep
);
593 memcpy(idx
->inbuf
+ to_keep
, data
, size
);
594 idx
->inbuf_len
+= size
- to_expell
;
597 static int write_at(git_indexer
*idx
, const void *data
, git_off_t offset
, size_t size
)
599 git_file fd
= idx
->pack
->mwf
.fd
;
600 size_t mmap_alignment
;
602 git_off_t page_start
;
603 unsigned char *map_data
;
607 assert(data
&& size
);
609 if ((error
= git__mmap_alignment(&mmap_alignment
)) < 0)
612 /* the offset needs to be at the mmap boundary for the platform */
613 page_offset
= offset
% mmap_alignment
;
614 page_start
= offset
- page_offset
;
616 if ((error
= p_mmap(&map
, page_offset
+ size
, GIT_PROT_WRITE
, GIT_MAP_SHARED
, fd
, page_start
)) < 0)
619 map_data
= (unsigned char *)map
.data
;
620 memcpy(map_data
+ page_offset
, data
, size
);
626 static int append_to_pack(git_indexer
*idx
, const void *data
, size_t size
)
629 size_t mmap_alignment
;
631 git_off_t page_start
;
632 git_off_t current_size
= idx
->pack
->mwf
.size
;
633 int fd
= idx
->pack
->mwf
.fd
;
639 if ((error
= git__mmap_alignment(&mmap_alignment
)) < 0)
642 /* Write a single byte to force the file system to allocate space now or
643 * report an error, since we can't report errors when writing using mmap.
644 * Round the size up to the nearest page so that we only need to perform file
645 * I/O when we add a page, instead of whenever we write even a single byte. */
646 new_size
= current_size
+ size
;
647 page_offset
= new_size
% mmap_alignment
;
648 page_start
= new_size
- page_offset
;
650 if (p_lseek(fd
, page_start
+ mmap_alignment
- 1, SEEK_SET
) < 0 ||
651 p_write(idx
->pack
->mwf
.fd
, data
, 1) < 0) {
652 git_error_set(GIT_ERROR_OS
, "cannot extend packfile '%s'", idx
->pack
->pack_name
);
656 return write_at(idx
, data
, idx
->pack
->mwf
.size
, size
);
659 static int read_stream_object(git_indexer
*idx
, git_transfer_progress
*stats
)
661 git_packfile_stream
*stream
= &idx
->stream
;
662 git_off_t entry_start
= idx
->off
;
665 git_mwindow
*w
= NULL
;
668 if (idx
->pack
->mwf
.size
<= idx
->off
+ 20)
671 if (!idx
->have_stream
) {
672 error
= git_packfile_unpack_header(&entry_size
, &type
, &idx
->pack
->mwf
, &w
, &idx
->off
);
673 if (error
== GIT_EBUFS
) {
674 idx
->off
= entry_start
;
680 git_mwindow_close(&w
);
681 idx
->entry_start
= entry_start
;
682 git_hash_init(&idx
->hash_ctx
);
683 git_buf_clear(&idx
->entry_data
);
685 if (type
== GIT_OBJECT_REF_DELTA
|| type
== GIT_OBJECT_OFS_DELTA
) {
686 error
= advance_delta_offset(idx
, type
);
687 if (error
== GIT_EBUFS
) {
688 idx
->off
= entry_start
;
698 error
= hash_header(&idx
->hash_ctx
, entry_size
, type
);
703 idx
->have_stream
= 1;
704 idx
->entry_type
= type
;
706 error
= git_packfile_stream_open(stream
, idx
->pack
, idx
->off
);
711 if (idx
->have_delta
) {
712 error
= read_object_stream(idx
, stream
);
714 error
= hash_object_stream(idx
, stream
);
717 idx
->off
= stream
->curpos
;
718 if (error
== GIT_EBUFS
)
721 /* We want to free the stream reasorces no matter what here */
722 idx
->have_stream
= 0;
723 git_packfile_stream_dispose(stream
);
728 if (idx
->have_delta
) {
729 error
= store_delta(idx
);
731 error
= store_object(idx
);
737 if (!idx
->have_delta
) {
738 stats
->indexed_objects
++;
740 stats
->received_objects
++;
742 if ((error
= do_progress_callback(idx
, stats
)) != 0)
748 int git_indexer_append(git_indexer
*idx
, const void *data
, size_t size
, git_transfer_progress
*stats
)
751 struct git_pack_header
*hdr
= &idx
->hdr
;
752 git_mwindow_file
*mwf
= &idx
->pack
->mwf
;
754 assert(idx
&& data
&& stats
);
756 if ((error
= append_to_pack(idx
, data
, size
)) < 0)
759 hash_partially(idx
, data
, (int)size
);
761 /* Make sure we set the new size of the pack */
762 idx
->pack
->mwf
.size
+= size
;
764 if (!idx
->parsed_header
) {
765 unsigned int total_objects
;
767 if ((unsigned)idx
->pack
->mwf
.size
< sizeof(struct git_pack_header
))
770 if ((error
= parse_header(&idx
->hdr
, idx
->pack
)) < 0)
773 idx
->parsed_header
= 1;
774 idx
->nr_objects
= ntohl(hdr
->hdr_entries
);
775 idx
->off
= sizeof(struct git_pack_header
);
777 if (idx
->nr_objects
<= git_indexer__max_objects
) {
778 total_objects
= (unsigned int)idx
->nr_objects
;
780 git_error_set(GIT_ERROR_INDEXER
, "too many objects");
784 idx
->pack
->idx_cache
= git_oidmap_alloc();
785 GIT_ERROR_CHECK_ALLOC(idx
->pack
->idx_cache
);
787 idx
->pack
->has_cache
= 1;
788 if (git_vector_init(&idx
->objects
, total_objects
, objects_cmp
) < 0)
791 if (git_vector_init(&idx
->deltas
, total_objects
/ 2, NULL
) < 0)
794 stats
->received_objects
= 0;
795 stats
->local_objects
= 0;
796 stats
->total_deltas
= 0;
797 stats
->indexed_deltas
= 0;
798 stats
->indexed_objects
= 0;
799 stats
->total_objects
= total_objects
;
801 if ((error
= do_progress_callback(idx
, stats
)) != 0)
805 /* Now that we have data in the pack, let's try to parse it */
807 /* As the file grows any windows we try to use will be out of date */
808 git_mwindow_free_all(mwf
);
810 while (stats
->indexed_objects
< idx
->nr_objects
) {
811 if ((error
= read_stream_object(idx
, stats
)) != 0) {
812 if (error
== GIT_EBUFS
)
822 git_mwindow_free_all(mwf
);
826 static int index_path(git_buf
*path
, git_indexer
*idx
, const char *suffix
)
828 const char prefix
[] = "pack-";
829 size_t slash
= (size_t)path
->size
;
831 /* search backwards for '/' */
832 while (slash
> 0 && path
->ptr
[slash
- 1] != '/')
835 if (git_buf_grow(path
, slash
+ 1 + strlen(prefix
) +
836 GIT_OID_HEXSZ
+ strlen(suffix
) + 1) < 0)
839 git_buf_truncate(path
, slash
);
840 git_buf_puts(path
, prefix
);
841 git_oid_fmt(path
->ptr
+ git_buf_len(path
), &idx
->hash
);
842 path
->size
+= GIT_OID_HEXSZ
;
843 git_buf_puts(path
, suffix
);
845 return git_buf_oom(path
) ? -1 : 0;
849 * Rewind the packfile by the trailer, as we might need to fix the
850 * packfile by injecting objects at the tail and must overwrite it.
852 static void seek_back_trailer(git_indexer
*idx
)
854 idx
->pack
->mwf
.size
-= GIT_OID_RAWSZ
;
855 git_mwindow_free_all(&idx
->pack
->mwf
);
858 static int inject_object(git_indexer
*idx
, git_oid
*id
)
862 struct git_pack_entry
*pentry
= NULL
;
864 unsigned char hdr
[64];
865 git_buf buf
= GIT_BUF_INIT
;
866 git_off_t entry_start
;
871 seek_back_trailer(idx
);
872 entry_start
= idx
->pack
->mwf
.size
;
874 if (git_odb_read(&obj
, idx
->odb
, id
) < 0) {
875 git_error_set(GIT_ERROR_INDEXER
, "missing delta bases");
879 data
= git_odb_object_data(obj
);
880 len
= git_odb_object_size(obj
);
882 entry
= git__calloc(1, sizeof(*entry
));
883 GIT_ERROR_CHECK_ALLOC(entry
);
885 entry
->crc
= crc32(0L, Z_NULL
, 0);
887 /* Write out the object header */
888 hdr_len
= git_packfile__object_header(hdr
, len
, git_odb_object_type(obj
));
889 if ((error
= append_to_pack(idx
, hdr
, hdr_len
)) < 0)
892 idx
->pack
->mwf
.size
+= hdr_len
;
893 entry
->crc
= crc32(entry
->crc
, hdr
, (uInt
)hdr_len
);
895 if ((error
= git_zstream_deflatebuf(&buf
, data
, len
)) < 0)
898 /* And then the compressed object */
899 if ((error
= append_to_pack(idx
, buf
.ptr
, buf
.size
)) < 0)
902 idx
->pack
->mwf
.size
+= buf
.size
;
903 entry
->crc
= htonl(crc32(entry
->crc
, (unsigned char *)buf
.ptr
, (uInt
)buf
.size
));
904 git_buf_dispose(&buf
);
906 /* Write a fake trailer so the pack functions play ball */
908 if ((error
= append_to_pack(idx
, &foo
, GIT_OID_RAWSZ
)) < 0)
911 idx
->pack
->mwf
.size
+= GIT_OID_RAWSZ
;
913 pentry
= git__calloc(1, sizeof(struct git_pack_entry
));
914 GIT_ERROR_CHECK_ALLOC(pentry
);
916 git_oid_cpy(&pentry
->sha1
, id
);
917 git_oid_cpy(&entry
->oid
, id
);
918 idx
->off
= entry_start
+ hdr_len
+ len
;
920 error
= save_entry(idx
, entry
, pentry
, entry_start
);
928 git_odb_object_free(obj
);
932 static int fix_thin_pack(git_indexer
*idx
, git_transfer_progress
*stats
)
934 int error
, found_ref_delta
= 0;
936 struct delta_info
*delta
;
939 git_mwindow
*w
= NULL
;
940 git_off_t curpos
= 0;
941 unsigned char *base_info
;
942 unsigned int left
= 0;
945 assert(git_vector_length(&idx
->deltas
) > 0);
947 if (idx
->odb
== NULL
) {
948 git_error_set(GIT_ERROR_INDEXER
, "cannot fix a thin pack without an ODB");
952 /* Loop until we find the first REF delta */
953 git_vector_foreach(&idx
->deltas
, i
, delta
) {
957 curpos
= delta
->delta_off
;
958 error
= git_packfile_unpack_header(&size
, &type
, &idx
->pack
->mwf
, &w
, &curpos
);
962 if (type
== GIT_OBJECT_REF_DELTA
) {
968 if (!found_ref_delta
) {
969 git_error_set(GIT_ERROR_INDEXER
, "no REF_DELTA found, cannot inject object");
973 /* curpos now points to the base information, which is an OID */
974 base_info
= git_mwindow_open(&idx
->pack
->mwf
, &w
, curpos
, GIT_OID_RAWSZ
, &left
);
975 if (base_info
== NULL
) {
976 git_error_set(GIT_ERROR_INDEXER
, "failed to map delta information");
980 git_oid_fromraw(&base
, base_info
);
981 git_mwindow_close(&w
);
983 if (has_entry(idx
, &base
))
986 if (inject_object(idx
, &base
) < 0)
989 stats
->local_objects
++;
994 static int resolve_deltas(git_indexer
*idx
, git_transfer_progress
*stats
)
998 struct delta_info
*delta
;
999 int progressed
= 0, non_null
= 0, progress_cb_result
;
1001 while (idx
->deltas
.length
> 0) {
1004 git_vector_foreach(&idx
->deltas
, i
, delta
) {
1005 git_rawobj obj
= {0};
1011 idx
->off
= delta
->delta_off
;
1012 if ((error
= git_packfile_unpack(&obj
, idx
->pack
, &idx
->off
)) < 0) {
1013 if (error
== GIT_PASSTHROUGH
) {
1014 /* We have not seen the base object, we'll try again later. */
1020 if (idx
->do_verify
&& check_object_connectivity(idx
, &obj
) < 0)
1021 /* TODO: error? continue? */
1024 if (hash_and_save(idx
, &obj
, delta
->delta_off
) < 0)
1027 git__free(obj
.data
);
1028 stats
->indexed_objects
++;
1029 stats
->indexed_deltas
++;
1031 if ((progress_cb_result
= do_progress_callback(idx
, stats
)) < 0)
1032 return progress_cb_result
;
1034 /* remove from the list */
1035 git_vector_set(NULL
, &idx
->deltas
, i
, NULL
);
1039 /* if none were actually set, we're done */
1043 if (!progressed
&& (fix_thin_pack(idx
, stats
) < 0)) {
1051 static int update_header_and_rehash(git_indexer
*idx
, git_transfer_progress
*stats
)
1054 size_t chunk
= 1024*1024;
1055 git_off_t hashed
= 0;
1056 git_mwindow
*w
= NULL
;
1057 git_mwindow_file
*mwf
;
1060 mwf
= &idx
->pack
->mwf
;
1062 git_hash_init(&idx
->trailer
);
1065 /* Update the header to include the numer of local objects we injected */
1066 idx
->hdr
.hdr_entries
= htonl(stats
->total_objects
+ stats
->local_objects
);
1067 if (write_at(idx
, &idx
->hdr
, 0, sizeof(struct git_pack_header
)) < 0)
1071 * We now use the same technique as before to determine the
1072 * hash. We keep reading up to the end and let
1073 * hash_partially() keep the existing trailer out of the
1076 git_mwindow_free_all(mwf
);
1078 while (hashed
< mwf
->size
) {
1079 ptr
= git_mwindow_open(mwf
, &w
, hashed
, chunk
, &left
);
1083 hash_partially(idx
, ptr
, left
);
1086 git_mwindow_close(&w
);
1092 int git_indexer_commit(git_indexer
*idx
, git_transfer_progress
*stats
)
1094 git_mwindow
*w
= NULL
;
1095 unsigned int i
, long_offsets
= 0, left
;
1097 struct git_pack_idx_header hdr
;
1098 git_buf filename
= GIT_BUF_INIT
;
1099 struct entry
*entry
;
1100 git_oid trailer_hash
, file_hash
;
1101 git_filebuf index_file
= {0};
1102 void *packfile_trailer
;
1104 if (!idx
->parsed_header
) {
1105 git_error_set(GIT_ERROR_INDEXER
, "incomplete pack header");
1109 /* Test for this before resolve_deltas(), as it plays with idx->off */
1110 if (idx
->off
+ 20 < idx
->pack
->mwf
.size
) {
1111 git_error_set(GIT_ERROR_INDEXER
, "unexpected data at the end of the pack");
1114 if (idx
->off
+ 20 > idx
->pack
->mwf
.size
) {
1115 git_error_set(GIT_ERROR_INDEXER
, "missing trailer at the end of the pack");
1119 packfile_trailer
= git_mwindow_open(&idx
->pack
->mwf
, &w
, idx
->pack
->mwf
.size
- GIT_OID_RAWSZ
, GIT_OID_RAWSZ
, &left
);
1120 if (packfile_trailer
== NULL
) {
1121 git_mwindow_close(&w
);
1125 /* Compare the packfile trailer as it was sent to us and what we calculated */
1126 git_oid_fromraw(&file_hash
, packfile_trailer
);
1127 git_mwindow_close(&w
);
1129 git_hash_final(&trailer_hash
, &idx
->trailer
);
1130 if (git_oid_cmp(&file_hash
, &trailer_hash
)) {
1131 git_error_set(GIT_ERROR_INDEXER
, "packfile trailer mismatch");
1135 /* Freeze the number of deltas */
1136 stats
->total_deltas
= stats
->total_objects
- stats
->indexed_objects
;
1138 if ((error
= resolve_deltas(idx
, stats
)) < 0)
1141 if (stats
->indexed_objects
!= stats
->total_objects
) {
1142 git_error_set(GIT_ERROR_INDEXER
, "early EOF");
1146 if (stats
->local_objects
> 0) {
1147 if (update_header_and_rehash(idx
, stats
) < 0)
1150 git_hash_final(&trailer_hash
, &idx
->trailer
);
1151 write_at(idx
, &trailer_hash
, idx
->pack
->mwf
.size
- GIT_OID_RAWSZ
, GIT_OID_RAWSZ
);
1155 * Is the resulting graph fully connected or are we still
1156 * missing some objects? In the second case, we can
1157 * bail out due to an incomplete and thus corrupt
1160 if (git_oidmap_size(idx
->expected_oids
) > 0) {
1161 git_error_set(GIT_ERROR_INDEXER
, "packfile is missing %"PRIuZ
" objects",
1162 git_oidmap_size(idx
->expected_oids
));
1166 git_vector_sort(&idx
->objects
);
1168 /* Use the trailer hash as the pack file name to ensure
1169 * files with different contents have different names */
1170 git_oid_cpy(&idx
->hash
, &trailer_hash
);
1172 git_buf_sets(&filename
, idx
->pack
->pack_name
);
1173 git_buf_shorten(&filename
, strlen("pack"));
1174 git_buf_puts(&filename
, "idx");
1175 if (git_buf_oom(&filename
))
1178 if (git_filebuf_open(&index_file
, filename
.ptr
,
1179 GIT_FILEBUF_HASH_CONTENTS
|
1180 (idx
->do_fsync
? GIT_FILEBUF_FSYNC
: 0),
1184 /* Write out the header */
1185 hdr
.idx_signature
= htonl(PACK_IDX_SIGNATURE
);
1186 hdr
.idx_version
= htonl(2);
1187 git_filebuf_write(&index_file
, &hdr
, sizeof(hdr
));
1189 /* Write out the fanout table */
1190 for (i
= 0; i
< 256; ++i
) {
1191 uint32_t n
= htonl(idx
->fanout
[i
]);
1192 git_filebuf_write(&index_file
, &n
, sizeof(n
));
1195 /* Write out the object names (SHA-1 hashes) */
1196 git_vector_foreach(&idx
->objects
, i
, entry
) {
1197 git_filebuf_write(&index_file
, &entry
->oid
, sizeof(git_oid
));
1200 /* Write out the CRC32 values */
1201 git_vector_foreach(&idx
->objects
, i
, entry
) {
1202 git_filebuf_write(&index_file
, &entry
->crc
, sizeof(uint32_t));
1205 /* Write out the offsets */
1206 git_vector_foreach(&idx
->objects
, i
, entry
) {
1209 if (entry
->offset
== UINT32_MAX
)
1210 n
= htonl(0x80000000 | long_offsets
++);
1212 n
= htonl(entry
->offset
);
1214 git_filebuf_write(&index_file
, &n
, sizeof(uint32_t));
1217 /* Write out the long offsets */
1218 git_vector_foreach(&idx
->objects
, i
, entry
) {
1221 if (entry
->offset
!= UINT32_MAX
)
1224 split
[0] = htonl(entry
->offset_long
>> 32);
1225 split
[1] = htonl(entry
->offset_long
& 0xffffffff);
1227 git_filebuf_write(&index_file
, &split
, sizeof(uint32_t) * 2);
1230 /* Write out the packfile trailer to the index */
1231 if (git_filebuf_write(&index_file
, &trailer_hash
, GIT_OID_RAWSZ
) < 0)
1234 /* Write out the hash of the idx */
1235 if (git_filebuf_hash(&trailer_hash
, &index_file
) < 0)
1238 git_filebuf_write(&index_file
, &trailer_hash
, sizeof(git_oid
));
1240 /* Figure out what the final name should be */
1241 if (index_path(&filename
, idx
, ".idx") < 0)
1245 if (git_filebuf_commit_at(&index_file
, filename
.ptr
) < 0)
1248 git_mwindow_free_all(&idx
->pack
->mwf
);
1250 /* Truncate file to undo rounding up to next page_size in append_to_pack */
1251 if (p_ftruncate(idx
->pack
->mwf
.fd
, idx
->pack
->mwf
.size
) < 0) {
1252 git_error_set(GIT_ERROR_OS
, "failed to truncate pack file '%s'", idx
->pack
->pack_name
);
1256 if (idx
->do_fsync
&& p_fsync(idx
->pack
->mwf
.fd
) < 0) {
1257 git_error_set(GIT_ERROR_OS
, "failed to fsync packfile");
1261 /* We need to close the descriptor here so Windows doesn't choke on commit_at */
1262 if (p_close(idx
->pack
->mwf
.fd
) < 0) {
1263 git_error_set(GIT_ERROR_OS
, "failed to close packfile");
1267 idx
->pack
->mwf
.fd
= -1;
1269 if (index_path(&filename
, idx
, ".pack") < 0)
1272 /* And don't forget to rename the packfile to its new place. */
1273 if (p_rename(idx
->pack
->pack_name
, git_buf_cstr(&filename
)) < 0)
1276 /* And fsync the parent directory if we're asked to. */
1277 if (idx
->do_fsync
&&
1278 git_futils_fsync_parent(git_buf_cstr(&filename
)) < 0)
1281 idx
->pack_committed
= 1;
1283 git_buf_dispose(&filename
);
1287 git_mwindow_free_all(&idx
->pack
->mwf
);
1288 git_filebuf_cleanup(&index_file
);
1289 git_buf_dispose(&filename
);
1293 void git_indexer_free(git_indexer
*idx
)
1300 if (idx
->have_stream
)
1301 git_packfile_stream_dispose(&idx
->stream
);
1303 git_vector_free_deep(&idx
->objects
);
1305 if (idx
->pack
->idx_cache
) {
1306 struct git_pack_entry
*pentry
;
1307 git_oidmap_foreach_value(idx
->pack
->idx_cache
, pentry
, {
1311 git_oidmap_free(idx
->pack
->idx_cache
);
1314 git_vector_free_deep(&idx
->deltas
);
1316 if (!git_mutex_lock(&git__mwindow_mutex
)) {
1317 if (!idx
->pack_committed
)
1318 git_packfile_close(idx
->pack
, true);
1320 git_packfile_free(idx
->pack
);
1321 git_mutex_unlock(&git__mwindow_mutex
);
1324 for (pos
= git_oidmap_begin(idx
->expected_oids
);
1325 pos
!= git_oidmap_end(idx
->expected_oids
); pos
++)
1327 if (git_oidmap_has_data(idx
->expected_oids
, pos
)) {
1328 git__free((git_oid
*) git_oidmap_key(idx
->expected_oids
, pos
));
1329 git_oidmap_delete_at(idx
->expected_oids
, pos
);
1333 git_hash_ctx_cleanup(&idx
->trailer
);
1334 git_hash_ctx_cleanup(&idx
->hash_ctx
);
1335 git_buf_dispose(&idx
->entry_data
);
1336 git_oidmap_free(idx
->expected_oids
);