2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/indexer.h"
11 #include "git2/object.h"
27 size_t git_indexer__max_objects
= UINT32_MAX
;
29 #define UINT31_MAX (0x7FFFFFFF)
39 unsigned int parsed_header
:1,
45 struct git_pack_header hdr
;
46 struct git_pack_file
*pack
;
50 git_object_t entry_type
;
52 git_packfile_stream stream
;
56 unsigned int fanout
[256];
57 git_hash_ctx hash_ctx
;
59 git_indexer_progress_cb progress_cb
;
60 void *progress_payload
;
63 /* OIDs referenced from pack objects. Used for verification. */
64 git_oidmap
*expected_oids
;
66 /* Needed to look up objects which we want to inject to fix a thin pack */
69 /* Fields for calculating the packfile trailer (hash of everything before it) */
70 char inbuf
[GIT_OID_RAWSZ
];
79 const git_oid
*git_indexer_hash(const git_indexer
*idx
)
84 static int parse_header(struct git_pack_header
*hdr
, struct git_pack_file
*pack
)
89 if ((error
= p_mmap(&map
, sizeof(*hdr
), GIT_PROT_READ
, GIT_MAP_SHARED
, pack
->mwf
.fd
, 0)) < 0)
92 memcpy(hdr
, map
.data
, sizeof(*hdr
));
95 /* Verify we recognize this pack file format. */
96 if (hdr
->hdr_signature
!= ntohl(PACK_SIGNATURE
)) {
97 git_error_set(GIT_ERROR_INDEXER
, "wrong pack signature");
101 if (!pack_version_ok(hdr
->hdr_version
)) {
102 git_error_set(GIT_ERROR_INDEXER
, "wrong pack version");
109 static int objects_cmp(const void *a
, const void *b
)
111 const struct entry
*entrya
= a
;
112 const struct entry
*entryb
= b
;
114 return git_oid__cmp(&entrya
->oid
, &entryb
->oid
);
117 int git_indexer_options_init(git_indexer_options
*opts
, unsigned int version
)
119 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
120 opts
, version
, git_indexer_options
, GIT_INDEXER_OPTIONS_INIT
);
124 #ifndef GIT_DEPRECATE_HARD
125 int git_indexer_init_options(git_indexer_options
*opts
, unsigned int version
)
127 return git_indexer_options_init(opts
, version
);
136 git_indexer_options
*in_opts
)
138 git_indexer_options opts
= GIT_INDEXER_OPTIONS_INIT
;
140 git_buf path
= GIT_BUF_INIT
, tmp_path
= GIT_BUF_INIT
;
141 static const char suff
[] = "/pack";
145 memcpy(&opts
, in_opts
, sizeof(opts
));
147 idx
= git__calloc(1, sizeof(git_indexer
));
148 GIT_ERROR_CHECK_ALLOC(idx
);
150 idx
->progress_cb
= opts
.progress_cb
;
151 idx
->progress_payload
= opts
.progress_cb_payload
;
152 idx
->mode
= mode
? mode
: GIT_PACK_FILE_MODE
;
153 git_buf_init(&idx
->entry_data
, 0);
155 if ((error
= git_hash_ctx_init(&idx
->hash_ctx
)) < 0 ||
156 (error
= git_hash_ctx_init(&idx
->trailer
)) < 0 ||
157 (error
= git_oidmap_new(&idx
->expected_oids
)) < 0)
160 idx
->do_verify
= opts
.verify
;
162 if (git_repository__fsync_gitdir
)
165 error
= git_buf_joinpath(&path
, prefix
, suff
);
169 fd
= git_futils_mktmp(&tmp_path
, git_buf_cstr(&path
), idx
->mode
);
170 git_buf_dispose(&path
);
174 error
= git_packfile_alloc(&idx
->pack
, git_buf_cstr(&tmp_path
));
175 git_buf_dispose(&tmp_path
);
180 idx
->pack
->mwf
.fd
= fd
;
181 if ((error
= git_mwindow_file_register(&idx
->pack
->mwf
)) < 0)
191 if (git_buf_len(&tmp_path
) > 0)
192 p_unlink(git_buf_cstr(&tmp_path
));
194 if (idx
->pack
!= NULL
)
195 p_unlink(idx
->pack
->pack_name
);
197 git_buf_dispose(&path
);
198 git_buf_dispose(&tmp_path
);
203 void git_indexer__set_fsync(git_indexer
*idx
, int do_fsync
)
205 idx
->do_fsync
= !!do_fsync
;
208 /* Try to store the delta so we can try to resolve it later */
209 static int store_delta(git_indexer
*idx
)
211 struct delta_info
*delta
;
213 delta
= git__calloc(1, sizeof(struct delta_info
));
214 GIT_ERROR_CHECK_ALLOC(delta
);
215 delta
->delta_off
= idx
->entry_start
;
217 if (git_vector_insert(&idx
->deltas
, delta
) < 0)
223 static int hash_header(git_hash_ctx
*ctx
, off64_t len
, git_object_t type
)
229 if ((error
= git_odb__format_object_header(&hdrlen
,
230 buffer
, sizeof(buffer
), (size_t)len
, type
)) < 0)
233 return git_hash_update(ctx
, buffer
, hdrlen
);
236 static int hash_object_stream(git_indexer
*idx
, git_packfile_stream
*stream
)
241 GIT_ASSERT_ARG(stream
);
244 if ((read
= git_packfile_stream_read(stream
, idx
->objbuf
, sizeof(idx
->objbuf
))) < 0)
248 git_buf_put(&idx
->entry_data
, idx
->objbuf
, read
);
250 git_hash_update(&idx
->hash_ctx
, idx
->objbuf
, read
);
259 /* In order to create the packfile stream, we need to skip over the delta base description */
260 static int advance_delta_offset(git_indexer
*idx
, git_object_t type
)
262 git_mwindow
*w
= NULL
;
264 GIT_ASSERT_ARG(type
== GIT_OBJECT_REF_DELTA
|| type
== GIT_OBJECT_OFS_DELTA
);
266 if (type
== GIT_OBJECT_REF_DELTA
) {
267 idx
->off
+= GIT_OID_RAWSZ
;
270 int error
= get_delta_base(&base_off
, idx
->pack
, &w
, &idx
->off
, type
, idx
->entry_start
);
271 git_mwindow_close(&w
);
279 /* Read from the stream and discard any output */
280 static int read_object_stream(git_indexer
*idx
, git_packfile_stream
*stream
)
284 GIT_ASSERT_ARG(stream
);
287 read
= git_packfile_stream_read(stream
, idx
->objbuf
, sizeof(idx
->objbuf
));
296 static int crc_object(uint32_t *crc_out
, git_mwindow_file
*mwf
, off64_t start
, off64_t size
)
300 unsigned int left
, len
;
301 git_mwindow
*w
= NULL
;
303 crc
= crc32(0L, Z_NULL
, 0);
305 ptr
= git_mwindow_open(mwf
, &w
, start
, (size_t)size
, &left
);
309 len
= min(left
, (unsigned int)size
);
310 crc
= crc32(crc
, ptr
, len
);
313 git_mwindow_close(&w
);
316 *crc_out
= htonl(crc
);
320 static int add_expected_oid(git_indexer
*idx
, const git_oid
*oid
)
323 * If we know about that object because it is stored in our ODB or
324 * because we have already processed it as part of our pack file, we do
325 * not have to expect it.
327 if ((!idx
->odb
|| !git_odb_exists(idx
->odb
, oid
)) &&
328 !git_oidmap_exists(idx
->pack
->idx_cache
, oid
) &&
329 !git_oidmap_exists(idx
->expected_oids
, oid
)) {
330 git_oid
*dup
= git__malloc(sizeof(*oid
));
331 GIT_ERROR_CHECK_ALLOC(dup
);
332 git_oid_cpy(dup
, oid
);
333 return git_oidmap_set(idx
->expected_oids
, dup
, dup
);
339 static int check_object_connectivity(git_indexer
*idx
, const git_rawobj
*obj
)
345 if (obj
->type
!= GIT_OBJECT_BLOB
&&
346 obj
->type
!= GIT_OBJECT_TREE
&&
347 obj
->type
!= GIT_OBJECT_COMMIT
&&
348 obj
->type
!= GIT_OBJECT_TAG
)
351 if ((error
= git_object__from_raw(&object
, obj
->data
, obj
->len
, obj
->type
)) < 0)
354 if ((expected
= git_oidmap_get(idx
->expected_oids
, &object
->cached
.oid
)) != NULL
) {
355 git_oidmap_delete(idx
->expected_oids
, &object
->cached
.oid
);
360 * Check whether this is a known object. If so, we can just continue as
361 * we assume that the ODB has a complete graph.
363 if (idx
->odb
&& git_odb_exists(idx
->odb
, &object
->cached
.oid
))
367 case GIT_OBJECT_TREE
:
369 git_tree
*tree
= (git_tree
*) object
;
370 git_tree_entry
*entry
;
373 git_array_foreach(tree
->entries
, i
, entry
)
374 if (add_expected_oid(idx
, entry
->oid
) < 0)
379 case GIT_OBJECT_COMMIT
:
381 git_commit
*commit
= (git_commit
*) object
;
385 git_array_foreach(commit
->parent_ids
, i
, parent_oid
)
386 if (add_expected_oid(idx
, parent_oid
) < 0)
389 if (add_expected_oid(idx
, &commit
->tree_id
) < 0)
396 git_tag
*tag
= (git_tag
*) object
;
398 if (add_expected_oid(idx
, &tag
->target
) < 0)
403 case GIT_OBJECT_BLOB
:
409 git_object_free(object
);
414 static int store_object(git_indexer
*idx
)
420 struct git_pack_entry
*pentry
;
421 off64_t entry_start
= idx
->entry_start
;
423 entry
= git__calloc(1, sizeof(*entry
));
424 GIT_ERROR_CHECK_ALLOC(entry
);
426 pentry
= git__calloc(1, sizeof(struct git_pack_entry
));
427 GIT_ERROR_CHECK_ALLOC(pentry
);
429 if (git_hash_final(&oid
, &idx
->hash_ctx
)) {
433 entry_size
= idx
->off
- entry_start
;
434 if (entry_start
> UINT31_MAX
) {
435 entry
->offset
= UINT32_MAX
;
436 entry
->offset_long
= entry_start
;
438 entry
->offset
= (uint32_t)entry_start
;
441 if (idx
->do_verify
) {
442 git_rawobj rawobj
= {
444 idx
->entry_data
.size
,
448 if ((error
= check_object_connectivity(idx
, &rawobj
)) < 0)
452 git_oid_cpy(&pentry
->sha1
, &oid
);
453 pentry
->offset
= entry_start
;
455 if (git_oidmap_exists(idx
->pack
->idx_cache
, &pentry
->sha1
)) {
456 git_error_set(GIT_ERROR_INDEXER
, "duplicate object %s found in pack", git_oid_tostr_s(&pentry
->sha1
));
461 if ((error
= git_oidmap_set(idx
->pack
->idx_cache
, &pentry
->sha1
, pentry
)) < 0) {
467 git_oid_cpy(&entry
->oid
, &oid
);
469 if (crc_object(&entry
->crc
, &idx
->pack
->mwf
, entry_start
, entry_size
) < 0)
472 /* Add the object to the list */
473 if (git_vector_insert(&idx
->objects
, entry
) < 0)
476 for (i
= oid
.id
[0]; i
< 256; ++i
) {
488 GIT_INLINE(bool) has_entry(git_indexer
*idx
, git_oid
*id
)
490 return git_oidmap_exists(idx
->pack
->idx_cache
, id
);
493 static int save_entry(git_indexer
*idx
, struct entry
*entry
, struct git_pack_entry
*pentry
, off64_t entry_start
)
497 if (entry_start
> UINT31_MAX
) {
498 entry
->offset
= UINT32_MAX
;
499 entry
->offset_long
= entry_start
;
501 entry
->offset
= (uint32_t)entry_start
;
504 pentry
->offset
= entry_start
;
506 if (git_oidmap_exists(idx
->pack
->idx_cache
, &pentry
->sha1
) ||
507 git_oidmap_set(idx
->pack
->idx_cache
, &pentry
->sha1
, pentry
) < 0) {
508 git_error_set(GIT_ERROR_INDEXER
, "cannot insert object into pack");
512 /* Add the object to the list */
513 if (git_vector_insert(&idx
->objects
, entry
) < 0)
516 for (i
= entry
->oid
.id
[0]; i
< 256; ++i
) {
523 static int hash_and_save(git_indexer
*idx
, git_rawobj
*obj
, off64_t entry_start
)
528 struct git_pack_entry
*pentry
= NULL
;
530 entry
= git__calloc(1, sizeof(*entry
));
531 GIT_ERROR_CHECK_ALLOC(entry
);
533 if (git_odb__hashobj(&oid
, obj
) < 0) {
534 git_error_set(GIT_ERROR_INDEXER
, "failed to hash object");
538 pentry
= git__calloc(1, sizeof(struct git_pack_entry
));
539 GIT_ERROR_CHECK_ALLOC(pentry
);
541 git_oid_cpy(&pentry
->sha1
, &oid
);
542 git_oid_cpy(&entry
->oid
, &oid
);
543 entry
->crc
= crc32(0L, Z_NULL
, 0);
545 entry_size
= (size_t)(idx
->off
- entry_start
);
546 if (crc_object(&entry
->crc
, &idx
->pack
->mwf
, entry_start
, entry_size
) < 0)
549 return save_entry(idx
, entry
, pentry
, entry_start
);
554 git__free(obj
->data
);
558 static int do_progress_callback(git_indexer
*idx
, git_indexer_progress
*stats
)
560 if (idx
->progress_cb
)
561 return git_error_set_after_callback_function(
562 idx
->progress_cb(stats
, idx
->progress_payload
),
567 /* Hash everything but the last 20B of input */
568 static void hash_partially(git_indexer
*idx
, const uint8_t *data
, size_t size
)
570 size_t to_expell
, to_keep
;
575 /* Easy case, dump the buffer and the data minus the last 20 bytes */
576 if (size
>= GIT_OID_RAWSZ
) {
577 git_hash_update(&idx
->trailer
, idx
->inbuf
, idx
->inbuf_len
);
578 git_hash_update(&idx
->trailer
, data
, size
- GIT_OID_RAWSZ
);
580 data
+= size
- GIT_OID_RAWSZ
;
581 memcpy(idx
->inbuf
, data
, GIT_OID_RAWSZ
);
582 idx
->inbuf_len
= GIT_OID_RAWSZ
;
586 /* We can just append */
587 if (idx
->inbuf_len
+ size
<= GIT_OID_RAWSZ
) {
588 memcpy(idx
->inbuf
+ idx
->inbuf_len
, data
, size
);
589 idx
->inbuf_len
+= size
;
593 /* We need to partially drain the buffer and then append */
594 to_keep
= GIT_OID_RAWSZ
- size
;
595 to_expell
= idx
->inbuf_len
- to_keep
;
597 git_hash_update(&idx
->trailer
, idx
->inbuf
, to_expell
);
599 memmove(idx
->inbuf
, idx
->inbuf
+ to_expell
, to_keep
);
600 memcpy(idx
->inbuf
+ to_keep
, data
, size
);
601 idx
->inbuf_len
+= size
- to_expell
;
604 #if defined(NO_MMAP) || !defined(GIT_WIN32)
606 static int write_at(git_indexer
*idx
, const void *data
, off64_t offset
, size_t size
)
608 size_t remaining_size
= size
;
609 const char *ptr
= (const char *)data
;
611 /* Handle data size larger that ssize_t */
612 while (remaining_size
> 0) {
614 HANDLE_EINTR(nb
, p_pwrite(idx
->pack
->mwf
.fd
, (void *)ptr
,
615 remaining_size
, offset
));
621 remaining_size
-= nb
;
627 static int append_to_pack(git_indexer
*idx
, const void *data
, size_t size
)
629 if (write_at(idx
, data
, idx
->pack
->mwf
.size
, size
) < 0) {
630 git_error_set(GIT_ERROR_OS
, "cannot extend packfile '%s'", idx
->pack
->pack_name
);
640 * Windows may keep different views to a networked file for the mmap- and
641 * open-accessed versions of a file, so any writes done through
642 * `write(2)`/`pwrite(2)` may not be reflected on the data that `mmap(2)` is
646 static int write_at(git_indexer
*idx
, const void *data
, off64_t offset
, size_t size
)
648 git_file fd
= idx
->pack
->mwf
.fd
;
649 size_t mmap_alignment
;
652 unsigned char *map_data
;
656 GIT_ASSERT_ARG(data
);
657 GIT_ASSERT_ARG(size
);
659 if ((error
= git__mmap_alignment(&mmap_alignment
)) < 0)
662 /* the offset needs to be at the mmap boundary for the platform */
663 page_offset
= offset
% mmap_alignment
;
664 page_start
= offset
- page_offset
;
666 if ((error
= p_mmap(&map
, page_offset
+ size
, GIT_PROT_WRITE
, GIT_MAP_SHARED
, fd
, page_start
)) < 0)
669 map_data
= (unsigned char *)map
.data
;
670 memcpy(map_data
+ page_offset
, data
, size
);
676 static int append_to_pack(git_indexer
*idx
, const void *data
, size_t size
)
679 size_t mmap_alignment
;
682 off64_t current_size
= idx
->pack
->mwf
.size
;
688 if ((error
= git__mmap_alignment(&mmap_alignment
)) < 0)
691 /* Write a single byte to force the file system to allocate space now or
692 * report an error, since we can't report errors when writing using mmap.
693 * Round the size up to the nearest page so that we only need to perform file
694 * I/O when we add a page, instead of whenever we write even a single byte. */
695 new_size
= current_size
+ size
;
696 page_offset
= new_size
% mmap_alignment
;
697 page_start
= new_size
- page_offset
;
699 if (p_pwrite(idx
->pack
->mwf
.fd
, data
, 1, page_start
+ mmap_alignment
- 1) < 0) {
700 git_error_set(GIT_ERROR_OS
, "cannot extend packfile '%s'", idx
->pack
->pack_name
);
704 return write_at(idx
, data
, idx
->pack
->mwf
.size
, size
);
709 static int read_stream_object(git_indexer
*idx
, git_indexer_progress
*stats
)
711 git_packfile_stream
*stream
= &idx
->stream
;
712 off64_t entry_start
= idx
->off
;
715 git_mwindow
*w
= NULL
;
718 if (idx
->pack
->mwf
.size
<= idx
->off
+ 20)
721 if (!idx
->have_stream
) {
722 error
= git_packfile_unpack_header(&entry_size
, &type
, idx
->pack
, &w
, &idx
->off
);
723 if (error
== GIT_EBUFS
) {
724 idx
->off
= entry_start
;
730 git_mwindow_close(&w
);
731 idx
->entry_start
= entry_start
;
732 git_hash_init(&idx
->hash_ctx
);
733 git_buf_clear(&idx
->entry_data
);
735 if (type
== GIT_OBJECT_REF_DELTA
|| type
== GIT_OBJECT_OFS_DELTA
) {
736 error
= advance_delta_offset(idx
, type
);
737 if (error
== GIT_EBUFS
) {
738 idx
->off
= entry_start
;
748 error
= hash_header(&idx
->hash_ctx
, entry_size
, type
);
753 idx
->have_stream
= 1;
754 idx
->entry_type
= type
;
756 error
= git_packfile_stream_open(stream
, idx
->pack
, idx
->off
);
761 if (idx
->have_delta
) {
762 error
= read_object_stream(idx
, stream
);
764 error
= hash_object_stream(idx
, stream
);
767 idx
->off
= stream
->curpos
;
768 if (error
== GIT_EBUFS
)
771 /* We want to free the stream reasorces no matter what here */
772 idx
->have_stream
= 0;
773 git_packfile_stream_dispose(stream
);
778 if (idx
->have_delta
) {
779 error
= store_delta(idx
);
781 error
= store_object(idx
);
787 if (!idx
->have_delta
) {
788 stats
->indexed_objects
++;
790 stats
->received_objects
++;
792 if ((error
= do_progress_callback(idx
, stats
)) != 0)
798 int git_indexer_append(git_indexer
*idx
, const void *data
, size_t size
, git_indexer_progress
*stats
)
801 struct git_pack_header
*hdr
= &idx
->hdr
;
802 git_mwindow_file
*mwf
= &idx
->pack
->mwf
;
805 GIT_ASSERT_ARG(data
);
806 GIT_ASSERT_ARG(stats
);
808 if ((error
= append_to_pack(idx
, data
, size
)) < 0)
811 hash_partially(idx
, data
, (int)size
);
813 /* Make sure we set the new size of the pack */
814 idx
->pack
->mwf
.size
+= size
;
816 if (!idx
->parsed_header
) {
817 unsigned int total_objects
;
819 if ((unsigned)idx
->pack
->mwf
.size
< sizeof(struct git_pack_header
))
822 if ((error
= parse_header(&idx
->hdr
, idx
->pack
)) < 0)
825 idx
->parsed_header
= 1;
826 idx
->nr_objects
= ntohl(hdr
->hdr_entries
);
827 idx
->off
= sizeof(struct git_pack_header
);
829 if (idx
->nr_objects
<= git_indexer__max_objects
) {
830 total_objects
= (unsigned int)idx
->nr_objects
;
832 git_error_set(GIT_ERROR_INDEXER
, "too many objects");
836 if (git_oidmap_new(&idx
->pack
->idx_cache
) < 0)
839 idx
->pack
->has_cache
= 1;
840 if (git_vector_init(&idx
->objects
, total_objects
, objects_cmp
) < 0)
843 if (git_vector_init(&idx
->deltas
, total_objects
/ 2, NULL
) < 0)
846 stats
->received_objects
= 0;
847 stats
->local_objects
= 0;
848 stats
->total_deltas
= 0;
849 stats
->indexed_deltas
= 0;
850 stats
->indexed_objects
= 0;
851 stats
->total_objects
= total_objects
;
853 if ((error
= do_progress_callback(idx
, stats
)) != 0)
857 /* Now that we have data in the pack, let's try to parse it */
859 /* As the file grows any windows we try to use will be out of date */
860 if ((error
= git_mwindow_free_all(mwf
)) < 0)
863 while (stats
->indexed_objects
< idx
->nr_objects
) {
864 if ((error
= read_stream_object(idx
, stats
)) != 0) {
865 if (error
== GIT_EBUFS
)
875 git_mwindow_free_all(mwf
);
879 static int index_path(git_buf
*path
, git_indexer
*idx
, const char *suffix
)
881 const char prefix
[] = "pack-";
882 size_t slash
= (size_t)path
->size
;
884 /* search backwards for '/' */
885 while (slash
> 0 && path
->ptr
[slash
- 1] != '/')
888 if (git_buf_grow(path
, slash
+ 1 + strlen(prefix
) +
889 GIT_OID_HEXSZ
+ strlen(suffix
) + 1) < 0)
892 git_buf_truncate(path
, slash
);
893 git_buf_puts(path
, prefix
);
894 git_oid_fmt(path
->ptr
+ git_buf_len(path
), &idx
->hash
);
895 path
->size
+= GIT_OID_HEXSZ
;
896 git_buf_puts(path
, suffix
);
898 return git_buf_oom(path
) ? -1 : 0;
902 * Rewind the packfile by the trailer, as we might need to fix the
903 * packfile by injecting objects at the tail and must overwrite it.
905 static int seek_back_trailer(git_indexer
*idx
)
907 idx
->pack
->mwf
.size
-= GIT_OID_RAWSZ
;
908 return git_mwindow_free_all(&idx
->pack
->mwf
);
911 static int inject_object(git_indexer
*idx
, git_oid
*id
)
913 git_odb_object
*obj
= NULL
;
914 struct entry
*entry
= NULL
;
915 struct git_pack_entry
*pentry
= NULL
;
917 unsigned char hdr
[64];
918 git_buf buf
= GIT_BUF_INIT
;
924 if ((error
= seek_back_trailer(idx
)) < 0)
927 entry_start
= idx
->pack
->mwf
.size
;
929 if ((error
= git_odb_read(&obj
, idx
->odb
, id
)) < 0) {
930 git_error_set(GIT_ERROR_INDEXER
, "missing delta bases");
934 data
= git_odb_object_data(obj
);
935 len
= git_odb_object_size(obj
);
937 entry
= git__calloc(1, sizeof(*entry
));
938 GIT_ERROR_CHECK_ALLOC(entry
);
940 entry
->crc
= crc32(0L, Z_NULL
, 0);
942 /* Write out the object header */
943 if ((error
= git_packfile__object_header(&hdr_len
, hdr
, len
, git_odb_object_type(obj
))) < 0 ||
944 (error
= append_to_pack(idx
, hdr
, hdr_len
)) < 0)
947 idx
->pack
->mwf
.size
+= hdr_len
;
948 entry
->crc
= crc32(entry
->crc
, hdr
, (uInt
)hdr_len
);
950 if ((error
= git_zstream_deflatebuf(&buf
, data
, len
)) < 0)
953 /* And then the compressed object */
954 if ((error
= append_to_pack(idx
, buf
.ptr
, buf
.size
)) < 0)
957 idx
->pack
->mwf
.size
+= buf
.size
;
958 entry
->crc
= htonl(crc32(entry
->crc
, (unsigned char *)buf
.ptr
, (uInt
)buf
.size
));
959 git_buf_dispose(&buf
);
961 /* Write a fake trailer so the pack functions play ball */
963 if ((error
= append_to_pack(idx
, &foo
, GIT_OID_RAWSZ
)) < 0)
966 idx
->pack
->mwf
.size
+= GIT_OID_RAWSZ
;
968 pentry
= git__calloc(1, sizeof(struct git_pack_entry
));
969 GIT_ERROR_CHECK_ALLOC(pentry
);
971 git_oid_cpy(&pentry
->sha1
, id
);
972 git_oid_cpy(&entry
->oid
, id
);
973 idx
->off
= entry_start
+ hdr_len
+ len
;
975 error
= save_entry(idx
, entry
, pentry
, entry_start
);
983 git_odb_object_free(obj
);
987 static int fix_thin_pack(git_indexer
*idx
, git_indexer_progress
*stats
)
989 int error
, found_ref_delta
= 0;
991 struct delta_info
*delta
;
994 git_mwindow
*w
= NULL
;
996 unsigned char *base_info
;
997 unsigned int left
= 0;
1000 GIT_ASSERT(git_vector_length(&idx
->deltas
) > 0);
1002 if (idx
->odb
== NULL
) {
1003 git_error_set(GIT_ERROR_INDEXER
, "cannot fix a thin pack without an ODB");
1007 /* Loop until we find the first REF delta */
1008 git_vector_foreach(&idx
->deltas
, i
, delta
) {
1012 curpos
= delta
->delta_off
;
1013 error
= git_packfile_unpack_header(&size
, &type
, idx
->pack
, &w
, &curpos
);
1017 if (type
== GIT_OBJECT_REF_DELTA
) {
1018 found_ref_delta
= 1;
1023 if (!found_ref_delta
) {
1024 git_error_set(GIT_ERROR_INDEXER
, "no REF_DELTA found, cannot inject object");
1028 /* curpos now points to the base information, which is an OID */
1029 base_info
= git_mwindow_open(&idx
->pack
->mwf
, &w
, curpos
, GIT_OID_RAWSZ
, &left
);
1030 if (base_info
== NULL
) {
1031 git_error_set(GIT_ERROR_INDEXER
, "failed to map delta information");
1035 git_oid_fromraw(&base
, base_info
);
1036 git_mwindow_close(&w
);
1038 if (has_entry(idx
, &base
))
1041 if (inject_object(idx
, &base
) < 0)
1044 stats
->local_objects
++;
1049 static int resolve_deltas(git_indexer
*idx
, git_indexer_progress
*stats
)
1053 struct delta_info
*delta
;
1054 int progressed
= 0, non_null
= 0, progress_cb_result
;
1056 while (idx
->deltas
.length
> 0) {
1059 git_vector_foreach(&idx
->deltas
, i
, delta
) {
1060 git_rawobj obj
= {0};
1066 idx
->off
= delta
->delta_off
;
1067 if ((error
= git_packfile_unpack(&obj
, idx
->pack
, &idx
->off
)) < 0) {
1068 if (error
== GIT_PASSTHROUGH
) {
1069 /* We have not seen the base object, we'll try again later. */
1075 if (idx
->do_verify
&& check_object_connectivity(idx
, &obj
) < 0)
1076 /* TODO: error? continue? */
1079 if (hash_and_save(idx
, &obj
, delta
->delta_off
) < 0)
1082 git__free(obj
.data
);
1083 stats
->indexed_objects
++;
1084 stats
->indexed_deltas
++;
1086 if ((progress_cb_result
= do_progress_callback(idx
, stats
)) < 0)
1087 return progress_cb_result
;
1089 /* remove from the list */
1090 git_vector_set(NULL
, &idx
->deltas
, i
, NULL
);
1094 /* if none were actually set, we're done */
1098 if (!progressed
&& (fix_thin_pack(idx
, stats
) < 0)) {
1106 static int update_header_and_rehash(git_indexer
*idx
, git_indexer_progress
*stats
)
1109 size_t chunk
= 1024*1024;
1111 git_mwindow
*w
= NULL
;
1112 git_mwindow_file
*mwf
;
1115 mwf
= &idx
->pack
->mwf
;
1117 git_hash_init(&idx
->trailer
);
1120 /* Update the header to include the numer of local objects we injected */
1121 idx
->hdr
.hdr_entries
= htonl(stats
->total_objects
+ stats
->local_objects
);
1122 if (write_at(idx
, &idx
->hdr
, 0, sizeof(struct git_pack_header
)) < 0)
1126 * We now use the same technique as before to determine the
1127 * hash. We keep reading up to the end and let
1128 * hash_partially() keep the existing trailer out of the
1131 if (git_mwindow_free_all(mwf
) < 0)
1135 while (hashed
< mwf
->size
) {
1136 ptr
= git_mwindow_open(mwf
, &w
, hashed
, chunk
, &left
);
1140 hash_partially(idx
, ptr
, left
);
1143 git_mwindow_close(&w
);
1149 int git_indexer_commit(git_indexer
*idx
, git_indexer_progress
*stats
)
1151 git_mwindow
*w
= NULL
;
1152 unsigned int i
, long_offsets
= 0, left
;
1154 struct git_pack_idx_header hdr
;
1155 git_buf filename
= GIT_BUF_INIT
;
1156 struct entry
*entry
;
1157 git_oid trailer_hash
, file_hash
;
1158 git_filebuf index_file
= {0};
1159 void *packfile_trailer
;
1161 if (!idx
->parsed_header
) {
1162 git_error_set(GIT_ERROR_INDEXER
, "incomplete pack header");
1166 /* Test for this before resolve_deltas(), as it plays with idx->off */
1167 if (idx
->off
+ 20 < idx
->pack
->mwf
.size
) {
1168 git_error_set(GIT_ERROR_INDEXER
, "unexpected data at the end of the pack");
1171 if (idx
->off
+ 20 > idx
->pack
->mwf
.size
) {
1172 git_error_set(GIT_ERROR_INDEXER
, "missing trailer at the end of the pack");
1176 packfile_trailer
= git_mwindow_open(&idx
->pack
->mwf
, &w
, idx
->pack
->mwf
.size
- GIT_OID_RAWSZ
, GIT_OID_RAWSZ
, &left
);
1177 if (packfile_trailer
== NULL
) {
1178 git_mwindow_close(&w
);
1182 /* Compare the packfile trailer as it was sent to us and what we calculated */
1183 git_oid_fromraw(&file_hash
, packfile_trailer
);
1184 git_mwindow_close(&w
);
1186 git_hash_final(&trailer_hash
, &idx
->trailer
);
1187 if (git_oid_cmp(&file_hash
, &trailer_hash
)) {
1188 git_error_set(GIT_ERROR_INDEXER
, "packfile trailer mismatch");
1192 /* Freeze the number of deltas */
1193 stats
->total_deltas
= stats
->total_objects
- stats
->indexed_objects
;
1195 if ((error
= resolve_deltas(idx
, stats
)) < 0)
1198 if (stats
->indexed_objects
!= stats
->total_objects
) {
1199 git_error_set(GIT_ERROR_INDEXER
, "early EOF");
1203 if (stats
->local_objects
> 0) {
1204 if (update_header_and_rehash(idx
, stats
) < 0)
1207 git_hash_final(&trailer_hash
, &idx
->trailer
);
1208 write_at(idx
, &trailer_hash
, idx
->pack
->mwf
.size
- GIT_OID_RAWSZ
, GIT_OID_RAWSZ
);
1212 * Is the resulting graph fully connected or are we still
1213 * missing some objects? In the second case, we can
1214 * bail out due to an incomplete and thus corrupt
1217 if (git_oidmap_size(idx
->expected_oids
) > 0) {
1218 git_error_set(GIT_ERROR_INDEXER
, "packfile is missing %"PRIuZ
" objects",
1219 git_oidmap_size(idx
->expected_oids
));
1223 git_vector_sort(&idx
->objects
);
1225 /* Use the trailer hash as the pack file name to ensure
1226 * files with different contents have different names */
1227 git_oid_cpy(&idx
->hash
, &trailer_hash
);
1229 git_buf_sets(&filename
, idx
->pack
->pack_name
);
1230 git_buf_shorten(&filename
, strlen("pack"));
1231 git_buf_puts(&filename
, "idx");
1232 if (git_buf_oom(&filename
))
1235 if (git_filebuf_open(&index_file
, filename
.ptr
,
1236 GIT_FILEBUF_HASH_CONTENTS
|
1237 (idx
->do_fsync
? GIT_FILEBUF_FSYNC
: 0),
1241 /* Write out the header */
1242 hdr
.idx_signature
= htonl(PACK_IDX_SIGNATURE
);
1243 hdr
.idx_version
= htonl(2);
1244 git_filebuf_write(&index_file
, &hdr
, sizeof(hdr
));
1246 /* Write out the fanout table */
1247 for (i
= 0; i
< 256; ++i
) {
1248 uint32_t n
= htonl(idx
->fanout
[i
]);
1249 git_filebuf_write(&index_file
, &n
, sizeof(n
));
1252 /* Write out the object names (SHA-1 hashes) */
1253 git_vector_foreach(&idx
->objects
, i
, entry
) {
1254 git_filebuf_write(&index_file
, &entry
->oid
, sizeof(git_oid
));
1257 /* Write out the CRC32 values */
1258 git_vector_foreach(&idx
->objects
, i
, entry
) {
1259 git_filebuf_write(&index_file
, &entry
->crc
, sizeof(uint32_t));
1262 /* Write out the offsets */
1263 git_vector_foreach(&idx
->objects
, i
, entry
) {
1266 if (entry
->offset
== UINT32_MAX
)
1267 n
= htonl(0x80000000 | long_offsets
++);
1269 n
= htonl(entry
->offset
);
1271 git_filebuf_write(&index_file
, &n
, sizeof(uint32_t));
1274 /* Write out the long offsets */
1275 git_vector_foreach(&idx
->objects
, i
, entry
) {
1278 if (entry
->offset
!= UINT32_MAX
)
1281 split
[0] = htonl(entry
->offset_long
>> 32);
1282 split
[1] = htonl(entry
->offset_long
& 0xffffffff);
1284 git_filebuf_write(&index_file
, &split
, sizeof(uint32_t) * 2);
1287 /* Write out the packfile trailer to the index */
1288 if (git_filebuf_write(&index_file
, &trailer_hash
, GIT_OID_RAWSZ
) < 0)
1291 /* Write out the hash of the idx */
1292 if (git_filebuf_hash(&trailer_hash
, &index_file
) < 0)
1295 git_filebuf_write(&index_file
, &trailer_hash
, sizeof(git_oid
));
1297 /* Figure out what the final name should be */
1298 if (index_path(&filename
, idx
, ".idx") < 0)
1302 if (git_filebuf_commit_at(&index_file
, filename
.ptr
) < 0)
1305 if (git_mwindow_free_all(&idx
->pack
->mwf
) < 0)
1308 #if !defined(NO_MMAP) && defined(GIT_WIN32)
1310 * Some non-Windows remote filesystems fail when truncating files if the
1311 * file permissions change after opening the file (done by p_mkstemp).
1313 * Truncation is only needed when mmap is used to undo rounding up to next
1314 * page_size in append_to_pack.
1316 if (p_ftruncate(idx
->pack
->mwf
.fd
, idx
->pack
->mwf
.size
) < 0) {
1317 git_error_set(GIT_ERROR_OS
, "failed to truncate pack file '%s'", idx
->pack
->pack_name
);
1322 if (idx
->do_fsync
&& p_fsync(idx
->pack
->mwf
.fd
) < 0) {
1323 git_error_set(GIT_ERROR_OS
, "failed to fsync packfile");
1327 /* We need to close the descriptor here so Windows doesn't choke on commit_at */
1328 if (p_close(idx
->pack
->mwf
.fd
) < 0) {
1329 git_error_set(GIT_ERROR_OS
, "failed to close packfile");
1333 idx
->pack
->mwf
.fd
= -1;
1335 if (index_path(&filename
, idx
, ".pack") < 0)
1338 /* And don't forget to rename the packfile to its new place. */
1339 if (p_rename(idx
->pack
->pack_name
, git_buf_cstr(&filename
)) < 0)
1342 /* And fsync the parent directory if we're asked to. */
1343 if (idx
->do_fsync
&&
1344 git_futils_fsync_parent(git_buf_cstr(&filename
)) < 0)
1347 idx
->pack_committed
= 1;
1349 git_buf_dispose(&filename
);
1353 git_mwindow_free_all(&idx
->pack
->mwf
);
1354 git_filebuf_cleanup(&index_file
);
1355 git_buf_dispose(&filename
);
1359 void git_indexer_free(git_indexer
*idx
)
1368 if (idx
->have_stream
)
1369 git_packfile_stream_dispose(&idx
->stream
);
1371 git_vector_free_deep(&idx
->objects
);
1373 if (idx
->pack
->idx_cache
) {
1374 struct git_pack_entry
*pentry
;
1375 git_oidmap_foreach_value(idx
->pack
->idx_cache
, pentry
, {
1379 git_oidmap_free(idx
->pack
->idx_cache
);
1382 git_vector_free_deep(&idx
->deltas
);
1384 git_packfile_free(idx
->pack
, !idx
->pack_committed
);
1387 while (git_oidmap_iterate((void **) &value
, idx
->expected_oids
, &iter
, &key
) == 0)
1390 git_hash_ctx_cleanup(&idx
->trailer
);
1391 git_hash_ctx_cleanup(&idx
->hash_ctx
);
1392 git_buf_dispose(&idx
->entry_data
);
1393 git_oidmap_free(idx
->expected_oids
);