2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
15 #include "sha1_lookup.h"
17 /* Option to bypass checking existence of '.keep' files */
18 bool git_disable_pack_keep_file_checks
= false;
20 static int packfile_open(struct git_pack_file
*p
);
21 static off64_t
nth_packed_object_offset(const struct git_pack_file
*p
, uint32_t n
);
22 static int packfile_unpack_compressed(
24 struct git_pack_file
*p
,
30 /* Can find the offset of an object given
31 * a prefix of an identifier.
32 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
33 * is ambiguous within the pack.
34 * This method assumes that len is between
35 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
37 static int pack_entry_find_offset(
40 struct git_pack_file
*p
,
41 const git_oid
*short_oid
,
44 static int packfile_error(const char *message
)
46 git_error_set(GIT_ERROR_ODB
, "invalid pack file - %s", message
);
54 static git_pack_cache_entry
*new_cache_object(git_rawobj
*source
)
56 git_pack_cache_entry
*e
= git__calloc(1, sizeof(git_pack_cache_entry
));
60 git_atomic_inc(&e
->refcount
);
61 memcpy(&e
->raw
, source
, sizeof(git_rawobj
));
66 static void free_cache_object(void *o
)
68 git_pack_cache_entry
*e
= (git_pack_cache_entry
*)o
;
71 assert(e
->refcount
.val
== 0);
72 git__free(e
->raw
.data
);
77 static void cache_free(git_pack_cache
*cache
)
79 git_pack_cache_entry
*entry
;
82 git_offmap_foreach_value(cache
->entries
, entry
, {
83 free_cache_object(entry
);
86 git_offmap_free(cache
->entries
);
87 cache
->entries
= NULL
;
91 static int cache_init(git_pack_cache
*cache
)
93 if (git_offmap_new(&cache
->entries
) < 0)
96 cache
->memory_limit
= GIT_PACK_CACHE_MEMORY_LIMIT
;
98 if (git_mutex_init(&cache
->lock
)) {
99 git_error_set(GIT_ERROR_OS
, "failed to initialize pack cache mutex");
101 git__free(cache
->entries
);
102 cache
->entries
= NULL
;
110 static git_pack_cache_entry
*cache_get(git_pack_cache
*cache
, off64_t offset
)
112 git_pack_cache_entry
*entry
;
114 if (git_mutex_lock(&cache
->lock
) < 0)
117 if ((entry
= git_offmap_get(cache
->entries
, offset
)) != NULL
) {
118 git_atomic_inc(&entry
->refcount
);
119 entry
->last_usage
= cache
->use_ctr
++;
121 git_mutex_unlock(&cache
->lock
);
126 /* Run with the cache lock held */
127 static void free_lowest_entry(git_pack_cache
*cache
)
130 git_pack_cache_entry
*entry
;
132 git_offmap_foreach(cache
->entries
, offset
, entry
, {
133 if (entry
&& entry
->refcount
.val
== 0) {
134 cache
->memory_used
-= entry
->raw
.len
;
135 git_offmap_delete(cache
->entries
, offset
);
136 free_cache_object(entry
);
141 static int cache_add(
142 git_pack_cache_entry
**cached_out
,
143 git_pack_cache
*cache
,
147 git_pack_cache_entry
*entry
;
150 if (base
->len
> GIT_PACK_CACHE_SIZE_LIMIT
)
153 entry
= new_cache_object(base
);
155 if (git_mutex_lock(&cache
->lock
) < 0) {
156 git_error_set(GIT_ERROR_OS
, "failed to lock cache");
160 /* Add it to the cache if nobody else has */
161 exists
= git_offmap_exists(cache
->entries
, offset
);
163 while (cache
->memory_used
+ base
->len
> cache
->memory_limit
)
164 free_lowest_entry(cache
);
166 git_offmap_set(cache
->entries
, offset
, entry
);
167 cache
->memory_used
+= entry
->raw
.len
;
171 git_mutex_unlock(&cache
->lock
);
172 /* Somebody beat us to adding it into the cache */
182 /***********************************************************
186 ***********************************************************/
188 static void pack_index_free(struct git_pack_file
*p
)
194 if (p
->index_map
.data
) {
195 git_futils_mmap_free(&p
->index_map
);
196 p
->index_map
.data
= NULL
;
200 static int pack_index_check(const char *path
, struct git_pack_file
*p
)
202 struct git_pack_idx_header
*hdr
;
203 uint32_t version
, nr
, i
, *index
;
208 /* TODO: properly open the file without access time using O_NOATIME */
209 git_file fd
= git_futils_open_ro(path
);
213 if (p_fstat(fd
, &st
) < 0) {
215 git_error_set(GIT_ERROR_OS
, "unable to stat pack index '%s'", path
);
219 if (!S_ISREG(st
.st_mode
) ||
220 !git__is_sizet(st
.st_size
) ||
221 (idx_size
= (size_t)st
.st_size
) < 4 * 256 + 20 + 20)
224 git_error_set(GIT_ERROR_ODB
, "invalid pack index '%s'", path
);
228 error
= git_futils_mmap_ro(&p
->index_map
, fd
, 0, idx_size
);
235 hdr
= idx_map
= p
->index_map
.data
;
237 if (hdr
->idx_signature
== htonl(PACK_IDX_SIGNATURE
)) {
238 version
= ntohl(hdr
->idx_version
);
240 if (version
< 2 || version
> 2) {
241 git_futils_mmap_free(&p
->index_map
);
242 return packfile_error("unsupported index version");
252 index
+= 2; /* skip index header */
254 for (i
= 0; i
< 256; i
++) {
255 uint32_t n
= ntohl(index
[i
]);
257 git_futils_mmap_free(&p
->index_map
);
258 return packfile_error("index is non-monotonic");
266 * - 256 index entries 4 bytes each
267 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
268 * - 20-byte SHA1 of the packfile
269 * - 20-byte SHA1 file checksum
271 if (idx_size
!= 4*256 + nr
* 24 + 20 + 20) {
272 git_futils_mmap_free(&p
->index_map
);
273 return packfile_error("index is corrupted");
275 } else if (version
== 2) {
278 * - 8 bytes of header
279 * - 256 index entries 4 bytes each
280 * - 20-byte sha1 entry * nr
281 * - 4-byte crc entry * nr
282 * - 4-byte offset entry * nr
283 * - 20-byte SHA1 of the packfile
284 * - 20-byte SHA1 file checksum
285 * And after the 4-byte offset table might be a
286 * variable sized table containing 8-byte entries
287 * for offsets larger than 2^31.
289 unsigned long min_size
= 8 + 4*256 + nr
*(20 + 4 + 4) + 20 + 20;
290 unsigned long max_size
= min_size
;
293 max_size
+= (nr
- 1)*8;
295 if (idx_size
< min_size
|| idx_size
> max_size
) {
296 git_futils_mmap_free(&p
->index_map
);
297 return packfile_error("wrong index size");
302 p
->index_version
= version
;
306 static int pack_index_open(struct git_pack_file
*p
)
312 if (p
->index_version
> -1)
315 name_len
= strlen(p
->pack_name
);
316 assert(name_len
> strlen(".pack")); /* checked by git_pack_file alloc */
318 if (git_buf_init(&idx_name
, name_len
) < 0)
321 git_buf_put(&idx_name
, p
->pack_name
, name_len
- strlen(".pack"));
322 git_buf_puts(&idx_name
, ".idx");
323 if (git_buf_oom(&idx_name
)) {
324 git_buf_dispose(&idx_name
);
328 if ((error
= git_mutex_lock(&p
->lock
)) < 0) {
329 git_buf_dispose(&idx_name
);
333 if (p
->index_version
== -1)
334 error
= pack_index_check(idx_name
.ptr
, p
);
336 git_buf_dispose(&idx_name
);
338 git_mutex_unlock(&p
->lock
);
343 static unsigned char *pack_window_open(
344 struct git_pack_file
*p
,
345 git_mwindow
**w_cursor
,
349 if (p
->mwf
.fd
== -1 && packfile_open(p
) < 0)
352 /* Since packfiles end in a hash of their content and it's
353 * pointless to ask for an offset into the middle of that
354 * hash, and the pack_window_contains function above wouldn't match
355 * don't allow an offset too close to the end of the file.
357 * Don't allow a negative offset, as that means we've wrapped
360 if (offset
> (p
->mwf
.size
- 20))
365 return git_mwindow_open(&p
->mwf
, w_cursor
, offset
, 20, left
);
369 * The per-object header is a pretty dense thing, which is
370 * - first byte: low four bits are "size",
371 * then three bits of "type",
372 * with the high bit being "size continues".
373 * - each byte afterwards: low seven bits are size continuation,
374 * with the high bit being "size continues"
376 size_t git_packfile__object_header(unsigned char *hdr
, size_t size
, git_object_t type
)
378 unsigned char *hdr_base
;
381 assert(type
>= GIT_OBJECT_COMMIT
&& type
<= GIT_OBJECT_REF_DELTA
);
383 /* TODO: add support for chunked objects; see git.git 6c0d19b1 */
385 c
= (unsigned char)((type
<< 4) | (size
& 15));
396 return (hdr
- hdr_base
);
400 static int packfile_unpack_header1(
401 unsigned long *usedp
,
404 const unsigned char *buf
,
408 unsigned long size
, c
;
409 unsigned long used
= 0;
412 *type
= (c
>> 4) & 7;
417 git_error_set(GIT_ERROR_ODB
, "buffer too small");
421 if (bitsizeof(long) <= shift
) {
423 git_error_set(GIT_ERROR_ODB
, "packfile corrupted");
428 size
+= (c
& 0x7f) << shift
;
432 *sizep
= (size_t)size
;
437 int git_packfile_unpack_header(
439 git_object_t
*type_p
,
440 git_mwindow_file
*mwf
,
441 git_mwindow
**w_curs
,
449 /* pack_window_open() assures us we have [base, base + 20) available
450 * as a range that we can look at at. (Its actually the hash
451 * size that is assured.) With our object header encoding
452 * the maximum deflated object size is 2^137, which is just
453 * insane, so we know won't exceed what we have been given.
455 /* base = pack_window_open(p, w_curs, *curpos, &left); */
456 base
= git_mwindow_open(mwf
, w_curs
, *curpos
, 20, &left
);
460 ret
= packfile_unpack_header1(&used
, size_p
, type_p
, base
, left
);
461 git_mwindow_close(w_curs
);
462 if (ret
== GIT_EBUFS
)
465 return packfile_error("header length is zero");
471 int git_packfile_resolve_header(
473 git_object_t
*type_p
,
474 struct git_pack_file
*p
,
477 git_mwindow
*w_curs
= NULL
;
478 off64_t curpos
= offset
;
484 error
= git_packfile_unpack_header(&size
, &type
, &p
->mwf
, &w_curs
, &curpos
);
488 if (type
== GIT_OBJECT_OFS_DELTA
|| type
== GIT_OBJECT_REF_DELTA
) {
490 git_packfile_stream stream
;
492 base_offset
= get_delta_base(p
, &w_curs
, &curpos
, type
, offset
);
493 git_mwindow_close(&w_curs
);
494 if ((error
= git_packfile_stream_open(&stream
, p
, curpos
)) < 0)
496 error
= git_delta_read_header_fromstream(&base_size
, size_p
, &stream
);
497 git_packfile_stream_dispose(&stream
);
505 while (type
== GIT_OBJECT_OFS_DELTA
|| type
== GIT_OBJECT_REF_DELTA
) {
506 curpos
= base_offset
;
507 error
= git_packfile_unpack_header(&size
, &type
, &p
->mwf
, &w_curs
, &curpos
);
510 if (type
!= GIT_OBJECT_OFS_DELTA
&& type
!= GIT_OBJECT_REF_DELTA
)
512 base_offset
= get_delta_base(p
, &w_curs
, &curpos
, type
, base_offset
);
513 git_mwindow_close(&w_curs
);
520 #define SMALL_STACK_SIZE 64
523 * Generate the chain of dependencies which we need to get to the
524 * object at `off`. `chain` is used a stack, popping gives the right
525 * order to apply deltas on. If an object is found in the pack's base
526 * cache, we stop calculating there.
528 static int pack_dependency_chain(git_dependency_chain
*chain_out
,
529 git_pack_cache_entry
**cached_out
, off64_t
*cached_off
,
530 struct pack_chain_elem
*small_stack
, size_t *stack_sz
,
531 struct git_pack_file
*p
, off64_t obj_offset
)
533 git_dependency_chain chain
= GIT_ARRAY_INIT
;
534 git_mwindow
*w_curs
= NULL
;
535 off64_t curpos
= obj_offset
, base_offset
;
536 int error
= 0, use_heap
= 0;
537 size_t size
, elem_pos
;
542 struct pack_chain_elem
*elem
;
543 git_pack_cache_entry
*cached
= NULL
;
545 /* if we have a base cached, we can stop here instead */
546 if ((cached
= cache_get(&p
->bases
, obj_offset
)) != NULL
) {
547 *cached_out
= cached
;
548 *cached_off
= obj_offset
;
552 /* if we run out of space on the small stack, use the array */
553 if (elem_pos
== SMALL_STACK_SIZE
) {
554 git_array_init_to_size(chain
, elem_pos
);
555 GIT_ERROR_CHECK_ARRAY(chain
);
556 memcpy(chain
.ptr
, small_stack
, elem_pos
* sizeof(struct pack_chain_elem
));
557 chain
.size
= elem_pos
;
563 elem
= &small_stack
[elem_pos
];
565 elem
= git_array_alloc(chain
);
572 elem
->base_key
= obj_offset
;
574 error
= git_packfile_unpack_header(&size
, &type
, &p
->mwf
, &w_curs
, &curpos
);
579 elem
->offset
= curpos
;
582 elem
->base_key
= obj_offset
;
584 if (type
!= GIT_OBJECT_OFS_DELTA
&& type
!= GIT_OBJECT_REF_DELTA
)
587 base_offset
= get_delta_base(p
, &w_curs
, &curpos
, type
, obj_offset
);
588 git_mwindow_close(&w_curs
);
590 if (base_offset
== 0) {
591 error
= packfile_error("delta offset is zero");
594 if (base_offset
< 0) { /* must actually be an error code */
595 error
= (int)base_offset
;
599 /* we need to pass the pos *after* the delta-base bit */
600 elem
->offset
= curpos
;
602 /* go through the loop again, but with the new object */
603 obj_offset
= base_offset
;
608 *stack_sz
= elem_pos
+ 1;
613 git_array_clear(chain
);
617 int git_packfile_unpack(
619 struct git_pack_file
*p
,
622 git_mwindow
*w_curs
= NULL
;
623 off64_t curpos
= *obj_offset
;
624 int error
, free_base
= 0;
625 git_dependency_chain chain
= GIT_ARRAY_INIT
;
626 struct pack_chain_elem
*elem
= NULL
, *stack
;
627 git_pack_cache_entry
*cached
= NULL
;
628 struct pack_chain_elem small_stack
[SMALL_STACK_SIZE
];
629 size_t stack_size
= 0, elem_pos
, alloclen
;
630 git_object_t base_type
;
633 * TODO: optionally check the CRC on the packfile
636 error
= pack_dependency_chain(&chain
, &cached
, obj_offset
, small_stack
, &stack_size
, p
, *obj_offset
);
642 obj
->type
= GIT_OBJECT_INVALID
;
644 /* let's point to the right stack */
645 stack
= chain
.ptr
? chain
.ptr
: small_stack
;
647 elem_pos
= stack_size
;
649 memcpy(obj
, &cached
->raw
, sizeof(git_rawobj
));
650 base_type
= obj
->type
;
651 elem_pos
--; /* stack_size includes the base, which isn't actually there */
653 elem
= &stack
[--elem_pos
];
654 base_type
= elem
->type
;
658 case GIT_OBJECT_COMMIT
:
659 case GIT_OBJECT_TREE
:
660 case GIT_OBJECT_BLOB
:
663 curpos
= elem
->offset
;
664 error
= packfile_unpack_compressed(obj
, p
, &w_curs
, &curpos
, elem
->size
, elem
->type
);
665 git_mwindow_close(&w_curs
);
666 base_type
= elem
->type
;
671 case GIT_OBJECT_OFS_DELTA
:
672 case GIT_OBJECT_REF_DELTA
:
673 error
= packfile_error("dependency chain ends in a delta");
676 error
= packfile_error("invalid packfile type in header");
681 * Finding the object we want a cached base element is
682 * problematic, as we need to make sure we don't accidentally
683 * give the caller the cached object, which it would then feel
684 * free to free, so we need to copy the data.
686 if (cached
&& stack_size
== 1) {
687 void *data
= obj
->data
;
689 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen
, obj
->len
, 1);
690 obj
->data
= git__malloc(alloclen
);
691 GIT_ERROR_CHECK_ALLOC(obj
->data
);
693 memcpy(obj
->data
, data
, obj
->len
+ 1);
694 git_atomic_dec(&cached
->refcount
);
698 /* we now apply each consecutive delta until we run out */
699 while (elem_pos
> 0 && !error
) {
700 git_rawobj base
, delta
;
703 * We can now try to add the base to the cache, as
704 * long as it's not already the cached one.
707 free_base
= !!cache_add(&cached
, &p
->bases
, obj
, elem
->base_key
);
709 elem
= &stack
[elem_pos
- 1];
710 curpos
= elem
->offset
;
711 error
= packfile_unpack_compressed(&delta
, p
, &w_curs
, &curpos
, elem
->size
, elem
->type
);
712 git_mwindow_close(&w_curs
);
715 /* We have transferred ownership of the data to the cache. */
720 /* the current object becomes the new base, on which we apply the delta */
724 obj
->type
= GIT_OBJECT_INVALID
;
726 error
= git_delta_apply(&obj
->data
, &obj
->len
, base
.data
, base
.len
, delta
.data
, delta
.len
);
727 obj
->type
= base_type
;
730 * We usually don't want to free the base at this
731 * point, as we put it into the cache in the previous
732 * iteration. free_base lets us know that we got the
733 * base object directly from the packfile, so we can free it.
735 git__free(delta
.data
);
738 git__free(base
.data
);
742 git_atomic_dec(&cached
->refcount
);
754 git__free(obj
->data
);
756 git_atomic_dec(&cached
->refcount
);
760 *obj_offset
= curpos
;
762 git_array_clear(chain
);
766 int git_packfile_stream_open(git_packfile_stream
*obj
, struct git_pack_file
*p
, off64_t curpos
)
768 memset(obj
, 0, sizeof(git_packfile_stream
));
769 obj
->curpos
= curpos
;
772 if (git_zstream_init(&obj
->zstream
, GIT_ZSTREAM_INFLATE
) < 0) {
773 git_error_set(GIT_ERROR_ZLIB
, "failed to init packfile stream");
780 ssize_t
git_packfile_stream_read(git_packfile_stream
*obj
, void *buffer
, size_t len
)
782 unsigned int window_len
;
789 if ((in
= pack_window_open(obj
->p
, &obj
->mw
, obj
->curpos
, &window_len
)) == NULL
)
792 if ((error
= git_zstream_set_input(&obj
->zstream
, in
, window_len
)) < 0 ||
793 (error
= git_zstream_get_output_chunk(buffer
, &len
, &obj
->zstream
)) < 0) {
794 git_mwindow_close(&obj
->mw
);
795 git_error_set(GIT_ERROR_ZLIB
, "error reading from the zlib stream");
799 git_mwindow_close(&obj
->mw
);
801 obj
->curpos
+= window_len
- obj
->zstream
.in_len
;
803 if (git_zstream_eos(&obj
->zstream
))
806 /* If we didn't write anything out but we're not done, we need more data */
807 if (!len
&& !git_zstream_eos(&obj
->zstream
))
814 void git_packfile_stream_dispose(git_packfile_stream
*obj
)
816 git_zstream_free(&obj
->zstream
);
819 static int packfile_unpack_compressed(
821 struct git_pack_file
*p
,
822 git_mwindow
**mwindow
,
827 git_zstream zstream
= GIT_ZSTREAM_INIT
;
828 size_t buffer_len
, total
= 0;
832 GIT_ERROR_CHECK_ALLOC_ADD(&buffer_len
, size
, 1);
833 data
= git__calloc(1, buffer_len
);
834 GIT_ERROR_CHECK_ALLOC(data
);
836 if ((error
= git_zstream_init(&zstream
, GIT_ZSTREAM_INFLATE
)) < 0) {
837 git_error_set(GIT_ERROR_ZLIB
, "failed to init zlib stream on unpack");
842 size_t bytes
= buffer_len
- total
;
843 unsigned int window_len
;
846 in
= pack_window_open(p
, mwindow
, *position
, &window_len
);
848 if ((error
= git_zstream_set_input(&zstream
, in
, window_len
)) < 0 ||
849 (error
= git_zstream_get_output_chunk(data
+ total
, &bytes
, &zstream
)) < 0) {
850 git_mwindow_close(mwindow
);
854 git_mwindow_close(mwindow
);
856 *position
+= window_len
- zstream
.in_len
;
858 } while (total
< size
);
860 if (total
!= size
|| !git_zstream_eos(&zstream
)) {
861 git_error_set(GIT_ERROR_ZLIB
, "error inflating zlib stream");
871 git_zstream_free(&zstream
);
879 * curpos is where the data starts, delta_obj_offset is the where the
882 off64_t
get_delta_base(
883 struct git_pack_file
*p
,
884 git_mwindow
**w_curs
,
887 off64_t delta_obj_offset
)
889 unsigned int left
= 0;
890 unsigned char *base_info
;
894 base_info
= pack_window_open(p
, w_curs
, *curpos
, &left
);
895 /* Assumption: the only reason this would fail is because the file is too small */
896 if (base_info
== NULL
)
898 /* pack_window_open() assured us we have [base_info, base_info + 20)
899 * as a range that we can look at without walking off the
900 * end of the mapped window. Its actually the hash size
901 * that is assured. An OFS_DELTA longer than the hash size
902 * is stupid, as then a REF_DELTA would be smaller to store.
904 if (type
== GIT_OBJECT_OFS_DELTA
) {
906 unsigned char c
= base_info
[used
++];
907 size_t unsigned_base_offset
= c
& 127;
911 unsigned_base_offset
+= 1;
912 if (!unsigned_base_offset
|| MSB(unsigned_base_offset
, 7))
913 return 0; /* overflow */
914 c
= base_info
[used
++];
915 unsigned_base_offset
= (unsigned_base_offset
<< 7) + (c
& 127);
917 if (unsigned_base_offset
== 0 || (size_t)delta_obj_offset
<= unsigned_base_offset
)
918 return 0; /* out of bound */
919 base_offset
= delta_obj_offset
- unsigned_base_offset
;
921 } else if (type
== GIT_OBJECT_REF_DELTA
) {
922 /* If we have the cooperative cache, search in it first */
924 struct git_pack_entry
*entry
;
927 git_oid_fromraw(&oid
, base_info
);
928 if ((entry
= git_oidmap_get(p
->idx_cache
, &oid
)) != NULL
) {
930 return entry
->offset
;
932 /* If we're building an index, don't try to find the pack
933 * entry; we just haven't seen it yet. We'll make
934 * progress again in the next loop.
936 return GIT_PASSTHROUGH
;
940 /* The base entry _must_ be in the same pack */
941 if (pack_entry_find_offset(&base_offset
, &unused
, p
, (git_oid
*)base_info
, GIT_OID_HEXSZ
) < 0)
942 return packfile_error("base entry delta is not in the same pack");
950 /***********************************************************
954 ***********************************************************/
956 void git_packfile_close(struct git_pack_file
*p
, bool unlink_packfile
)
958 if (p
->mwf
.fd
>= 0) {
959 git_mwindow_free_all_locked(&p
->mwf
);
965 p_unlink(p
->pack_name
);
968 void git_packfile_free(struct git_pack_file
*p
)
973 cache_free(&p
->bases
);
975 git_packfile_close(p
, false);
979 git__free(p
->bad_object_sha1
);
981 git_mutex_free(&p
->lock
);
982 git_mutex_free(&p
->bases
.lock
);
986 static int packfile_open(struct git_pack_file
*p
)
989 struct git_pack_header hdr
;
991 unsigned char *idx_sha1
;
993 if (p
->index_version
== -1 && pack_index_open(p
) < 0)
994 return git_odb__error_notfound("failed to open packfile", NULL
, 0);
996 /* if mwf opened by another thread, return now */
997 if (git_mutex_lock(&p
->lock
) < 0)
998 return packfile_error("failed to get lock for open");
1000 if (p
->mwf
.fd
>= 0) {
1001 git_mutex_unlock(&p
->lock
);
1005 /* TODO: open with noatime */
1006 p
->mwf
.fd
= git_futils_open_ro(p
->pack_name
);
1010 if (p_fstat(p
->mwf
.fd
, &st
) < 0 ||
1011 git_mwindow_file_register(&p
->mwf
) < 0)
1014 /* If we created the struct before we had the pack we lack size. */
1016 if (!S_ISREG(st
.st_mode
))
1018 p
->mwf
.size
= (off64_t
)st
.st_size
;
1019 } else if (p
->mwf
.size
!= st
.st_size
)
1023 /* We leave these file descriptors open with sliding mmap;
1024 * there is no point keeping them open across exec(), though.
1026 fd_flag
= fcntl(p
->mwf
.fd
, F_GETFD
, 0);
1030 fd_flag
|= FD_CLOEXEC
;
1031 if (fcntl(p
->pack_fd
, F_SETFD
, fd_flag
) == -1)
1035 /* Verify we recognize this pack file format. */
1036 if (p_read(p
->mwf
.fd
, &hdr
, sizeof(hdr
)) < 0 ||
1037 hdr
.hdr_signature
!= htonl(PACK_SIGNATURE
) ||
1038 !pack_version_ok(hdr
.hdr_version
))
1041 /* Verify the pack matches its index. */
1042 if (p
->num_objects
!= ntohl(hdr
.hdr_entries
) ||
1043 p_lseek(p
->mwf
.fd
, p
->mwf
.size
- GIT_OID_RAWSZ
, SEEK_SET
) == -1 ||
1044 p_read(p
->mwf
.fd
, sha1
.id
, GIT_OID_RAWSZ
) < 0)
1047 idx_sha1
= ((unsigned char *)p
->index_map
.data
) + p
->index_map
.len
- 40;
1049 if (git_oid__cmp(&sha1
, (git_oid
*)idx_sha1
) != 0)
1052 git_mutex_unlock(&p
->lock
);
1056 git_error_set(GIT_ERROR_OS
, "invalid packfile '%s'", p
->pack_name
);
1062 git_mutex_unlock(&p
->lock
);
1067 int git_packfile__name(char **out
, const char *path
)
1070 git_buf buf
= GIT_BUF_INIT
;
1072 path_len
= strlen(path
);
1074 if (path_len
< strlen(".idx"))
1075 return git_odb__error_notfound("invalid packfile path", NULL
, 0);
1077 if (git_buf_printf(&buf
, "%.*s.pack", (int)(path_len
- strlen(".idx")), path
) < 0)
1080 *out
= git_buf_detach(&buf
);
1084 int git_packfile_alloc(struct git_pack_file
**pack_out
, const char *path
)
1087 struct git_pack_file
*p
;
1088 size_t path_len
= path
? strlen(path
) : 0, alloc_len
;
1092 if (path_len
< strlen(".idx"))
1093 return git_odb__error_notfound("invalid packfile path", NULL
, 0);
1095 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, sizeof(*p
), path_len
);
1096 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, alloc_len
, 2);
1098 p
= git__calloc(1, alloc_len
);
1099 GIT_ERROR_CHECK_ALLOC(p
);
1101 memcpy(p
->pack_name
, path
, path_len
+ 1);
1104 * Make sure a corresponding .pack file exists and that
1105 * the index looks sane.
1107 if (git__suffixcmp(path
, ".idx") == 0) {
1108 size_t root_len
= path_len
- strlen(".idx");
1110 if (!git_disable_pack_keep_file_checks
) {
1111 memcpy(p
->pack_name
+ root_len
, ".keep", sizeof(".keep"));
1112 if (git_path_exists(p
->pack_name
) == true)
1116 memcpy(p
->pack_name
+ root_len
, ".pack", sizeof(".pack"));
1119 if (p_stat(p
->pack_name
, &st
) < 0 || !S_ISREG(st
.st_mode
)) {
1121 return git_odb__error_notfound("packfile not found", NULL
, 0);
1124 /* ok, it looks sane as far as we can check without
1125 * actually mapping the pack file.
1128 p
->mwf
.size
= st
.st_size
;
1130 p
->mtime
= (git_time_t
)st
.st_mtime
;
1131 p
->index_version
= -1;
1133 if (git_mutex_init(&p
->lock
)) {
1134 git_error_set(GIT_ERROR_OS
, "failed to initialize packfile mutex");
1139 if (cache_init(&p
->bases
) < 0) {
1149 /***********************************************************
1151 * PACKFILE ENTRY SEARCH INTERNALS
1153 ***********************************************************/
1155 static off64_t
nth_packed_object_offset(const struct git_pack_file
*p
, uint32_t n
)
1157 const unsigned char *index
= p
->index_map
.data
;
1158 const unsigned char *end
= index
+ p
->index_map
.len
;
1160 if (p
->index_version
== 1) {
1161 return ntohl(*((uint32_t *)(index
+ 24 * n
)));
1164 index
+= 8 + p
->num_objects
* (20 + 4);
1165 off
= ntohl(*((uint32_t *)(index
+ 4 * n
)));
1166 if (!(off
& 0x80000000))
1168 index
+= p
->num_objects
* 4 + (off
& 0x7fffffff) * 8;
1170 /* Make sure we're not being sent out of bounds */
1171 if (index
>= end
- 8)
1174 return (((uint64_t)ntohl(*((uint32_t *)(index
+ 0)))) << 32) |
1175 ntohl(*((uint32_t *)(index
+ 4)));
1179 static int git__memcmp4(const void *a
, const void *b
) {
1180 return memcmp(a
, b
, 4);
1183 int git_pack_foreach_entry(
1184 struct git_pack_file
*p
,
1185 git_odb_foreach_cb cb
,
1188 const unsigned char *index
= p
->index_map
.data
, *current
;
1192 if (index
== NULL
) {
1193 if ((error
= pack_index_open(p
)) < 0)
1196 assert(p
->index_map
.data
);
1198 index
= p
->index_map
.data
;
1201 if (p
->index_version
> 1) {
1207 if (p
->oids
== NULL
) {
1208 git_vector offsets
, oids
;
1210 if ((error
= git_vector_init(&oids
, p
->num_objects
, NULL
)))
1213 if ((error
= git_vector_init(&offsets
, p
->num_objects
, git__memcmp4
)))
1216 if (p
->index_version
> 1) {
1217 const unsigned char *off
= index
+ 24 * p
->num_objects
;
1218 for (i
= 0; i
< p
->num_objects
; i
++)
1219 git_vector_insert(&offsets
, (void*)&off
[4 * i
]);
1220 git_vector_sort(&offsets
);
1221 git_vector_foreach(&offsets
, i
, current
)
1222 git_vector_insert(&oids
, (void*)&index
[5 * (current
- off
)]);
1224 for (i
= 0; i
< p
->num_objects
; i
++)
1225 git_vector_insert(&offsets
, (void*)&index
[24 * i
]);
1226 git_vector_sort(&offsets
);
1227 git_vector_foreach(&offsets
, i
, current
)
1228 git_vector_insert(&oids
, (void*)¤t
[4]);
1231 git_vector_free(&offsets
);
1232 p
->oids
= (git_oid
**)git_vector_detach(NULL
, NULL
, &oids
);
1235 for (i
= 0; i
< p
->num_objects
; i
++)
1236 if ((error
= cb(p
->oids
[i
], data
)) != 0)
1237 return git_error_set_after_callback(error
);
1242 static int pack_entry_find_offset(
1243 off64_t
*offset_out
,
1245 struct git_pack_file
*p
,
1246 const git_oid
*short_oid
,
1249 const uint32_t *level1_ofs
;
1250 const unsigned char *index
;
1251 unsigned hi
, lo
, stride
;
1254 const unsigned char *current
= 0;
1258 if (p
->index_version
== -1) {
1261 if ((error
= pack_index_open(p
)) < 0)
1263 assert(p
->index_map
.data
);
1266 index
= p
->index_map
.data
;
1267 level1_ofs
= p
->index_map
.data
;
1269 if (p
->index_version
> 1) {
1275 hi
= ntohl(level1_ofs
[(int)short_oid
->id
[0]]);
1276 lo
= ((short_oid
->id
[0] == 0x0) ? 0 : ntohl(level1_ofs
[(int)short_oid
->id
[0] - 1]));
1278 if (p
->index_version
> 1) {
1285 #ifdef INDEX_DEBUG_LOOKUP
1286 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
1287 short_oid
->id
[0], short_oid
->id
[1], short_oid
->id
[2], lo
, hi
, p
->num_objects
);
1290 pos
= sha1_position(index
, stride
, lo
, hi
, short_oid
->id
);
1293 /* An object matching exactly the oid was found */
1295 current
= index
+ pos
* stride
;
1297 /* No object was found */
1298 /* pos refers to the object with the "closest" oid to short_oid */
1300 if (pos
< (int)p
->num_objects
) {
1301 current
= index
+ pos
* stride
;
1303 if (!git_oid_ncmp(short_oid
, (const git_oid
*)current
, len
))
1308 if (found
&& len
!= GIT_OID_HEXSZ
&& pos
+ 1 < (int)p
->num_objects
) {
1309 /* Check for ambiguousity */
1310 const unsigned char *next
= current
+ stride
;
1312 if (!git_oid_ncmp(short_oid
, (const git_oid
*)next
, len
)) {
1318 return git_odb__error_notfound("failed to find offset for pack entry", short_oid
, len
);
1320 return git_odb__error_ambiguous("found multiple offsets for pack entry");
1322 if ((offset
= nth_packed_object_offset(p
, pos
)) < 0) {
1323 git_error_set(GIT_ERROR_ODB
, "packfile index is corrupt");
1327 *offset_out
= offset
;
1328 git_oid_fromraw(found_oid
, current
);
1330 #ifdef INDEX_DEBUG_LOOKUP
1332 unsigned char hex_sha1
[GIT_OID_HEXSZ
+ 1];
1333 git_oid_fmt(hex_sha1
, found_oid
);
1334 hex_sha1
[GIT_OID_HEXSZ
] = '\0';
1335 printf("found lo=%d %s\n", lo
, hex_sha1
);
1342 int git_pack_entry_find(
1343 struct git_pack_entry
*e
,
1344 struct git_pack_file
*p
,
1345 const git_oid
*short_oid
,
1354 if (len
== GIT_OID_HEXSZ
&& p
->num_bad_objects
) {
1356 for (i
= 0; i
< p
->num_bad_objects
; i
++)
1357 if (git_oid__cmp(short_oid
, &p
->bad_object_sha1
[i
]) == 0)
1358 return packfile_error("bad object found in packfile");
1361 error
= pack_entry_find_offset(&offset
, &found_oid
, p
, short_oid
, len
);
1365 /* we found a unique entry in the index;
1366 * make sure the packfile backing the index
1367 * still exists on disk */
1368 if (p
->mwf
.fd
== -1 && (error
= packfile_open(p
)) < 0)
1374 git_oid_cpy(&e
->sha1
, &found_oid
);