2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
16 /* Option to bypass checking existence of '.keep' files */
17 bool git_disable_pack_keep_file_checks
= false;
19 static int packfile_open(struct git_pack_file
*p
);
20 static off64_t
nth_packed_object_offset(const struct git_pack_file
*p
, uint32_t n
);
21 static int packfile_unpack_compressed(
23 struct git_pack_file
*p
,
29 /* Can find the offset of an object given
30 * a prefix of an identifier.
31 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
32 * is ambiguous within the pack.
33 * This method assumes that len is between
34 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
36 static int pack_entry_find_offset(
39 struct git_pack_file
*p
,
40 const git_oid
*short_oid
,
43 static int packfile_error(const char *message
)
45 git_error_set(GIT_ERROR_ODB
, "invalid pack file - %s", message
);
53 static git_pack_cache_entry
*new_cache_object(git_rawobj
*source
)
55 git_pack_cache_entry
*e
= git__calloc(1, sizeof(git_pack_cache_entry
));
59 git_atomic_inc(&e
->refcount
);
60 memcpy(&e
->raw
, source
, sizeof(git_rawobj
));
65 static void free_cache_object(void *o
)
67 git_pack_cache_entry
*e
= (git_pack_cache_entry
*)o
;
70 assert(e
->refcount
.val
== 0);
71 git__free(e
->raw
.data
);
76 static void cache_free(git_pack_cache
*cache
)
78 git_pack_cache_entry
*entry
;
81 git_offmap_foreach_value(cache
->entries
, entry
, {
82 free_cache_object(entry
);
85 git_offmap_free(cache
->entries
);
86 cache
->entries
= NULL
;
90 static int cache_init(git_pack_cache
*cache
)
92 if (git_offmap_new(&cache
->entries
) < 0)
95 cache
->memory_limit
= GIT_PACK_CACHE_MEMORY_LIMIT
;
97 if (git_mutex_init(&cache
->lock
)) {
98 git_error_set(GIT_ERROR_OS
, "failed to initialize pack cache mutex");
100 git__free(cache
->entries
);
101 cache
->entries
= NULL
;
109 static git_pack_cache_entry
*cache_get(git_pack_cache
*cache
, off64_t offset
)
111 git_pack_cache_entry
*entry
;
113 if (git_mutex_lock(&cache
->lock
) < 0)
116 if ((entry
= git_offmap_get(cache
->entries
, offset
)) != NULL
) {
117 git_atomic_inc(&entry
->refcount
);
118 entry
->last_usage
= cache
->use_ctr
++;
120 git_mutex_unlock(&cache
->lock
);
125 /* Run with the cache lock held */
126 static void free_lowest_entry(git_pack_cache
*cache
)
129 git_pack_cache_entry
*entry
;
131 git_offmap_foreach(cache
->entries
, offset
, entry
, {
132 if (entry
&& entry
->refcount
.val
== 0) {
133 cache
->memory_used
-= entry
->raw
.len
;
134 git_offmap_delete(cache
->entries
, offset
);
135 free_cache_object(entry
);
140 static int cache_add(
141 git_pack_cache_entry
**cached_out
,
142 git_pack_cache
*cache
,
146 git_pack_cache_entry
*entry
;
149 if (base
->len
> GIT_PACK_CACHE_SIZE_LIMIT
)
152 entry
= new_cache_object(base
);
154 if (git_mutex_lock(&cache
->lock
) < 0) {
155 git_error_set(GIT_ERROR_OS
, "failed to lock cache");
159 /* Add it to the cache if nobody else has */
160 exists
= git_offmap_exists(cache
->entries
, offset
);
162 while (cache
->memory_used
+ base
->len
> cache
->memory_limit
)
163 free_lowest_entry(cache
);
165 git_offmap_set(cache
->entries
, offset
, entry
);
166 cache
->memory_used
+= entry
->raw
.len
;
170 git_mutex_unlock(&cache
->lock
);
171 /* Somebody beat us to adding it into the cache */
181 /***********************************************************
185 ***********************************************************/
187 static void pack_index_free(struct git_pack_file
*p
)
193 if (p
->index_map
.data
) {
194 git_futils_mmap_free(&p
->index_map
);
195 p
->index_map
.data
= NULL
;
199 static int pack_index_check(const char *path
, struct git_pack_file
*p
)
201 struct git_pack_idx_header
*hdr
;
202 uint32_t version
, nr
, i
, *index
;
207 /* TODO: properly open the file without access time using O_NOATIME */
208 git_file fd
= git_futils_open_ro(path
);
212 if (p_fstat(fd
, &st
) < 0) {
214 git_error_set(GIT_ERROR_OS
, "unable to stat pack index '%s'", path
);
218 if (!S_ISREG(st
.st_mode
) ||
219 !git__is_sizet(st
.st_size
) ||
220 (idx_size
= (size_t)st
.st_size
) < 4 * 256 + 20 + 20)
223 git_error_set(GIT_ERROR_ODB
, "invalid pack index '%s'", path
);
227 error
= git_futils_mmap_ro(&p
->index_map
, fd
, 0, idx_size
);
234 hdr
= idx_map
= p
->index_map
.data
;
236 if (hdr
->idx_signature
== htonl(PACK_IDX_SIGNATURE
)) {
237 version
= ntohl(hdr
->idx_version
);
239 if (version
< 2 || version
> 2) {
240 git_futils_mmap_free(&p
->index_map
);
241 return packfile_error("unsupported index version");
251 index
+= 2; /* skip index header */
253 for (i
= 0; i
< 256; i
++) {
254 uint32_t n
= ntohl(index
[i
]);
256 git_futils_mmap_free(&p
->index_map
);
257 return packfile_error("index is non-monotonic");
265 * - 256 index entries 4 bytes each
266 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
267 * - 20-byte SHA1 of the packfile
268 * - 20-byte SHA1 file checksum
270 if (idx_size
!= 4*256 + nr
* 24 + 20 + 20) {
271 git_futils_mmap_free(&p
->index_map
);
272 return packfile_error("index is corrupted");
274 } else if (version
== 2) {
277 * - 8 bytes of header
278 * - 256 index entries 4 bytes each
279 * - 20-byte sha1 entry * nr
280 * - 4-byte crc entry * nr
281 * - 4-byte offset entry * nr
282 * - 20-byte SHA1 of the packfile
283 * - 20-byte SHA1 file checksum
284 * And after the 4-byte offset table might be a
285 * variable sized table containing 8-byte entries
286 * for offsets larger than 2^31.
288 unsigned long min_size
= 8 + 4*256 + nr
*(20 + 4 + 4) + 20 + 20;
289 unsigned long max_size
= min_size
;
292 max_size
+= (nr
- 1)*8;
294 if (idx_size
< min_size
|| idx_size
> max_size
) {
295 git_futils_mmap_free(&p
->index_map
);
296 return packfile_error("wrong index size");
301 p
->index_version
= version
;
305 static int pack_index_open(struct git_pack_file
*p
)
311 if (p
->index_version
> -1)
314 name_len
= strlen(p
->pack_name
);
315 assert(name_len
> strlen(".pack")); /* checked by git_pack_file alloc */
317 if (git_buf_init(&idx_name
, name_len
) < 0)
320 git_buf_put(&idx_name
, p
->pack_name
, name_len
- strlen(".pack"));
321 git_buf_puts(&idx_name
, ".idx");
322 if (git_buf_oom(&idx_name
)) {
323 git_buf_dispose(&idx_name
);
327 if ((error
= git_mutex_lock(&p
->lock
)) < 0) {
328 git_buf_dispose(&idx_name
);
332 if (p
->index_version
== -1)
333 error
= pack_index_check(idx_name
.ptr
, p
);
335 git_buf_dispose(&idx_name
);
337 git_mutex_unlock(&p
->lock
);
342 static unsigned char *pack_window_open(
343 struct git_pack_file
*p
,
344 git_mwindow
**w_cursor
,
348 if (p
->mwf
.fd
== -1 && packfile_open(p
) < 0)
351 /* Since packfiles end in a hash of their content and it's
352 * pointless to ask for an offset into the middle of that
353 * hash, and the pack_window_contains function above wouldn't match
354 * don't allow an offset too close to the end of the file.
356 * Don't allow a negative offset, as that means we've wrapped
359 if (offset
> (p
->mwf
.size
- 20))
364 return git_mwindow_open(&p
->mwf
, w_cursor
, offset
, 20, left
);
368 * The per-object header is a pretty dense thing, which is
369 * - first byte: low four bits are "size",
370 * then three bits of "type",
371 * with the high bit being "size continues".
372 * - each byte afterwards: low seven bits are size continuation,
373 * with the high bit being "size continues"
375 size_t git_packfile__object_header(unsigned char *hdr
, size_t size
, git_object_t type
)
377 unsigned char *hdr_base
;
380 assert(type
>= GIT_OBJECT_COMMIT
&& type
<= GIT_OBJECT_REF_DELTA
);
382 /* TODO: add support for chunked objects; see git.git 6c0d19b1 */
384 c
= (unsigned char)((type
<< 4) | (size
& 15));
395 return (hdr
- hdr_base
);
399 static int packfile_unpack_header1(
400 unsigned long *usedp
,
403 const unsigned char *buf
,
407 unsigned long size
, c
;
408 unsigned long used
= 0;
411 *type
= (c
>> 4) & 7;
416 git_error_set(GIT_ERROR_ODB
, "buffer too small");
420 if (bitsizeof(long) <= shift
) {
422 git_error_set(GIT_ERROR_ODB
, "packfile corrupted");
427 size
+= (c
& 0x7f) << shift
;
431 *sizep
= (size_t)size
;
436 int git_packfile_unpack_header(
438 git_object_t
*type_p
,
439 git_mwindow_file
*mwf
,
440 git_mwindow
**w_curs
,
448 /* pack_window_open() assures us we have [base, base + 20) available
449 * as a range that we can look at at. (Its actually the hash
450 * size that is assured.) With our object header encoding
451 * the maximum deflated object size is 2^137, which is just
452 * insane, so we know won't exceed what we have been given.
454 /* base = pack_window_open(p, w_curs, *curpos, &left); */
455 base
= git_mwindow_open(mwf
, w_curs
, *curpos
, 20, &left
);
459 ret
= packfile_unpack_header1(&used
, size_p
, type_p
, base
, left
);
460 git_mwindow_close(w_curs
);
461 if (ret
== GIT_EBUFS
)
464 return packfile_error("header length is zero");
470 int git_packfile_resolve_header(
472 git_object_t
*type_p
,
473 struct git_pack_file
*p
,
476 git_mwindow
*w_curs
= NULL
;
477 off64_t curpos
= offset
;
483 error
= git_packfile_unpack_header(&size
, &type
, &p
->mwf
, &w_curs
, &curpos
);
487 if (type
== GIT_OBJECT_OFS_DELTA
|| type
== GIT_OBJECT_REF_DELTA
) {
489 git_packfile_stream stream
;
491 error
= get_delta_base(&base_offset
, p
, &w_curs
, &curpos
, type
, offset
);
492 git_mwindow_close(&w_curs
);
497 if ((error
= git_packfile_stream_open(&stream
, p
, curpos
)) < 0)
499 error
= git_delta_read_header_fromstream(&base_size
, size_p
, &stream
);
500 git_packfile_stream_dispose(&stream
);
508 while (type
== GIT_OBJECT_OFS_DELTA
|| type
== GIT_OBJECT_REF_DELTA
) {
509 curpos
= base_offset
;
510 error
= git_packfile_unpack_header(&size
, &type
, &p
->mwf
, &w_curs
, &curpos
);
513 if (type
!= GIT_OBJECT_OFS_DELTA
&& type
!= GIT_OBJECT_REF_DELTA
)
516 error
= get_delta_base(&base_offset
, p
, &w_curs
, &curpos
, type
, base_offset
);
517 git_mwindow_close(&w_curs
);
527 #define SMALL_STACK_SIZE 64
530 * Generate the chain of dependencies which we need to get to the
531 * object at `off`. `chain` is used a stack, popping gives the right
532 * order to apply deltas on. If an object is found in the pack's base
533 * cache, we stop calculating there.
535 static int pack_dependency_chain(git_dependency_chain
*chain_out
,
536 git_pack_cache_entry
**cached_out
, off64_t
*cached_off
,
537 struct pack_chain_elem
*small_stack
, size_t *stack_sz
,
538 struct git_pack_file
*p
, off64_t obj_offset
)
540 git_dependency_chain chain
= GIT_ARRAY_INIT
;
541 git_mwindow
*w_curs
= NULL
;
542 off64_t curpos
= obj_offset
, base_offset
;
543 int error
= 0, use_heap
= 0;
544 size_t size
, elem_pos
;
549 struct pack_chain_elem
*elem
;
550 git_pack_cache_entry
*cached
= NULL
;
552 /* if we have a base cached, we can stop here instead */
553 if ((cached
= cache_get(&p
->bases
, obj_offset
)) != NULL
) {
554 *cached_out
= cached
;
555 *cached_off
= obj_offset
;
559 /* if we run out of space on the small stack, use the array */
560 if (elem_pos
== SMALL_STACK_SIZE
) {
561 git_array_init_to_size(chain
, elem_pos
);
562 GIT_ERROR_CHECK_ARRAY(chain
);
563 memcpy(chain
.ptr
, small_stack
, elem_pos
* sizeof(struct pack_chain_elem
));
564 chain
.size
= elem_pos
;
570 elem
= &small_stack
[elem_pos
];
572 elem
= git_array_alloc(chain
);
579 elem
->base_key
= obj_offset
;
581 error
= git_packfile_unpack_header(&size
, &type
, &p
->mwf
, &w_curs
, &curpos
);
586 elem
->offset
= curpos
;
589 elem
->base_key
= obj_offset
;
591 if (type
!= GIT_OBJECT_OFS_DELTA
&& type
!= GIT_OBJECT_REF_DELTA
)
594 error
= get_delta_base(&base_offset
, p
, &w_curs
, &curpos
, type
, obj_offset
);
595 git_mwindow_close(&w_curs
);
600 /* we need to pass the pos *after* the delta-base bit */
601 elem
->offset
= curpos
;
603 /* go through the loop again, but with the new object */
604 obj_offset
= base_offset
;
609 *stack_sz
= elem_pos
+ 1;
614 git_array_clear(chain
);
618 int git_packfile_unpack(
620 struct git_pack_file
*p
,
623 git_mwindow
*w_curs
= NULL
;
624 off64_t curpos
= *obj_offset
;
625 int error
, free_base
= 0;
626 git_dependency_chain chain
= GIT_ARRAY_INIT
;
627 struct pack_chain_elem
*elem
= NULL
, *stack
;
628 git_pack_cache_entry
*cached
= NULL
;
629 struct pack_chain_elem small_stack
[SMALL_STACK_SIZE
];
630 size_t stack_size
= 0, elem_pos
, alloclen
;
631 git_object_t base_type
;
634 * TODO: optionally check the CRC on the packfile
637 error
= pack_dependency_chain(&chain
, &cached
, obj_offset
, small_stack
, &stack_size
, p
, *obj_offset
);
643 obj
->type
= GIT_OBJECT_INVALID
;
645 /* let's point to the right stack */
646 stack
= chain
.ptr
? chain
.ptr
: small_stack
;
648 elem_pos
= stack_size
;
650 memcpy(obj
, &cached
->raw
, sizeof(git_rawobj
));
651 base_type
= obj
->type
;
652 elem_pos
--; /* stack_size includes the base, which isn't actually there */
654 elem
= &stack
[--elem_pos
];
655 base_type
= elem
->type
;
659 case GIT_OBJECT_COMMIT
:
660 case GIT_OBJECT_TREE
:
661 case GIT_OBJECT_BLOB
:
664 curpos
= elem
->offset
;
665 error
= packfile_unpack_compressed(obj
, p
, &w_curs
, &curpos
, elem
->size
, elem
->type
);
666 git_mwindow_close(&w_curs
);
667 base_type
= elem
->type
;
672 case GIT_OBJECT_OFS_DELTA
:
673 case GIT_OBJECT_REF_DELTA
:
674 error
= packfile_error("dependency chain ends in a delta");
677 error
= packfile_error("invalid packfile type in header");
682 * Finding the object we want a cached base element is
683 * problematic, as we need to make sure we don't accidentally
684 * give the caller the cached object, which it would then feel
685 * free to free, so we need to copy the data.
687 if (cached
&& stack_size
== 1) {
688 void *data
= obj
->data
;
690 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen
, obj
->len
, 1);
691 obj
->data
= git__malloc(alloclen
);
692 GIT_ERROR_CHECK_ALLOC(obj
->data
);
694 memcpy(obj
->data
, data
, obj
->len
+ 1);
695 git_atomic_dec(&cached
->refcount
);
699 /* we now apply each consecutive delta until we run out */
700 while (elem_pos
> 0 && !error
) {
701 git_rawobj base
, delta
;
704 * We can now try to add the base to the cache, as
705 * long as it's not already the cached one.
708 free_base
= !!cache_add(&cached
, &p
->bases
, obj
, elem
->base_key
);
710 elem
= &stack
[elem_pos
- 1];
711 curpos
= elem
->offset
;
712 error
= packfile_unpack_compressed(&delta
, p
, &w_curs
, &curpos
, elem
->size
, elem
->type
);
713 git_mwindow_close(&w_curs
);
716 /* We have transferred ownership of the data to the cache. */
721 /* the current object becomes the new base, on which we apply the delta */
725 obj
->type
= GIT_OBJECT_INVALID
;
727 error
= git_delta_apply(&obj
->data
, &obj
->len
, base
.data
, base
.len
, delta
.data
, delta
.len
);
728 obj
->type
= base_type
;
731 * We usually don't want to free the base at this
732 * point, as we put it into the cache in the previous
733 * iteration. free_base lets us know that we got the
734 * base object directly from the packfile, so we can free it.
736 git__free(delta
.data
);
739 git__free(base
.data
);
743 git_atomic_dec(&cached
->refcount
);
755 git__free(obj
->data
);
757 git_atomic_dec(&cached
->refcount
);
761 *obj_offset
= curpos
;
763 git_array_clear(chain
);
767 int git_packfile_stream_open(git_packfile_stream
*obj
, struct git_pack_file
*p
, off64_t curpos
)
769 memset(obj
, 0, sizeof(git_packfile_stream
));
770 obj
->curpos
= curpos
;
773 if (git_zstream_init(&obj
->zstream
, GIT_ZSTREAM_INFLATE
) < 0) {
774 git_error_set(GIT_ERROR_ZLIB
, "failed to init packfile stream");
781 ssize_t
git_packfile_stream_read(git_packfile_stream
*obj
, void *buffer
, size_t len
)
783 unsigned int window_len
;
790 if ((in
= pack_window_open(obj
->p
, &obj
->mw
, obj
->curpos
, &window_len
)) == NULL
)
793 if ((error
= git_zstream_set_input(&obj
->zstream
, in
, window_len
)) < 0 ||
794 (error
= git_zstream_get_output_chunk(buffer
, &len
, &obj
->zstream
)) < 0) {
795 git_mwindow_close(&obj
->mw
);
796 git_error_set(GIT_ERROR_ZLIB
, "error reading from the zlib stream");
800 git_mwindow_close(&obj
->mw
);
802 obj
->curpos
+= window_len
- obj
->zstream
.in_len
;
804 if (git_zstream_eos(&obj
->zstream
))
807 /* If we didn't write anything out but we're not done, we need more data */
808 if (!len
&& !git_zstream_eos(&obj
->zstream
))
815 void git_packfile_stream_dispose(git_packfile_stream
*obj
)
817 git_zstream_free(&obj
->zstream
);
820 static int packfile_unpack_compressed(
822 struct git_pack_file
*p
,
823 git_mwindow
**mwindow
,
828 git_zstream zstream
= GIT_ZSTREAM_INIT
;
829 size_t buffer_len
, total
= 0;
833 GIT_ERROR_CHECK_ALLOC_ADD(&buffer_len
, size
, 1);
834 data
= git__calloc(1, buffer_len
);
835 GIT_ERROR_CHECK_ALLOC(data
);
837 if ((error
= git_zstream_init(&zstream
, GIT_ZSTREAM_INFLATE
)) < 0) {
838 git_error_set(GIT_ERROR_ZLIB
, "failed to init zlib stream on unpack");
843 size_t bytes
= buffer_len
- total
;
844 unsigned int window_len
;
847 if ((in
= pack_window_open(p
, mwindow
, *position
, &window_len
)) == NULL
) {
852 if ((error
= git_zstream_set_input(&zstream
, in
, window_len
)) < 0 ||
853 (error
= git_zstream_get_output_chunk(data
+ total
, &bytes
, &zstream
)) < 0) {
854 git_mwindow_close(mwindow
);
858 git_mwindow_close(mwindow
);
863 *position
+= window_len
- zstream
.in_len
;
865 } while (!git_zstream_eos(&zstream
));
867 if (total
!= size
|| !git_zstream_eos(&zstream
)) {
868 git_error_set(GIT_ERROR_ZLIB
, "error inflating zlib stream");
878 git_zstream_free(&zstream
);
886 * curpos is where the data starts, delta_obj_offset is the where the
890 off64_t
*delta_base_out
,
891 struct git_pack_file
*p
,
892 git_mwindow
**w_curs
,
895 off64_t delta_obj_offset
)
897 unsigned int left
= 0;
898 unsigned char *base_info
;
902 assert(delta_base_out
);
904 base_info
= pack_window_open(p
, w_curs
, *curpos
, &left
);
905 /* Assumption: the only reason this would fail is because the file is too small */
906 if (base_info
== NULL
)
908 /* pack_window_open() assured us we have [base_info, base_info + 20)
909 * as a range that we can look at without walking off the
910 * end of the mapped window. Its actually the hash size
911 * that is assured. An OFS_DELTA longer than the hash size
912 * is stupid, as then a REF_DELTA would be smaller to store.
914 if (type
== GIT_OBJECT_OFS_DELTA
) {
916 unsigned char c
= base_info
[used
++];
917 size_t unsigned_base_offset
= c
& 127;
921 unsigned_base_offset
+= 1;
922 if (!unsigned_base_offset
|| MSB(unsigned_base_offset
, 7))
923 return packfile_error("overflow");
924 c
= base_info
[used
++];
925 unsigned_base_offset
= (unsigned_base_offset
<< 7) + (c
& 127);
927 if (unsigned_base_offset
== 0 || (size_t)delta_obj_offset
<= unsigned_base_offset
)
928 return packfile_error("out of bounds");
929 base_offset
= delta_obj_offset
- unsigned_base_offset
;
931 } else if (type
== GIT_OBJECT_REF_DELTA
) {
932 /* If we have the cooperative cache, search in it first */
934 struct git_pack_entry
*entry
;
937 git_oid_fromraw(&oid
, base_info
);
938 if ((entry
= git_oidmap_get(p
->idx_cache
, &oid
)) != NULL
) {
939 if (entry
->offset
== 0)
940 return packfile_error("delta offset is zero");
943 *delta_base_out
= entry
->offset
;
946 /* If we're building an index, don't try to find the pack
947 * entry; we just haven't seen it yet. We'll make
948 * progress again in the next loop.
950 return GIT_PASSTHROUGH
;
954 /* The base entry _must_ be in the same pack */
955 if (pack_entry_find_offset(&base_offset
, &unused
, p
, (git_oid
*)base_info
, GIT_OID_HEXSZ
) < 0)
956 return packfile_error("base entry delta is not in the same pack");
959 return packfile_error("unknown object type");
961 if (base_offset
== 0)
962 return packfile_error("delta offset is zero");
964 *delta_base_out
= base_offset
;
968 /***********************************************************
972 ***********************************************************/
974 void git_packfile_close(struct git_pack_file
*p
, bool unlink_packfile
)
976 if (p
->mwf
.fd
>= 0) {
977 git_mwindow_free_all_locked(&p
->mwf
);
983 p_unlink(p
->pack_name
);
986 void git_packfile_free(struct git_pack_file
*p
)
991 cache_free(&p
->bases
);
993 git_packfile_close(p
, false);
997 git__free(p
->bad_object_sha1
);
999 git_mutex_free(&p
->lock
);
1000 git_mutex_free(&p
->bases
.lock
);
1004 static int packfile_open(struct git_pack_file
*p
)
1007 struct git_pack_header hdr
;
1009 unsigned char *idx_sha1
;
1011 if (p
->index_version
== -1 && pack_index_open(p
) < 0)
1012 return git_odb__error_notfound("failed to open packfile", NULL
, 0);
1014 /* if mwf opened by another thread, return now */
1015 if (git_mutex_lock(&p
->lock
) < 0)
1016 return packfile_error("failed to get lock for open");
1018 if (p
->mwf
.fd
>= 0) {
1019 git_mutex_unlock(&p
->lock
);
1023 /* TODO: open with noatime */
1024 p
->mwf
.fd
= git_futils_open_ro(p
->pack_name
);
1028 if (p_fstat(p
->mwf
.fd
, &st
) < 0 ||
1029 git_mwindow_file_register(&p
->mwf
) < 0)
1032 /* If we created the struct before we had the pack we lack size. */
1034 if (!S_ISREG(st
.st_mode
))
1036 p
->mwf
.size
= (off64_t
)st
.st_size
;
1037 } else if (p
->mwf
.size
!= st
.st_size
)
1041 /* We leave these file descriptors open with sliding mmap;
1042 * there is no point keeping them open across exec(), though.
1044 fd_flag
= fcntl(p
->mwf
.fd
, F_GETFD
, 0);
1048 fd_flag
|= FD_CLOEXEC
;
1049 if (fcntl(p
->pack_fd
, F_SETFD
, fd_flag
) == -1)
1053 /* Verify we recognize this pack file format. */
1054 if (p_read(p
->mwf
.fd
, &hdr
, sizeof(hdr
)) < 0 ||
1055 hdr
.hdr_signature
!= htonl(PACK_SIGNATURE
) ||
1056 !pack_version_ok(hdr
.hdr_version
))
1059 /* Verify the pack matches its index. */
1060 if (p
->num_objects
!= ntohl(hdr
.hdr_entries
) ||
1061 p_lseek(p
->mwf
.fd
, p
->mwf
.size
- GIT_OID_RAWSZ
, SEEK_SET
) == -1 ||
1062 p_read(p
->mwf
.fd
, sha1
.id
, GIT_OID_RAWSZ
) < 0)
1065 idx_sha1
= ((unsigned char *)p
->index_map
.data
) + p
->index_map
.len
- 40;
1067 if (git_oid__cmp(&sha1
, (git_oid
*)idx_sha1
) != 0)
1070 git_mutex_unlock(&p
->lock
);
1074 git_error_set(GIT_ERROR_OS
, "invalid packfile '%s'", p
->pack_name
);
1080 git_mutex_unlock(&p
->lock
);
1085 int git_packfile__name(char **out
, const char *path
)
1088 git_buf buf
= GIT_BUF_INIT
;
1090 path_len
= strlen(path
);
1092 if (path_len
< strlen(".idx"))
1093 return git_odb__error_notfound("invalid packfile path", NULL
, 0);
1095 if (git_buf_printf(&buf
, "%.*s.pack", (int)(path_len
- strlen(".idx")), path
) < 0)
1098 *out
= git_buf_detach(&buf
);
1102 int git_packfile_alloc(struct git_pack_file
**pack_out
, const char *path
)
1105 struct git_pack_file
*p
;
1106 size_t path_len
= path
? strlen(path
) : 0, alloc_len
;
1110 if (path_len
< strlen(".idx"))
1111 return git_odb__error_notfound("invalid packfile path", NULL
, 0);
1113 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, sizeof(*p
), path_len
);
1114 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len
, alloc_len
, 2);
1116 p
= git__calloc(1, alloc_len
);
1117 GIT_ERROR_CHECK_ALLOC(p
);
1119 memcpy(p
->pack_name
, path
, path_len
+ 1);
1122 * Make sure a corresponding .pack file exists and that
1123 * the index looks sane.
1125 if (git__suffixcmp(path
, ".idx") == 0) {
1126 size_t root_len
= path_len
- strlen(".idx");
1128 if (!git_disable_pack_keep_file_checks
) {
1129 memcpy(p
->pack_name
+ root_len
, ".keep", sizeof(".keep"));
1130 if (git_path_exists(p
->pack_name
) == true)
1134 memcpy(p
->pack_name
+ root_len
, ".pack", sizeof(".pack"));
1137 if (p_stat(p
->pack_name
, &st
) < 0 || !S_ISREG(st
.st_mode
)) {
1139 return git_odb__error_notfound("packfile not found", NULL
, 0);
1142 /* ok, it looks sane as far as we can check without
1143 * actually mapping the pack file.
1146 p
->mwf
.size
= st
.st_size
;
1148 p
->mtime
= (git_time_t
)st
.st_mtime
;
1149 p
->index_version
= -1;
1151 if (git_mutex_init(&p
->lock
)) {
1152 git_error_set(GIT_ERROR_OS
, "failed to initialize packfile mutex");
1157 if (cache_init(&p
->bases
) < 0) {
1167 /***********************************************************
1169 * PACKFILE ENTRY SEARCH INTERNALS
1171 ***********************************************************/
1173 static off64_t
nth_packed_object_offset(const struct git_pack_file
*p
, uint32_t n
)
1175 const unsigned char *index
= p
->index_map
.data
;
1176 const unsigned char *end
= index
+ p
->index_map
.len
;
1178 if (p
->index_version
== 1) {
1179 return ntohl(*((uint32_t *)(index
+ 24 * n
)));
1182 index
+= 8 + p
->num_objects
* (20 + 4);
1183 off
= ntohl(*((uint32_t *)(index
+ 4 * n
)));
1184 if (!(off
& 0x80000000))
1186 index
+= p
->num_objects
* 4 + (off
& 0x7fffffff) * 8;
1188 /* Make sure we're not being sent out of bounds */
1189 if (index
>= end
- 8)
1192 return (((uint64_t)ntohl(*((uint32_t *)(index
+ 0)))) << 32) |
1193 ntohl(*((uint32_t *)(index
+ 4)));
1197 static int git__memcmp4(const void *a
, const void *b
) {
1198 return memcmp(a
, b
, 4);
1201 int git_pack_foreach_entry(
1202 struct git_pack_file
*p
,
1203 git_odb_foreach_cb cb
,
1206 const unsigned char *index
= p
->index_map
.data
, *current
;
1210 if (index
== NULL
) {
1211 if ((error
= pack_index_open(p
)) < 0)
1214 assert(p
->index_map
.data
);
1216 index
= p
->index_map
.data
;
1219 if (p
->index_version
> 1) {
1225 if (p
->oids
== NULL
) {
1226 git_vector offsets
, oids
;
1228 if ((error
= git_vector_init(&oids
, p
->num_objects
, NULL
)))
1231 if ((error
= git_vector_init(&offsets
, p
->num_objects
, git__memcmp4
)))
1234 if (p
->index_version
> 1) {
1235 const unsigned char *off
= index
+ 24 * p
->num_objects
;
1236 for (i
= 0; i
< p
->num_objects
; i
++)
1237 git_vector_insert(&offsets
, (void*)&off
[4 * i
]);
1238 git_vector_sort(&offsets
);
1239 git_vector_foreach(&offsets
, i
, current
)
1240 git_vector_insert(&oids
, (void*)&index
[5 * (current
- off
)]);
1242 for (i
= 0; i
< p
->num_objects
; i
++)
1243 git_vector_insert(&offsets
, (void*)&index
[24 * i
]);
1244 git_vector_sort(&offsets
);
1245 git_vector_foreach(&offsets
, i
, current
)
1246 git_vector_insert(&oids
, (void*)¤t
[4]);
1249 git_vector_free(&offsets
);
1250 p
->oids
= (git_oid
**)git_vector_detach(NULL
, NULL
, &oids
);
1253 for (i
= 0; i
< p
->num_objects
; i
++)
1254 if ((error
= cb(p
->oids
[i
], data
)) != 0)
1255 return git_error_set_after_callback(error
);
1260 int git_pack__lookup_sha1(const void *oid_lookup_table
, size_t stride
, unsigned lo
,
1261 unsigned hi
, const unsigned char *oid_prefix
)
1263 const unsigned char *base
= oid_lookup_table
;
1266 unsigned mi
= (lo
+ hi
) / 2;
1267 int cmp
= git_oid__hashcmp(base
+ mi
* stride
, oid_prefix
);
1278 return -((int)lo
)-1;
1281 static int pack_entry_find_offset(
1282 off64_t
*offset_out
,
1284 struct git_pack_file
*p
,
1285 const git_oid
*short_oid
,
1288 const uint32_t *level1_ofs
;
1289 const unsigned char *index
;
1290 unsigned hi
, lo
, stride
;
1293 const unsigned char *current
= 0;
1297 if (p
->index_version
== -1) {
1300 if ((error
= pack_index_open(p
)) < 0)
1302 assert(p
->index_map
.data
);
1305 index
= p
->index_map
.data
;
1306 level1_ofs
= p
->index_map
.data
;
1308 if (p
->index_version
> 1) {
1314 hi
= ntohl(level1_ofs
[(int)short_oid
->id
[0]]);
1315 lo
= ((short_oid
->id
[0] == 0x0) ? 0 : ntohl(level1_ofs
[(int)short_oid
->id
[0] - 1]));
1317 if (p
->index_version
> 1) {
1324 #ifdef INDEX_DEBUG_LOOKUP
1325 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
1326 short_oid
->id
[0], short_oid
->id
[1], short_oid
->id
[2], lo
, hi
, p
->num_objects
);
1329 pos
= git_pack__lookup_sha1(index
, stride
, lo
, hi
, short_oid
->id
);
1332 /* An object matching exactly the oid was found */
1334 current
= index
+ pos
* stride
;
1336 /* No object was found */
1337 /* pos refers to the object with the "closest" oid to short_oid */
1339 if (pos
< (int)p
->num_objects
) {
1340 current
= index
+ pos
* stride
;
1342 if (!git_oid_ncmp(short_oid
, (const git_oid
*)current
, len
))
1347 if (found
&& len
!= GIT_OID_HEXSZ
&& pos
+ 1 < (int)p
->num_objects
) {
1348 /* Check for ambiguousity */
1349 const unsigned char *next
= current
+ stride
;
1351 if (!git_oid_ncmp(short_oid
, (const git_oid
*)next
, len
)) {
1357 return git_odb__error_notfound("failed to find offset for pack entry", short_oid
, len
);
1359 return git_odb__error_ambiguous("found multiple offsets for pack entry");
1361 if ((offset
= nth_packed_object_offset(p
, pos
)) < 0) {
1362 git_error_set(GIT_ERROR_ODB
, "packfile index is corrupt");
1366 *offset_out
= offset
;
1367 git_oid_fromraw(found_oid
, current
);
1369 #ifdef INDEX_DEBUG_LOOKUP
1371 unsigned char hex_sha1
[GIT_OID_HEXSZ
+ 1];
1372 git_oid_fmt(hex_sha1
, found_oid
);
1373 hex_sha1
[GIT_OID_HEXSZ
] = '\0';
1374 printf("found lo=%d %s\n", lo
, hex_sha1
);
1381 int git_pack_entry_find(
1382 struct git_pack_entry
*e
,
1383 struct git_pack_file
*p
,
1384 const git_oid
*short_oid
,
1393 if (len
== GIT_OID_HEXSZ
&& p
->num_bad_objects
) {
1395 for (i
= 0; i
< p
->num_bad_objects
; i
++)
1396 if (git_oid__cmp(short_oid
, &p
->bad_object_sha1
[i
]) == 0)
1397 return packfile_error("bad object found in packfile");
1400 error
= pack_entry_find_offset(&offset
, &found_oid
, p
, short_oid
, len
);
1404 /* we found a unique entry in the index;
1405 * make sure the packfile backing the index
1406 * still exists on disk */
1407 if (p
->mwf
.fd
== -1 && (error
= packfile_open(p
)) < 0)
1413 git_oid_cpy(&e
->sha1
, &found_oid
);