2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
27 #include "git2/zlib.h"
28 #include "git2/repository.h"
33 #include "delta-apply.h"
34 #include "sha1_lookup.h"
36 #include "git2/odb_backend.h"
38 #define DEFAULT_WINDOW_SIZE \
40 ? 1 * 1024 * 1024 * 1024 \
43 #define DEFAULT_MAPPED_LIMIT \
44 ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
46 #define PACK_SIGNATURE 0x5041434b /* "PACK" */
47 #define PACK_VERSION 2
48 #define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3))
50 uint32_t hdr_signature
;
56 * The first four bytes of index formats later than version 1 should
57 * start with this signature, as all older git binaries would find this
58 * value illegal and abort reading the file.
60 * This is the case because the number of objects in a packfile
61 * cannot exceed 1,431,660,000 as every object would need at least
62 * 3 bytes of data and the overall packfile cannot exceed 4 GiB with
63 * version 1 of the index file due to the offsets limited to 32 bits.
64 * Clearly the signature exceeds this maximum.
66 * Very old git binaries will also compare the first 4 bytes to the
67 * next 4 bytes in the index and abort with a "non-monotonic index"
68 * error if the second 4 byte word is smaller than the first 4
69 * byte word. This would be true in the proposed future index
70 * format as idx_signature would be greater than idx_version.
72 #define PACK_IDX_SIGNATURE 0xff744f63 /* "\377tOc" */
74 struct pack_idx_header
{
75 uint32_t idx_signature
;
80 struct pack_window
*next
;
83 unsigned int last_used
;
84 unsigned int inuse_cnt
;
88 struct pack_window
*windows
;
94 uint32_t num_bad_objects
;
95 git_oid
*bad_object_sha1
; /* array of git_oid */
100 unsigned pack_local
:1, pack_keep
:1;
103 /* something like ".git/objects/pack/xxxxx.pack" */
104 char pack_name
[GIT_FLEX_ARRAY
]; /* more */
113 struct pack_backend
{
114 git_odb_backend parent
;
116 struct pack_file
*last_found
;
118 time_t pack_folder_mtime
;
120 size_t window_size
; /* needs default value */
122 size_t mapped_limit
; /* needs default value */
128 unsigned int peak_open_windows
;
129 unsigned int open_windows
;
131 unsigned int mmap_calls
;
135 * The wonderful tale of a Packed Object lookup query
136 * ===================================================
137 * A riveting and epic story of epicness and ASCII
138 * art, presented by yours truly,
139 * Sir Vicent of Marti
142 * Chapter 1: Once upon a time...
143 * Initialization of the Pack Backend
144 * --------------------------------------------------
146 * # git_odb_backend_pack
147 * | Creates the pack backend structure, initializes the
148 * | callback pointers to our default read() and exist() methods,
149 * | and tries to preload all the known packfiles in the ODB.
151 * |-# packfile_load_all
152 * | Tries to find the `pack` folder, if it exists. ODBs without
153 * | a pack folder are ignored altogether. If there's a `pack` folder
154 * | we run a `dirent` callback through every file in the pack folder
155 * | to find our packfiles. The packfiles are then sorted according
156 * | to a sorting callback.
158 * |-# packfile_load__cb
159 * | | This callback is called from `dirent` with every single file
160 * | | inside the pack folder. We find the packs by actually locating
161 * | | their index (ends in ".idx"). From that index, we verify that
162 * | | the corresponding packfile exists and is valid, and if so, we
163 * | | add it to the pack list.
165 * | |-# packfile_check
166 * | Make sure that there's a packfile to back this index, and store
167 * | some very basic information regarding the packfile itself,
168 * | such as the full path, the size, and the modification time.
169 * | We don't actually open the packfile to check for internal consistency.
171 * |-# packfile_sort__cb
172 * Sort all the preloaded packs according to some specific criteria:
173 * we prioritize the "newer" packs because it's more likely they
174 * contain the objects we are looking for, and we prioritize local
175 * packs over remote ones.
179 * Chapter 2: To be, or not to be...
180 * A standard packed `exist` query for an OID
181 * --------------------------------------------------
183 * # pack_backend__exists
184 * | Check if the given SHA1 oid exists in any of the packs
185 * | that have been loaded for our ODB.
187 * |-# pack_entry_find
188 * | Iterate through all the packs that have been preloaded
189 * | (starting by the pack where the latest object was found)
190 * | to try to find the OID in one of them.
192 * |-# pack_entry_find1
193 * | Check the index of an individual pack to see if the SHA1
194 * | OID can be found. If we can find the offset to that SHA1
195 * | inside of the index, that means the object is contained
196 * | inside of the packfile and we can stop searching.
197 * | Before returning, we verify that the packfile behing the
198 * | index we are searching still exists on disk.
200 * |-# pack_entry_find_offset
201 * | | Mmap the actual index file to disk if it hasn't been opened
202 * | | yet, and run a binary search through it to find the OID.
203 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
204 * | | on the Packfile Index format and how do we find entries in it.
206 * | |-# pack_index_open
207 * | | Guess the name of the index based on the full path to the
208 * | | packfile, open it and verify its contents. Only if the index
209 * | | has not been opened already.
211 * | |-# pack_index_check
212 * | Mmap the index file and do a quick run through the header
213 * | to guess the index version (right now we support v1 and v2),
214 * | and to verify that the size of the index makes sense.
217 * See `packfile_open` in Chapter 3
221 * Chapter 3: The neverending story...
222 * A standard packed `lookup` query for an OID
223 * --------------------------------------------------
231 /***********************************************************
233 * FORWARD DECLARATIONS
235 ***********************************************************/
237 static void pack_window_free_all(struct pack_backend
*backend
, struct pack_file
*p
);
238 static int pack_window_contains(struct pack_window
*win
, off_t offset
);
240 static void pack_window_scan_lru(struct pack_file
*p
, struct pack_file
**lru_p
,
241 struct pack_window
**lru_w
, struct pack_window
**lru_l
);
243 static int pack_window_close_lru( struct pack_backend
*backend
,
244 struct pack_file
*current
, git_file keep_fd
);
246 static void pack_window_close(struct pack_window
**w_cursor
);
248 static unsigned char *pack_window_open( struct pack_backend
*backend
,
249 struct pack_file
*p
, struct pack_window
**w_cursor
, off_t offset
,
252 static int packfile_sort__cb(const void *a_
, const void *b_
);
254 static void pack_index_free(struct pack_file
*p
);
256 static int pack_index_check(const char *path
, struct pack_file
*p
);
257 static int pack_index_open(struct pack_file
*p
);
259 static struct pack_file
*packfile_alloc(int extra
);
260 static int packfile_open(struct pack_file
*p
);
261 static int packfile_check(struct pack_file
**pack_out
, const char *path
);
262 static int packfile_load__cb(void *_data
, char *path
);
263 static int packfile_refresh_all(struct pack_backend
*backend
);
265 static off_t
nth_packed_object_offset(const struct pack_file
*p
, uint32_t n
);
267 /* Can find the offset of an object given
268 * a prefix of an identifier.
269 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
270 * is ambiguous within the pack.
271 * This method assumes that len is between
272 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
274 static int pack_entry_find_offset(
278 const git_oid
*short_oid
,
281 static int pack_entry_find1(
282 struct pack_entry
*e
,
284 const git_oid
*short_oid
,
287 static int pack_entry_find(struct pack_entry
*e
,
288 struct pack_backend
*backend
, const git_oid
*oid
);
290 /* Can find the offset of an object given
291 * a prefix of an identifier.
292 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
294 * This method assumes that len is between
295 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
297 static int pack_entry_find_prefix(struct pack_entry
*e
,
298 struct pack_backend
*backend
,
299 const git_oid
*short_oid
,
302 static off_t
get_delta_base(struct pack_backend
*backend
,
303 struct pack_file
*p
, struct pack_window
**w_curs
,
304 off_t
*curpos
, git_otype type
,
305 off_t delta_obj_offset
);
307 static unsigned long packfile_unpack_header1(
310 const unsigned char *buf
,
313 static int packfile_unpack_header(
316 struct pack_backend
*backend
,
318 struct pack_window
**w_curs
,
321 static int packfile_unpack_compressed(
323 struct pack_backend
*backend
,
325 struct pack_window
**w_curs
,
330 static int packfile_unpack_delta(
332 struct pack_backend
*backend
,
334 struct pack_window
**w_curs
,
337 git_otype delta_type
,
340 static int packfile_unpack(git_rawobj
*obj
, struct pack_backend
*backend
,
341 struct pack_file
*p
, off_t obj_offset
);
347 /***********************************************************
349 * PACK WINDOW MANAGEMENT
351 ***********************************************************/
353 void pack_window_free_all(struct pack_backend
*backend
, struct pack_file
*p
)
356 struct pack_window
*w
= p
->windows
;
357 assert(w
->inuse_cnt
== 0);
359 backend
->mapped
-= w
->window_map
.len
;
360 backend
->open_windows
--;
362 git_futils_mmap_free(&w
->window_map
);
364 p
->windows
= w
->next
;
369 GIT_INLINE(int) pack_window_contains(struct pack_window
*win
, off_t offset
)
371 /* We must promise at least 20 bytes (one hash) after the
372 * offset is available from this window, otherwise the offset
373 * is not actually in this window and a different window (which
374 * has that one hash excess) must be used. This is to support
375 * the object header and delta base parsing routines below.
377 off_t win_off
= win
->offset
;
378 return win_off
<= offset
379 && (offset
+ 20) <= (off_t
)(win_off
+ win
->window_map
.len
);
382 static void pack_window_scan_lru(
384 struct pack_file
**lru_p
,
385 struct pack_window
**lru_w
,
386 struct pack_window
**lru_l
)
388 struct pack_window
*w
, *w_l
;
390 for (w_l
= NULL
, w
= p
->windows
; w
; w
= w
->next
) {
392 if (!*lru_w
|| w
->last_used
< (*lru_w
)->last_used
) {
402 static int pack_window_close_lru(
403 struct pack_backend
*backend
,
404 struct pack_file
*current
,
407 struct pack_file
*lru_p
= NULL
;
408 struct pack_window
*lru_w
= NULL
, *lru_l
= NULL
;
412 pack_window_scan_lru(current
, &lru_p
, &lru_w
, &lru_l
);
414 for (i
= 0; i
< backend
->packs
.length
; ++i
)
415 pack_window_scan_lru(git_vector_get(&backend
->packs
, i
), &lru_p
, &lru_w
, &lru_l
);
418 backend
->mapped
-= lru_w
->window_map
.len
;
419 git_futils_mmap_free(&lru_w
->window_map
);
422 lru_l
->next
= lru_w
->next
;
424 lru_p
->windows
= lru_w
->next
;
425 if (!lru_p
->windows
&& lru_p
->pack_fd
!= keep_fd
) {
426 p_close(lru_p
->pack_fd
);
432 backend
->open_windows
--;
436 return git__throw(GIT_ERROR
, "Failed to close pack window");
439 static void pack_window_close(struct pack_window
**w_cursor
)
441 struct pack_window
*w
= *w_cursor
;
448 static unsigned char *pack_window_open(
449 struct pack_backend
*backend
,
451 struct pack_window
**w_cursor
,
455 struct pack_window
*win
= *w_cursor
;
457 if (p
->pack_fd
== -1 && packfile_open(p
) < GIT_SUCCESS
)
460 /* Since packfiles end in a hash of their content and it's
461 * pointless to ask for an offset into the middle of that
462 * hash, and the pack_window_contains function above wouldn't match
463 * don't allow an offset too close to the end of the file.
465 if (offset
> (p
->pack_size
- 20))
468 if (!win
|| !pack_window_contains(win
, offset
)) {
473 for (win
= p
->windows
; win
; win
= win
->next
) {
474 if (pack_window_contains(win
, offset
))
479 size_t window_align
= backend
->window_size
/ 2;
482 win
= git__calloc(1, sizeof(*win
));
486 win
->offset
= (offset
/ window_align
) * window_align
;
488 len
= (size_t)(p
->pack_size
- win
->offset
);
489 if (len
> backend
->window_size
)
490 len
= backend
->window_size
;
492 backend
->mapped
+= len
;
494 while (backend
->mapped_limit
< backend
->mapped
&&
495 pack_window_close_lru(backend
, p
, p
->pack_fd
) == GIT_SUCCESS
) {}
497 if (git_futils_mmap_ro(&win
->window_map
, p
->pack_fd
,
498 win
->offset
, len
) < GIT_SUCCESS
) {
503 backend
->mmap_calls
++;
504 backend
->open_windows
++;
506 if (backend
->mapped
> backend
->peak_mapped
)
507 backend
->peak_mapped
= backend
->mapped
;
509 if (backend
->open_windows
> backend
->peak_open_windows
)
510 backend
->peak_open_windows
= backend
->open_windows
;
512 win
->next
= p
->windows
;
517 if (win
!= *w_cursor
) {
518 win
->last_used
= backend
->used_ctr
++;
523 offset
-= win
->offset
;
524 assert(git__is_sizet(offset
));
527 *left
= win
->window_map
.len
- (size_t)offset
;
529 return (unsigned char *)win
->window_map
.data
+ offset
;
538 /***********************************************************
542 ***********************************************************/
544 static void pack_index_free(struct pack_file
*p
)
546 if (p
->index_map
.data
) {
547 git_futils_mmap_free(&p
->index_map
);
548 p
->index_map
.data
= NULL
;
552 static int pack_index_check(const char *path
, struct pack_file
*p
)
554 struct pack_idx_header
*hdr
;
555 uint32_t version
, nr
, i
, *index
;
562 /* TODO: properly open the file without access time */
563 git_file fd
= p_open(path
, O_RDONLY
/*| O_NOATIME */);
568 return git__throw(GIT_EOSERR
, "Failed to check index. File missing or corrupted");
570 if (p_fstat(fd
, &st
) < GIT_SUCCESS
) {
572 return git__throw(GIT_EOSERR
, "Failed to check index. File appears to be corrupted");
575 if (!git__is_sizet(st
.st_size
))
578 idx_size
= (size_t)st
.st_size
;
580 if (idx_size
< 4 * 256 + 20 + 20) {
582 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Object is corrupted");
585 error
= git_futils_mmap_ro(&p
->index_map
, fd
, 0, idx_size
);
588 if (error
< GIT_SUCCESS
)
589 return git__rethrow(error
, "Failed to check index");
591 hdr
= idx_map
= p
->index_map
.data
;
593 if (hdr
->idx_signature
== htonl(PACK_IDX_SIGNATURE
)) {
594 version
= ntohl(hdr
->idx_version
);
596 if (version
< 2 || version
> 2) {
597 git_futils_mmap_free(&p
->index_map
);
598 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Unsupported index version");
608 index
+= 2; /* skip index header */
610 for (i
= 0; i
< 256; i
++) {
611 uint32_t n
= ntohl(index
[i
]);
613 git_futils_mmap_free(&p
->index_map
);
614 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Index is non-monotonic");
622 * - 256 index entries 4 bytes each
623 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
624 * - 20-byte SHA1 of the packfile
625 * - 20-byte SHA1 file checksum
627 if (idx_size
!= 4*256 + nr
* 24 + 20 + 20) {
628 git_futils_mmap_free(&p
->index_map
);
629 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Object is corrupted");
631 } else if (version
== 2) {
634 * - 8 bytes of header
635 * - 256 index entries 4 bytes each
636 * - 20-byte sha1 entry * nr
637 * - 4-byte crc entry * nr
638 * - 4-byte offset entry * nr
639 * - 20-byte SHA1 of the packfile
640 * - 20-byte SHA1 file checksum
641 * And after the 4-byte offset table might be a
642 * variable sized table containing 8-byte entries
643 * for offsets larger than 2^31.
645 unsigned long min_size
= 8 + 4*256 + nr
*(20 + 4 + 4) + 20 + 20;
646 unsigned long max_size
= min_size
;
649 max_size
+= (nr
- 1)*8;
651 if (idx_size
< min_size
|| idx_size
> max_size
) {
652 git_futils_mmap_free(&p
->index_map
);
653 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Wrong index size");
656 /* Make sure that off_t is big enough to access the whole pack...
657 * Is this an issue in libgit2? It shouldn't. */
658 if (idx_size
!= min_size
&& (sizeof(off_t
) <= 4)) {
659 git_futils_mmap_free(&p
->index_map
);
660 return git__throw(GIT_EOSERR
, "Failed to check index. off_t not big enough to access the whole pack");
664 p
->index_version
= version
;
669 static int pack_index_open(struct pack_file
*p
)
674 if (p
->index_map
.data
)
677 idx_name
= git__strdup(p
->pack_name
);
678 strcpy(idx_name
+ strlen(idx_name
) - STRLEN(".pack"), ".idx");
680 error
= pack_index_check(idx_name
, p
);
683 return error
== GIT_SUCCESS
? GIT_SUCCESS
: git__rethrow(error
, "Failed to open index");
694 /***********************************************************
698 ***********************************************************/
700 static int packfile_sort__cb(const void *a_
, const void *b_
)
702 struct pack_file
*a
= *((struct pack_file
**)a_
);
703 struct pack_file
*b
= *((struct pack_file
**)b_
);
707 * Local packs tend to contain objects specific to our
708 * variant of the project than remote ones. In addition,
709 * remote ones could be on a network mounted filesystem.
710 * Favor local ones for these reasons.
712 st
= a
->pack_local
- b
->pack_local
;
717 * Younger packs tend to contain more recent objects,
718 * and more recent objects tend to get accessed more
721 if (a
->mtime
< b
->mtime
)
723 else if (a
->mtime
== b
->mtime
)
729 static struct pack_file
*packfile_alloc(int extra
)
731 struct pack_file
*p
= git__malloc(sizeof(*p
) + extra
);
732 memset(p
, 0, sizeof(*p
));
738 static void packfile_free(struct pack_backend
*backend
, struct pack_file
*p
)
742 /* clear_delta_base_cache(); */
743 pack_window_free_all(backend
, p
);
745 if (p
->pack_fd
!= -1)
750 free(p
->bad_object_sha1
);
754 static int packfile_open(struct pack_file
*p
)
757 struct pack_header hdr
;
759 unsigned char *idx_sha1
;
761 if (!p
->index_map
.data
&& pack_index_open(p
) < GIT_SUCCESS
)
762 return git__throw(GIT_ENOTFOUND
, "Failed to open packfile. File not found");
764 /* TODO: open with noatime */
765 p
->pack_fd
= p_open(p
->pack_name
, O_RDONLY
);
766 if (p
->pack_fd
< 0 || p_fstat(p
->pack_fd
, &st
) < GIT_SUCCESS
)
767 return git__throw(GIT_EOSERR
, "Failed to open packfile. File appears to be corrupted");
769 /* If we created the struct before we had the pack we lack size. */
771 if (!S_ISREG(st
.st_mode
))
773 p
->pack_size
= (off_t
)st
.st_size
;
774 } else if (p
->pack_size
!= st
.st_size
)
778 /* We leave these file descriptors open with sliding mmap;
779 * there is no point keeping them open across exec(), though.
781 fd_flag
= fcntl(p
->pack_fd
, F_GETFD
, 0);
783 return error("cannot determine file descriptor flags");
785 fd_flag
|= FD_CLOEXEC
;
786 if (fcntl(p
->pack_fd
, F_SETFD
, fd_flag
) == -1)
790 /* Verify we recognize this pack file format. */
791 if (p_read(p
->pack_fd
, &hdr
, sizeof(hdr
)) < GIT_SUCCESS
)
794 if (hdr
.hdr_signature
!= htonl(PACK_SIGNATURE
))
797 if (!pack_version_ok(hdr
.hdr_version
))
800 /* Verify the pack matches its index. */
801 if (p
->num_objects
!= ntohl(hdr
.hdr_entries
))
804 if (p_lseek(p
->pack_fd
, p
->pack_size
- GIT_OID_RAWSZ
, SEEK_SET
) == -1)
807 if (p_read(p
->pack_fd
, sha1
.id
, GIT_OID_RAWSZ
) < GIT_SUCCESS
)
810 idx_sha1
= ((unsigned char *)p
->index_map
.data
) + p
->index_map
.len
- 40;
812 if (git_oid_cmp(&sha1
, (git_oid
*)idx_sha1
) != 0)
820 return git__throw(GIT_EPACKCORRUPTED
, "Failed to packfile. Pack is corrupted");
823 static int packfile_check(struct pack_file
**pack_out
, const char *path
)
830 path_len
= strlen(path
);
831 p
= packfile_alloc(path_len
+ 2);
834 * Make sure a corresponding .pack file exists and that
835 * the index looks sane.
837 path_len
-= STRLEN(".idx");
840 return git__throw(GIT_ENOTFOUND
, "Failed to check packfile. Wrong path name");
843 memcpy(p
->pack_name
, path
, path_len
);
845 strcpy(p
->pack_name
+ path_len
, ".keep");
846 if (git_futils_exists(p
->pack_name
) == GIT_SUCCESS
)
849 strcpy(p
->pack_name
+ path_len
, ".pack");
850 if (p_stat(p
->pack_name
, &st
) < GIT_SUCCESS
|| !S_ISREG(st
.st_mode
)) {
852 return git__throw(GIT_ENOTFOUND
, "Failed to check packfile. File not found");
855 /* ok, it looks sane as far as we can check without
856 * actually mapping the pack file.
858 p
->pack_size
= (off_t
)st
.st_size
;
860 p
->mtime
= (git_time_t
)st
.st_mtime
;
862 /* see if we can parse the sha1 oid in the packfile name */
864 git_oid_fromstr(&p
->sha1
, path
+ path_len
- GIT_OID_HEXSZ
) < GIT_SUCCESS
)
865 memset(&p
->sha1
, 0x0, GIT_OID_RAWSZ
);
871 static int packfile_load__cb(void *_data
, char *path
)
873 struct pack_backend
*backend
= (struct pack_backend
*)_data
;
874 struct pack_file
*pack
;
878 if (git__suffixcmp(path
, ".idx") != 0)
879 return GIT_SUCCESS
; /* not an index */
881 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
882 struct pack_file
*p
= git_vector_get(&backend
->packs
, i
);
883 if (memcmp(p
->pack_name
, path
, strlen(path
) - STRLEN(".idx")) == 0)
887 error
= packfile_check(&pack
, path
);
888 if (error
< GIT_SUCCESS
)
889 return git__rethrow(error
, "Failed to load packfile");
891 if (git_vector_insert(&backend
->packs
, pack
) < GIT_SUCCESS
) {
899 static int packfile_refresh_all(struct pack_backend
*backend
)
904 if (backend
->pack_folder
== NULL
)
907 if (p_stat(backend
->pack_folder
, &st
) < 0 || !S_ISDIR(st
.st_mode
))
908 return git__throw(GIT_ENOTFOUND
, "Failed to refresh packfiles. Backend not found");
910 if (st
.st_mtime
!= backend
->pack_folder_mtime
) {
911 char path
[GIT_PATH_MAX
];
912 strcpy(path
, backend
->pack_folder
);
914 /* reload all packs */
915 error
= git_futils_direach(path
, GIT_PATH_MAX
, packfile_load__cb
, (void *)backend
);
916 if (error
< GIT_SUCCESS
)
917 return git__rethrow(error
, "Failed to refresh packfiles");
919 git_vector_sort(&backend
->packs
);
920 backend
->pack_folder_mtime
= st
.st_mtime
;
933 /***********************************************************
935 * PACKFILE ENTRY SEARCH INTERNALS
937 ***********************************************************/
939 static off_t
nth_packed_object_offset(const struct pack_file
*p
, uint32_t n
)
941 const unsigned char *index
= p
->index_map
.data
;
943 if (p
->index_version
== 1) {
944 return ntohl(*((uint32_t *)(index
+ 24 * n
)));
947 index
+= 8 + p
->num_objects
* (20 + 4);
948 off
= ntohl(*((uint32_t *)(index
+ 4 * n
)));
949 if (!(off
& 0x80000000))
951 index
+= p
->num_objects
* 4 + (off
& 0x7fffffff) * 8;
952 return (((uint64_t)ntohl(*((uint32_t *)(index
+ 0)))) << 32) |
953 ntohl(*((uint32_t *)(index
+ 4)));
957 static int pack_entry_find_offset(
961 const git_oid
*short_oid
,
964 const uint32_t *level1_ofs
= p
->index_map
.data
;
965 const unsigned char *index
= p
->index_map
.data
;
966 unsigned hi
, lo
, stride
;
968 const unsigned char *current
= 0;
975 if ((error
= pack_index_open(p
)) < GIT_SUCCESS
)
976 return git__rethrow(error
, "Failed to find offset for pack entry");
978 assert(p
->index_map
.data
);
980 index
= p
->index_map
.data
;
981 level1_ofs
= p
->index_map
.data
;
984 if (p
->index_version
> 1) {
990 hi
= ntohl(level1_ofs
[(int)short_oid
->id
[0]]);
991 lo
= ((short_oid
->id
[0] == 0x0) ? 0 : ntohl(level1_ofs
[(int)short_oid
->id
[0] - 1]));
993 if (p
->index_version
> 1) {
1000 #ifdef INDEX_DEBUG_LOOKUP
1001 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
1002 short_oid
->id
[0], short_oid
->id
[1], short_oid
->id
[2], lo
, hi
, p
->num_objects
);
1005 /* Use git.git lookup code */
1006 pos
= sha1_entry_pos(index
, stride
, 0, lo
, hi
, p
->num_objects
, short_oid
->id
);
1009 /* An object matching exactly the oid was found */
1011 current
= index
+ pos
* stride
;
1013 /* No object was found */
1014 /* pos refers to the object with the "closest" oid to short_oid */
1016 if (pos
< (int)p
->num_objects
) {
1017 current
= index
+ pos
* stride
;
1019 if (!git_oid_ncmp(short_oid
, (const git_oid
*)current
, len
)) {
1025 if (found
&& pos
+ 1 < (int)p
->num_objects
) {
1026 /* Check for ambiguousity */
1027 const unsigned char *next
= current
+ stride
;
1029 if (!git_oid_ncmp(short_oid
, (const git_oid
*)next
, len
)) {
1035 return git__throw(GIT_ENOTFOUND
, "Failed to find offset for pack entry. Entry not found");
1036 } else if (found
> 1) {
1037 return git__throw(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to find offset for pack entry. Ambiguous sha1 prefix within pack");
1039 *offset_out
= nth_packed_object_offset(p
, pos
);
1040 git_oid_fromraw(found_oid
, current
);
1042 #ifdef INDEX_DEBUG_LOOKUP
1043 unsigned char hex_sha1
[GIT_OID_HEXSZ
+ 1];
1044 git_oid_fmt(hex_sha1
, found_oid
);
1045 hex_sha1
[GIT_OID_HEXSZ
] = '\0';
1046 printf("found lo=%d %s\n", lo
, hex_sha1
);
1052 static int pack_entry_find1(
1053 struct pack_entry
*e
,
1054 struct pack_file
*p
,
1055 const git_oid
*short_oid
,
1064 if (len
== GIT_OID_HEXSZ
&& p
->num_bad_objects
) {
1066 for (i
= 0; i
< p
->num_bad_objects
; i
++)
1067 if (git_oid_cmp(short_oid
, &p
->bad_object_sha1
[i
]) == 0)
1068 return git__throw(GIT_ERROR
, "Failed to find pack entry. Bad object found");
1071 error
= pack_entry_find_offset(&offset
, &found_oid
, p
, short_oid
, len
);
1072 if (error
< GIT_SUCCESS
)
1073 return git__rethrow(error
, "Failed to find pack entry. Couldn't find offset");
1075 /* we found a unique entry in the index;
1076 * make sure the packfile backing the index
1077 * still exists on disk */
1078 if (p
->pack_fd
== -1 && packfile_open(p
) < GIT_SUCCESS
)
1079 return git__throw(GIT_EOSERR
, "Failed to find pack entry. Packfile doesn't exist on disk");
1084 git_oid_cpy(&e
->sha1
, &found_oid
);
1088 static int pack_entry_find(struct pack_entry
*e
, struct pack_backend
*backend
, const git_oid
*oid
)
1093 if ((error
= packfile_refresh_all(backend
)) < GIT_SUCCESS
)
1094 return git__rethrow(error
, "Failed to find pack entry");
1096 if (backend
->last_found
&&
1097 pack_entry_find1(e
, backend
->last_found
, oid
, GIT_OID_HEXSZ
) == GIT_SUCCESS
)
1100 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
1101 struct pack_file
*p
;
1103 p
= git_vector_get(&backend
->packs
, i
);
1104 if (p
== backend
->last_found
)
1107 if (pack_entry_find1(e
, p
, oid
, GIT_OID_HEXSZ
) == GIT_SUCCESS
) {
1108 backend
->last_found
= p
;
1113 return git__throw(GIT_ENOTFOUND
, "Failed to find pack entry");
1116 static int pack_entry_find_prefix(
1117 struct pack_entry
*e
,
1118 struct pack_backend
*backend
,
1119 const git_oid
*short_oid
,
1126 if ((error
= packfile_refresh_all(backend
)) < GIT_SUCCESS
)
1127 return git__rethrow(error
, "Failed to find pack entry");
1129 if (backend
->last_found
) {
1130 error
= pack_entry_find1(e
, backend
->last_found
, short_oid
, len
);
1131 if (error
== GIT_EAMBIGUOUSOIDPREFIX
) {
1132 return git__rethrow(error
, "Failed to find pack entry. Ambiguous sha1 prefix");
1133 } else if (error
== GIT_SUCCESS
) {
1138 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
1139 struct pack_file
*p
;
1141 p
= git_vector_get(&backend
->packs
, i
);
1142 if (p
== backend
->last_found
)
1145 error
= pack_entry_find1(e
, p
, short_oid
, len
);
1146 if (error
== GIT_EAMBIGUOUSOIDPREFIX
) {
1147 return git__rethrow(error
, "Failed to find pack entry. Ambiguous sha1 prefix");
1148 } else if (error
== GIT_SUCCESS
) {
1152 backend
->last_found
= p
;
1157 return git__rethrow(GIT_ENOTFOUND
, "Failed to find pack entry");
1158 } else if (found
> 1) {
1159 return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to find pack entry. Ambiguous sha1 prefix");
1177 /***********************************************************
1179 * PACKFILE ENTRY UNPACK INTERNALS
1181 ***********************************************************/
1183 static unsigned long packfile_unpack_header1(
1186 const unsigned char *buf
,
1190 unsigned long size
, c
;
1191 unsigned long used
= 0;
1194 *type
= (c
>> 4) & 7;
1198 if (len
<= used
|| bitsizeof(long) <= shift
)
1202 size
+= (c
& 0x7f) << shift
;
1206 *sizep
= (size_t)size
;
1210 static int packfile_unpack_header(
1213 struct pack_backend
*backend
,
1214 struct pack_file
*p
,
1215 struct pack_window
**w_curs
,
1218 unsigned char *base
;
1222 /* pack_window_open() assures us we have [base, base + 20) available
1223 * as a range that we can look at at. (Its actually the hash
1224 * size that is assured.) With our object header encoding
1225 * the maximum deflated object size is 2^137, which is just
1226 * insane, so we know won't exceed what we have been given.
1228 base
= pack_window_open(backend
, p
, w_curs
, *curpos
, &left
);
1232 used
= packfile_unpack_header1(size_p
, type_p
, base
, left
);
1235 return git__throw(GIT_EOBJCORRUPTED
, "Header length is zero");
1241 static int packfile_unpack_compressed(
1243 struct pack_backend
*backend
,
1244 struct pack_file
*p
,
1245 struct pack_window
**w_curs
,
1252 unsigned char *buffer
, *in
;
1254 buffer
= git__malloc(size
+ 1);
1255 memset(buffer
, 0x0, size
+ 1);
1257 memset(&stream
, 0, sizeof(stream
));
1258 stream
.next_out
= buffer
;
1259 stream
.avail_out
= size
+ 1;
1261 st
= inflateInit(&stream
);
1264 return git__throw(GIT_EZLIB
, "Error in zlib");
1268 in
= pack_window_open(backend
, p
, w_curs
, curpos
, &stream
.avail_in
);
1269 stream
.next_in
= in
;
1270 st
= inflate(&stream
, Z_FINISH
);
1272 if (!stream
.avail_out
)
1273 break; /* the payload is larger than it should be */
1275 curpos
+= stream
.next_in
- in
;
1276 } while (st
== Z_OK
|| st
== Z_BUF_ERROR
);
1278 inflateEnd(&stream
);
1280 if ((st
!= Z_STREAM_END
) || stream
.total_out
!= size
) {
1282 return git__throw(GIT_EZLIB
, "Error in zlib");
1291 static off_t
get_delta_base(
1292 struct pack_backend
*backend
,
1293 struct pack_file
*p
,
1294 struct pack_window
**w_curs
,
1297 off_t delta_obj_offset
)
1299 unsigned char *base_info
= pack_window_open(backend
, p
, w_curs
, *curpos
, NULL
);
1303 /* pack_window_open() assured us we have [base_info, base_info + 20)
1304 * as a range that we can look at without walking off the
1305 * end of the mapped window. Its actually the hash size
1306 * that is assured. An OFS_DELTA longer than the hash size
1307 * is stupid, as then a REF_DELTA would be smaller to store.
1309 if (type
== GIT_OBJ_OFS_DELTA
) {
1311 unsigned char c
= base_info
[used
++];
1312 base_offset
= c
& 127;
1315 if (!base_offset
|| MSB(base_offset
, 7))
1316 return 0; /* overflow */
1317 c
= base_info
[used
++];
1318 base_offset
= (base_offset
<< 7) + (c
& 127);
1320 base_offset
= delta_obj_offset
- base_offset
;
1321 if (base_offset
<= 0 || base_offset
>= delta_obj_offset
)
1322 return 0; /* out of bound */
1324 } else if (type
== GIT_OBJ_REF_DELTA
) {
1325 /* The base entry _must_ be in the same pack */
1326 if (pack_entry_find_offset(&base_offset
, &unused
, p
, (git_oid
*)base_info
, GIT_OID_HEXSZ
) < GIT_SUCCESS
)
1327 return git__throw(GIT_EPACKCORRUPTED
, "Base entry delta is not in the same pack");
1335 static int packfile_unpack_delta(
1337 struct pack_backend
*backend
,
1338 struct pack_file
*p
,
1339 struct pack_window
**w_curs
,
1342 git_otype delta_type
,
1346 git_rawobj base
, delta
;
1349 base_offset
= get_delta_base(backend
, p
, w_curs
, &curpos
, delta_type
, obj_offset
);
1350 if (base_offset
== 0)
1351 return git__throw(GIT_EOBJCORRUPTED
, "Delta offset is zero");
1353 pack_window_close(w_curs
);
1354 error
= packfile_unpack(&base
, backend
, p
, base_offset
);
1356 /* TODO: git.git tries to load the base from other packfiles
1357 * or loose objects */
1358 if (error
< GIT_SUCCESS
)
1359 return git__rethrow(error
, "Corrupted delta");
1361 error
= packfile_unpack_compressed(&delta
, backend
, p
, w_curs
, curpos
, delta_size
, delta_type
);
1362 if (error
< GIT_SUCCESS
) {
1364 return git__rethrow(error
, "Corrupted delta");
1367 obj
->type
= base
.type
;
1368 error
= git__delta_apply(obj
,
1369 base
.data
, base
.len
,
1370 delta
.data
, delta
.len
);
1375 /* TODO: we might want to cache this shit. eventually */
1376 //add_delta_base_cache(p, base_offset, base, base_size, *type);
1377 return error
; /* error set by git__delta_apply */
1380 static int packfile_unpack(
1382 struct pack_backend
*backend
,
1383 struct pack_file
*p
,
1386 struct pack_window
*w_curs
= NULL
;
1387 off_t curpos
= obj_offset
;
1394 * TODO: optionally check the CRC on the packfile
1399 obj
->type
= GIT_OBJ_BAD
;
1401 error
= packfile_unpack_header(&size
, &type
, backend
, p
, &w_curs
, &curpos
);
1402 if (error
< GIT_SUCCESS
)
1403 return git__rethrow(error
, "Failed to unpack packfile");
1406 case GIT_OBJ_OFS_DELTA
:
1407 case GIT_OBJ_REF_DELTA
:
1408 error
= packfile_unpack_delta(
1409 obj
, backend
, p
, &w_curs
, curpos
,
1410 size
, type
, obj_offset
);
1413 case GIT_OBJ_COMMIT
:
1417 error
= packfile_unpack_compressed(
1418 obj
, backend
, p
, &w_curs
, curpos
,
1423 error
= GIT_EOBJCORRUPTED
;
1427 pack_window_close(&w_curs
);
1428 return error
== GIT_SUCCESS
? GIT_SUCCESS
: git__rethrow(error
, "Failed to unpack packfile");
1435 /***********************************************************
1437 * PACKED BACKEND PUBLIC API
1439 * Implement the git_odb_backend API calls
1441 ***********************************************************/
1444 int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
1446 pack_location location;
1448 assert(obj && backend && oid);
1450 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
1451 return GIT_ENOTFOUND;
1453 return read_header_packed(obj, &location);
1457 int pack_backend__read(void **buffer_p
, size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
1459 struct pack_entry e
;
1463 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < GIT_SUCCESS
)
1464 return git__rethrow(error
, "Failed to read pack backend");
1466 if ((error
= packfile_unpack(&raw
, (struct pack_backend
*)backend
, e
.p
, e
.offset
)) < GIT_SUCCESS
)
1467 return git__rethrow(error
, "Failed to read pack backend");
1469 *buffer_p
= raw
.data
;
1476 int pack_backend__read_prefix(
1481 git_odb_backend
*backend
,
1482 const git_oid
*short_oid
,
1485 if (len
< GIT_OID_MINPREFIXLEN
)
1486 return git__throw(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN
);
1488 if (len
>= GIT_OID_HEXSZ
) {
1489 /* We can fall back to regular read method */
1490 int error
= pack_backend__read(buffer_p
, len_p
, type_p
, backend
, short_oid
);
1491 if (error
== GIT_SUCCESS
)
1492 git_oid_cpy(out_oid
, short_oid
);
1496 struct pack_entry e
;
1500 if ((error
= pack_entry_find_prefix(&e
, (struct pack_backend
*)backend
, short_oid
, len
)) < GIT_SUCCESS
)
1501 return git__rethrow(error
, "Failed to read pack backend");
1503 if ((error
= packfile_unpack(&raw
, (struct pack_backend
*)backend
, e
.p
, e
.offset
)) < GIT_SUCCESS
)
1504 return git__rethrow(error
, "Failed to read pack backend");
1506 *buffer_p
= raw
.data
;
1509 git_oid_cpy(out_oid
, &e
.sha1
);
1515 int pack_backend__exists(git_odb_backend
*backend
, const git_oid
*oid
)
1517 struct pack_entry e
;
1518 return pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
) == GIT_SUCCESS
;
1521 void pack_backend__free(git_odb_backend
*_backend
)
1523 struct pack_backend
*backend
;
1528 backend
= (struct pack_backend
*)_backend
;
1530 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
1531 struct pack_file
*p
= git_vector_get(&backend
->packs
, i
);
1532 packfile_free(backend
, p
);
1535 git_vector_free(&backend
->packs
);
1536 free(backend
->pack_folder
);
1540 int git_odb_backend_pack(git_odb_backend
**backend_out
, const char *objects_dir
)
1542 struct pack_backend
*backend
;
1543 char path
[GIT_PATH_MAX
];
1545 backend
= git__calloc(1, sizeof(struct pack_backend
));
1546 if (backend
== NULL
)
1549 if (git_vector_init(&backend
->packs
, 8, packfile_sort__cb
) < GIT_SUCCESS
) {
1554 backend
->window_size
= DEFAULT_WINDOW_SIZE
;
1555 backend
->mapped_limit
= DEFAULT_MAPPED_LIMIT
;
1557 git_path_join(path
, objects_dir
, "pack");
1558 if (git_futils_isdir(path
) == GIT_SUCCESS
) {
1559 backend
->pack_folder
= git__strdup(path
);
1560 backend
->pack_folder_mtime
= 0;
1562 if (backend
->pack_folder
== NULL
) {
1568 backend
->parent
.read
= &pack_backend__read
;
1569 backend
->parent
.read_prefix
= &pack_backend__read_prefix
;
1570 backend
->parent
.read_header
= NULL
;
1571 backend
->parent
.exists
= &pack_backend__exists
;
1572 backend
->parent
.free
= &pack_backend__free
;
1574 *backend_out
= (git_odb_backend
*)backend
;