2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
27 #include "git2/zlib.h"
28 #include "git2/repository.h"
33 #include "delta-apply.h"
34 #include "sha1_lookup.h"
36 #include "git2/odb_backend.h"
38 #define DEFAULT_WINDOW_SIZE \
40 ? 1 * 1024 * 1024 * 1024 \
43 #define DEFAULT_MAPPED_LIMIT \
44 ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
46 #define PACK_SIGNATURE 0x5041434b /* "PACK" */
47 #define PACK_VERSION 2
48 #define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3))
50 uint32_t hdr_signature
;
56 * The first four bytes of index formats later than version 1 should
57 * start with this signature, as all older git binaries would find this
58 * value illegal and abort reading the file.
60 * This is the case because the number of objects in a packfile
61 * cannot exceed 1,431,660,000 as every object would need at least
62 * 3 bytes of data and the overall packfile cannot exceed 4 GiB with
63 * version 1 of the index file due to the offsets limited to 32 bits.
64 * Clearly the signature exceeds this maximum.
66 * Very old git binaries will also compare the first 4 bytes to the
67 * next 4 bytes in the index and abort with a "non-monotonic index"
68 * error if the second 4 byte word is smaller than the first 4
69 * byte word. This would be true in the proposed future index
70 * format as idx_signature would be greater than idx_version.
72 #define PACK_IDX_SIGNATURE 0xff744f63 /* "\377tOc" */
74 struct pack_idx_header
{
75 uint32_t idx_signature
;
80 struct pack_window
*next
;
83 unsigned int last_used
;
84 unsigned int inuse_cnt
;
88 struct pack_window
*windows
;
94 uint32_t num_bad_objects
;
95 git_oid
*bad_object_sha1
; /* array of git_oid */
100 unsigned pack_local
:1, pack_keep
:1;
103 /* something like ".git/objects/pack/xxxxx.pack" */
104 char pack_name
[GIT_FLEX_ARRAY
]; /* more */
113 struct pack_backend
{
114 git_odb_backend parent
;
116 struct pack_file
*last_found
;
118 time_t pack_folder_mtime
;
120 size_t window_size
; /* needs default value */
122 size_t mapped_limit
; /* needs default value */
128 unsigned int peak_open_windows
;
129 unsigned int open_windows
;
131 unsigned int mmap_calls
;
135 * The wonderful tale of a Packed Object lookup query
136 * ===================================================
137 * A riveting and epic story of epicness and ASCII
138 * art, presented by yours truly,
139 * Sir Vicent of Marti
142 * Chapter 1: Once upon a time...
143 * Initialization of the Pack Backend
144 * --------------------------------------------------
146 * # git_odb_backend_pack
147 * | Creates the pack backend structure, initializes the
148 * | callback pointers to our default read() and exist() methods,
149 * | and tries to preload all the known packfiles in the ODB.
151 * |-# packfile_load_all
152 * | Tries to find the `pack` folder, if it exists. ODBs without
153 * | a pack folder are ignored altogether. If there's a `pack` folder
154 * | we run a `dirent` callback through every file in the pack folder
155 * | to find our packfiles. The packfiles are then sorted according
156 * | to a sorting callback.
158 * |-# packfile_load__cb
159 * | | This callback is called from `dirent` with every single file
160 * | | inside the pack folder. We find the packs by actually locating
161 * | | their index (ends in ".idx"). From that index, we verify that
162 * | | the corresponding packfile exists and is valid, and if so, we
163 * | | add it to the pack list.
165 * | |-# packfile_check
166 * | Make sure that there's a packfile to back this index, and store
167 * | some very basic information regarding the packfile itself,
168 * | such as the full path, the size, and the modification time.
169 * | We don't actually open the packfile to check for internal consistency.
171 * |-# packfile_sort__cb
172 * Sort all the preloaded packs according to some specific criteria:
173 * we prioritize the "newer" packs because it's more likely they
174 * contain the objects we are looking for, and we prioritize local
175 * packs over remote ones.
179 * Chapter 2: To be, or not to be...
180 * A standard packed `exist` query for an OID
181 * --------------------------------------------------
183 * # pack_backend__exists
184 * | Check if the given SHA1 oid exists in any of the packs
185 * | that have been loaded for our ODB.
187 * |-# pack_entry_find
188 * | Iterate through all the packs that have been preloaded
189 * | (starting by the pack where the latest object was found)
190 * | to try to find the OID in one of them.
192 * |-# pack_entry_find1
193 * | Check the index of an individual pack to see if the SHA1
194 * | OID can be found. If we can find the offset to that SHA1
195 * | inside of the index, that means the object is contained
196 * | inside of the packfile and we can stop searching.
197 * | Before returning, we verify that the packfile behing the
198 * | index we are searching still exists on disk.
200 * |-# pack_entry_find_offset
201 * | | Mmap the actual index file to disk if it hasn't been opened
202 * | | yet, and run a binary search through it to find the OID.
203 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
204 * | | on the Packfile Index format and how do we find entries in it.
206 * | |-# pack_index_open
207 * | | Guess the name of the index based on the full path to the
208 * | | packfile, open it and verify its contents. Only if the index
209 * | | has not been opened already.
211 * | |-# pack_index_check
212 * | Mmap the index file and do a quick run through the header
213 * | to guess the index version (right now we support v1 and v2),
214 * | and to verify that the size of the index makes sense.
217 * See `packfile_open` in Chapter 3
221 * Chapter 3: The neverending story...
222 * A standard packed `lookup` query for an OID
223 * --------------------------------------------------
231 /***********************************************************
233 * FORWARD DECLARATIONS
235 ***********************************************************/
237 static void pack_window_free_all(struct pack_backend
*backend
, struct pack_file
*p
);
238 static int pack_window_contains(struct pack_window
*win
, off_t offset
);
240 static void pack_window_scan_lru(struct pack_file
*p
, struct pack_file
**lru_p
,
241 struct pack_window
**lru_w
, struct pack_window
**lru_l
);
243 static int pack_window_close_lru( struct pack_backend
*backend
,
244 struct pack_file
*current
, git_file keep_fd
);
246 static void pack_window_close(struct pack_window
**w_cursor
);
248 static unsigned char *pack_window_open( struct pack_backend
*backend
,
249 struct pack_file
*p
, struct pack_window
**w_cursor
, off_t offset
,
252 static int packfile_sort__cb(const void *a_
, const void *b_
);
254 static void pack_index_free(struct pack_file
*p
);
256 static int pack_index_check(const char *path
, struct pack_file
*p
);
257 static int pack_index_open(struct pack_file
*p
);
259 static struct pack_file
*packfile_alloc(int extra
);
260 static int packfile_open(struct pack_file
*p
);
261 static int packfile_check(struct pack_file
**pack_out
, const char *path
);
262 static int packfile_load__cb(void *_data
, char *path
);
263 static int packfile_refresh_all(struct pack_backend
*backend
);
265 static off_t
nth_packed_object_offset(const struct pack_file
*p
, uint32_t n
);
267 /* Can find the offset of an object given
268 * a prefix of an identifier.
269 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
270 * is ambiguous within the pack.
271 * This method assumes that len is between
272 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
274 static int pack_entry_find_offset(
278 const git_oid
*short_oid
,
281 static int pack_entry_find1(
282 struct pack_entry
*e
,
284 const git_oid
*short_oid
,
287 static int pack_entry_find(struct pack_entry
*e
,
288 struct pack_backend
*backend
, const git_oid
*oid
);
290 /* Can find the offset of an object given
291 * a prefix of an identifier.
292 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
294 * This method assumes that len is between
295 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
297 static int pack_entry_find_prefix(struct pack_entry
*e
,
298 struct pack_backend
*backend
,
299 const git_oid
*short_oid
,
302 static off_t
get_delta_base(struct pack_backend
*backend
,
303 struct pack_file
*p
, struct pack_window
**w_curs
,
304 off_t
*curpos
, git_otype type
,
305 off_t delta_obj_offset
);
307 static unsigned long packfile_unpack_header1(
310 const unsigned char *buf
,
313 static int packfile_unpack_header(
316 struct pack_backend
*backend
,
318 struct pack_window
**w_curs
,
321 static int packfile_unpack_compressed(
323 struct pack_backend
*backend
,
325 struct pack_window
**w_curs
,
330 static int packfile_unpack_delta(
332 struct pack_backend
*backend
,
334 struct pack_window
**w_curs
,
337 git_otype delta_type
,
340 static int packfile_unpack(git_rawobj
*obj
, struct pack_backend
*backend
,
341 struct pack_file
*p
, off_t obj_offset
);
347 /***********************************************************
349 * PACK WINDOW MANAGEMENT
351 ***********************************************************/
353 void pack_window_free_all(struct pack_backend
*backend
, struct pack_file
*p
)
356 struct pack_window
*w
= p
->windows
;
357 assert(w
->inuse_cnt
== 0);
359 backend
->mapped
-= w
->window_map
.len
;
360 backend
->open_windows
--;
362 gitfo_free_map(&w
->window_map
);
364 p
->windows
= w
->next
;
369 GIT_INLINE(int) pack_window_contains(struct pack_window
*win
, off_t offset
)
371 /* We must promise at least 20 bytes (one hash) after the
372 * offset is available from this window, otherwise the offset
373 * is not actually in this window and a different window (which
374 * has that one hash excess) must be used. This is to support
375 * the object header and delta base parsing routines below.
377 off_t win_off
= win
->offset
;
378 return win_off
<= offset
379 && (offset
+ 20) <= (off_t
)(win_off
+ win
->window_map
.len
);
382 static void pack_window_scan_lru(
384 struct pack_file
**lru_p
,
385 struct pack_window
**lru_w
,
386 struct pack_window
**lru_l
)
388 struct pack_window
*w
, *w_l
;
390 for (w_l
= NULL
, w
= p
->windows
; w
; w
= w
->next
) {
392 if (!*lru_w
|| w
->last_used
< (*lru_w
)->last_used
) {
402 static int pack_window_close_lru(
403 struct pack_backend
*backend
,
404 struct pack_file
*current
,
407 struct pack_file
*lru_p
= NULL
;
408 struct pack_window
*lru_w
= NULL
, *lru_l
= NULL
;
412 pack_window_scan_lru(current
, &lru_p
, &lru_w
, &lru_l
);
414 for (i
= 0; i
< backend
->packs
.length
; ++i
)
415 pack_window_scan_lru(git_vector_get(&backend
->packs
, i
), &lru_p
, &lru_w
, &lru_l
);
418 backend
->mapped
-= lru_w
->window_map
.len
;
419 gitfo_free_map(&lru_w
->window_map
);
422 lru_l
->next
= lru_w
->next
;
424 lru_p
->windows
= lru_w
->next
;
425 if (!lru_p
->windows
&& lru_p
->pack_fd
!= keep_fd
) {
426 gitfo_close(lru_p
->pack_fd
);
432 backend
->open_windows
--;
436 return git__throw(GIT_ERROR
, "Failed to close pack window");
439 static void pack_window_close(struct pack_window
**w_cursor
)
441 struct pack_window
*w
= *w_cursor
;
448 static unsigned char *pack_window_open(
449 struct pack_backend
*backend
,
451 struct pack_window
**w_cursor
,
455 struct pack_window
*win
= *w_cursor
;
457 if (p
->pack_fd
== -1 && packfile_open(p
) < GIT_SUCCESS
)
460 /* Since packfiles end in a hash of their content and it's
461 * pointless to ask for an offset into the middle of that
462 * hash, and the pack_window_contains function above wouldn't match
463 * don't allow an offset too close to the end of the file.
465 if (offset
> (p
->pack_size
- 20))
468 if (!win
|| !pack_window_contains(win
, offset
)) {
473 for (win
= p
->windows
; win
; win
= win
->next
) {
474 if (pack_window_contains(win
, offset
))
479 size_t window_align
= backend
->window_size
/ 2;
482 win
= git__calloc(1, sizeof(*win
));
483 win
->offset
= (offset
/ window_align
) * window_align
;
485 len
= (size_t)(p
->pack_size
- win
->offset
);
486 if (len
> backend
->window_size
)
487 len
= backend
->window_size
;
489 backend
->mapped
+= len
;
491 while (backend
->mapped_limit
< backend
->mapped
&&
492 pack_window_close_lru(backend
, p
, p
->pack_fd
) == GIT_SUCCESS
) {}
494 if (gitfo_map_ro(&win
->window_map
, p
->pack_fd
,
495 win
->offset
, len
) < GIT_SUCCESS
)
498 backend
->mmap_calls
++;
499 backend
->open_windows
++;
501 if (backend
->mapped
> backend
->peak_mapped
)
502 backend
->peak_mapped
= backend
->mapped
;
504 if (backend
->open_windows
> backend
->peak_open_windows
)
505 backend
->peak_open_windows
= backend
->open_windows
;
507 win
->next
= p
->windows
;
512 if (win
!= *w_cursor
) {
513 win
->last_used
= backend
->used_ctr
++;
518 offset
-= win
->offset
;
519 assert(git__is_sizet(offset
));
522 *left
= win
->window_map
.len
- (size_t)offset
;
524 return (unsigned char *)win
->window_map
.data
+ offset
;
533 /***********************************************************
537 ***********************************************************/
539 static void pack_index_free(struct pack_file
*p
)
541 if (p
->index_map
.data
) {
542 gitfo_free_map(&p
->index_map
);
543 p
->index_map
.data
= NULL
;
547 static int pack_index_check(const char *path
, struct pack_file
*p
)
549 struct pack_idx_header
*hdr
;
550 uint32_t version
, nr
, i
, *index
;
557 /* TODO: properly open the file without access time */
558 git_file fd
= gitfo_open(path
, O_RDONLY
/*| O_NOATIME */);
563 return git__throw(GIT_EOSERR
, "Failed to check index. File missing or corrupted");
565 if (gitfo_fstat(fd
, &st
) < GIT_SUCCESS
) {
567 return git__throw(GIT_EOSERR
, "Failed to check index. File appears to be corrupted");
570 if (!git__is_sizet(st
.st_size
))
573 idx_size
= (size_t)st
.st_size
;
575 if (idx_size
< 4 * 256 + 20 + 20) {
577 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Object is corrupted");
580 error
= gitfo_map_ro(&p
->index_map
, fd
, 0, idx_size
);
583 if (error
< GIT_SUCCESS
)
584 return git__rethrow(error
, "Failed to check index");
586 hdr
= idx_map
= p
->index_map
.data
;
588 if (hdr
->idx_signature
== htonl(PACK_IDX_SIGNATURE
)) {
589 version
= ntohl(hdr
->idx_version
);
591 if (version
< 2 || version
> 2) {
592 gitfo_free_map(&p
->index_map
);
593 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Unsupported index version");
603 index
+= 2; /* skip index header */
605 for (i
= 0; i
< 256; i
++) {
606 uint32_t n
= ntohl(index
[i
]);
608 gitfo_free_map(&p
->index_map
);
609 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Index is non-monotonic");
617 * - 256 index entries 4 bytes each
618 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
619 * - 20-byte SHA1 of the packfile
620 * - 20-byte SHA1 file checksum
622 if (idx_size
!= 4*256 + nr
* 24 + 20 + 20) {
623 gitfo_free_map(&p
->index_map
);
624 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Object is corrupted");
626 } else if (version
== 2) {
629 * - 8 bytes of header
630 * - 256 index entries 4 bytes each
631 * - 20-byte sha1 entry * nr
632 * - 4-byte crc entry * nr
633 * - 4-byte offset entry * nr
634 * - 20-byte SHA1 of the packfile
635 * - 20-byte SHA1 file checksum
636 * And after the 4-byte offset table might be a
637 * variable sized table containing 8-byte entries
638 * for offsets larger than 2^31.
640 unsigned long min_size
= 8 + 4*256 + nr
*(20 + 4 + 4) + 20 + 20;
641 unsigned long max_size
= min_size
;
644 max_size
+= (nr
- 1)*8;
646 if (idx_size
< min_size
|| idx_size
> max_size
) {
647 gitfo_free_map(&p
->index_map
);
648 return git__throw(GIT_EOBJCORRUPTED
, "Failed to check index. Wrong index size");
651 /* Make sure that off_t is big enough to access the whole pack...
652 * Is this an issue in libgit2? It shouldn't. */
653 if (idx_size
!= min_size
&& (sizeof(off_t
) <= 4)) {
654 gitfo_free_map(&p
->index_map
);
655 return git__throw(GIT_EOSERR
, "Failed to check index. off_t not big enough to access the whole pack");
659 p
->index_version
= version
;
664 static int pack_index_open(struct pack_file
*p
)
669 if (p
->index_map
.data
)
672 idx_name
= git__strdup(p
->pack_name
);
673 strcpy(idx_name
+ strlen(idx_name
) - STRLEN(".pack"), ".idx");
675 error
= pack_index_check(idx_name
, p
);
678 return error
== GIT_SUCCESS
? GIT_SUCCESS
: git__rethrow(error
, "Failed to open index");
689 /***********************************************************
693 ***********************************************************/
695 static int packfile_sort__cb(const void *a_
, const void *b_
)
697 struct pack_file
*a
= *((struct pack_file
**)a_
);
698 struct pack_file
*b
= *((struct pack_file
**)b_
);
702 * Local packs tend to contain objects specific to our
703 * variant of the project than remote ones. In addition,
704 * remote ones could be on a network mounted filesystem.
705 * Favor local ones for these reasons.
707 st
= a
->pack_local
- b
->pack_local
;
712 * Younger packs tend to contain more recent objects,
713 * and more recent objects tend to get accessed more
716 if (a
->mtime
< b
->mtime
)
718 else if (a
->mtime
== b
->mtime
)
724 static struct pack_file
*packfile_alloc(int extra
)
726 struct pack_file
*p
= git__malloc(sizeof(*p
) + extra
);
727 memset(p
, 0, sizeof(*p
));
733 static void packfile_free(struct pack_backend
*backend
, struct pack_file
*p
)
737 /* clear_delta_base_cache(); */
738 pack_window_free_all(backend
, p
);
740 if (p
->pack_fd
!= -1)
741 gitfo_close(p
->pack_fd
);
745 free(p
->bad_object_sha1
);
749 static int packfile_open(struct pack_file
*p
)
752 struct pack_header hdr
;
754 unsigned char *idx_sha1
;
756 if (!p
->index_map
.data
&& pack_index_open(p
) < GIT_SUCCESS
)
757 return git__throw(GIT_ENOTFOUND
, "Failed to open packfile. File not found");
759 /* TODO: open with noatime */
760 p
->pack_fd
= gitfo_open(p
->pack_name
, O_RDONLY
);
761 if (p
->pack_fd
< 0 || gitfo_fstat(p
->pack_fd
, &st
) < GIT_SUCCESS
)
762 return git__throw(GIT_EOSERR
, "Failed to open packfile. File appears to be corrupted");
764 /* If we created the struct before we had the pack we lack size. */
766 if (!S_ISREG(st
.st_mode
))
768 p
->pack_size
= (off_t
)st
.st_size
;
769 } else if (p
->pack_size
!= st
.st_size
)
773 /* We leave these file descriptors open with sliding mmap;
774 * there is no point keeping them open across exec(), though.
776 fd_flag
= fcntl(p
->pack_fd
, F_GETFD
, 0);
778 return error("cannot determine file descriptor flags");
780 fd_flag
|= FD_CLOEXEC
;
781 if (fcntl(p
->pack_fd
, F_SETFD
, fd_flag
) == -1)
785 /* Verify we recognize this pack file format. */
786 if (gitfo_read(p
->pack_fd
, &hdr
, sizeof(hdr
)) < GIT_SUCCESS
)
789 if (hdr
.hdr_signature
!= htonl(PACK_SIGNATURE
))
792 if (!pack_version_ok(hdr
.hdr_version
))
795 /* Verify the pack matches its index. */
796 if (p
->num_objects
!= ntohl(hdr
.hdr_entries
))
799 if (gitfo_lseek(p
->pack_fd
, p
->pack_size
- GIT_OID_RAWSZ
, SEEK_SET
) == -1)
802 if (gitfo_read(p
->pack_fd
, sha1
.id
, GIT_OID_RAWSZ
) < GIT_SUCCESS
)
805 idx_sha1
= ((unsigned char *)p
->index_map
.data
) + p
->index_map
.len
- 40;
807 if (git_oid_cmp(&sha1
, (git_oid
*)idx_sha1
) != 0)
813 gitfo_close(p
->pack_fd
);
815 return git__throw(GIT_EPACKCORRUPTED
, "Failed to packfile. Pack is corrupted");
818 static int packfile_check(struct pack_file
**pack_out
, const char *path
)
825 path_len
= strlen(path
);
826 p
= packfile_alloc(path_len
+ 2);
829 * Make sure a corresponding .pack file exists and that
830 * the index looks sane.
832 path_len
-= STRLEN(".idx");
835 return git__throw(GIT_ENOTFOUND
, "Failed to check packfile. Wrong path name");
838 memcpy(p
->pack_name
, path
, path_len
);
840 strcpy(p
->pack_name
+ path_len
, ".keep");
841 if (gitfo_exists(p
->pack_name
) == GIT_SUCCESS
)
844 strcpy(p
->pack_name
+ path_len
, ".pack");
845 if (gitfo_stat(p
->pack_name
, &st
) < GIT_SUCCESS
|| !S_ISREG(st
.st_mode
)) {
847 return git__throw(GIT_ENOTFOUND
, "Failed to check packfile. File not found");
850 /* ok, it looks sane as far as we can check without
851 * actually mapping the pack file.
853 p
->pack_size
= (off_t
)st
.st_size
;
855 p
->mtime
= (git_time_t
)st
.st_mtime
;
857 /* see if we can parse the sha1 oid in the packfile name */
859 git_oid_fromstr(&p
->sha1
, path
+ path_len
- GIT_OID_HEXSZ
) < GIT_SUCCESS
)
860 memset(&p
->sha1
, 0x0, GIT_OID_RAWSZ
);
866 static int packfile_load__cb(void *_data
, char *path
)
868 struct pack_backend
*backend
= (struct pack_backend
*)_data
;
869 struct pack_file
*pack
;
873 if (git__suffixcmp(path
, ".idx") != 0)
874 return GIT_SUCCESS
; /* not an index */
876 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
877 struct pack_file
*p
= git_vector_get(&backend
->packs
, i
);
878 if (memcmp(p
->pack_name
, path
, strlen(path
) - STRLEN(".idx")) == 0)
882 error
= packfile_check(&pack
, path
);
883 if (error
< GIT_SUCCESS
)
884 return git__rethrow(error
, "Failed to load packfile");
886 if (git_vector_insert(&backend
->packs
, pack
) < GIT_SUCCESS
) {
894 static int packfile_refresh_all(struct pack_backend
*backend
)
899 if (backend
->pack_folder
== NULL
)
902 if (gitfo_stat(backend
->pack_folder
, &st
) < 0 || !S_ISDIR(st
.st_mode
))
903 return git__throw(GIT_ENOTFOUND
, "Failed to refresh packfiles. Backend not found");
905 if (st
.st_mtime
!= backend
->pack_folder_mtime
) {
906 char path
[GIT_PATH_MAX
];
907 strcpy(path
, backend
->pack_folder
);
909 /* reload all packs */
910 error
= gitfo_dirent(path
, GIT_PATH_MAX
, packfile_load__cb
, (void *)backend
);
911 if (error
< GIT_SUCCESS
)
912 return git__rethrow(error
, "Failed to refresh packfiles");
914 git_vector_sort(&backend
->packs
);
915 backend
->pack_folder_mtime
= st
.st_mtime
;
928 /***********************************************************
930 * PACKFILE ENTRY SEARCH INTERNALS
932 ***********************************************************/
934 static off_t
nth_packed_object_offset(const struct pack_file
*p
, uint32_t n
)
936 const unsigned char *index
= p
->index_map
.data
;
938 if (p
->index_version
== 1) {
939 return ntohl(*((uint32_t *)(index
+ 24 * n
)));
942 index
+= 8 + p
->num_objects
* (20 + 4);
943 off
= ntohl(*((uint32_t *)(index
+ 4 * n
)));
944 if (!(off
& 0x80000000))
946 index
+= p
->num_objects
* 4 + (off
& 0x7fffffff) * 8;
947 return (((uint64_t)ntohl(*((uint32_t *)(index
+ 0)))) << 32) |
948 ntohl(*((uint32_t *)(index
+ 4)));
952 static int pack_entry_find_offset(
956 const git_oid
*short_oid
,
959 const uint32_t *level1_ofs
= p
->index_map
.data
;
960 const unsigned char *index
= p
->index_map
.data
;
961 unsigned hi
, lo
, stride
;
963 const unsigned char *current
= 0;
970 if ((error
= pack_index_open(p
)) < GIT_SUCCESS
)
971 return git__rethrow(error
, "Failed to find offset for pack entry");
973 assert(p
->index_map
.data
);
975 index
= p
->index_map
.data
;
976 level1_ofs
= p
->index_map
.data
;
979 if (p
->index_version
> 1) {
985 hi
= ntohl(level1_ofs
[(int)short_oid
->id
[0]]);
986 lo
= ((short_oid
->id
[0] == 0x0) ? 0 : ntohl(level1_ofs
[(int)short_oid
->id
[0] - 1]));
988 if (p
->index_version
> 1) {
995 #ifdef INDEX_DEBUG_LOOKUP
996 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
997 short_oid
->id
[0], short_oid
->id
[1], short_oid
->id
[2], lo
, hi
, p
->num_objects
);
1000 /* Use git.git lookup code */
1001 pos
= sha1_entry_pos(index
, stride
, 0, lo
, hi
, p
->num_objects
, short_oid
->id
);
1004 /* An object matching exactly the oid was found */
1006 current
= index
+ pos
* stride
;
1008 /* No object was found */
1009 /* pos refers to the object with the "closest" oid to short_oid */
1011 if (pos
< (int)p
->num_objects
) {
1012 current
= index
+ pos
* stride
;
1014 if (!git_oid_ncmp_raw(len
, short_oid
->id
, current
)) {
1020 if (found
&& pos
+ 1 < (int)p
->num_objects
) {
1021 /* Check for ambiguousity */
1022 const unsigned char *next
= current
+ stride
;
1024 if (!git_oid_ncmp_raw(len
, short_oid
->id
, next
)) {
1030 return git__throw(GIT_ENOTFOUND
, "Failed to find offset for pack entry. Entry not found");
1031 } else if (found
> 1) {
1032 return git__throw(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to find offset for pack entry. Ambiguous sha1 prefix within pack");
1034 *offset_out
= nth_packed_object_offset(p
, pos
);
1035 git_oid_fromraw(found_oid
, current
);
1037 #ifdef INDEX_DEBUG_LOOKUP
1038 unsigned char hex_sha1
[GIT_OID_HEXSZ
+ 1];
1039 git_oid_fmt(hex_sha1
, found_oid
);
1040 hex_sha1
[GIT_OID_HEXSZ
] = '\0';
1041 printf("found lo=%d %s\n", lo
, hex_sha1
);
1047 static int pack_entry_find1(
1048 struct pack_entry
*e
,
1049 struct pack_file
*p
,
1050 const git_oid
*short_oid
,
1059 if (len
== GIT_OID_HEXSZ
&& p
->num_bad_objects
) {
1061 for (i
= 0; i
< p
->num_bad_objects
; i
++)
1062 if (git_oid_cmp(short_oid
, &p
->bad_object_sha1
[i
]) == 0)
1063 return git__throw(GIT_ERROR
, "Failed to find pack entry. Bad object found");
1066 error
= pack_entry_find_offset(&offset
, &found_oid
, p
, short_oid
, len
);
1067 if (error
< GIT_SUCCESS
)
1068 return git__rethrow(error
, "Failed to find pack entry. Couldn't find offset");
1070 /* we found a unique entry in the index;
1071 * make sure the packfile backing the index
1072 * still exists on disk */
1073 if (p
->pack_fd
== -1 && packfile_open(p
) < GIT_SUCCESS
)
1074 return git__throw(GIT_EOSERR
, "Failed to find pack entry. Packfile doesn't exist on disk");
1079 git_oid_cpy(&e
->sha1
, &found_oid
);
1083 static int pack_entry_find(struct pack_entry
*e
, struct pack_backend
*backend
, const git_oid
*oid
)
1088 if ((error
= packfile_refresh_all(backend
)) < GIT_SUCCESS
)
1089 return git__rethrow(error
, "Failed to find pack entry");
1091 if (backend
->last_found
&&
1092 pack_entry_find1(e
, backend
->last_found
, oid
, GIT_OID_HEXSZ
) == GIT_SUCCESS
)
1095 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
1096 struct pack_file
*p
;
1098 p
= git_vector_get(&backend
->packs
, i
);
1099 if (p
== backend
->last_found
)
1102 if (pack_entry_find1(e
, p
, oid
, GIT_OID_HEXSZ
) == GIT_SUCCESS
) {
1103 backend
->last_found
= p
;
1108 return git__throw(GIT_ENOTFOUND
, "Failed to find pack entry");
1111 static int pack_entry_find_prefix(
1112 struct pack_entry
*e
,
1113 struct pack_backend
*backend
,
1114 const git_oid
*short_oid
,
1121 if ((error
= packfile_refresh_all(backend
)) < GIT_SUCCESS
)
1122 return git__rethrow(error
, "Failed to find pack entry");
1124 if (backend
->last_found
) {
1125 error
= pack_entry_find1(e
, backend
->last_found
, short_oid
, len
);
1126 if (error
== GIT_EAMBIGUOUSOIDPREFIX
) {
1127 return git__rethrow(error
, "Failed to find pack entry. Ambiguous sha1 prefix");
1128 } else if (error
== GIT_SUCCESS
) {
1133 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
1134 struct pack_file
*p
;
1136 p
= git_vector_get(&backend
->packs
, i
);
1137 if (p
== backend
->last_found
)
1140 error
= pack_entry_find1(e
, p
, short_oid
, len
);
1141 if (error
== GIT_EAMBIGUOUSOIDPREFIX
) {
1142 return git__rethrow(error
, "Failed to find pack entry. Ambiguous sha1 prefix");
1143 } else if (error
== GIT_SUCCESS
) {
1147 backend
->last_found
= p
;
1152 return git__rethrow(GIT_ENOTFOUND
, "Failed to find pack entry");
1153 } else if (found
> 1) {
1154 return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to find pack entry. Ambiguous sha1 prefix");
1172 /***********************************************************
1174 * PACKFILE ENTRY UNPACK INTERNALS
1176 ***********************************************************/
1178 static unsigned long packfile_unpack_header1(
1181 const unsigned char *buf
,
1185 unsigned long size
, c
;
1186 unsigned long used
= 0;
1189 *type
= (c
>> 4) & 7;
1193 if (len
<= used
|| bitsizeof(long) <= shift
)
1197 size
+= (c
& 0x7f) << shift
;
1201 *sizep
= (size_t)size
;
1205 static int packfile_unpack_header(
1208 struct pack_backend
*backend
,
1209 struct pack_file
*p
,
1210 struct pack_window
**w_curs
,
1213 unsigned char *base
;
1217 /* pack_window_open() assures us we have [base, base + 20) available
1218 * as a range that we can look at at. (Its actually the hash
1219 * size that is assured.) With our object header encoding
1220 * the maximum deflated object size is 2^137, which is just
1221 * insane, so we know won't exceed what we have been given.
1223 base
= pack_window_open(backend
, p
, w_curs
, *curpos
, &left
);
1227 used
= packfile_unpack_header1(size_p
, type_p
, base
, left
);
1230 return git__throw(GIT_EOBJCORRUPTED
, "Header length is zero");
1236 static int packfile_unpack_compressed(
1238 struct pack_backend
*backend
,
1239 struct pack_file
*p
,
1240 struct pack_window
**w_curs
,
1247 unsigned char *buffer
, *in
;
1249 buffer
= git__malloc(size
);
1251 memset(&stream
, 0, sizeof(stream
));
1252 stream
.next_out
= buffer
;
1253 stream
.avail_out
= size
+ 1;
1255 st
= inflateInit(&stream
);
1258 return git__throw(GIT_EZLIB
, "Error in zlib");
1262 in
= pack_window_open(backend
, p
, w_curs
, curpos
, &stream
.avail_in
);
1263 stream
.next_in
= in
;
1264 st
= inflate(&stream
, Z_FINISH
);
1266 if (!stream
.avail_out
)
1267 break; /* the payload is larger than it should be */
1269 curpos
+= stream
.next_in
- in
;
1270 } while (st
== Z_OK
|| st
== Z_BUF_ERROR
);
1272 inflateEnd(&stream
);
1274 if ((st
!= Z_STREAM_END
) || stream
.total_out
!= size
) {
1276 return git__throw(GIT_EZLIB
, "Error in zlib");
1285 static off_t
get_delta_base(
1286 struct pack_backend
*backend
,
1287 struct pack_file
*p
,
1288 struct pack_window
**w_curs
,
1291 off_t delta_obj_offset
)
1293 unsigned char *base_info
= pack_window_open(backend
, p
, w_curs
, *curpos
, NULL
);
1297 /* pack_window_open() assured us we have [base_info, base_info + 20)
1298 * as a range that we can look at without walking off the
1299 * end of the mapped window. Its actually the hash size
1300 * that is assured. An OFS_DELTA longer than the hash size
1301 * is stupid, as then a REF_DELTA would be smaller to store.
1303 if (type
== GIT_OBJ_OFS_DELTA
) {
1305 unsigned char c
= base_info
[used
++];
1306 base_offset
= c
& 127;
1309 if (!base_offset
|| MSB(base_offset
, 7))
1310 return 0; /* overflow */
1311 c
= base_info
[used
++];
1312 base_offset
= (base_offset
<< 7) + (c
& 127);
1314 base_offset
= delta_obj_offset
- base_offset
;
1315 if (base_offset
<= 0 || base_offset
>= delta_obj_offset
)
1316 return 0; /* out of bound */
1318 } else if (type
== GIT_OBJ_REF_DELTA
) {
1319 /* The base entry _must_ be in the same pack */
1320 if (pack_entry_find_offset(&base_offset
, &unused
, p
, (git_oid
*)base_info
, GIT_OID_HEXSZ
) < GIT_SUCCESS
)
1321 return git__throw(GIT_EPACKCORRUPTED
, "Base entry delta is not in the same pack");
1329 static int packfile_unpack_delta(
1331 struct pack_backend
*backend
,
1332 struct pack_file
*p
,
1333 struct pack_window
**w_curs
,
1336 git_otype delta_type
,
1340 git_rawobj base
, delta
;
1343 base_offset
= get_delta_base(backend
, p
, w_curs
, &curpos
, delta_type
, obj_offset
);
1344 if (base_offset
== 0)
1345 return git__throw(GIT_EOBJCORRUPTED
, "Delta offset is zero");
1347 pack_window_close(w_curs
);
1348 error
= packfile_unpack(&base
, backend
, p
, base_offset
);
1350 /* TODO: git.git tries to load the base from other packfiles
1351 * or loose objects */
1352 if (error
< GIT_SUCCESS
)
1353 return git__rethrow(error
, "Corrupted delta");
1355 error
= packfile_unpack_compressed(&delta
, backend
, p
, w_curs
, curpos
, delta_size
, delta_type
);
1356 if (error
< GIT_SUCCESS
) {
1358 return git__rethrow(error
, "Corrupted delta");
1361 obj
->type
= base
.type
;
1362 error
= git__delta_apply(obj
,
1363 base
.data
, base
.len
,
1364 delta
.data
, delta
.len
);
1369 /* TODO: we might want to cache this shit. eventually */
1370 //add_delta_base_cache(p, base_offset, base, base_size, *type);
1371 return error
; /* error set by git__delta_apply */
1374 static int packfile_unpack(
1376 struct pack_backend
*backend
,
1377 struct pack_file
*p
,
1380 struct pack_window
*w_curs
= NULL
;
1381 off_t curpos
= obj_offset
;
1388 * TODO: optionally check the CRC on the packfile
1393 obj
->type
= GIT_OBJ_BAD
;
1395 error
= packfile_unpack_header(&size
, &type
, backend
, p
, &w_curs
, &curpos
);
1396 if (error
< GIT_SUCCESS
)
1397 return git__rethrow(error
, "Failed to unpack packfile");
1400 case GIT_OBJ_OFS_DELTA
:
1401 case GIT_OBJ_REF_DELTA
:
1402 error
= packfile_unpack_delta(
1403 obj
, backend
, p
, &w_curs
, curpos
,
1404 size
, type
, obj_offset
);
1407 case GIT_OBJ_COMMIT
:
1411 error
= packfile_unpack_compressed(
1412 obj
, backend
, p
, &w_curs
, curpos
,
1417 error
= GIT_EOBJCORRUPTED
;
1421 pack_window_close(&w_curs
);
1422 return error
== GIT_SUCCESS
? GIT_SUCCESS
: git__rethrow(error
, "Failed to unpack packfile");
1429 /***********************************************************
1431 * PACKED BACKEND PUBLIC API
1433 * Implement the git_odb_backend API calls
1435 ***********************************************************/
1438 int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
1440 pack_location location;
1442 assert(obj && backend && oid);
1444 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
1445 return GIT_ENOTFOUND;
1447 return read_header_packed(obj, &location);
1451 int pack_backend__read(void **buffer_p
, size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
1453 struct pack_entry e
;
1457 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < GIT_SUCCESS
)
1458 return git__rethrow(error
, "Failed to read pack backend");
1460 if ((error
= packfile_unpack(&raw
, (struct pack_backend
*)backend
, e
.p
, e
.offset
)) < GIT_SUCCESS
)
1461 return git__rethrow(error
, "Failed to read pack backend");
1463 *buffer_p
= raw
.data
;
1470 int pack_backend__read_prefix(
1475 git_odb_backend
*backend
,
1476 const git_oid
*short_oid
,
1479 if (len
< GIT_OID_MINPREFIXLEN
)
1480 return git__throw(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN
);
1482 if (len
>= GIT_OID_HEXSZ
) {
1483 /* We can fall back to regular read method */
1484 int error
= pack_backend__read(buffer_p
, len_p
, type_p
, backend
, short_oid
);
1485 if (error
== GIT_SUCCESS
)
1486 git_oid_cpy(out_oid
, short_oid
);
1490 struct pack_entry e
;
1494 if ((error
= pack_entry_find_prefix(&e
, (struct pack_backend
*)backend
, short_oid
, len
)) < GIT_SUCCESS
)
1495 return git__rethrow(error
, "Failed to read pack backend");
1497 if ((error
= packfile_unpack(&raw
, (struct pack_backend
*)backend
, e
.p
, e
.offset
)) < GIT_SUCCESS
)
1498 return git__rethrow(error
, "Failed to read pack backend");
1500 *buffer_p
= raw
.data
;
1503 git_oid_cpy(out_oid
, &e
.sha1
);
1509 int pack_backend__exists(git_odb_backend
*backend
, const git_oid
*oid
)
1511 struct pack_entry e
;
1512 return pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
) == GIT_SUCCESS
;
1515 void pack_backend__free(git_odb_backend
*_backend
)
1517 struct pack_backend
*backend
;
1522 backend
= (struct pack_backend
*)_backend
;
1524 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
1525 struct pack_file
*p
= git_vector_get(&backend
->packs
, i
);
1526 packfile_free(backend
, p
);
1529 git_vector_free(&backend
->packs
);
1530 free(backend
->pack_folder
);
1534 int git_odb_backend_pack(git_odb_backend
**backend_out
, const char *objects_dir
)
1536 struct pack_backend
*backend
;
1537 char path
[GIT_PATH_MAX
];
1539 backend
= git__calloc(1, sizeof(struct pack_backend
));
1540 if (backend
== NULL
)
1543 if (git_vector_init(&backend
->packs
, 8, packfile_sort__cb
) < GIT_SUCCESS
) {
1548 backend
->window_size
= DEFAULT_WINDOW_SIZE
;
1549 backend
->mapped_limit
= DEFAULT_MAPPED_LIMIT
;
1551 git__joinpath(path
, objects_dir
, "pack");
1552 if (gitfo_isdir(path
) == GIT_SUCCESS
) {
1553 backend
->pack_folder
= git__strdup(path
);
1554 backend
->pack_folder_mtime
= 0;
1556 if (backend
->pack_folder
== NULL
) {
1562 backend
->parent
.read
= &pack_backend__read
;
1563 backend
->parent
.read_prefix
= &pack_backend__read_prefix
;
1564 backend
->parent
.read_header
= NULL
;
1565 backend
->parent
.exists
= &pack_backend__exists
;
1566 backend
->parent
.free
= &pack_backend__free
;
1568 *backend_out
= (git_odb_backend
*)backend
;