2 * Copyright (C) 2009-2012 the libgit2 contributors
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/repository.h"
15 #include "delta-apply.h"
16 #include "sha1_lookup.h"
20 #include "git2/odb_backend.h"
23 git_odb_backend parent
;
25 struct git_pack_file
*last_found
;
29 struct pack_writepack
{
30 struct git_odb_writepack parent
;
31 git_indexer_stream
*indexer_stream
;
35 * The wonderful tale of a Packed Object lookup query
36 * ===================================================
37 * A riveting and epic story of epicness and ASCII
38 * art, presented by yours truly,
42 * Chapter 1: Once upon a time...
43 * Initialization of the Pack Backend
44 * --------------------------------------------------
46 * # git_odb_backend_pack
47 * | Creates the pack backend structure, initializes the
48 * | callback pointers to our default read() and exist() methods,
49 * | and tries to preload all the known packfiles in the ODB.
51 * |-# packfile_load_all
52 * | Tries to find the `pack` folder, if it exists. ODBs without
53 * | a pack folder are ignored altogether. If there's a `pack` folder
54 * | we run a `dirent` callback through every file in the pack folder
55 * | to find our packfiles. The packfiles are then sorted according
56 * | to a sorting callback.
58 * |-# packfile_load__cb
59 * | | This callback is called from `dirent` with every single file
60 * | | inside the pack folder. We find the packs by actually locating
61 * | | their index (ends in ".idx"). From that index, we verify that
62 * | | the corresponding packfile exists and is valid, and if so, we
63 * | | add it to the pack list.
65 * | |-# packfile_check
66 * | Make sure that there's a packfile to back this index, and store
67 * | some very basic information regarding the packfile itself,
68 * | such as the full path, the size, and the modification time.
69 * | We don't actually open the packfile to check for internal consistency.
71 * |-# packfile_sort__cb
72 * Sort all the preloaded packs according to some specific criteria:
73 * we prioritize the "newer" packs because it's more likely they
74 * contain the objects we are looking for, and we prioritize local
75 * packs over remote ones.
79 * Chapter 2: To be, or not to be...
80 * A standard packed `exist` query for an OID
81 * --------------------------------------------------
83 * # pack_backend__exists
84 * | Check if the given SHA1 oid exists in any of the packs
85 * | that have been loaded for our ODB.
88 * | Iterate through all the packs that have been preloaded
89 * | (starting by the pack where the latest object was found)
90 * | to try to find the OID in one of them.
92 * |-# pack_entry_find1
93 * | Check the index of an individual pack to see if the SHA1
94 * | OID can be found. If we can find the offset to that SHA1
95 * | inside of the index, that means the object is contained
96 * | inside of the packfile and we can stop searching.
97 * | Before returning, we verify that the packfile behing the
98 * | index we are searching still exists on disk.
100 * |-# pack_entry_find_offset
101 * | | Mmap the actual index file to disk if it hasn't been opened
102 * | | yet, and run a binary search through it to find the OID.
103 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
104 * | | on the Packfile Index format and how do we find entries in it.
106 * | |-# pack_index_open
107 * | | Guess the name of the index based on the full path to the
108 * | | packfile, open it and verify its contents. Only if the index
109 * | | has not been opened already.
111 * | |-# pack_index_check
112 * | Mmap the index file and do a quick run through the header
113 * | to guess the index version (right now we support v1 and v2),
114 * | and to verify that the size of the index makes sense.
117 * See `packfile_open` in Chapter 3
121 * Chapter 3: The neverending story...
122 * A standard packed `lookup` query for an OID
123 * --------------------------------------------------
129 /***********************************************************
131 * FORWARD DECLARATIONS
133 ***********************************************************/
135 static void pack_window_free_all(struct pack_backend
*backend
, struct git_pack_file
*p
);
136 static int pack_window_contains(git_mwindow
*win
, off_t offset
);
138 static int packfile_sort__cb(const void *a_
, const void *b_
);
140 static int packfile_load__cb(void *_data
, git_buf
*path
);
141 static int packfile_refresh_all(struct pack_backend
*backend
);
143 static int pack_entry_find(struct git_pack_entry
*e
,
144 struct pack_backend
*backend
, const git_oid
*oid
);
146 /* Can find the offset of an object given
147 * a prefix of an identifier.
148 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
149 * This method assumes that len is between
150 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
152 static int pack_entry_find_prefix(
153 struct git_pack_entry
*e
,
154 struct pack_backend
*backend
,
155 const git_oid
*short_oid
,
160 /***********************************************************
162 * PACK WINDOW MANAGEMENT
164 ***********************************************************/
166 GIT_INLINE(void) pack_window_free_all(struct pack_backend
*backend
, struct git_pack_file
*p
)
169 git_mwindow_free_all(&p
->mwf
);
172 GIT_INLINE(int) pack_window_contains(git_mwindow
*win
, off_t offset
)
174 /* We must promise at least 20 bytes (one hash) after the
175 * offset is available from this window, otherwise the offset
176 * is not actually in this window and a different window (which
177 * has that one hash excess) must be used. This is to support
178 * the object header and delta base parsing routines below.
180 return git_mwindow_contains(win
, offset
+ 20);
183 static int packfile_sort__cb(const void *a_
, const void *b_
)
185 const struct git_pack_file
*a
= a_
;
186 const struct git_pack_file
*b
= b_
;
190 * Local packs tend to contain objects specific to our
191 * variant of the project than remote ones. In addition,
192 * remote ones could be on a network mounted filesystem.
193 * Favor local ones for these reasons.
195 st
= a
->pack_local
- b
->pack_local
;
200 * Younger packs tend to contain more recent objects,
201 * and more recent objects tend to get accessed more
204 if (a
->mtime
< b
->mtime
)
206 else if (a
->mtime
== b
->mtime
)
214 static int packfile_load__cb(void *_data
, git_buf
*path
)
216 struct pack_backend
*backend
= (struct pack_backend
*)_data
;
217 struct git_pack_file
*pack
;
221 if (git__suffixcmp(path
->ptr
, ".idx") != 0)
222 return 0; /* not an index */
224 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
225 struct git_pack_file
*p
= git_vector_get(&backend
->packs
, i
);
226 if (memcmp(p
->pack_name
, git_buf_cstr(path
), git_buf_len(path
) - strlen(".idx")) == 0)
230 error
= git_packfile_check(&pack
, path
->ptr
);
231 if (error
== GIT_ENOTFOUND
)
232 /* ignore missing .pack file as git does */
237 return git_vector_insert(&backend
->packs
, pack
);
240 static int packfile_refresh_all(struct pack_backend
*backend
)
244 git_buf path
= GIT_BUF_INIT
;
246 if (backend
->pack_folder
== NULL
)
249 if (p_stat(backend
->pack_folder
, &st
) < 0 || !S_ISDIR(st
.st_mode
))
250 return git_odb__error_notfound("failed to refresh packfiles", NULL
);
252 git_buf_sets(&path
, backend
->pack_folder
);
254 /* reload all packs */
255 error
= git_path_direach(&path
, packfile_load__cb
, (void *)backend
);
262 git_vector_sort(&backend
->packs
);
267 static int pack_entry_find_inner(
268 struct git_pack_entry
*e
,
269 struct pack_backend
*backend
,
271 struct git_pack_file
*last_found
)
276 git_pack_entry_find(e
, last_found
, oid
, GIT_OID_HEXSZ
) == 0)
279 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
280 struct git_pack_file
*p
;
282 p
= git_vector_get(&backend
->packs
, i
);
286 if (git_pack_entry_find(e
, p
, oid
, GIT_OID_HEXSZ
) == 0) {
287 backend
->last_found
= p
;
295 static int pack_entry_find(struct git_pack_entry
*e
, struct pack_backend
*backend
, const git_oid
*oid
)
298 struct git_pack_file
*last_found
= backend
->last_found
;
300 if (backend
->last_found
&&
301 git_pack_entry_find(e
, backend
->last_found
, oid
, GIT_OID_HEXSZ
) == 0)
304 if (!pack_entry_find_inner(e
, backend
, oid
, last_found
))
306 if ((error
= packfile_refresh_all(backend
)) < 0)
308 if (!pack_entry_find_inner(e
, backend
, oid
, last_found
))
311 return git_odb__error_notfound("failed to find pack entry", oid
);
314 static unsigned pack_entry_find_prefix_inner(
315 struct git_pack_entry
*e
,
316 struct pack_backend
*backend
,
317 const git_oid
*short_oid
,
319 struct git_pack_file
*last_found
)
326 error
= git_pack_entry_find(e
, last_found
, short_oid
, len
);
327 if (error
== GIT_EAMBIGUOUS
)
333 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
334 struct git_pack_file
*p
;
336 p
= git_vector_get(&backend
->packs
, i
);
340 error
= git_pack_entry_find(e
, p
, short_oid
, len
);
341 if (error
== GIT_EAMBIGUOUS
)
346 backend
->last_found
= p
;
353 static int pack_entry_find_prefix(
354 struct git_pack_entry
*e
,
355 struct pack_backend
*backend
,
356 const git_oid
*short_oid
,
361 struct git_pack_file
*last_found
= backend
->last_found
;
363 if ((found
= pack_entry_find_prefix_inner(e
, backend
, short_oid
, len
, last_found
)) > 0)
365 if ((error
= packfile_refresh_all(backend
)) < 0)
367 found
= pack_entry_find_prefix_inner(e
, backend
, short_oid
, len
, last_found
);
371 return git_odb__error_notfound("no matching pack entry for prefix", short_oid
);
373 return git_odb__error_ambiguous("found multiple pack entries");
379 /***********************************************************
381 * PACKED BACKEND PUBLIC API
383 * Implement the git_odb_backend API calls
385 ***********************************************************/
387 static int pack_backend__read_header(size_t *len_p
, git_otype
*type_p
, struct git_odb_backend
*backend
, const git_oid
*oid
)
389 struct git_pack_entry e
;
392 assert(len_p
&& type_p
&& backend
&& oid
);
394 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < 0)
397 return git_packfile_resolve_header(len_p
, type_p
, e
.p
, e
.offset
);
400 static int pack_backend__read(void **buffer_p
, size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
402 struct git_pack_entry e
;
406 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < 0 ||
407 (error
= git_packfile_unpack(&raw
, e
.p
, &e
.offset
)) < 0)
410 *buffer_p
= raw
.data
;
417 static int pack_backend__read_prefix(
422 git_odb_backend
*backend
,
423 const git_oid
*short_oid
,
428 if (len
< GIT_OID_MINPREFIXLEN
)
429 error
= git_odb__error_ambiguous("prefix length too short");
431 else if (len
>= GIT_OID_HEXSZ
) {
432 /* We can fall back to regular read method */
433 error
= pack_backend__read(buffer_p
, len_p
, type_p
, backend
, short_oid
);
435 git_oid_cpy(out_oid
, short_oid
);
437 struct git_pack_entry e
;
440 if ((error
= pack_entry_find_prefix(
441 &e
, (struct pack_backend
*)backend
, short_oid
, len
)) == 0 &&
442 (error
= git_packfile_unpack(&raw
, e
.p
, &e
.offset
)) == 0)
444 *buffer_p
= raw
.data
;
447 git_oid_cpy(out_oid
, &e
.sha1
);
454 static int pack_backend__exists(git_odb_backend
*backend
, const git_oid
*oid
)
456 struct git_pack_entry e
;
457 return pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
) == 0;
460 static int pack_backend__foreach(git_odb_backend
*_backend
, git_odb_foreach_cb cb
, void *data
)
463 struct git_pack_file
*p
;
464 struct pack_backend
*backend
;
467 assert(_backend
&& cb
);
468 backend
= (struct pack_backend
*)_backend
;
470 /* Make sure we know about the packfiles */
471 if ((error
= packfile_refresh_all(backend
)) < 0)
474 git_vector_foreach(&backend
->packs
, i
, p
) {
475 if ((error
= git_pack_foreach_entry(p
, cb
, data
)) < 0)
482 static int pack_backend__writepack_add(struct git_odb_writepack
*_writepack
, const void *data
, size_t size
, git_transfer_progress
*stats
)
484 struct pack_writepack
*writepack
= (struct pack_writepack
*)_writepack
;
488 return git_indexer_stream_add(writepack
->indexer_stream
, data
, size
, stats
);
491 static int pack_backend__writepack_commit(struct git_odb_writepack
*_writepack
, git_transfer_progress
*stats
)
493 struct pack_writepack
*writepack
= (struct pack_writepack
*)_writepack
;
497 return git_indexer_stream_finalize(writepack
->indexer_stream
, stats
);
500 static void pack_backend__writepack_free(struct git_odb_writepack
*_writepack
)
502 struct pack_writepack
*writepack
= (struct pack_writepack
*)_writepack
;
506 git_indexer_stream_free(writepack
->indexer_stream
);
507 git__free(writepack
);
510 static int pack_backend__writepack(struct git_odb_writepack
**out
,
511 git_odb_backend
*_backend
,
512 git_transfer_progress_callback progress_cb
,
513 void *progress_payload
)
515 struct pack_backend
*backend
;
516 struct pack_writepack
*writepack
;
518 assert(out
&& _backend
);
522 backend
= (struct pack_backend
*)_backend
;
524 writepack
= git__calloc(1, sizeof(struct pack_writepack
));
525 GITERR_CHECK_ALLOC(writepack
);
527 if (git_indexer_stream_new(&writepack
->indexer_stream
,
528 backend
->pack_folder
, progress_cb
, progress_payload
) < 0) {
529 git__free(writepack
);
533 writepack
->parent
.backend
= _backend
;
534 writepack
->parent
.add
= pack_backend__writepack_add
;
535 writepack
->parent
.commit
= pack_backend__writepack_commit
;
536 writepack
->parent
.free
= pack_backend__writepack_free
;
538 *out
= (git_odb_writepack
*)writepack
;
543 static void pack_backend__free(git_odb_backend
*_backend
)
545 struct pack_backend
*backend
;
550 backend
= (struct pack_backend
*)_backend
;
552 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
553 struct git_pack_file
*p
= git_vector_get(&backend
->packs
, i
);
557 git_vector_free(&backend
->packs
);
558 git__free(backend
->pack_folder
);
562 int git_odb_backend_one_pack(git_odb_backend
**backend_out
, const char *idx
)
564 struct pack_backend
*backend
= NULL
;
565 struct git_pack_file
*packfile
= NULL
;
567 if (git_packfile_check(&packfile
, idx
) < 0)
570 backend
= git__calloc(1, sizeof(struct pack_backend
));
571 GITERR_CHECK_ALLOC(backend
);
572 backend
->parent
.version
= GIT_ODB_BACKEND_VERSION
;
574 if (git_vector_init(&backend
->packs
, 1, NULL
) < 0)
577 if (git_vector_insert(&backend
->packs
, packfile
) < 0)
580 backend
->parent
.read
= &pack_backend__read
;
581 backend
->parent
.read_prefix
= &pack_backend__read_prefix
;
582 backend
->parent
.read_header
= &pack_backend__read_header
;
583 backend
->parent
.exists
= &pack_backend__exists
;
584 backend
->parent
.foreach
= &pack_backend__foreach
;
585 backend
->parent
.free
= &pack_backend__free
;
587 *backend_out
= (git_odb_backend
*)backend
;
592 git_vector_free(&backend
->packs
);
598 int git_odb_backend_pack(git_odb_backend
**backend_out
, const char *objects_dir
)
600 struct pack_backend
*backend
= NULL
;
601 git_buf path
= GIT_BUF_INIT
;
603 backend
= git__calloc(1, sizeof(struct pack_backend
));
604 GITERR_CHECK_ALLOC(backend
);
605 backend
->parent
.version
= GIT_ODB_BACKEND_VERSION
;
607 if (git_vector_init(&backend
->packs
, 8, packfile_sort__cb
) < 0 ||
608 git_buf_joinpath(&path
, objects_dir
, "pack") < 0)
614 if (git_path_isdir(git_buf_cstr(&path
)) == true) {
615 backend
->pack_folder
= git_buf_detach(&path
);
618 backend
->parent
.read
= &pack_backend__read
;
619 backend
->parent
.read_prefix
= &pack_backend__read_prefix
;
620 backend
->parent
.read_header
= &pack_backend__read_header
;
621 backend
->parent
.exists
= &pack_backend__exists
;
622 backend
->parent
.foreach
= &pack_backend__foreach
;
623 backend
->parent
.writepack
= &pack_backend__writepack
;
624 backend
->parent
.free
= &pack_backend__free
;
626 *backend_out
= (git_odb_backend
*)backend
;