2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
11 #include "git2/repository.h"
12 #include "git2/indexer.h"
13 #include "git2/sys/odb_backend.h"
18 #include "sha1_lookup.h"
22 #include "git2/odb_backend.h"
24 /* re-freshen pack files no more than every 2 seconds */
25 #define FRESHEN_FREQUENCY 2
28 git_odb_backend parent
;
30 struct git_pack_file
*last_found
;
34 struct pack_writepack
{
35 struct git_odb_writepack parent
;
40 * The wonderful tale of a Packed Object lookup query
41 * ===================================================
42 * A riveting and epic story of epicness and ASCII
43 * art, presented by yours truly,
47 * Chapter 1: Once upon a time...
48 * Initialization of the Pack Backend
49 * --------------------------------------------------
51 * # git_odb_backend_pack
52 * | Creates the pack backend structure, initializes the
53 * | callback pointers to our default read() and exist() methods,
54 * | and tries to preload all the known packfiles in the ODB.
56 * |-# packfile_load_all
57 * | Tries to find the `pack` folder, if it exists. ODBs without
58 * | a pack folder are ignored altogether. If there's a `pack` folder
59 * | we run a `dirent` callback through every file in the pack folder
60 * | to find our packfiles. The packfiles are then sorted according
61 * | to a sorting callback.
63 * |-# packfile_load__cb
64 * | | This callback is called from `dirent` with every single file
65 * | | inside the pack folder. We find the packs by actually locating
66 * | | their index (ends in ".idx"). From that index, we verify that
67 * | | the corresponding packfile exists and is valid, and if so, we
68 * | | add it to the pack list.
70 * | |-# packfile_check
71 * | Make sure that there's a packfile to back this index, and store
72 * | some very basic information regarding the packfile itself,
73 * | such as the full path, the size, and the modification time.
74 * | We don't actually open the packfile to check for internal consistency.
76 * |-# packfile_sort__cb
77 * Sort all the preloaded packs according to some specific criteria:
78 * we prioritize the "newer" packs because it's more likely they
79 * contain the objects we are looking for, and we prioritize local
80 * packs over remote ones.
84 * Chapter 2: To be, or not to be...
85 * A standard packed `exist` query for an OID
86 * --------------------------------------------------
88 * # pack_backend__exists
89 * | Check if the given SHA1 oid exists in any of the packs
90 * | that have been loaded for our ODB.
93 * | Iterate through all the packs that have been preloaded
94 * | (starting by the pack where the latest object was found)
95 * | to try to find the OID in one of them.
97 * |-# pack_entry_find1
98 * | Check the index of an individual pack to see if the SHA1
99 * | OID can be found. If we can find the offset to that SHA1
100 * | inside of the index, that means the object is contained
101 * | inside of the packfile and we can stop searching.
102 * | Before returning, we verify that the packfile behing the
103 * | index we are searching still exists on disk.
105 * |-# pack_entry_find_offset
106 * | | Mmap the actual index file to disk if it hasn't been opened
107 * | | yet, and run a binary search through it to find the OID.
108 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
109 * | | on the Packfile Index format and how do we find entries in it.
111 * | |-# pack_index_open
112 * | | Guess the name of the index based on the full path to the
113 * | | packfile, open it and verify its contents. Only if the index
114 * | | has not been opened already.
116 * | |-# pack_index_check
117 * | Mmap the index file and do a quick run through the header
118 * | to guess the index version (right now we support v1 and v2),
119 * | and to verify that the size of the index makes sense.
122 * See `packfile_open` in Chapter 3
126 * Chapter 3: The neverending story...
127 * A standard packed `lookup` query for an OID
128 * --------------------------------------------------
134 /***********************************************************
136 * FORWARD DECLARATIONS
138 ***********************************************************/
140 static int packfile_sort__cb(const void *a_
, const void *b_
);
142 static int packfile_load__cb(void *_data
, git_buf
*path
);
144 static int pack_entry_find(struct git_pack_entry
*e
,
145 struct pack_backend
*backend
, const git_oid
*oid
);
147 /* Can find the offset of an object given
148 * a prefix of an identifier.
149 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
150 * This method assumes that len is between
151 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
153 static int pack_entry_find_prefix(
154 struct git_pack_entry
*e
,
155 struct pack_backend
*backend
,
156 const git_oid
*short_oid
,
161 /***********************************************************
163 * PACK WINDOW MANAGEMENT
165 ***********************************************************/
167 static int packfile_sort__cb(const void *a_
, const void *b_
)
169 const struct git_pack_file
*a
= a_
;
170 const struct git_pack_file
*b
= b_
;
174 * Local packs tend to contain objects specific to our
175 * variant of the project than remote ones. In addition,
176 * remote ones could be on a network mounted filesystem.
177 * Favor local ones for these reasons.
179 st
= a
->pack_local
- b
->pack_local
;
184 * Younger packs tend to contain more recent objects,
185 * and more recent objects tend to get accessed more
188 if (a
->mtime
< b
->mtime
)
190 else if (a
->mtime
== b
->mtime
)
197 static int packfile_load__cb(void *data
, git_buf
*path
)
199 struct pack_backend
*backend
= data
;
200 struct git_pack_file
*pack
;
201 const char *path_str
= git_buf_cstr(path
);
202 size_t i
, cmp_len
= git_buf_len(path
);
205 if (cmp_len
<= strlen(".idx") || git__suffixcmp(path_str
, ".idx") != 0)
206 return 0; /* not an index */
208 cmp_len
-= strlen(".idx");
210 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
211 struct git_pack_file
*p
= git_vector_get(&backend
->packs
, i
);
213 if (strncmp(p
->pack_name
, path_str
, cmp_len
) == 0)
217 error
= git_mwindow_get_pack(&pack
, path
->ptr
);
219 /* ignore missing .pack file as git does */
220 if (error
== GIT_ENOTFOUND
) {
226 error
= git_vector_insert(&backend
->packs
, pack
);
232 static int pack_entry_find_inner(
233 struct git_pack_entry
*e
,
234 struct pack_backend
*backend
,
236 struct git_pack_file
*last_found
)
241 git_pack_entry_find(e
, last_found
, oid
, GIT_OID_HEXSZ
) == 0)
244 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
245 struct git_pack_file
*p
;
247 p
= git_vector_get(&backend
->packs
, i
);
251 if (git_pack_entry_find(e
, p
, oid
, GIT_OID_HEXSZ
) == 0) {
252 backend
->last_found
= p
;
260 static int pack_entry_find(struct git_pack_entry
*e
, struct pack_backend
*backend
, const git_oid
*oid
)
262 struct git_pack_file
*last_found
= backend
->last_found
;
264 if (backend
->last_found
&&
265 git_pack_entry_find(e
, backend
->last_found
, oid
, GIT_OID_HEXSZ
) == 0)
268 if (!pack_entry_find_inner(e
, backend
, oid
, last_found
))
271 return git_odb__error_notfound(
272 "failed to find pack entry", oid
, GIT_OID_HEXSZ
);
275 static int pack_entry_find_prefix(
276 struct git_pack_entry
*e
,
277 struct pack_backend
*backend
,
278 const git_oid
*short_oid
,
283 git_oid found_full_oid
= {{0}};
285 struct git_pack_file
*last_found
= backend
->last_found
;
288 error
= git_pack_entry_find(e
, last_found
, short_oid
, len
);
289 if (error
== GIT_EAMBIGUOUS
)
292 git_oid_cpy(&found_full_oid
, &e
->sha1
);
297 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
298 struct git_pack_file
*p
;
300 p
= git_vector_get(&backend
->packs
, i
);
304 error
= git_pack_entry_find(e
, p
, short_oid
, len
);
305 if (error
== GIT_EAMBIGUOUS
)
308 if (found
&& git_oid_cmp(&e
->sha1
, &found_full_oid
))
309 return git_odb__error_ambiguous("found multiple pack entries");
310 git_oid_cpy(&found_full_oid
, &e
->sha1
);
312 backend
->last_found
= p
;
317 return git_odb__error_notfound("no matching pack entry for prefix",
324 /***********************************************************
326 * PACKED BACKEND PUBLIC API
328 * Implement the git_odb_backend API calls
330 ***********************************************************/
331 static int pack_backend__refresh(git_odb_backend
*backend_
)
335 git_buf path
= GIT_BUF_INIT
;
336 struct pack_backend
*backend
= (struct pack_backend
*)backend_
;
338 if (backend
->pack_folder
== NULL
)
341 if (p_stat(backend
->pack_folder
, &st
) < 0 || !S_ISDIR(st
.st_mode
))
342 return git_odb__error_notfound("failed to refresh packfiles", NULL
, 0);
344 git_buf_sets(&path
, backend
->pack_folder
);
346 /* reload all packs */
347 error
= git_path_direach(&path
, 0, packfile_load__cb
, backend
);
349 git_buf_dispose(&path
);
350 git_vector_sort(&backend
->packs
);
355 static int pack_backend__read_header(
356 size_t *len_p
, git_object_t
*type_p
,
357 struct git_odb_backend
*backend
, const git_oid
*oid
)
359 struct git_pack_entry e
;
362 assert(len_p
&& type_p
&& backend
&& oid
);
364 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < 0)
367 return git_packfile_resolve_header(len_p
, type_p
, e
.p
, e
.offset
);
370 static int pack_backend__freshen(
371 git_odb_backend
*backend
, const git_oid
*oid
)
373 struct git_pack_entry e
;
377 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < 0)
382 if (e
.p
->last_freshen
> now
- FRESHEN_FREQUENCY
)
385 if ((error
= git_futils_touch(e
.p
->pack_name
, &now
)) < 0)
388 e
.p
->last_freshen
= now
;
392 static int pack_backend__read(
393 void **buffer_p
, size_t *len_p
, git_object_t
*type_p
,
394 git_odb_backend
*backend
, const git_oid
*oid
)
396 struct git_pack_entry e
;
397 git_rawobj raw
= {NULL
};
400 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < 0 ||
401 (error
= git_packfile_unpack(&raw
, e
.p
, &e
.offset
)) < 0)
404 *buffer_p
= raw
.data
;
411 static int pack_backend__read_prefix(
415 git_object_t
*type_p
,
416 git_odb_backend
*backend
,
417 const git_oid
*short_oid
,
422 if (len
< GIT_OID_MINPREFIXLEN
)
423 error
= git_odb__error_ambiguous("prefix length too short");
425 else if (len
>= GIT_OID_HEXSZ
) {
426 /* We can fall back to regular read method */
427 error
= pack_backend__read(buffer_p
, len_p
, type_p
, backend
, short_oid
);
429 git_oid_cpy(out_oid
, short_oid
);
431 struct git_pack_entry e
;
432 git_rawobj raw
= {NULL
};
434 if ((error
= pack_entry_find_prefix(
435 &e
, (struct pack_backend
*)backend
, short_oid
, len
)) == 0 &&
436 (error
= git_packfile_unpack(&raw
, e
.p
, &e
.offset
)) == 0)
438 *buffer_p
= raw
.data
;
441 git_oid_cpy(out_oid
, &e
.sha1
);
448 static int pack_backend__exists(git_odb_backend
*backend
, const git_oid
*oid
)
450 struct git_pack_entry e
;
451 return pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
) == 0;
454 static int pack_backend__exists_prefix(
455 git_oid
*out
, git_odb_backend
*backend
, const git_oid
*short_id
, size_t len
)
458 struct pack_backend
*pb
= (struct pack_backend
*)backend
;
459 struct git_pack_entry e
= {0};
461 error
= pack_entry_find_prefix(&e
, pb
, short_id
, len
);
462 git_oid_cpy(out
, &e
.sha1
);
466 static int pack_backend__foreach(git_odb_backend
*_backend
, git_odb_foreach_cb cb
, void *data
)
469 struct git_pack_file
*p
;
470 struct pack_backend
*backend
;
473 assert(_backend
&& cb
);
474 backend
= (struct pack_backend
*)_backend
;
476 /* Make sure we know about the packfiles */
477 if ((error
= pack_backend__refresh(_backend
)) < 0)
480 git_vector_foreach(&backend
->packs
, i
, p
) {
481 if ((error
= git_pack_foreach_entry(p
, cb
, data
)) != 0)
488 static int pack_backend__writepack_append(struct git_odb_writepack
*_writepack
, const void *data
, size_t size
, git_transfer_progress
*stats
)
490 struct pack_writepack
*writepack
= (struct pack_writepack
*)_writepack
;
494 return git_indexer_append(writepack
->indexer
, data
, size
, stats
);
497 static int pack_backend__writepack_commit(struct git_odb_writepack
*_writepack
, git_transfer_progress
*stats
)
499 struct pack_writepack
*writepack
= (struct pack_writepack
*)_writepack
;
503 return git_indexer_commit(writepack
->indexer
, stats
);
506 static void pack_backend__writepack_free(struct git_odb_writepack
*_writepack
)
508 struct pack_writepack
*writepack
= (struct pack_writepack
*)_writepack
;
512 git_indexer_free(writepack
->indexer
);
513 git__free(writepack
);
516 static int pack_backend__writepack(struct git_odb_writepack
**out
,
517 git_odb_backend
*_backend
,
519 git_transfer_progress_cb progress_cb
,
520 void *progress_payload
)
522 git_indexer_options opts
= GIT_INDEXER_OPTIONS_INIT
;
523 struct pack_backend
*backend
;
524 struct pack_writepack
*writepack
;
526 assert(out
&& _backend
);
530 opts
.progress_cb
= progress_cb
;
531 opts
.progress_cb_payload
= progress_payload
;
533 backend
= (struct pack_backend
*)_backend
;
535 writepack
= git__calloc(1, sizeof(struct pack_writepack
));
536 GIT_ERROR_CHECK_ALLOC(writepack
);
538 if (git_indexer_new(&writepack
->indexer
,
539 backend
->pack_folder
, 0, odb
, &opts
) < 0) {
540 git__free(writepack
);
544 writepack
->parent
.backend
= _backend
;
545 writepack
->parent
.append
= pack_backend__writepack_append
;
546 writepack
->parent
.commit
= pack_backend__writepack_commit
;
547 writepack
->parent
.free
= pack_backend__writepack_free
;
549 *out
= (git_odb_writepack
*)writepack
;
554 static void pack_backend__free(git_odb_backend
*_backend
)
556 struct pack_backend
*backend
;
561 backend
= (struct pack_backend
*)_backend
;
563 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
564 struct git_pack_file
*p
= git_vector_get(&backend
->packs
, i
);
565 git_mwindow_put_pack(p
);
568 git_vector_free(&backend
->packs
);
569 git__free(backend
->pack_folder
);
573 static int pack_backend__alloc(struct pack_backend
**out
, size_t initial_size
)
575 struct pack_backend
*backend
= git__calloc(1, sizeof(struct pack_backend
));
576 GIT_ERROR_CHECK_ALLOC(backend
);
578 if (git_vector_init(&backend
->packs
, initial_size
, packfile_sort__cb
) < 0) {
583 backend
->parent
.version
= GIT_ODB_BACKEND_VERSION
;
585 backend
->parent
.read
= &pack_backend__read
;
586 backend
->parent
.read_prefix
= &pack_backend__read_prefix
;
587 backend
->parent
.read_header
= &pack_backend__read_header
;
588 backend
->parent
.exists
= &pack_backend__exists
;
589 backend
->parent
.exists_prefix
= &pack_backend__exists_prefix
;
590 backend
->parent
.refresh
= &pack_backend__refresh
;
591 backend
->parent
.foreach
= &pack_backend__foreach
;
592 backend
->parent
.writepack
= &pack_backend__writepack
;
593 backend
->parent
.freshen
= &pack_backend__freshen
;
594 backend
->parent
.free
= &pack_backend__free
;
600 int git_odb_backend_one_pack(git_odb_backend
**backend_out
, const char *idx
)
602 struct pack_backend
*backend
= NULL
;
603 struct git_pack_file
*packfile
= NULL
;
605 if (pack_backend__alloc(&backend
, 1) < 0)
608 if (git_mwindow_get_pack(&packfile
, idx
) < 0 ||
609 git_vector_insert(&backend
->packs
, packfile
) < 0)
611 pack_backend__free((git_odb_backend
*)backend
);
615 *backend_out
= (git_odb_backend
*)backend
;
619 int git_odb_backend_pack(git_odb_backend
**backend_out
, const char *objects_dir
)
622 struct pack_backend
*backend
= NULL
;
623 git_buf path
= GIT_BUF_INIT
;
625 if (pack_backend__alloc(&backend
, 8) < 0)
628 if (!(error
= git_buf_joinpath(&path
, objects_dir
, "pack")) &&
629 git_path_isdir(git_buf_cstr(&path
)))
631 backend
->pack_folder
= git_buf_detach(&path
);
632 error
= pack_backend__refresh((git_odb_backend
*)backend
);
636 pack_backend__free((git_odb_backend
*)backend
);
640 *backend_out
= (git_odb_backend
*)backend
;
642 git_buf_dispose(&path
);