2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
27 #include "git2/zlib.h"
28 #include "git2/repository.h"
33 #include "delta-apply.h"
34 #include "sha1_lookup.h"
38 #include "git2/odb_backend.h"
41 git_odb_backend parent
;
43 struct git_pack_file
*last_found
;
45 time_t pack_folder_mtime
;
49 * The wonderful tale of a Packed Object lookup query
50 * ===================================================
51 * A riveting and epic story of epicness and ASCII
52 * art, presented by yours truly,
56 * Chapter 1: Once upon a time...
57 * Initialization of the Pack Backend
58 * --------------------------------------------------
60 * # git_odb_backend_pack
61 * | Creates the pack backend structure, initializes the
62 * | callback pointers to our default read() and exist() methods,
63 * | and tries to preload all the known packfiles in the ODB.
65 * |-# packfile_load_all
66 * | Tries to find the `pack` folder, if it exists. ODBs without
67 * | a pack folder are ignored altogether. If there's a `pack` folder
68 * | we run a `dirent` callback through every file in the pack folder
69 * | to find our packfiles. The packfiles are then sorted according
70 * | to a sorting callback.
72 * |-# packfile_load__cb
73 * | | This callback is called from `dirent` with every single file
74 * | | inside the pack folder. We find the packs by actually locating
75 * | | their index (ends in ".idx"). From that index, we verify that
76 * | | the corresponding packfile exists and is valid, and if so, we
77 * | | add it to the pack list.
79 * | |-# packfile_check
80 * | Make sure that there's a packfile to back this index, and store
81 * | some very basic information regarding the packfile itself,
82 * | such as the full path, the size, and the modification time.
83 * | We don't actually open the packfile to check for internal consistency.
85 * |-# packfile_sort__cb
86 * Sort all the preloaded packs according to some specific criteria:
87 * we prioritize the "newer" packs because it's more likely they
88 * contain the objects we are looking for, and we prioritize local
89 * packs over remote ones.
93 * Chapter 2: To be, or not to be...
94 * A standard packed `exist` query for an OID
95 * --------------------------------------------------
97 * # pack_backend__exists
98 * | Check if the given SHA1 oid exists in any of the packs
99 * | that have been loaded for our ODB.
101 * |-# pack_entry_find
102 * | Iterate through all the packs that have been preloaded
103 * | (starting by the pack where the latest object was found)
104 * | to try to find the OID in one of them.
106 * |-# pack_entry_find1
107 * | Check the index of an individual pack to see if the SHA1
108 * | OID can be found. If we can find the offset to that SHA1
109 * | inside of the index, that means the object is contained
110 * | inside of the packfile and we can stop searching.
111 * | Before returning, we verify that the packfile behing the
112 * | index we are searching still exists on disk.
114 * |-# pack_entry_find_offset
115 * | | Mmap the actual index file to disk if it hasn't been opened
116 * | | yet, and run a binary search through it to find the OID.
117 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
118 * | | on the Packfile Index format and how do we find entries in it.
120 * | |-# pack_index_open
121 * | | Guess the name of the index based on the full path to the
122 * | | packfile, open it and verify its contents. Only if the index
123 * | | has not been opened already.
125 * | |-# pack_index_check
126 * | Mmap the index file and do a quick run through the header
127 * | to guess the index version (right now we support v1 and v2),
128 * | and to verify that the size of the index makes sense.
131 * See `packfile_open` in Chapter 3
135 * Chapter 3: The neverending story...
136 * A standard packed `lookup` query for an OID
137 * --------------------------------------------------
143 /***********************************************************
145 * FORWARD DECLARATIONS
147 ***********************************************************/
149 static void pack_window_free_all(struct pack_backend
*backend
, struct git_pack_file
*p
);
150 static int pack_window_contains(git_mwindow
*win
, off_t offset
);
152 static int packfile_sort__cb(const void *a_
, const void *b_
);
154 static int packfile_load__cb(void *_data
, char *path
);
155 static int packfile_refresh_all(struct pack_backend
*backend
);
157 static int pack_entry_find(struct git_pack_entry
*e
,
158 struct pack_backend
*backend
, const git_oid
*oid
);
160 /* Can find the offset of an object given
161 * a prefix of an identifier.
162 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
164 * This method assumes that len is between
165 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
167 static int pack_entry_find_prefix(struct git_pack_entry
*e
,
168 struct pack_backend
*backend
,
169 const git_oid
*short_oid
,
174 /***********************************************************
176 * PACK WINDOW MANAGEMENT
178 ***********************************************************/
180 GIT_INLINE(void) pack_window_free_all(struct pack_backend
*GIT_UNUSED(backend
), struct git_pack_file
*p
)
182 GIT_UNUSED_ARG(backend
);
183 git_mwindow_free_all(&p
->mwf
);
186 GIT_INLINE(int) pack_window_contains(git_mwindow
*win
, off_t offset
)
188 /* We must promise at least 20 bytes (one hash) after the
189 * offset is available from this window, otherwise the offset
190 * is not actually in this window and a different window (which
191 * has that one hash excess) must be used. This is to support
192 * the object header and delta base parsing routines below.
194 return git_mwindow_contains(win
, offset
+ 20);
197 static int packfile_sort__cb(const void *a_
, const void *b_
)
199 const struct git_pack_file
*a
= a_
;
200 const struct git_pack_file
*b
= b_
;
204 * Local packs tend to contain objects specific to our
205 * variant of the project than remote ones. In addition,
206 * remote ones could be on a network mounted filesystem.
207 * Favor local ones for these reasons.
209 st
= a
->pack_local
- b
->pack_local
;
214 * Younger packs tend to contain more recent objects,
215 * and more recent objects tend to get accessed more
218 if (a
->mtime
< b
->mtime
)
220 else if (a
->mtime
== b
->mtime
)
228 static int packfile_load__cb(void *_data
, char *path
)
230 struct pack_backend
*backend
= (struct pack_backend
*)_data
;
231 struct git_pack_file
*pack
;
235 if (git__suffixcmp(path
, ".idx") != 0)
236 return GIT_SUCCESS
; /* not an index */
238 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
239 struct git_pack_file
*p
= git_vector_get(&backend
->packs
, i
);
240 if (memcmp(p
->pack_name
, path
, strlen(path
) - strlen(".idx")) == 0)
244 error
= git_packfile_check(&pack
, path
);
245 if (error
< GIT_SUCCESS
)
246 return git__rethrow(error
, "Failed to load packfile");
248 if (git_vector_insert(&backend
->packs
, pack
) < GIT_SUCCESS
) {
256 static int packfile_refresh_all(struct pack_backend
*backend
)
261 if (backend
->pack_folder
== NULL
)
264 if (p_stat(backend
->pack_folder
, &st
) < 0 || !S_ISDIR(st
.st_mode
))
265 return git__throw(GIT_ENOTFOUND
, "Failed to refresh packfiles. Backend not found");
267 if (st
.st_mtime
!= backend
->pack_folder_mtime
) {
268 char path
[GIT_PATH_MAX
];
269 strcpy(path
, backend
->pack_folder
);
271 /* reload all packs */
272 error
= git_futils_direach(path
, GIT_PATH_MAX
, packfile_load__cb
, (void *)backend
);
273 if (error
< GIT_SUCCESS
)
274 return git__rethrow(error
, "Failed to refresh packfiles");
276 git_vector_sort(&backend
->packs
);
277 backend
->pack_folder_mtime
= st
.st_mtime
;
283 static int pack_entry_find(struct git_pack_entry
*e
, struct pack_backend
*backend
, const git_oid
*oid
)
288 if ((error
= packfile_refresh_all(backend
)) < GIT_SUCCESS
)
289 return git__rethrow(error
, "Failed to find pack entry");
291 if (backend
->last_found
&&
292 git_pack_entry_find(e
, backend
->last_found
, oid
, GIT_OID_HEXSZ
) == GIT_SUCCESS
)
295 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
296 struct git_pack_file
*p
;
298 p
= git_vector_get(&backend
->packs
, i
);
299 if (p
== backend
->last_found
)
302 if (git_pack_entry_find(e
, p
, oid
, GIT_OID_HEXSZ
) == GIT_SUCCESS
) {
303 backend
->last_found
= p
;
308 return git__throw(GIT_ENOTFOUND
, "Failed to find pack entry");
311 static int pack_entry_find_prefix(
312 struct git_pack_entry
*e
,
313 struct pack_backend
*backend
,
314 const git_oid
*short_oid
,
321 if ((error
= packfile_refresh_all(backend
)) < GIT_SUCCESS
)
322 return git__rethrow(error
, "Failed to find pack entry");
324 if (backend
->last_found
) {
325 error
= git_pack_entry_find(e
, backend
->last_found
, short_oid
, len
);
326 if (error
== GIT_EAMBIGUOUSOIDPREFIX
) {
327 return git__rethrow(error
, "Failed to find pack entry. Ambiguous sha1 prefix");
328 } else if (error
== GIT_SUCCESS
) {
333 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
334 struct git_pack_file
*p
;
336 p
= git_vector_get(&backend
->packs
, i
);
337 if (p
== backend
->last_found
)
340 error
= git_pack_entry_find(e
, p
, short_oid
, len
);
341 if (error
== GIT_EAMBIGUOUSOIDPREFIX
) {
342 return git__rethrow(error
, "Failed to find pack entry. Ambiguous sha1 prefix");
343 } else if (error
== GIT_SUCCESS
) {
347 backend
->last_found
= p
;
352 return git__rethrow(GIT_ENOTFOUND
, "Failed to find pack entry");
353 } else if (found
> 1) {
354 return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to find pack entry. Ambiguous sha1 prefix");
362 /***********************************************************
364 * PACKED BACKEND PUBLIC API
366 * Implement the git_odb_backend API calls
368 ***********************************************************/
371 int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
373 pack_location location;
375 assert(obj && backend && oid);
377 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
378 return GIT_ENOTFOUND;
380 return read_header_packed(obj, &location);
384 static int pack_backend__read(void **buffer_p
, size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
386 struct git_pack_entry e
;
390 if ((error
= pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
)) < GIT_SUCCESS
)
391 return git__rethrow(error
, "Failed to read pack backend");
393 if ((error
= git_packfile_unpack(&raw
, e
.p
, &e
.offset
)) < GIT_SUCCESS
)
394 return git__rethrow(error
, "Failed to read pack backend");
396 *buffer_p
= raw
.data
;
403 static int pack_backend__read_prefix(
408 git_odb_backend
*backend
,
409 const git_oid
*short_oid
,
412 if (len
< GIT_OID_MINPREFIXLEN
)
413 return git__throw(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN
);
415 if (len
>= GIT_OID_HEXSZ
) {
416 /* We can fall back to regular read method */
417 int error
= pack_backend__read(buffer_p
, len_p
, type_p
, backend
, short_oid
);
418 if (error
== GIT_SUCCESS
)
419 git_oid_cpy(out_oid
, short_oid
);
423 struct git_pack_entry e
;
427 if ((error
= pack_entry_find_prefix(&e
, (struct pack_backend
*)backend
, short_oid
, len
)) < GIT_SUCCESS
)
428 return git__rethrow(error
, "Failed to read pack backend");
430 if ((error
= git_packfile_unpack(&raw
, e
.p
, &e
.offset
)) < GIT_SUCCESS
)
431 return git__rethrow(error
, "Failed to read pack backend");
433 *buffer_p
= raw
.data
;
436 git_oid_cpy(out_oid
, &e
.sha1
);
442 static int pack_backend__exists(git_odb_backend
*backend
, const git_oid
*oid
)
444 struct git_pack_entry e
;
445 return pack_entry_find(&e
, (struct pack_backend
*)backend
, oid
) == GIT_SUCCESS
;
448 static void pack_backend__free(git_odb_backend
*_backend
)
450 struct pack_backend
*backend
;
455 backend
= (struct pack_backend
*)_backend
;
457 for (i
= 0; i
< backend
->packs
.length
; ++i
) {
458 struct git_pack_file
*p
= git_vector_get(&backend
->packs
, i
);
462 git_vector_free(&backend
->packs
);
463 free(backend
->pack_folder
);
467 int git_odb_backend_pack(git_odb_backend
**backend_out
, const char *objects_dir
)
469 struct pack_backend
*backend
;
470 char path
[GIT_PATH_MAX
];
472 backend
= git__calloc(1, sizeof(struct pack_backend
));
476 if (git_vector_init(&backend
->packs
, 8, packfile_sort__cb
) < GIT_SUCCESS
) {
481 git_path_join(path
, objects_dir
, "pack");
482 if (git_futils_isdir(path
) == GIT_SUCCESS
) {
483 backend
->pack_folder
= git__strdup(path
);
484 backend
->pack_folder_mtime
= 0;
486 if (backend
->pack_folder
== NULL
) {
492 backend
->parent
.read
= &pack_backend__read
;
493 backend
->parent
.read_prefix
= &pack_backend__read_prefix
;
494 backend
->parent
.read_header
= NULL
;
495 backend
->parent
.exists
= &pack_backend__exists
;
496 backend
->parent
.free
= &pack_backend__free
;
498 *backend_out
= (git_odb_backend
*)backend
;