]> git.proxmox.com Git - libgit2.git/blob - src/odb_pack.c
CMakefile: add -Wmissing-prototypes and fix warnings
[libgit2.git] / src / odb_pack.c
1 /*
2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
5 *
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
14 *
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "common.h"
27 #include "git2/zlib.h"
28 #include "git2/repository.h"
29 #include "git2/oid.h"
30 #include "fileops.h"
31 #include "hash.h"
32 #include "odb.h"
33 #include "delta-apply.h"
34 #include "sha1_lookup.h"
35 #include "mwindow.h"
36 #include "pack.h"
37
38 #include "git2/odb_backend.h"
39
40 struct pack_backend {
41 git_odb_backend parent;
42 git_vector packs;
43 struct git_pack_file *last_found;
44 char *pack_folder;
45 time_t pack_folder_mtime;
46 };
47
48 /**
49 * The wonderful tale of a Packed Object lookup query
50 * ===================================================
51 * A riveting and epic story of epicness and ASCII
52 * art, presented by yours truly,
53 * Sir Vicent of Marti
54 *
55 *
56 * Chapter 1: Once upon a time...
57 * Initialization of the Pack Backend
58 * --------------------------------------------------
59 *
60 * # git_odb_backend_pack
61 * | Creates the pack backend structure, initializes the
62 * | callback pointers to our default read() and exist() methods,
63 * | and tries to preload all the known packfiles in the ODB.
64 * |
65 * |-# packfile_load_all
66 * | Tries to find the `pack` folder, if it exists. ODBs without
67 * | a pack folder are ignored altogether. If there's a `pack` folder
68 * | we run a `dirent` callback through every file in the pack folder
69 * | to find our packfiles. The packfiles are then sorted according
70 * | to a sorting callback.
71 * |
72 * |-# packfile_load__cb
73 * | | This callback is called from `dirent` with every single file
74 * | | inside the pack folder. We find the packs by actually locating
75 * | | their index (ends in ".idx"). From that index, we verify that
76 * | | the corresponding packfile exists and is valid, and if so, we
77 * | | add it to the pack list.
78 * | |
79 * | |-# packfile_check
80 * | Make sure that there's a packfile to back this index, and store
81 * | some very basic information regarding the packfile itself,
82 * | such as the full path, the size, and the modification time.
83 * | We don't actually open the packfile to check for internal consistency.
84 * |
85 * |-# packfile_sort__cb
86 * Sort all the preloaded packs according to some specific criteria:
87 * we prioritize the "newer" packs because it's more likely they
88 * contain the objects we are looking for, and we prioritize local
89 * packs over remote ones.
90 *
91 *
92 *
93 * Chapter 2: To be, or not to be...
94 * A standard packed `exist` query for an OID
95 * --------------------------------------------------
96 *
97 * # pack_backend__exists
98 * | Check if the given SHA1 oid exists in any of the packs
99 * | that have been loaded for our ODB.
100 * |
101 * |-# pack_entry_find
102 * | Iterate through all the packs that have been preloaded
103 * | (starting by the pack where the latest object was found)
104 * | to try to find the OID in one of them.
105 * |
106 * |-# pack_entry_find1
107 * | Check the index of an individual pack to see if the SHA1
108 * | OID can be found. If we can find the offset to that SHA1
109 * | inside of the index, that means the object is contained
110 * | inside of the packfile and we can stop searching.
111 * | Before returning, we verify that the packfile behing the
112 * | index we are searching still exists on disk.
113 * |
114 * |-# pack_entry_find_offset
115 * | | Mmap the actual index file to disk if it hasn't been opened
116 * | | yet, and run a binary search through it to find the OID.
117 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
118 * | | on the Packfile Index format and how do we find entries in it.
119 * | |
120 * | |-# pack_index_open
121 * | | Guess the name of the index based on the full path to the
122 * | | packfile, open it and verify its contents. Only if the index
123 * | | has not been opened already.
124 * | |
125 * | |-# pack_index_check
126 * | Mmap the index file and do a quick run through the header
127 * | to guess the index version (right now we support v1 and v2),
128 * | and to verify that the size of the index makes sense.
129 * |
130 * |-# packfile_open
131 * See `packfile_open` in Chapter 3
132 *
133 *
134 *
135 * Chapter 3: The neverending story...
136 * A standard packed `lookup` query for an OID
137 * --------------------------------------------------
138 * TODO
139 *
140 */
141
142
143 /***********************************************************
144 *
145 * FORWARD DECLARATIONS
146 *
147 ***********************************************************/
148
149 static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
150 static int pack_window_contains(git_mwindow *win, off_t offset);
151
152 static int packfile_sort__cb(const void *a_, const void *b_);
153
154 static int packfile_load__cb(void *_data, char *path);
155 static int packfile_refresh_all(struct pack_backend *backend);
156
157 static int pack_entry_find(struct git_pack_entry *e,
158 struct pack_backend *backend, const git_oid *oid);
159
160 /* Can find the offset of an object given
161 * a prefix of an identifier.
162 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
163 * is ambiguous.
164 * This method assumes that len is between
165 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
166 */
167 static int pack_entry_find_prefix(struct git_pack_entry *e,
168 struct pack_backend *backend,
169 const git_oid *short_oid,
170 unsigned int len);
171
172
173
174 /***********************************************************
175 *
176 * PACK WINDOW MANAGEMENT
177 *
178 ***********************************************************/
179
180 GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct git_pack_file *p)
181 {
182 GIT_UNUSED_ARG(backend);
183 git_mwindow_free_all(&p->mwf);
184 }
185
186 GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
187 {
188 /* We must promise at least 20 bytes (one hash) after the
189 * offset is available from this window, otherwise the offset
190 * is not actually in this window and a different window (which
191 * has that one hash excess) must be used. This is to support
192 * the object header and delta base parsing routines below.
193 */
194 return git_mwindow_contains(win, offset + 20);
195 }
196
197 static int packfile_sort__cb(const void *a_, const void *b_)
198 {
199 const struct git_pack_file *a = a_;
200 const struct git_pack_file *b = b_;
201 int st;
202
203 /*
204 * Local packs tend to contain objects specific to our
205 * variant of the project than remote ones. In addition,
206 * remote ones could be on a network mounted filesystem.
207 * Favor local ones for these reasons.
208 */
209 st = a->pack_local - b->pack_local;
210 if (st)
211 return -st;
212
213 /*
214 * Younger packs tend to contain more recent objects,
215 * and more recent objects tend to get accessed more
216 * often.
217 */
218 if (a->mtime < b->mtime)
219 return 1;
220 else if (a->mtime == b->mtime)
221 return 0;
222
223 return -1;
224 }
225
226
227
228 static int packfile_load__cb(void *_data, char *path)
229 {
230 struct pack_backend *backend = (struct pack_backend *)_data;
231 struct git_pack_file *pack;
232 int error;
233 size_t i;
234
235 if (git__suffixcmp(path, ".idx") != 0)
236 return GIT_SUCCESS; /* not an index */
237
238 for (i = 0; i < backend->packs.length; ++i) {
239 struct git_pack_file *p = git_vector_get(&backend->packs, i);
240 if (memcmp(p->pack_name, path, strlen(path) - strlen(".idx")) == 0)
241 return GIT_SUCCESS;
242 }
243
244 error = git_packfile_check(&pack, path);
245 if (error < GIT_SUCCESS)
246 return git__rethrow(error, "Failed to load packfile");
247
248 if (git_vector_insert(&backend->packs, pack) < GIT_SUCCESS) {
249 free(pack);
250 return GIT_ENOMEM;
251 }
252
253 return GIT_SUCCESS;
254 }
255
256 static int packfile_refresh_all(struct pack_backend *backend)
257 {
258 int error;
259 struct stat st;
260
261 if (backend->pack_folder == NULL)
262 return GIT_SUCCESS;
263
264 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
265 return git__throw(GIT_ENOTFOUND, "Failed to refresh packfiles. Backend not found");
266
267 if (st.st_mtime != backend->pack_folder_mtime) {
268 char path[GIT_PATH_MAX];
269 strcpy(path, backend->pack_folder);
270
271 /* reload all packs */
272 error = git_futils_direach(path, GIT_PATH_MAX, packfile_load__cb, (void *)backend);
273 if (error < GIT_SUCCESS)
274 return git__rethrow(error, "Failed to refresh packfiles");
275
276 git_vector_sort(&backend->packs);
277 backend->pack_folder_mtime = st.st_mtime;
278 }
279
280 return GIT_SUCCESS;
281 }
282
283 static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
284 {
285 int error;
286 size_t i;
287
288 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
289 return git__rethrow(error, "Failed to find pack entry");
290
291 if (backend->last_found &&
292 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS)
293 return GIT_SUCCESS;
294
295 for (i = 0; i < backend->packs.length; ++i) {
296 struct git_pack_file *p;
297
298 p = git_vector_get(&backend->packs, i);
299 if (p == backend->last_found)
300 continue;
301
302 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) {
303 backend->last_found = p;
304 return GIT_SUCCESS;
305 }
306 }
307
308 return git__throw(GIT_ENOTFOUND, "Failed to find pack entry");
309 }
310
311 static int pack_entry_find_prefix(
312 struct git_pack_entry *e,
313 struct pack_backend *backend,
314 const git_oid *short_oid,
315 unsigned int len)
316 {
317 int error;
318 size_t i;
319 unsigned found = 0;
320
321 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
322 return git__rethrow(error, "Failed to find pack entry");
323
324 if (backend->last_found) {
325 error = git_pack_entry_find(e, backend->last_found, short_oid, len);
326 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
327 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
328 } else if (error == GIT_SUCCESS) {
329 found = 1;
330 }
331 }
332
333 for (i = 0; i < backend->packs.length; ++i) {
334 struct git_pack_file *p;
335
336 p = git_vector_get(&backend->packs, i);
337 if (p == backend->last_found)
338 continue;
339
340 error = git_pack_entry_find(e, p, short_oid, len);
341 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
342 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
343 } else if (error == GIT_SUCCESS) {
344 found++;
345 if (found > 1)
346 break;
347 backend->last_found = p;
348 }
349 }
350
351 if (!found) {
352 return git__rethrow(GIT_ENOTFOUND, "Failed to find pack entry");
353 } else if (found > 1) {
354 return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find pack entry. Ambiguous sha1 prefix");
355 } else {
356 return GIT_SUCCESS;
357 }
358
359 }
360
361
362 /***********************************************************
363 *
364 * PACKED BACKEND PUBLIC API
365 *
366 * Implement the git_odb_backend API calls
367 *
368 ***********************************************************/
369
370 /*
371 int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
372 {
373 pack_location location;
374
375 assert(obj && backend && oid);
376
377 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
378 return GIT_ENOTFOUND;
379
380 return read_header_packed(obj, &location);
381 }
382 */
383
384 static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
385 {
386 struct git_pack_entry e;
387 git_rawobj raw;
388 int error;
389
390 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS)
391 return git__rethrow(error, "Failed to read pack backend");
392
393 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
394 return git__rethrow(error, "Failed to read pack backend");
395
396 *buffer_p = raw.data;
397 *len_p = raw.len;
398 *type_p = raw.type;
399
400 return GIT_SUCCESS;
401 }
402
403 static int pack_backend__read_prefix(
404 git_oid *out_oid,
405 void **buffer_p,
406 size_t *len_p,
407 git_otype *type_p,
408 git_odb_backend *backend,
409 const git_oid *short_oid,
410 unsigned int len)
411 {
412 if (len < GIT_OID_MINPREFIXLEN)
413 return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN);
414
415 if (len >= GIT_OID_HEXSZ) {
416 /* We can fall back to regular read method */
417 int error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
418 if (error == GIT_SUCCESS)
419 git_oid_cpy(out_oid, short_oid);
420
421 return error;
422 } else {
423 struct git_pack_entry e;
424 git_rawobj raw;
425 int error;
426
427 if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS)
428 return git__rethrow(error, "Failed to read pack backend");
429
430 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
431 return git__rethrow(error, "Failed to read pack backend");
432
433 *buffer_p = raw.data;
434 *len_p = raw.len;
435 *type_p = raw.type;
436 git_oid_cpy(out_oid, &e.sha1);
437 }
438
439 return GIT_SUCCESS;
440 }
441
442 static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
443 {
444 struct git_pack_entry e;
445 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == GIT_SUCCESS;
446 }
447
448 static void pack_backend__free(git_odb_backend *_backend)
449 {
450 struct pack_backend *backend;
451 size_t i;
452
453 assert(_backend);
454
455 backend = (struct pack_backend *)_backend;
456
457 for (i = 0; i < backend->packs.length; ++i) {
458 struct git_pack_file *p = git_vector_get(&backend->packs, i);
459 packfile_free(p);
460 }
461
462 git_vector_free(&backend->packs);
463 free(backend->pack_folder);
464 free(backend);
465 }
466
467 int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
468 {
469 struct pack_backend *backend;
470 char path[GIT_PATH_MAX];
471
472 backend = git__calloc(1, sizeof(struct pack_backend));
473 if (backend == NULL)
474 return GIT_ENOMEM;
475
476 if (git_vector_init(&backend->packs, 8, packfile_sort__cb) < GIT_SUCCESS) {
477 free(backend);
478 return GIT_ENOMEM;
479 }
480
481 git_path_join(path, objects_dir, "pack");
482 if (git_futils_isdir(path) == GIT_SUCCESS) {
483 backend->pack_folder = git__strdup(path);
484 backend->pack_folder_mtime = 0;
485
486 if (backend->pack_folder == NULL) {
487 free(backend);
488 return GIT_ENOMEM;
489 }
490 }
491
492 backend->parent.read = &pack_backend__read;
493 backend->parent.read_prefix = &pack_backend__read_prefix;
494 backend->parent.read_header = NULL;
495 backend->parent.exists = &pack_backend__exists;
496 backend->parent.free = &pack_backend__free;
497
498 *backend_out = (git_odb_backend *)backend;
499 return GIT_SUCCESS;
500 }