]> git.proxmox.com Git - libgit2.git/blame - src/odb_pack.c
Fix the build on Emscripten
[libgit2.git] / src / odb_pack.c
CommitLineData
7d7cd885 1/*
bb742ede 2 * Copyright (C) 2009-2011 the libgit2 contributors
7d7cd885 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
7d7cd885
VM
6 */
7
8#include "common.h"
44908fe7
VM
9#include "git2/zlib.h"
10#include "git2/repository.h"
f1d01851 11#include "git2/oid.h"
7d7cd885
VM
12#include "fileops.h"
13#include "hash.h"
14#include "odb.h"
15#include "delta-apply.h"
dd453c4d 16#include "sha1_lookup.h"
7bfdb3d2 17#include "mwindow.h"
c7c9e183 18#include "pack.h"
7d7cd885 19
44908fe7 20#include "git2/odb_backend.h"
d12299fe 21
58d06cf1
VM
22struct pack_backend {
23 git_odb_backend parent;
24 git_vector packs;
a070f152 25 struct git_pack_file *last_found;
90d743cd 26 char *pack_folder;
c7b79af3 27 time_t pack_folder_mtime;
58d06cf1
VM
28};
29
30/**
31 * The wonderful tale of a Packed Object lookup query
32 * ===================================================
87d9869f
VM
33 * A riveting and epic story of epicness and ASCII
34 * art, presented by yours truly,
35 * Sir Vicent of Marti
58d06cf1
VM
36 *
37 *
38 * Chapter 1: Once upon a time...
39 * Initialization of the Pack Backend
40 * --------------------------------------------------
41 *
42 * # git_odb_backend_pack
43 * | Creates the pack backend structure, initializes the
44 * | callback pointers to our default read() and exist() methods,
45 * | and tries to preload all the known packfiles in the ODB.
87d9869f 46 * |
58d06cf1 47 * |-# packfile_load_all
87d9869f
VM
48 * | Tries to find the `pack` folder, if it exists. ODBs without
49 * | a pack folder are ignored altogether. If there's a `pack` folder
50 * | we run a `dirent` callback through every file in the pack folder
51 * | to find our packfiles. The packfiles are then sorted according
52 * | to a sorting callback.
53 * |
54 * |-# packfile_load__cb
55 * | | This callback is called from `dirent` with every single file
56 * | | inside the pack folder. We find the packs by actually locating
57 * | | their index (ends in ".idx"). From that index, we verify that
58 * | | the corresponding packfile exists and is valid, and if so, we
59 * | | add it to the pack list.
60 * | |
61 * | |-# packfile_check
62 * | Make sure that there's a packfile to back this index, and store
63 * | some very basic information regarding the packfile itself,
64 * | such as the full path, the size, and the modification time.
65 * | We don't actually open the packfile to check for internal consistency.
66 * |
67 * |-# packfile_sort__cb
68 * Sort all the preloaded packs according to some specific criteria:
69 * we prioritize the "newer" packs because it's more likely they
70 * contain the objects we are looking for, and we prioritize local
71 * packs over remote ones.
58d06cf1
VM
72 *
73 *
74 *
75 * Chapter 2: To be, or not to be...
76 * A standard packed `exist` query for an OID
77 * --------------------------------------------------
78 *
87d9869f
VM
79 * # pack_backend__exists
80 * | Check if the given SHA1 oid exists in any of the packs
81 * | that have been loaded for our ODB.
82 * |
83 * |-# pack_entry_find
84 * | Iterate through all the packs that have been preloaded
85 * | (starting by the pack where the latest object was found)
86 * | to try to find the OID in one of them.
87 * |
88 * |-# pack_entry_find1
89 * | Check the index of an individual pack to see if the SHA1
90 * | OID can be found. If we can find the offset to that SHA1
91 * | inside of the index, that means the object is contained
92 * | inside of the packfile and we can stop searching.
93 * | Before returning, we verify that the packfile behing the
94 * | index we are searching still exists on disk.
95 * |
96 * |-# pack_entry_find_offset
97 * | | Mmap the actual index file to disk if it hasn't been opened
98 * | | yet, and run a binary search through it to find the OID.
99 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
100 * | | on the Packfile Index format and how do we find entries in it.
101 * | |
102 * | |-# pack_index_open
103 * | | Guess the name of the index based on the full path to the
104 * | | packfile, open it and verify its contents. Only if the index
105 * | | has not been opened already.
106 * | |
107 * | |-# pack_index_check
108 * | Mmap the index file and do a quick run through the header
109 * | to guess the index version (right now we support v1 and v2),
110 * | and to verify that the size of the index makes sense.
111 * |
112 * |-# packfile_open
113 * See `packfile_open` in Chapter 3
58d06cf1
VM
114 *
115 *
116 *
117 * Chapter 3: The neverending story...
118 * A standard packed `lookup` query for an OID
119 * --------------------------------------------------
120 * TODO
121 *
122 */
7d7cd885
VM
123
124
7d7cd885
VM
125/***********************************************************
126 *
58d06cf1 127 * FORWARD DECLARATIONS
7d7cd885
VM
128 *
129 ***********************************************************/
130
a070f152 131static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
7bfdb3d2 132static int pack_window_contains(git_mwindow *win, off_t offset);
7d7cd885 133
58d06cf1 134static int packfile_sort__cb(const void *a_, const void *b_);
7d7cd885 135
97769280 136static int packfile_load__cb(void *_data, git_buf *path);
90d743cd 137static int packfile_refresh_all(struct pack_backend *backend);
7d7cd885 138
a070f152 139static int pack_entry_find(struct git_pack_entry *e,
58d06cf1 140 struct pack_backend *backend, const git_oid *oid);
7d7cd885 141
dd453c4d
MP
142/* Can find the offset of an object given
143 * a prefix of an identifier.
144 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
145 * is ambiguous.
ac2b94ad
MP
146 * This method assumes that len is between
147 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
dd453c4d 148 */
a070f152 149static int pack_entry_find_prefix(struct git_pack_entry *e,
dd453c4d
MP
150 struct pack_backend *backend,
151 const git_oid *short_oid,
152 unsigned int len);
153
58d06cf1
VM
154
155
156/***********************************************************
157 *
158 * PACK WINDOW MANAGEMENT
159 *
160 ***********************************************************/
7d7cd885 161
a070f152 162GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct git_pack_file *p)
7d7cd885 163{
f6867e63 164 GIT_UNUSED_ARG(backend);
7bfdb3d2 165 git_mwindow_free_all(&p->mwf);
7d7cd885
VM
166}
167
7bfdb3d2 168GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
7d7cd885 169{
58d06cf1
VM
170 /* We must promise at least 20 bytes (one hash) after the
171 * offset is available from this window, otherwise the offset
172 * is not actually in this window and a different window (which
87d9869f 173 * has that one hash excess) must be used. This is to support
58d06cf1
VM
174 * the object header and delta base parsing routines below.
175 */
7bfdb3d2 176 return git_mwindow_contains(win, offset + 20);
7d7cd885
VM
177}
178
58d06cf1 179static int packfile_sort__cb(const void *a_, const void *b_)
7d7cd885 180{
a070f152
CMN
181 const struct git_pack_file *a = a_;
182 const struct git_pack_file *b = b_;
58d06cf1 183 int st;
7d7cd885 184
58d06cf1
VM
185 /*
186 * Local packs tend to contain objects specific to our
87d9869f 187 * variant of the project than remote ones. In addition,
58d06cf1
VM
188 * remote ones could be on a network mounted filesystem.
189 * Favor local ones for these reasons.
190 */
191 st = a->pack_local - b->pack_local;
192 if (st)
193 return -st;
194
195 /*
196 * Younger packs tend to contain more recent objects,
197 * and more recent objects tend to get accessed more
198 * often.
199 */
200 if (a->mtime < b->mtime)
201 return 1;
202 else if (a->mtime == b->mtime)
203 return 0;
204
205 return -1;
7d7cd885
VM
206}
207
58d06cf1 208
7d7cd885 209
97769280 210static int packfile_load__cb(void *_data, git_buf *path)
7d7cd885 211{
90d743cd 212 struct pack_backend *backend = (struct pack_backend *)_data;
a070f152 213 struct git_pack_file *pack;
58d06cf1 214 int error;
90d743cd 215 size_t i;
7d7cd885 216
97769280 217 if (git__suffixcmp(path->ptr, ".idx") != 0)
58d06cf1 218 return GIT_SUCCESS; /* not an index */
7d7cd885 219
90d743cd 220 for (i = 0; i < backend->packs.length; ++i) {
a070f152 221 struct git_pack_file *p = git_vector_get(&backend->packs, i);
97769280 222 if (memcmp(p->pack_name, path->ptr, path->size - strlen(".idx")) == 0)
90d743cd
VM
223 return GIT_SUCCESS;
224 }
7d7cd885 225
97769280 226 error = git_packfile_check(&pack, path->ptr);
599297fd
SS
227 if (error == GIT_ENOTFOUND) {
228 /* ignore missing .pack file as git does */
229 return GIT_SUCCESS;
230 } else if (error < GIT_SUCCESS)
267d539f 231 return git__rethrow(error, "Failed to load packfile");
7d7cd885 232
90d743cd 233 if (git_vector_insert(&backend->packs, pack) < GIT_SUCCESS) {
3286c408 234 git__free(pack);
58d06cf1 235 return GIT_ENOMEM;
7d7cd885 236 }
58d06cf1
VM
237
238 return GIT_SUCCESS;
7d7cd885
VM
239}
240
90d743cd 241static int packfile_refresh_all(struct pack_backend *backend)
7d7cd885 242{
58d06cf1 243 int error;
90d743cd 244 struct stat st;
7d7cd885 245
90d743cd 246 if (backend->pack_folder == NULL)
58d06cf1 247 return GIT_SUCCESS;
7d7cd885 248
f79026b4 249 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
267d539f 250 return git__throw(GIT_ENOTFOUND, "Failed to refresh packfiles. Backend not found");
7d7cd885 251
c7b79af3 252 if (st.st_mtime != backend->pack_folder_mtime) {
97769280
RB
253 git_buf path = GIT_BUF_INIT;
254 git_buf_sets(&path, backend->pack_folder);
90d743cd
VM
255
256 /* reload all packs */
1744fafe 257 error = git_path_direach(&path, packfile_load__cb, (void *)backend);
97769280
RB
258
259 git_buf_free(&path);
90d743cd 260 if (error < GIT_SUCCESS)
267d539f 261 return git__rethrow(error, "Failed to refresh packfiles");
90d743cd
VM
262
263 git_vector_sort(&backend->packs);
c7b79af3 264 backend->pack_folder_mtime = st.st_mtime;
90d743cd 265 }
7d7cd885
VM
266
267 return GIT_SUCCESS;
268}
269
a070f152 270static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
58d06cf1 271{
90d743cd 272 int error;
58d06cf1 273 size_t i;
7d7cd885 274
90d743cd 275 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
267d539f 276 return git__rethrow(error, "Failed to find pack entry");
90d743cd 277
58d06cf1 278 if (backend->last_found &&
a070f152 279 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS)
58d06cf1 280 return GIT_SUCCESS;
7d7cd885 281
58d06cf1 282 for (i = 0; i < backend->packs.length; ++i) {
a070f152 283 struct git_pack_file *p;
7d7cd885 284
58d06cf1
VM
285 p = git_vector_get(&backend->packs, i);
286 if (p == backend->last_found)
287 continue;
7d7cd885 288
a070f152 289 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) {
58d06cf1 290 backend->last_found = p;
7d7cd885
VM
291 return GIT_SUCCESS;
292 }
58d06cf1
VM
293 }
294
267d539f 295 return git__throw(GIT_ENOTFOUND, "Failed to find pack entry");
58d06cf1
VM
296}
297
d0323a5f 298static int pack_entry_find_prefix(
a070f152 299 struct git_pack_entry *e,
d0323a5f
VM
300 struct pack_backend *backend,
301 const git_oid *short_oid,
302 unsigned int len)
dd453c4d
MP
303{
304 int error;
305 size_t i;
7107b599 306 unsigned found = 0;
dd453c4d
MP
307
308 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
309 return git__rethrow(error, "Failed to find pack entry");
310
dd453c4d 311 if (backend->last_found) {
a070f152 312 error = git_pack_entry_find(e, backend->last_found, short_oid, len);
dd453c4d
MP
313 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
314 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
315 } else if (error == GIT_SUCCESS) {
316 found = 1;
317 }
318 }
319
320 for (i = 0; i < backend->packs.length; ++i) {
a070f152 321 struct git_pack_file *p;
dd453c4d
MP
322
323 p = git_vector_get(&backend->packs, i);
324 if (p == backend->last_found)
325 continue;
326
a070f152 327 error = git_pack_entry_find(e, p, short_oid, len);
dd453c4d
MP
328 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
329 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
330 } else if (error == GIT_SUCCESS) {
331 found++;
d0323a5f 332 if (found > 1)
dd453c4d
MP
333 break;
334 backend->last_found = p;
335 }
336 }
337
338 if (!found) {
339 return git__rethrow(GIT_ENOTFOUND, "Failed to find pack entry");
340 } else if (found > 1) {
341 return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find pack entry. Ambiguous sha1 prefix");
342 } else {
343 return GIT_SUCCESS;
344 }
345
346}
347
58d06cf1 348
7d7cd885
VM
349/***********************************************************
350 *
351 * PACKED BACKEND PUBLIC API
352 *
353 * Implement the git_odb_backend API calls
354 *
355 ***********************************************************/
356
58d06cf1 357/*
7d7cd885
VM
358int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
359{
360 pack_location location;
361
362 assert(obj && backend && oid);
363
58d06cf1 364 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
7d7cd885
VM
365 return GIT_ENOTFOUND;
366
367 return read_header_packed(obj, &location);
368}
58d06cf1 369*/
7d7cd885 370
d568d585 371static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
7d7cd885 372{
a070f152 373 struct git_pack_entry e;
72a3fe42 374 git_rawobj raw;
58d06cf1 375 int error;
7d7cd885 376
58d06cf1 377 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS)
267d539f 378 return git__rethrow(error, "Failed to read pack backend");
7d7cd885 379
b5b474dd 380 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
267d539f 381 return git__rethrow(error, "Failed to read pack backend");
72a3fe42
VM
382
383 *buffer_p = raw.data;
384 *len_p = raw.len;
385 *type_p = raw.type;
386
387 return GIT_SUCCESS;
7d7cd885
VM
388}
389
d568d585 390static int pack_backend__read_prefix(
d0323a5f
VM
391 git_oid *out_oid,
392 void **buffer_p,
393 size_t *len_p,
394 git_otype *type_p,
395 git_odb_backend *backend,
396 const git_oid *short_oid,
397 unsigned int len)
ecd6fdf1 398{
6c8ca697
MP
399 if (len < GIT_OID_MINPREFIXLEN)
400 return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN);
dd453c4d 401
6c8ca697
MP
402 if (len >= GIT_OID_HEXSZ) {
403 /* We can fall back to regular read method */
404 int error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
405 if (error == GIT_SUCCESS)
406 git_oid_cpy(out_oid, short_oid);
dd453c4d 407
6c8ca697
MP
408 return error;
409 } else {
a070f152 410 struct git_pack_entry e;
6c8ca697
MP
411 git_rawobj raw;
412 int error;
dd453c4d 413
d0323a5f 414 if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS)
6c8ca697
MP
415 return git__rethrow(error, "Failed to read pack backend");
416
b5b474dd 417 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
6c8ca697 418 return git__rethrow(error, "Failed to read pack backend");
dd453c4d 419
6c8ca697
MP
420 *buffer_p = raw.data;
421 *len_p = raw.len;
422 *type_p = raw.type;
423 git_oid_cpy(out_oid, &e.sha1);
424 }
dd453c4d
MP
425
426 return GIT_SUCCESS;
ecd6fdf1
MP
427}
428
d568d585 429static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
7d7cd885 430{
a070f152 431 struct git_pack_entry e;
58d06cf1 432 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == GIT_SUCCESS;
7d7cd885
VM
433}
434
d568d585 435static void pack_backend__free(git_odb_backend *_backend)
7d7cd885 436{
58d06cf1
VM
437 struct pack_backend *backend;
438 size_t i;
7d7cd885
VM
439
440 assert(_backend);
441
58d06cf1 442 backend = (struct pack_backend *)_backend;
7d7cd885 443
58d06cf1 444 for (i = 0; i < backend->packs.length; ++i) {
a070f152
CMN
445 struct git_pack_file *p = git_vector_get(&backend->packs, i);
446 packfile_free(p);
58d06cf1 447 }
7d7cd885 448
58d06cf1 449 git_vector_free(&backend->packs);
3286c408
VM
450 git__free(backend->pack_folder);
451 git__free(backend);
7d7cd885
VM
452}
453
454int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
455{
97769280
RB
456 struct pack_backend *backend = NULL;
457 git_buf path = GIT_BUF_INIT;
458 int error = GIT_SUCCESS;
7d7cd885 459
58d06cf1 460 backend = git__calloc(1, sizeof(struct pack_backend));
7d7cd885
VM
461 if (backend == NULL)
462 return GIT_ENOMEM;
463
97769280
RB
464 error = git_vector_init(&backend->packs, 8, packfile_sort__cb);
465 if (error < GIT_SUCCESS)
466 goto cleanup;
7d7cd885 467
97769280
RB
468 error = git_buf_joinpath(&path, objects_dir, "pack");
469 if (error < GIT_SUCCESS)
470 goto cleanup;
90d743cd 471
1744fafe 472 if (git_path_isdir(git_buf_cstr(&path)) == GIT_SUCCESS) {
97769280
RB
473 backend->pack_folder = git_buf_detach(&path);
474 backend->pack_folder_mtime = 0;
58d06cf1 475 }
7d7cd885
VM
476
477 backend->parent.read = &pack_backend__read;
d0323a5f 478 backend->parent.read_prefix = &pack_backend__read_prefix;
58d06cf1 479 backend->parent.read_header = NULL;
7d7cd885
VM
480 backend->parent.exists = &pack_backend__exists;
481 backend->parent.free = &pack_backend__free;
482
7d7cd885 483 *backend_out = (git_odb_backend *)backend;
97769280
RB
484
485cleanup:
486 if (error < GIT_SUCCESS)
487 git__free(backend);
488 git_buf_free(&path);
489
490 return error;
7d7cd885 491}