]> git.proxmox.com Git - libgit2.git/blob - src/odb_pack.c
86c858df1116f0a9ae8b2506059d1e4aeb25a929
[libgit2.git] / src / odb_pack.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9
10 #include <zlib.h>
11 #include "git2/repository.h"
12 #include "git2/indexer.h"
13 #include "git2/sys/odb_backend.h"
14 #include "futils.h"
15 #include "hash.h"
16 #include "odb.h"
17 #include "delta.h"
18 #include "mwindow.h"
19 #include "pack.h"
20
21 #include "git2/odb_backend.h"
22
23 /* re-freshen pack files no more than every 2 seconds */
24 #define FRESHEN_FREQUENCY 2
25
26 struct pack_backend {
27 git_odb_backend parent;
28 git_vector packs;
29 struct git_pack_file *last_found;
30 char *pack_folder;
31 };
32
33 struct pack_writepack {
34 struct git_odb_writepack parent;
35 git_indexer *indexer;
36 };
37
38 /**
39 * The wonderful tale of a Packed Object lookup query
40 * ===================================================
41 * A riveting and epic story of epicness and ASCII
42 * art, presented by yours truly,
43 * Sir Vicent of Marti
44 *
45 *
46 * Chapter 1: Once upon a time...
47 * Initialization of the Pack Backend
48 * --------------------------------------------------
49 *
50 * # git_odb_backend_pack
51 * | Creates the pack backend structure, initializes the
52 * | callback pointers to our default read() and exist() methods,
53 * | and tries to preload all the known packfiles in the ODB.
54 * |
55 * |-# packfile_load_all
56 * | Tries to find the `pack` folder, if it exists. ODBs without
57 * | a pack folder are ignored altogether. If there's a `pack` folder
58 * | we run a `dirent` callback through every file in the pack folder
59 * | to find our packfiles. The packfiles are then sorted according
60 * | to a sorting callback.
61 * |
62 * |-# packfile_load__cb
63 * | | This callback is called from `dirent` with every single file
64 * | | inside the pack folder. We find the packs by actually locating
65 * | | their index (ends in ".idx"). From that index, we verify that
66 * | | the corresponding packfile exists and is valid, and if so, we
67 * | | add it to the pack list.
68 * | |
69 * | |-# packfile_check
70 * | Make sure that there's a packfile to back this index, and store
71 * | some very basic information regarding the packfile itself,
72 * | such as the full path, the size, and the modification time.
73 * | We don't actually open the packfile to check for internal consistency.
74 * |
75 * |-# packfile_sort__cb
76 * Sort all the preloaded packs according to some specific criteria:
77 * we prioritize the "newer" packs because it's more likely they
78 * contain the objects we are looking for, and we prioritize local
79 * packs over remote ones.
80 *
81 *
82 *
83 * Chapter 2: To be, or not to be...
84 * A standard packed `exist` query for an OID
85 * --------------------------------------------------
86 *
87 * # pack_backend__exists
88 * | Check if the given SHA1 oid exists in any of the packs
89 * | that have been loaded for our ODB.
90 * |
91 * |-# pack_entry_find
92 * | Iterate through all the packs that have been preloaded
93 * | (starting by the pack where the latest object was found)
94 * | to try to find the OID in one of them.
95 * |
96 * |-# pack_entry_find1
97 * | Check the index of an individual pack to see if the SHA1
98 * | OID can be found. If we can find the offset to that SHA1
99 * | inside of the index, that means the object is contained
100 * | inside of the packfile and we can stop searching.
101 * | Before returning, we verify that the packfile behing the
102 * | index we are searching still exists on disk.
103 * |
104 * |-# pack_entry_find_offset
105 * | | Mmap the actual index file to disk if it hasn't been opened
106 * | | yet, and run a binary search through it to find the OID.
107 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
108 * | | on the Packfile Index format and how do we find entries in it.
109 * | |
110 * | |-# pack_index_open
111 * | | Guess the name of the index based on the full path to the
112 * | | packfile, open it and verify its contents. Only if the index
113 * | | has not been opened already.
114 * | |
115 * | |-# pack_index_check
116 * | Mmap the index file and do a quick run through the header
117 * | to guess the index version (right now we support v1 and v2),
118 * | and to verify that the size of the index makes sense.
119 * |
120 * |-# packfile_open
121 * See `packfile_open` in Chapter 3
122 *
123 *
124 *
125 * Chapter 3: The neverending story...
126 * A standard packed `lookup` query for an OID
127 * --------------------------------------------------
128 * TODO
129 *
130 */
131
132
133 /***********************************************************
134 *
135 * FORWARD DECLARATIONS
136 *
137 ***********************************************************/
138
139 static int packfile_sort__cb(const void *a_, const void *b_);
140
141 static int packfile_load__cb(void *_data, git_buf *path);
142
143 static int pack_entry_find(struct git_pack_entry *e,
144 struct pack_backend *backend, const git_oid *oid);
145
146 /* Can find the offset of an object given
147 * a prefix of an identifier.
148 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
149 * This method assumes that len is between
150 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
151 */
152 static int pack_entry_find_prefix(
153 struct git_pack_entry *e,
154 struct pack_backend *backend,
155 const git_oid *short_oid,
156 size_t len);
157
158
159
160 /***********************************************************
161 *
162 * PACK WINDOW MANAGEMENT
163 *
164 ***********************************************************/
165
166 static int packfile_sort__cb(const void *a_, const void *b_)
167 {
168 const struct git_pack_file *a = a_;
169 const struct git_pack_file *b = b_;
170 int st;
171
172 /*
173 * Local packs tend to contain objects specific to our
174 * variant of the project than remote ones. In addition,
175 * remote ones could be on a network mounted filesystem.
176 * Favor local ones for these reasons.
177 */
178 st = a->pack_local - b->pack_local;
179 if (st)
180 return -st;
181
182 /*
183 * Younger packs tend to contain more recent objects,
184 * and more recent objects tend to get accessed more
185 * often.
186 */
187 if (a->mtime < b->mtime)
188 return 1;
189 else if (a->mtime == b->mtime)
190 return 0;
191
192 return -1;
193 }
194
195
196 static int packfile_load__cb(void *data, git_buf *path)
197 {
198 struct pack_backend *backend = data;
199 struct git_pack_file *pack;
200 const char *path_str = git_buf_cstr(path);
201 size_t i, cmp_len = git_buf_len(path);
202 int error;
203
204 if (cmp_len <= strlen(".idx") || git__suffixcmp(path_str, ".idx") != 0)
205 return 0; /* not an index */
206
207 cmp_len -= strlen(".idx");
208
209 for (i = 0; i < backend->packs.length; ++i) {
210 struct git_pack_file *p = git_vector_get(&backend->packs, i);
211
212 if (strncmp(p->pack_name, path_str, cmp_len) == 0)
213 return 0;
214 }
215
216 error = git_mwindow_get_pack(&pack, path->ptr);
217
218 /* ignore missing .pack file as git does */
219 if (error == GIT_ENOTFOUND) {
220 git_error_clear();
221 return 0;
222 }
223
224 if (!error)
225 error = git_vector_insert(&backend->packs, pack);
226
227 return error;
228
229 }
230
231 static int pack_entry_find_inner(
232 struct git_pack_entry *e,
233 struct pack_backend *backend,
234 const git_oid *oid,
235 struct git_pack_file *last_found)
236 {
237 size_t i;
238
239 if (last_found &&
240 git_pack_entry_find(e, last_found, oid, GIT_OID_HEXSZ) == 0)
241 return 0;
242
243 for (i = 0; i < backend->packs.length; ++i) {
244 struct git_pack_file *p;
245
246 p = git_vector_get(&backend->packs, i);
247 if (p == last_found)
248 continue;
249
250 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) {
251 backend->last_found = p;
252 return 0;
253 }
254 }
255
256 return -1;
257 }
258
259 static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
260 {
261 struct git_pack_file *last_found = backend->last_found;
262
263 if (backend->last_found &&
264 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0)
265 return 0;
266
267 if (!pack_entry_find_inner(e, backend, oid, last_found))
268 return 0;
269
270 return git_odb__error_notfound(
271 "failed to find pack entry", oid, GIT_OID_HEXSZ);
272 }
273
274 static int pack_entry_find_prefix(
275 struct git_pack_entry *e,
276 struct pack_backend *backend,
277 const git_oid *short_oid,
278 size_t len)
279 {
280 int error;
281 size_t i;
282 git_oid found_full_oid = {{0}};
283 bool found = false;
284 struct git_pack_file *last_found = backend->last_found;
285
286 if (last_found) {
287 error = git_pack_entry_find(e, last_found, short_oid, len);
288 if (error == GIT_EAMBIGUOUS)
289 return error;
290 if (!error) {
291 git_oid_cpy(&found_full_oid, &e->sha1);
292 found = true;
293 }
294 }
295
296 for (i = 0; i < backend->packs.length; ++i) {
297 struct git_pack_file *p;
298
299 p = git_vector_get(&backend->packs, i);
300 if (p == last_found)
301 continue;
302
303 error = git_pack_entry_find(e, p, short_oid, len);
304 if (error == GIT_EAMBIGUOUS)
305 return error;
306 if (!error) {
307 if (found && git_oid_cmp(&e->sha1, &found_full_oid))
308 return git_odb__error_ambiguous("found multiple pack entries");
309 git_oid_cpy(&found_full_oid, &e->sha1);
310 found = true;
311 backend->last_found = p;
312 }
313 }
314
315 if (!found)
316 return git_odb__error_notfound("no matching pack entry for prefix",
317 short_oid, len);
318 else
319 return 0;
320 }
321
322
323 /***********************************************************
324 *
325 * PACKED BACKEND PUBLIC API
326 *
327 * Implement the git_odb_backend API calls
328 *
329 ***********************************************************/
330 static int pack_backend__refresh(git_odb_backend *backend_)
331 {
332 int error;
333 struct stat st;
334 git_buf path = GIT_BUF_INIT;
335 struct pack_backend *backend = (struct pack_backend *)backend_;
336
337 if (backend->pack_folder == NULL)
338 return 0;
339
340 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
341 return git_odb__error_notfound("failed to refresh packfiles", NULL, 0);
342
343 git_buf_sets(&path, backend->pack_folder);
344
345 /* reload all packs */
346 error = git_path_direach(&path, 0, packfile_load__cb, backend);
347
348 git_buf_dispose(&path);
349 git_vector_sort(&backend->packs);
350
351 return error;
352 }
353
354 static int pack_backend__read_header(
355 size_t *len_p, git_object_t *type_p,
356 struct git_odb_backend *backend, const git_oid *oid)
357 {
358 struct git_pack_entry e;
359 int error;
360
361 assert(len_p && type_p && backend && oid);
362
363 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
364 return error;
365
366 return git_packfile_resolve_header(len_p, type_p, e.p, e.offset);
367 }
368
369 static int pack_backend__freshen(
370 git_odb_backend *backend, const git_oid *oid)
371 {
372 struct git_pack_entry e;
373 time_t now;
374 int error;
375
376 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
377 return error;
378
379 now = time(NULL);
380
381 if (e.p->last_freshen > now - FRESHEN_FREQUENCY)
382 return 0;
383
384 if ((error = git_futils_touch(e.p->pack_name, &now)) < 0)
385 return error;
386
387 e.p->last_freshen = now;
388 return 0;
389 }
390
391 static int pack_backend__read(
392 void **buffer_p, size_t *len_p, git_object_t *type_p,
393 git_odb_backend *backend, const git_oid *oid)
394 {
395 struct git_pack_entry e;
396 git_rawobj raw = {NULL};
397 int error;
398
399 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 ||
400 (error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0)
401 return error;
402
403 *buffer_p = raw.data;
404 *len_p = raw.len;
405 *type_p = raw.type;
406
407 return 0;
408 }
409
410 static int pack_backend__read_prefix(
411 git_oid *out_oid,
412 void **buffer_p,
413 size_t *len_p,
414 git_object_t *type_p,
415 git_odb_backend *backend,
416 const git_oid *short_oid,
417 size_t len)
418 {
419 int error = 0;
420
421 if (len < GIT_OID_MINPREFIXLEN)
422 error = git_odb__error_ambiguous("prefix length too short");
423
424 else if (len >= GIT_OID_HEXSZ) {
425 /* We can fall back to regular read method */
426 error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
427 if (!error)
428 git_oid_cpy(out_oid, short_oid);
429 } else {
430 struct git_pack_entry e;
431 git_rawobj raw = {NULL};
432
433 if ((error = pack_entry_find_prefix(
434 &e, (struct pack_backend *)backend, short_oid, len)) == 0 &&
435 (error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0)
436 {
437 *buffer_p = raw.data;
438 *len_p = raw.len;
439 *type_p = raw.type;
440 git_oid_cpy(out_oid, &e.sha1);
441 }
442 }
443
444 return error;
445 }
446
447 static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
448 {
449 struct git_pack_entry e;
450 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0;
451 }
452
453 static int pack_backend__exists_prefix(
454 git_oid *out, git_odb_backend *backend, const git_oid *short_id, size_t len)
455 {
456 int error;
457 struct pack_backend *pb = (struct pack_backend *)backend;
458 struct git_pack_entry e = {0};
459
460 error = pack_entry_find_prefix(&e, pb, short_id, len);
461 git_oid_cpy(out, &e.sha1);
462 return error;
463 }
464
465 static int pack_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
466 {
467 int error;
468 struct git_pack_file *p;
469 struct pack_backend *backend;
470 unsigned int i;
471
472 assert(_backend && cb);
473 backend = (struct pack_backend *)_backend;
474
475 /* Make sure we know about the packfiles */
476 if ((error = pack_backend__refresh(_backend)) < 0)
477 return error;
478
479 git_vector_foreach(&backend->packs, i, p) {
480 if ((error = git_pack_foreach_entry(p, cb, data)) != 0)
481 return error;
482 }
483
484 return 0;
485 }
486
487 static int pack_backend__writepack_append(struct git_odb_writepack *_writepack, const void *data, size_t size, git_indexer_progress *stats)
488 {
489 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
490
491 assert(writepack);
492
493 return git_indexer_append(writepack->indexer, data, size, stats);
494 }
495
496 static int pack_backend__writepack_commit(struct git_odb_writepack *_writepack, git_indexer_progress *stats)
497 {
498 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
499
500 assert(writepack);
501
502 return git_indexer_commit(writepack->indexer, stats);
503 }
504
505 static void pack_backend__writepack_free(struct git_odb_writepack *_writepack)
506 {
507 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
508
509 assert(writepack);
510
511 git_indexer_free(writepack->indexer);
512 git__free(writepack);
513 }
514
515 static int pack_backend__writepack(struct git_odb_writepack **out,
516 git_odb_backend *_backend,
517 git_odb *odb,
518 git_indexer_progress_cb progress_cb,
519 void *progress_payload)
520 {
521 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
522 struct pack_backend *backend;
523 struct pack_writepack *writepack;
524
525 assert(out && _backend);
526
527 *out = NULL;
528
529 opts.progress_cb = progress_cb;
530 opts.progress_cb_payload = progress_payload;
531
532 backend = (struct pack_backend *)_backend;
533
534 writepack = git__calloc(1, sizeof(struct pack_writepack));
535 GIT_ERROR_CHECK_ALLOC(writepack);
536
537 if (git_indexer_new(&writepack->indexer,
538 backend->pack_folder, 0, odb, &opts) < 0) {
539 git__free(writepack);
540 return -1;
541 }
542
543 writepack->parent.backend = _backend;
544 writepack->parent.append = pack_backend__writepack_append;
545 writepack->parent.commit = pack_backend__writepack_commit;
546 writepack->parent.free = pack_backend__writepack_free;
547
548 *out = (git_odb_writepack *)writepack;
549
550 return 0;
551 }
552
553 static void pack_backend__free(git_odb_backend *_backend)
554 {
555 struct pack_backend *backend;
556 size_t i;
557
558 assert(_backend);
559
560 backend = (struct pack_backend *)_backend;
561
562 for (i = 0; i < backend->packs.length; ++i) {
563 struct git_pack_file *p = git_vector_get(&backend->packs, i);
564 git_mwindow_put_pack(p);
565 }
566
567 git_vector_free(&backend->packs);
568 git__free(backend->pack_folder);
569 git__free(backend);
570 }
571
572 static int pack_backend__alloc(struct pack_backend **out, size_t initial_size)
573 {
574 struct pack_backend *backend = git__calloc(1, sizeof(struct pack_backend));
575 GIT_ERROR_CHECK_ALLOC(backend);
576
577 if (git_vector_init(&backend->packs, initial_size, packfile_sort__cb) < 0) {
578 git__free(backend);
579 return -1;
580 }
581
582 backend->parent.version = GIT_ODB_BACKEND_VERSION;
583
584 backend->parent.read = &pack_backend__read;
585 backend->parent.read_prefix = &pack_backend__read_prefix;
586 backend->parent.read_header = &pack_backend__read_header;
587 backend->parent.exists = &pack_backend__exists;
588 backend->parent.exists_prefix = &pack_backend__exists_prefix;
589 backend->parent.refresh = &pack_backend__refresh;
590 backend->parent.foreach = &pack_backend__foreach;
591 backend->parent.writepack = &pack_backend__writepack;
592 backend->parent.freshen = &pack_backend__freshen;
593 backend->parent.free = &pack_backend__free;
594
595 *out = backend;
596 return 0;
597 }
598
599 int git_odb_backend_one_pack(git_odb_backend **backend_out, const char *idx)
600 {
601 struct pack_backend *backend = NULL;
602 struct git_pack_file *packfile = NULL;
603
604 if (pack_backend__alloc(&backend, 1) < 0)
605 return -1;
606
607 if (git_mwindow_get_pack(&packfile, idx) < 0 ||
608 git_vector_insert(&backend->packs, packfile) < 0)
609 {
610 pack_backend__free((git_odb_backend *)backend);
611 return -1;
612 }
613
614 *backend_out = (git_odb_backend *)backend;
615 return 0;
616 }
617
618 int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
619 {
620 int error = 0;
621 struct pack_backend *backend = NULL;
622 git_buf path = GIT_BUF_INIT;
623
624 if (pack_backend__alloc(&backend, 8) < 0)
625 return -1;
626
627 if (!(error = git_buf_joinpath(&path, objects_dir, "pack")) &&
628 git_path_isdir(git_buf_cstr(&path)))
629 {
630 backend->pack_folder = git_buf_detach(&path);
631 error = pack_backend__refresh((git_odb_backend *)backend);
632 }
633
634 if (error < 0) {
635 pack_backend__free((git_odb_backend *)backend);
636 backend = NULL;
637 }
638
639 *backend_out = (git_odb_backend *)backend;
640
641 git_buf_dispose(&path);
642
643 return error;
644 }