]> git.proxmox.com Git - libgit2.git/blob - src/odb_pack.c
beab37b7f7836c5e0e2cdfcd4cb5364ccc653072
[libgit2.git] / src / odb_pack.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9
10 #include <zlib.h>
11 #include "git2/repository.h"
12 #include "git2/indexer.h"
13 #include "git2/sys/odb_backend.h"
14 #include "fileops.h"
15 #include "hash.h"
16 #include "odb.h"
17 #include "delta.h"
18 #include "sha1_lookup.h"
19 #include "mwindow.h"
20 #include "pack.h"
21
22 #include "git2/odb_backend.h"
23
24 /* re-freshen pack files no more than every 2 seconds */
25 #define FRESHEN_FREQUENCY 2
26
27 struct pack_backend {
28 git_odb_backend parent;
29 git_vector packs;
30 struct git_pack_file *last_found;
31 char *pack_folder;
32 };
33
34 struct pack_writepack {
35 struct git_odb_writepack parent;
36 git_indexer *indexer;
37 };
38
39 /**
40 * The wonderful tale of a Packed Object lookup query
41 * ===================================================
42 * A riveting and epic story of epicness and ASCII
43 * art, presented by yours truly,
44 * Sir Vicent of Marti
45 *
46 *
47 * Chapter 1: Once upon a time...
48 * Initialization of the Pack Backend
49 * --------------------------------------------------
50 *
51 * # git_odb_backend_pack
52 * | Creates the pack backend structure, initializes the
53 * | callback pointers to our default read() and exist() methods,
54 * | and tries to preload all the known packfiles in the ODB.
55 * |
56 * |-# packfile_load_all
57 * | Tries to find the `pack` folder, if it exists. ODBs without
58 * | a pack folder are ignored altogether. If there's a `pack` folder
59 * | we run a `dirent` callback through every file in the pack folder
60 * | to find our packfiles. The packfiles are then sorted according
61 * | to a sorting callback.
62 * |
63 * |-# packfile_load__cb
64 * | | This callback is called from `dirent` with every single file
65 * | | inside the pack folder. We find the packs by actually locating
66 * | | their index (ends in ".idx"). From that index, we verify that
67 * | | the corresponding packfile exists and is valid, and if so, we
68 * | | add it to the pack list.
69 * | |
70 * | |-# packfile_check
71 * | Make sure that there's a packfile to back this index, and store
72 * | some very basic information regarding the packfile itself,
73 * | such as the full path, the size, and the modification time.
74 * | We don't actually open the packfile to check for internal consistency.
75 * |
76 * |-# packfile_sort__cb
77 * Sort all the preloaded packs according to some specific criteria:
78 * we prioritize the "newer" packs because it's more likely they
79 * contain the objects we are looking for, and we prioritize local
80 * packs over remote ones.
81 *
82 *
83 *
84 * Chapter 2: To be, or not to be...
85 * A standard packed `exist` query for an OID
86 * --------------------------------------------------
87 *
88 * # pack_backend__exists
89 * | Check if the given SHA1 oid exists in any of the packs
90 * | that have been loaded for our ODB.
91 * |
92 * |-# pack_entry_find
93 * | Iterate through all the packs that have been preloaded
94 * | (starting by the pack where the latest object was found)
95 * | to try to find the OID in one of them.
96 * |
97 * |-# pack_entry_find1
98 * | Check the index of an individual pack to see if the SHA1
99 * | OID can be found. If we can find the offset to that SHA1
100 * | inside of the index, that means the object is contained
101 * | inside of the packfile and we can stop searching.
102 * | Before returning, we verify that the packfile behing the
103 * | index we are searching still exists on disk.
104 * |
105 * |-# pack_entry_find_offset
106 * | | Mmap the actual index file to disk if it hasn't been opened
107 * | | yet, and run a binary search through it to find the OID.
108 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
109 * | | on the Packfile Index format and how do we find entries in it.
110 * | |
111 * | |-# pack_index_open
112 * | | Guess the name of the index based on the full path to the
113 * | | packfile, open it and verify its contents. Only if the index
114 * | | has not been opened already.
115 * | |
116 * | |-# pack_index_check
117 * | Mmap the index file and do a quick run through the header
118 * | to guess the index version (right now we support v1 and v2),
119 * | and to verify that the size of the index makes sense.
120 * |
121 * |-# packfile_open
122 * See `packfile_open` in Chapter 3
123 *
124 *
125 *
126 * Chapter 3: The neverending story...
127 * A standard packed `lookup` query for an OID
128 * --------------------------------------------------
129 * TODO
130 *
131 */
132
133
134 /***********************************************************
135 *
136 * FORWARD DECLARATIONS
137 *
138 ***********************************************************/
139
140 static int packfile_sort__cb(const void *a_, const void *b_);
141
142 static int packfile_load__cb(void *_data, git_buf *path);
143
144 static int pack_entry_find(struct git_pack_entry *e,
145 struct pack_backend *backend, const git_oid *oid);
146
147 /* Can find the offset of an object given
148 * a prefix of an identifier.
149 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
150 * This method assumes that len is between
151 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
152 */
153 static int pack_entry_find_prefix(
154 struct git_pack_entry *e,
155 struct pack_backend *backend,
156 const git_oid *short_oid,
157 size_t len);
158
159
160
161 /***********************************************************
162 *
163 * PACK WINDOW MANAGEMENT
164 *
165 ***********************************************************/
166
167 static int packfile_sort__cb(const void *a_, const void *b_)
168 {
169 const struct git_pack_file *a = a_;
170 const struct git_pack_file *b = b_;
171 int st;
172
173 /*
174 * Local packs tend to contain objects specific to our
175 * variant of the project than remote ones. In addition,
176 * remote ones could be on a network mounted filesystem.
177 * Favor local ones for these reasons.
178 */
179 st = a->pack_local - b->pack_local;
180 if (st)
181 return -st;
182
183 /*
184 * Younger packs tend to contain more recent objects,
185 * and more recent objects tend to get accessed more
186 * often.
187 */
188 if (a->mtime < b->mtime)
189 return 1;
190 else if (a->mtime == b->mtime)
191 return 0;
192
193 return -1;
194 }
195
196
197 static int packfile_load__cb(void *data, git_buf *path)
198 {
199 struct pack_backend *backend = data;
200 struct git_pack_file *pack;
201 const char *path_str = git_buf_cstr(path);
202 size_t i, cmp_len = git_buf_len(path);
203 int error;
204
205 if (cmp_len <= strlen(".idx") || git__suffixcmp(path_str, ".idx") != 0)
206 return 0; /* not an index */
207
208 cmp_len -= strlen(".idx");
209
210 for (i = 0; i < backend->packs.length; ++i) {
211 struct git_pack_file *p = git_vector_get(&backend->packs, i);
212
213 if (strncmp(p->pack_name, path_str, cmp_len) == 0)
214 return 0;
215 }
216
217 error = git_mwindow_get_pack(&pack, path->ptr);
218
219 /* ignore missing .pack file as git does */
220 if (error == GIT_ENOTFOUND) {
221 git_error_clear();
222 return 0;
223 }
224
225 if (!error)
226 error = git_vector_insert(&backend->packs, pack);
227
228 return error;
229
230 }
231
232 static int pack_entry_find_inner(
233 struct git_pack_entry *e,
234 struct pack_backend *backend,
235 const git_oid *oid,
236 struct git_pack_file *last_found)
237 {
238 size_t i;
239
240 if (last_found &&
241 git_pack_entry_find(e, last_found, oid, GIT_OID_HEXSZ) == 0)
242 return 0;
243
244 for (i = 0; i < backend->packs.length; ++i) {
245 struct git_pack_file *p;
246
247 p = git_vector_get(&backend->packs, i);
248 if (p == last_found)
249 continue;
250
251 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) {
252 backend->last_found = p;
253 return 0;
254 }
255 }
256
257 return -1;
258 }
259
260 static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
261 {
262 struct git_pack_file *last_found = backend->last_found;
263
264 if (backend->last_found &&
265 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0)
266 return 0;
267
268 if (!pack_entry_find_inner(e, backend, oid, last_found))
269 return 0;
270
271 return git_odb__error_notfound(
272 "failed to find pack entry", oid, GIT_OID_HEXSZ);
273 }
274
275 static int pack_entry_find_prefix(
276 struct git_pack_entry *e,
277 struct pack_backend *backend,
278 const git_oid *short_oid,
279 size_t len)
280 {
281 int error;
282 size_t i;
283 git_oid found_full_oid = {{0}};
284 bool found = false;
285 struct git_pack_file *last_found = backend->last_found;
286
287 if (last_found) {
288 error = git_pack_entry_find(e, last_found, short_oid, len);
289 if (error == GIT_EAMBIGUOUS)
290 return error;
291 if (!error) {
292 git_oid_cpy(&found_full_oid, &e->sha1);
293 found = true;
294 }
295 }
296
297 for (i = 0; i < backend->packs.length; ++i) {
298 struct git_pack_file *p;
299
300 p = git_vector_get(&backend->packs, i);
301 if (p == last_found)
302 continue;
303
304 error = git_pack_entry_find(e, p, short_oid, len);
305 if (error == GIT_EAMBIGUOUS)
306 return error;
307 if (!error) {
308 if (found && git_oid_cmp(&e->sha1, &found_full_oid))
309 return git_odb__error_ambiguous("found multiple pack entries");
310 git_oid_cpy(&found_full_oid, &e->sha1);
311 found = true;
312 backend->last_found = p;
313 }
314 }
315
316 if (!found)
317 return git_odb__error_notfound("no matching pack entry for prefix",
318 short_oid, len);
319 else
320 return 0;
321 }
322
323
324 /***********************************************************
325 *
326 * PACKED BACKEND PUBLIC API
327 *
328 * Implement the git_odb_backend API calls
329 *
330 ***********************************************************/
331 static int pack_backend__refresh(git_odb_backend *backend_)
332 {
333 int error;
334 struct stat st;
335 git_buf path = GIT_BUF_INIT;
336 struct pack_backend *backend = (struct pack_backend *)backend_;
337
338 if (backend->pack_folder == NULL)
339 return 0;
340
341 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
342 return git_odb__error_notfound("failed to refresh packfiles", NULL, 0);
343
344 git_buf_sets(&path, backend->pack_folder);
345
346 /* reload all packs */
347 error = git_path_direach(&path, 0, packfile_load__cb, backend);
348
349 git_buf_dispose(&path);
350 git_vector_sort(&backend->packs);
351
352 return error;
353 }
354
355 static int pack_backend__read_header(
356 size_t *len_p, git_object_t *type_p,
357 struct git_odb_backend *backend, const git_oid *oid)
358 {
359 struct git_pack_entry e;
360 int error;
361
362 assert(len_p && type_p && backend && oid);
363
364 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
365 return error;
366
367 return git_packfile_resolve_header(len_p, type_p, e.p, e.offset);
368 }
369
370 static int pack_backend__freshen(
371 git_odb_backend *backend, const git_oid *oid)
372 {
373 struct git_pack_entry e;
374 time_t now;
375 int error;
376
377 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
378 return error;
379
380 now = time(NULL);
381
382 if (e.p->last_freshen > now - FRESHEN_FREQUENCY)
383 return 0;
384
385 if ((error = git_futils_touch(e.p->pack_name, &now)) < 0)
386 return error;
387
388 e.p->last_freshen = now;
389 return 0;
390 }
391
392 static int pack_backend__read(
393 void **buffer_p, size_t *len_p, git_object_t *type_p,
394 git_odb_backend *backend, const git_oid *oid)
395 {
396 struct git_pack_entry e;
397 git_rawobj raw = {NULL};
398 int error;
399
400 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 ||
401 (error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0)
402 return error;
403
404 *buffer_p = raw.data;
405 *len_p = raw.len;
406 *type_p = raw.type;
407
408 return 0;
409 }
410
411 static int pack_backend__read_prefix(
412 git_oid *out_oid,
413 void **buffer_p,
414 size_t *len_p,
415 git_object_t *type_p,
416 git_odb_backend *backend,
417 const git_oid *short_oid,
418 size_t len)
419 {
420 int error = 0;
421
422 if (len < GIT_OID_MINPREFIXLEN)
423 error = git_odb__error_ambiguous("prefix length too short");
424
425 else if (len >= GIT_OID_HEXSZ) {
426 /* We can fall back to regular read method */
427 error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
428 if (!error)
429 git_oid_cpy(out_oid, short_oid);
430 } else {
431 struct git_pack_entry e;
432 git_rawobj raw = {NULL};
433
434 if ((error = pack_entry_find_prefix(
435 &e, (struct pack_backend *)backend, short_oid, len)) == 0 &&
436 (error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0)
437 {
438 *buffer_p = raw.data;
439 *len_p = raw.len;
440 *type_p = raw.type;
441 git_oid_cpy(out_oid, &e.sha1);
442 }
443 }
444
445 return error;
446 }
447
448 static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
449 {
450 struct git_pack_entry e;
451 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0;
452 }
453
454 static int pack_backend__exists_prefix(
455 git_oid *out, git_odb_backend *backend, const git_oid *short_id, size_t len)
456 {
457 int error;
458 struct pack_backend *pb = (struct pack_backend *)backend;
459 struct git_pack_entry e = {0};
460
461 error = pack_entry_find_prefix(&e, pb, short_id, len);
462 git_oid_cpy(out, &e.sha1);
463 return error;
464 }
465
466 static int pack_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
467 {
468 int error;
469 struct git_pack_file *p;
470 struct pack_backend *backend;
471 unsigned int i;
472
473 assert(_backend && cb);
474 backend = (struct pack_backend *)_backend;
475
476 /* Make sure we know about the packfiles */
477 if ((error = pack_backend__refresh(_backend)) < 0)
478 return error;
479
480 git_vector_foreach(&backend->packs, i, p) {
481 if ((error = git_pack_foreach_entry(p, cb, data)) != 0)
482 return error;
483 }
484
485 return 0;
486 }
487
488 static int pack_backend__writepack_append(struct git_odb_writepack *_writepack, const void *data, size_t size, git_transfer_progress *stats)
489 {
490 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
491
492 assert(writepack);
493
494 return git_indexer_append(writepack->indexer, data, size, stats);
495 }
496
497 static int pack_backend__writepack_commit(struct git_odb_writepack *_writepack, git_transfer_progress *stats)
498 {
499 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
500
501 assert(writepack);
502
503 return git_indexer_commit(writepack->indexer, stats);
504 }
505
506 static void pack_backend__writepack_free(struct git_odb_writepack *_writepack)
507 {
508 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
509
510 assert(writepack);
511
512 git_indexer_free(writepack->indexer);
513 git__free(writepack);
514 }
515
516 static int pack_backend__writepack(struct git_odb_writepack **out,
517 git_odb_backend *_backend,
518 git_odb *odb,
519 git_transfer_progress_cb progress_cb,
520 void *progress_payload)
521 {
522 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
523 struct pack_backend *backend;
524 struct pack_writepack *writepack;
525
526 assert(out && _backend);
527
528 *out = NULL;
529
530 opts.progress_cb = progress_cb;
531 opts.progress_cb_payload = progress_payload;
532
533 backend = (struct pack_backend *)_backend;
534
535 writepack = git__calloc(1, sizeof(struct pack_writepack));
536 GIT_ERROR_CHECK_ALLOC(writepack);
537
538 if (git_indexer_new(&writepack->indexer,
539 backend->pack_folder, 0, odb, &opts) < 0) {
540 git__free(writepack);
541 return -1;
542 }
543
544 writepack->parent.backend = _backend;
545 writepack->parent.append = pack_backend__writepack_append;
546 writepack->parent.commit = pack_backend__writepack_commit;
547 writepack->parent.free = pack_backend__writepack_free;
548
549 *out = (git_odb_writepack *)writepack;
550
551 return 0;
552 }
553
554 static void pack_backend__free(git_odb_backend *_backend)
555 {
556 struct pack_backend *backend;
557 size_t i;
558
559 assert(_backend);
560
561 backend = (struct pack_backend *)_backend;
562
563 for (i = 0; i < backend->packs.length; ++i) {
564 struct git_pack_file *p = git_vector_get(&backend->packs, i);
565 git_mwindow_put_pack(p);
566 }
567
568 git_vector_free(&backend->packs);
569 git__free(backend->pack_folder);
570 git__free(backend);
571 }
572
573 static int pack_backend__alloc(struct pack_backend **out, size_t initial_size)
574 {
575 struct pack_backend *backend = git__calloc(1, sizeof(struct pack_backend));
576 GIT_ERROR_CHECK_ALLOC(backend);
577
578 if (git_vector_init(&backend->packs, initial_size, packfile_sort__cb) < 0) {
579 git__free(backend);
580 return -1;
581 }
582
583 backend->parent.version = GIT_ODB_BACKEND_VERSION;
584
585 backend->parent.read = &pack_backend__read;
586 backend->parent.read_prefix = &pack_backend__read_prefix;
587 backend->parent.read_header = &pack_backend__read_header;
588 backend->parent.exists = &pack_backend__exists;
589 backend->parent.exists_prefix = &pack_backend__exists_prefix;
590 backend->parent.refresh = &pack_backend__refresh;
591 backend->parent.foreach = &pack_backend__foreach;
592 backend->parent.writepack = &pack_backend__writepack;
593 backend->parent.freshen = &pack_backend__freshen;
594 backend->parent.free = &pack_backend__free;
595
596 *out = backend;
597 return 0;
598 }
599
600 int git_odb_backend_one_pack(git_odb_backend **backend_out, const char *idx)
601 {
602 struct pack_backend *backend = NULL;
603 struct git_pack_file *packfile = NULL;
604
605 if (pack_backend__alloc(&backend, 1) < 0)
606 return -1;
607
608 if (git_mwindow_get_pack(&packfile, idx) < 0 ||
609 git_vector_insert(&backend->packs, packfile) < 0)
610 {
611 pack_backend__free((git_odb_backend *)backend);
612 return -1;
613 }
614
615 *backend_out = (git_odb_backend *)backend;
616 return 0;
617 }
618
619 int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
620 {
621 int error = 0;
622 struct pack_backend *backend = NULL;
623 git_buf path = GIT_BUF_INIT;
624
625 if (pack_backend__alloc(&backend, 8) < 0)
626 return -1;
627
628 if (!(error = git_buf_joinpath(&path, objects_dir, "pack")) &&
629 git_path_isdir(git_buf_cstr(&path)))
630 {
631 backend->pack_folder = git_buf_detach(&path);
632 error = pack_backend__refresh((git_odb_backend *)backend);
633 }
634
635 if (error < 0) {
636 pack_backend__free((git_odb_backend *)backend);
637 backend = NULL;
638 }
639
640 *backend_out = (git_odb_backend *)backend;
641
642 git_buf_dispose(&path);
643
644 return error;
645 }