]> git.proxmox.com Git - libgit2.git/blob - src/odb_pack.c
Move `url` to last place in parameter list
[libgit2.git] / src / odb_pack.c
1 /*
2 * Copyright (C) 2009-2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include <zlib.h>
10 #include "git2/repository.h"
11 #include "git2/oid.h"
12 #include "fileops.h"
13 #include "hash.h"
14 #include "odb.h"
15 #include "delta-apply.h"
16 #include "sha1_lookup.h"
17 #include "mwindow.h"
18 #include "pack.h"
19
20 #include "git2/odb_backend.h"
21
22 struct pack_backend {
23 git_odb_backend parent;
24 git_vector packs;
25 struct git_pack_file *last_found;
26 char *pack_folder;
27 };
28
29 struct pack_writepack {
30 struct git_odb_writepack parent;
31 git_indexer_stream *indexer_stream;
32 };
33
34 /**
35 * The wonderful tale of a Packed Object lookup query
36 * ===================================================
37 * A riveting and epic story of epicness and ASCII
38 * art, presented by yours truly,
39 * Sir Vicent of Marti
40 *
41 *
42 * Chapter 1: Once upon a time...
43 * Initialization of the Pack Backend
44 * --------------------------------------------------
45 *
46 * # git_odb_backend_pack
47 * | Creates the pack backend structure, initializes the
48 * | callback pointers to our default read() and exist() methods,
49 * | and tries to preload all the known packfiles in the ODB.
50 * |
51 * |-# packfile_load_all
52 * | Tries to find the `pack` folder, if it exists. ODBs without
53 * | a pack folder are ignored altogether. If there's a `pack` folder
54 * | we run a `dirent` callback through every file in the pack folder
55 * | to find our packfiles. The packfiles are then sorted according
56 * | to a sorting callback.
57 * |
58 * |-# packfile_load__cb
59 * | | This callback is called from `dirent` with every single file
60 * | | inside the pack folder. We find the packs by actually locating
61 * | | their index (ends in ".idx"). From that index, we verify that
62 * | | the corresponding packfile exists and is valid, and if so, we
63 * | | add it to the pack list.
64 * | |
65 * | |-# packfile_check
66 * | Make sure that there's a packfile to back this index, and store
67 * | some very basic information regarding the packfile itself,
68 * | such as the full path, the size, and the modification time.
69 * | We don't actually open the packfile to check for internal consistency.
70 * |
71 * |-# packfile_sort__cb
72 * Sort all the preloaded packs according to some specific criteria:
73 * we prioritize the "newer" packs because it's more likely they
74 * contain the objects we are looking for, and we prioritize local
75 * packs over remote ones.
76 *
77 *
78 *
79 * Chapter 2: To be, or not to be...
80 * A standard packed `exist` query for an OID
81 * --------------------------------------------------
82 *
83 * # pack_backend__exists
84 * | Check if the given SHA1 oid exists in any of the packs
85 * | that have been loaded for our ODB.
86 * |
87 * |-# pack_entry_find
88 * | Iterate through all the packs that have been preloaded
89 * | (starting by the pack where the latest object was found)
90 * | to try to find the OID in one of them.
91 * |
92 * |-# pack_entry_find1
93 * | Check the index of an individual pack to see if the SHA1
94 * | OID can be found. If we can find the offset to that SHA1
95 * | inside of the index, that means the object is contained
96 * | inside of the packfile and we can stop searching.
97 * | Before returning, we verify that the packfile behing the
98 * | index we are searching still exists on disk.
99 * |
100 * |-# pack_entry_find_offset
101 * | | Mmap the actual index file to disk if it hasn't been opened
102 * | | yet, and run a binary search through it to find the OID.
103 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
104 * | | on the Packfile Index format and how do we find entries in it.
105 * | |
106 * | |-# pack_index_open
107 * | | Guess the name of the index based on the full path to the
108 * | | packfile, open it and verify its contents. Only if the index
109 * | | has not been opened already.
110 * | |
111 * | |-# pack_index_check
112 * | Mmap the index file and do a quick run through the header
113 * | to guess the index version (right now we support v1 and v2),
114 * | and to verify that the size of the index makes sense.
115 * |
116 * |-# packfile_open
117 * See `packfile_open` in Chapter 3
118 *
119 *
120 *
121 * Chapter 3: The neverending story...
122 * A standard packed `lookup` query for an OID
123 * --------------------------------------------------
124 * TODO
125 *
126 */
127
128
129 /***********************************************************
130 *
131 * FORWARD DECLARATIONS
132 *
133 ***********************************************************/
134
135 static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
136 static int pack_window_contains(git_mwindow *win, off_t offset);
137
138 static int packfile_sort__cb(const void *a_, const void *b_);
139
140 static int packfile_load__cb(void *_data, git_buf *path);
141 static int packfile_refresh_all(struct pack_backend *backend);
142
143 static int pack_entry_find(struct git_pack_entry *e,
144 struct pack_backend *backend, const git_oid *oid);
145
146 /* Can find the offset of an object given
147 * a prefix of an identifier.
148 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
149 * This method assumes that len is between
150 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
151 */
152 static int pack_entry_find_prefix(
153 struct git_pack_entry *e,
154 struct pack_backend *backend,
155 const git_oid *short_oid,
156 size_t len);
157
158
159
160 /***********************************************************
161 *
162 * PACK WINDOW MANAGEMENT
163 *
164 ***********************************************************/
165
166 GIT_INLINE(void) pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p)
167 {
168 GIT_UNUSED(backend);
169 git_mwindow_free_all(&p->mwf);
170 }
171
172 GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
173 {
174 /* We must promise at least 20 bytes (one hash) after the
175 * offset is available from this window, otherwise the offset
176 * is not actually in this window and a different window (which
177 * has that one hash excess) must be used. This is to support
178 * the object header and delta base parsing routines below.
179 */
180 return git_mwindow_contains(win, offset + 20);
181 }
182
183 static int packfile_sort__cb(const void *a_, const void *b_)
184 {
185 const struct git_pack_file *a = a_;
186 const struct git_pack_file *b = b_;
187 int st;
188
189 /*
190 * Local packs tend to contain objects specific to our
191 * variant of the project than remote ones. In addition,
192 * remote ones could be on a network mounted filesystem.
193 * Favor local ones for these reasons.
194 */
195 st = a->pack_local - b->pack_local;
196 if (st)
197 return -st;
198
199 /*
200 * Younger packs tend to contain more recent objects,
201 * and more recent objects tend to get accessed more
202 * often.
203 */
204 if (a->mtime < b->mtime)
205 return 1;
206 else if (a->mtime == b->mtime)
207 return 0;
208
209 return -1;
210 }
211
212
213
214 static int packfile_load__cb(void *_data, git_buf *path)
215 {
216 struct pack_backend *backend = (struct pack_backend *)_data;
217 struct git_pack_file *pack;
218 int error;
219 unsigned int i;
220
221 if (git__suffixcmp(path->ptr, ".idx") != 0)
222 return 0; /* not an index */
223
224 for (i = 0; i < backend->packs.length; ++i) {
225 struct git_pack_file *p = git_vector_get(&backend->packs, i);
226 if (memcmp(p->pack_name, git_buf_cstr(path), git_buf_len(path) - strlen(".idx")) == 0)
227 return 0;
228 }
229
230 error = git_packfile_check(&pack, path->ptr);
231 if (error == GIT_ENOTFOUND)
232 /* ignore missing .pack file as git does */
233 return 0;
234 else if (error < 0)
235 return error;
236
237 return git_vector_insert(&backend->packs, pack);
238 }
239
240 static int packfile_refresh_all(struct pack_backend *backend)
241 {
242 int error;
243 struct stat st;
244 git_buf path = GIT_BUF_INIT;
245
246 if (backend->pack_folder == NULL)
247 return 0;
248
249 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
250 return git_odb__error_notfound("failed to refresh packfiles", NULL);
251
252 git_buf_sets(&path, backend->pack_folder);
253
254 /* reload all packs */
255 error = git_path_direach(&path, packfile_load__cb, (void *)backend);
256
257 git_buf_free(&path);
258
259 if (error < 0)
260 return error;
261
262 git_vector_sort(&backend->packs);
263
264 return 0;
265 }
266
267 static int pack_entry_find_inner(
268 struct git_pack_entry *e,
269 struct pack_backend *backend,
270 const git_oid *oid,
271 struct git_pack_file *last_found)
272 {
273 unsigned int i;
274
275 if (last_found &&
276 git_pack_entry_find(e, last_found, oid, GIT_OID_HEXSZ) == 0)
277 return 0;
278
279 for (i = 0; i < backend->packs.length; ++i) {
280 struct git_pack_file *p;
281
282 p = git_vector_get(&backend->packs, i);
283 if (p == last_found)
284 continue;
285
286 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) {
287 backend->last_found = p;
288 return 0;
289 }
290 }
291
292 return -1;
293 }
294
295 static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
296 {
297 int error;
298 struct git_pack_file *last_found = backend->last_found;
299
300 if (backend->last_found &&
301 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0)
302 return 0;
303
304 if (!pack_entry_find_inner(e, backend, oid, last_found))
305 return 0;
306 if ((error = packfile_refresh_all(backend)) < 0)
307 return error;
308 if (!pack_entry_find_inner(e, backend, oid, last_found))
309 return 0;
310
311 return git_odb__error_notfound("failed to find pack entry", oid);
312 }
313
314 static unsigned pack_entry_find_prefix_inner(
315 struct git_pack_entry *e,
316 struct pack_backend *backend,
317 const git_oid *short_oid,
318 size_t len,
319 struct git_pack_file *last_found)
320 {
321 int error;
322 unsigned int i;
323 unsigned found = 0;
324
325 if (last_found) {
326 error = git_pack_entry_find(e, last_found, short_oid, len);
327 if (error == GIT_EAMBIGUOUS)
328 return error;
329 if (!error)
330 found = 1;
331 }
332
333 for (i = 0; i < backend->packs.length; ++i) {
334 struct git_pack_file *p;
335
336 p = git_vector_get(&backend->packs, i);
337 if (p == last_found)
338 continue;
339
340 error = git_pack_entry_find(e, p, short_oid, len);
341 if (error == GIT_EAMBIGUOUS)
342 return error;
343 if (!error) {
344 if (++found > 1)
345 break;
346 backend->last_found = p;
347 }
348 }
349
350 return found;
351 }
352
353 static int pack_entry_find_prefix(
354 struct git_pack_entry *e,
355 struct pack_backend *backend,
356 const git_oid *short_oid,
357 size_t len)
358 {
359 unsigned found = 0;
360 int error;
361 struct git_pack_file *last_found = backend->last_found;
362
363 if ((found = pack_entry_find_prefix_inner(e, backend, short_oid, len, last_found)) > 0)
364 goto cleanup;
365 if ((error = packfile_refresh_all(backend)) < 0)
366 return error;
367 found = pack_entry_find_prefix_inner(e, backend, short_oid, len, last_found);
368
369 cleanup:
370 if (!found)
371 return git_odb__error_notfound("no matching pack entry for prefix", short_oid);
372 else if (found > 1)
373 return git_odb__error_ambiguous("found multiple pack entries");
374 else
375 return 0;
376 }
377
378
379 /***********************************************************
380 *
381 * PACKED BACKEND PUBLIC API
382 *
383 * Implement the git_odb_backend API calls
384 *
385 ***********************************************************/
386
387 static int pack_backend__read_header(size_t *len_p, git_otype *type_p, struct git_odb_backend *backend, const git_oid *oid)
388 {
389 struct git_pack_entry e;
390 int error;
391
392 assert(len_p && type_p && backend && oid);
393
394 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0)
395 return error;
396
397 return git_packfile_resolve_header(len_p, type_p, e.p, e.offset);
398 }
399
400 static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
401 {
402 struct git_pack_entry e;
403 git_rawobj raw;
404 int error;
405
406 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 ||
407 (error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0)
408 return error;
409
410 *buffer_p = raw.data;
411 *len_p = raw.len;
412 *type_p = raw.type;
413
414 return 0;
415 }
416
417 static int pack_backend__read_prefix(
418 git_oid *out_oid,
419 void **buffer_p,
420 size_t *len_p,
421 git_otype *type_p,
422 git_odb_backend *backend,
423 const git_oid *short_oid,
424 size_t len)
425 {
426 int error = 0;
427
428 if (len < GIT_OID_MINPREFIXLEN)
429 error = git_odb__error_ambiguous("prefix length too short");
430
431 else if (len >= GIT_OID_HEXSZ) {
432 /* We can fall back to regular read method */
433 error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
434 if (!error)
435 git_oid_cpy(out_oid, short_oid);
436 } else {
437 struct git_pack_entry e;
438 git_rawobj raw;
439
440 if ((error = pack_entry_find_prefix(
441 &e, (struct pack_backend *)backend, short_oid, len)) == 0 &&
442 (error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0)
443 {
444 *buffer_p = raw.data;
445 *len_p = raw.len;
446 *type_p = raw.type;
447 git_oid_cpy(out_oid, &e.sha1);
448 }
449 }
450
451 return error;
452 }
453
454 static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
455 {
456 struct git_pack_entry e;
457 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0;
458 }
459
460 static int pack_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
461 {
462 int error;
463 struct git_pack_file *p;
464 struct pack_backend *backend;
465 unsigned int i;
466
467 assert(_backend && cb);
468 backend = (struct pack_backend *)_backend;
469
470 /* Make sure we know about the packfiles */
471 if ((error = packfile_refresh_all(backend)) < 0)
472 return error;
473
474 git_vector_foreach(&backend->packs, i, p) {
475 if ((error = git_pack_foreach_entry(p, cb, data)) < 0)
476 return error;
477 }
478
479 return 0;
480 }
481
482 static int pack_backend__writepack_add(struct git_odb_writepack *_writepack, const void *data, size_t size, git_transfer_progress *stats)
483 {
484 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
485
486 assert(writepack);
487
488 return git_indexer_stream_add(writepack->indexer_stream, data, size, stats);
489 }
490
491 static int pack_backend__writepack_commit(struct git_odb_writepack *_writepack, git_transfer_progress *stats)
492 {
493 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
494
495 assert(writepack);
496
497 return git_indexer_stream_finalize(writepack->indexer_stream, stats);
498 }
499
500 static void pack_backend__writepack_free(struct git_odb_writepack *_writepack)
501 {
502 struct pack_writepack *writepack = (struct pack_writepack *)_writepack;
503
504 assert(writepack);
505
506 git_indexer_stream_free(writepack->indexer_stream);
507 git__free(writepack);
508 }
509
510 static int pack_backend__writepack(struct git_odb_writepack **out,
511 git_odb_backend *_backend,
512 git_transfer_progress_callback progress_cb,
513 void *progress_payload)
514 {
515 struct pack_backend *backend;
516 struct pack_writepack *writepack;
517
518 assert(out && _backend);
519
520 *out = NULL;
521
522 backend = (struct pack_backend *)_backend;
523
524 writepack = git__calloc(1, sizeof(struct pack_writepack));
525 GITERR_CHECK_ALLOC(writepack);
526
527 if (git_indexer_stream_new(&writepack->indexer_stream,
528 backend->pack_folder, progress_cb, progress_payload) < 0) {
529 git__free(writepack);
530 return -1;
531 }
532
533 writepack->parent.backend = _backend;
534 writepack->parent.add = pack_backend__writepack_add;
535 writepack->parent.commit = pack_backend__writepack_commit;
536 writepack->parent.free = pack_backend__writepack_free;
537
538 *out = (git_odb_writepack *)writepack;
539
540 return 0;
541 }
542
543 static void pack_backend__free(git_odb_backend *_backend)
544 {
545 struct pack_backend *backend;
546 unsigned int i;
547
548 assert(_backend);
549
550 backend = (struct pack_backend *)_backend;
551
552 for (i = 0; i < backend->packs.length; ++i) {
553 struct git_pack_file *p = git_vector_get(&backend->packs, i);
554 packfile_free(p);
555 }
556
557 git_vector_free(&backend->packs);
558 git__free(backend->pack_folder);
559 git__free(backend);
560 }
561
562 int git_odb_backend_one_pack(git_odb_backend **backend_out, const char *idx)
563 {
564 struct pack_backend *backend = NULL;
565 struct git_pack_file *packfile = NULL;
566
567 if (git_packfile_check(&packfile, idx) < 0)
568 return -1;
569
570 backend = git__calloc(1, sizeof(struct pack_backend));
571 GITERR_CHECK_ALLOC(backend);
572 backend->parent.version = GIT_ODB_BACKEND_VERSION;
573
574 if (git_vector_init(&backend->packs, 1, NULL) < 0)
575 goto on_error;
576
577 if (git_vector_insert(&backend->packs, packfile) < 0)
578 goto on_error;
579
580 backend->parent.read = &pack_backend__read;
581 backend->parent.read_prefix = &pack_backend__read_prefix;
582 backend->parent.read_header = &pack_backend__read_header;
583 backend->parent.exists = &pack_backend__exists;
584 backend->parent.foreach = &pack_backend__foreach;
585 backend->parent.free = &pack_backend__free;
586
587 *backend_out = (git_odb_backend *)backend;
588
589 return 0;
590
591 on_error:
592 git_vector_free(&backend->packs);
593 git__free(backend);
594 git__free(packfile);
595 return -1;
596 }
597
598 int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
599 {
600 struct pack_backend *backend = NULL;
601 git_buf path = GIT_BUF_INIT;
602
603 backend = git__calloc(1, sizeof(struct pack_backend));
604 GITERR_CHECK_ALLOC(backend);
605 backend->parent.version = GIT_ODB_BACKEND_VERSION;
606
607 if (git_vector_init(&backend->packs, 8, packfile_sort__cb) < 0 ||
608 git_buf_joinpath(&path, objects_dir, "pack") < 0)
609 {
610 git__free(backend);
611 return -1;
612 }
613
614 if (git_path_isdir(git_buf_cstr(&path)) == true) {
615 backend->pack_folder = git_buf_detach(&path);
616 }
617
618 backend->parent.read = &pack_backend__read;
619 backend->parent.read_prefix = &pack_backend__read_prefix;
620 backend->parent.read_header = &pack_backend__read_header;
621 backend->parent.exists = &pack_backend__exists;
622 backend->parent.foreach = &pack_backend__foreach;
623 backend->parent.writepack = &pack_backend__writepack;
624 backend->parent.free = &pack_backend__free;
625
626 *backend_out = (git_odb_backend *)backend;
627
628 git_buf_free(&path);
629
630 return 0;
631 }