]> git.proxmox.com Git - libgit2.git/blob - src/odb_pack.c
Merge pull request #968 from arrbee/diff-support-typechange
[libgit2.git] / src / odb_pack.c
1 /*
2 * Copyright (C) 2009-2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include <zlib.h>
10 #include "git2/repository.h"
11 #include "git2/oid.h"
12 #include "fileops.h"
13 #include "hash.h"
14 #include "odb.h"
15 #include "delta-apply.h"
16 #include "sha1_lookup.h"
17 #include "mwindow.h"
18 #include "pack.h"
19
20 #include "git2/odb_backend.h"
21
22 struct pack_backend {
23 git_odb_backend parent;
24 git_vector packs;
25 struct git_pack_file *last_found;
26 char *pack_folder;
27 };
28
29 /**
30 * The wonderful tale of a Packed Object lookup query
31 * ===================================================
32 * A riveting and epic story of epicness and ASCII
33 * art, presented by yours truly,
34 * Sir Vicent of Marti
35 *
36 *
37 * Chapter 1: Once upon a time...
38 * Initialization of the Pack Backend
39 * --------------------------------------------------
40 *
41 * # git_odb_backend_pack
42 * | Creates the pack backend structure, initializes the
43 * | callback pointers to our default read() and exist() methods,
44 * | and tries to preload all the known packfiles in the ODB.
45 * |
46 * |-# packfile_load_all
47 * | Tries to find the `pack` folder, if it exists. ODBs without
48 * | a pack folder are ignored altogether. If there's a `pack` folder
49 * | we run a `dirent` callback through every file in the pack folder
50 * | to find our packfiles. The packfiles are then sorted according
51 * | to a sorting callback.
52 * |
53 * |-# packfile_load__cb
54 * | | This callback is called from `dirent` with every single file
55 * | | inside the pack folder. We find the packs by actually locating
56 * | | their index (ends in ".idx"). From that index, we verify that
57 * | | the corresponding packfile exists and is valid, and if so, we
58 * | | add it to the pack list.
59 * | |
60 * | |-# packfile_check
61 * | Make sure that there's a packfile to back this index, and store
62 * | some very basic information regarding the packfile itself,
63 * | such as the full path, the size, and the modification time.
64 * | We don't actually open the packfile to check for internal consistency.
65 * |
66 * |-# packfile_sort__cb
67 * Sort all the preloaded packs according to some specific criteria:
68 * we prioritize the "newer" packs because it's more likely they
69 * contain the objects we are looking for, and we prioritize local
70 * packs over remote ones.
71 *
72 *
73 *
74 * Chapter 2: To be, or not to be...
75 * A standard packed `exist` query for an OID
76 * --------------------------------------------------
77 *
78 * # pack_backend__exists
79 * | Check if the given SHA1 oid exists in any of the packs
80 * | that have been loaded for our ODB.
81 * |
82 * |-# pack_entry_find
83 * | Iterate through all the packs that have been preloaded
84 * | (starting by the pack where the latest object was found)
85 * | to try to find the OID in one of them.
86 * |
87 * |-# pack_entry_find1
88 * | Check the index of an individual pack to see if the SHA1
89 * | OID can be found. If we can find the offset to that SHA1
90 * | inside of the index, that means the object is contained
91 * | inside of the packfile and we can stop searching.
92 * | Before returning, we verify that the packfile behing the
93 * | index we are searching still exists on disk.
94 * |
95 * |-# pack_entry_find_offset
96 * | | Mmap the actual index file to disk if it hasn't been opened
97 * | | yet, and run a binary search through it to find the OID.
98 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
99 * | | on the Packfile Index format and how do we find entries in it.
100 * | |
101 * | |-# pack_index_open
102 * | | Guess the name of the index based on the full path to the
103 * | | packfile, open it and verify its contents. Only if the index
104 * | | has not been opened already.
105 * | |
106 * | |-# pack_index_check
107 * | Mmap the index file and do a quick run through the header
108 * | to guess the index version (right now we support v1 and v2),
109 * | and to verify that the size of the index makes sense.
110 * |
111 * |-# packfile_open
112 * See `packfile_open` in Chapter 3
113 *
114 *
115 *
116 * Chapter 3: The neverending story...
117 * A standard packed `lookup` query for an OID
118 * --------------------------------------------------
119 * TODO
120 *
121 */
122
123
124 /***********************************************************
125 *
126 * FORWARD DECLARATIONS
127 *
128 ***********************************************************/
129
130 static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
131 static int pack_window_contains(git_mwindow *win, off_t offset);
132
133 static int packfile_sort__cb(const void *a_, const void *b_);
134
135 static int packfile_load__cb(void *_data, git_buf *path);
136 static int packfile_refresh_all(struct pack_backend *backend);
137
138 static int pack_entry_find(struct git_pack_entry *e,
139 struct pack_backend *backend, const git_oid *oid);
140
141 /* Can find the offset of an object given
142 * a prefix of an identifier.
143 * Sets GIT_EAMBIGUOUS if short oid is ambiguous.
144 * This method assumes that len is between
145 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
146 */
147 static int pack_entry_find_prefix(
148 struct git_pack_entry *e,
149 struct pack_backend *backend,
150 const git_oid *short_oid,
151 size_t len);
152
153
154
155 /***********************************************************
156 *
157 * PACK WINDOW MANAGEMENT
158 *
159 ***********************************************************/
160
161 GIT_INLINE(void) pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p)
162 {
163 GIT_UNUSED(backend);
164 git_mwindow_free_all(&p->mwf);
165 }
166
167 GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
168 {
169 /* We must promise at least 20 bytes (one hash) after the
170 * offset is available from this window, otherwise the offset
171 * is not actually in this window and a different window (which
172 * has that one hash excess) must be used. This is to support
173 * the object header and delta base parsing routines below.
174 */
175 return git_mwindow_contains(win, offset + 20);
176 }
177
178 static int packfile_sort__cb(const void *a_, const void *b_)
179 {
180 const struct git_pack_file *a = a_;
181 const struct git_pack_file *b = b_;
182 int st;
183
184 /*
185 * Local packs tend to contain objects specific to our
186 * variant of the project than remote ones. In addition,
187 * remote ones could be on a network mounted filesystem.
188 * Favor local ones for these reasons.
189 */
190 st = a->pack_local - b->pack_local;
191 if (st)
192 return -st;
193
194 /*
195 * Younger packs tend to contain more recent objects,
196 * and more recent objects tend to get accessed more
197 * often.
198 */
199 if (a->mtime < b->mtime)
200 return 1;
201 else if (a->mtime == b->mtime)
202 return 0;
203
204 return -1;
205 }
206
207
208
209 static int packfile_load__cb(void *_data, git_buf *path)
210 {
211 struct pack_backend *backend = (struct pack_backend *)_data;
212 struct git_pack_file *pack;
213 int error;
214 unsigned int i;
215
216 if (git__suffixcmp(path->ptr, ".idx") != 0)
217 return 0; /* not an index */
218
219 for (i = 0; i < backend->packs.length; ++i) {
220 struct git_pack_file *p = git_vector_get(&backend->packs, i);
221 if (memcmp(p->pack_name, git_buf_cstr(path), git_buf_len(path) - strlen(".idx")) == 0)
222 return 0;
223 }
224
225 error = git_packfile_check(&pack, path->ptr);
226 if (error == GIT_ENOTFOUND)
227 /* ignore missing .pack file as git does */
228 return 0;
229 else if (error < 0)
230 return error;
231
232 return git_vector_insert(&backend->packs, pack);
233 }
234
235 static int packfile_refresh_all(struct pack_backend *backend)
236 {
237 int error;
238 struct stat st;
239 git_buf path = GIT_BUF_INIT;
240
241 if (backend->pack_folder == NULL)
242 return 0;
243
244 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
245 return git_odb__error_notfound("failed to refresh packfiles", NULL);
246
247 git_buf_sets(&path, backend->pack_folder);
248
249 /* reload all packs */
250 error = git_path_direach(&path, packfile_load__cb, (void *)backend);
251
252 git_buf_free(&path);
253
254 if (error < 0)
255 return error;
256
257 git_vector_sort(&backend->packs);
258
259 return 0;
260 }
261
262 static int pack_entry_find_inner(
263 struct git_pack_entry *e,
264 struct pack_backend *backend,
265 const git_oid *oid,
266 struct git_pack_file *last_found)
267 {
268 unsigned int i;
269
270 if (last_found &&
271 git_pack_entry_find(e, last_found, oid, GIT_OID_HEXSZ) == 0)
272 return 0;
273
274 for (i = 0; i < backend->packs.length; ++i) {
275 struct git_pack_file *p;
276
277 p = git_vector_get(&backend->packs, i);
278 if (p == last_found)
279 continue;
280
281 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == 0) {
282 backend->last_found = p;
283 return 0;
284 }
285 }
286
287 return -1;
288 }
289
290 static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
291 {
292 int error;
293 struct git_pack_file *last_found = backend->last_found;
294
295 if (backend->last_found &&
296 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == 0)
297 return 0;
298
299 if (!pack_entry_find_inner(e, backend, oid, last_found))
300 return 0;
301 if ((error = packfile_refresh_all(backend)) < 0)
302 return error;
303 if (!pack_entry_find_inner(e, backend, oid, last_found))
304 return 0;
305
306 return git_odb__error_notfound("failed to find pack entry", oid);
307 }
308
309 static unsigned pack_entry_find_prefix_inner(
310 struct git_pack_entry *e,
311 struct pack_backend *backend,
312 const git_oid *short_oid,
313 size_t len,
314 struct git_pack_file *last_found)
315 {
316 int error;
317 unsigned int i;
318 unsigned found = 0;
319
320 if (last_found) {
321 error = git_pack_entry_find(e, last_found, short_oid, len);
322 if (error == GIT_EAMBIGUOUS)
323 return error;
324 if (!error)
325 found = 1;
326 }
327
328 for (i = 0; i < backend->packs.length; ++i) {
329 struct git_pack_file *p;
330
331 p = git_vector_get(&backend->packs, i);
332 if (p == last_found)
333 continue;
334
335 error = git_pack_entry_find(e, p, short_oid, len);
336 if (error == GIT_EAMBIGUOUS)
337 return error;
338 if (!error) {
339 if (++found > 1)
340 break;
341 backend->last_found = p;
342 }
343 }
344
345 return found;
346 }
347
348 static int pack_entry_find_prefix(
349 struct git_pack_entry *e,
350 struct pack_backend *backend,
351 const git_oid *short_oid,
352 size_t len)
353 {
354 unsigned found = 0;
355 int error;
356 struct git_pack_file *last_found = backend->last_found;
357
358 if ((found = pack_entry_find_prefix_inner(e, backend, short_oid, len, last_found)) > 0)
359 goto cleanup;
360 if ((error = packfile_refresh_all(backend)) < 0)
361 return error;
362 found = pack_entry_find_prefix_inner(e, backend, short_oid, len, last_found);
363
364 cleanup:
365 if (!found)
366 return git_odb__error_notfound("no matching pack entry for prefix", short_oid);
367 else if (found > 1)
368 return git_odb__error_ambiguous("found multiple pack entries");
369 else
370 return 0;
371 }
372
373
374 /***********************************************************
375 *
376 * PACKED BACKEND PUBLIC API
377 *
378 * Implement the git_odb_backend API calls
379 *
380 ***********************************************************/
381
382 /*
383 int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
384 {
385 pack_location location;
386
387 assert(obj && backend && oid);
388
389 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
390 return GIT_ENOTFOUND;
391
392 return read_header_packed(obj, &location);
393 }
394 */
395
396 static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
397 {
398 struct git_pack_entry e;
399 git_rawobj raw;
400 int error;
401
402 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < 0 ||
403 (error = git_packfile_unpack(&raw, e.p, &e.offset)) < 0)
404 return error;
405
406 *buffer_p = raw.data;
407 *len_p = raw.len;
408 *type_p = raw.type;
409
410 return 0;
411 }
412
413 static int pack_backend__read_prefix(
414 git_oid *out_oid,
415 void **buffer_p,
416 size_t *len_p,
417 git_otype *type_p,
418 git_odb_backend *backend,
419 const git_oid *short_oid,
420 size_t len)
421 {
422 int error = 0;
423
424 if (len < GIT_OID_MINPREFIXLEN)
425 error = git_odb__error_ambiguous("prefix length too short");
426
427 else if (len >= GIT_OID_HEXSZ) {
428 /* We can fall back to regular read method */
429 error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
430 if (!error)
431 git_oid_cpy(out_oid, short_oid);
432 } else {
433 struct git_pack_entry e;
434 git_rawobj raw;
435
436 if ((error = pack_entry_find_prefix(
437 &e, (struct pack_backend *)backend, short_oid, len)) == 0 &&
438 (error = git_packfile_unpack(&raw, e.p, &e.offset)) == 0)
439 {
440 *buffer_p = raw.data;
441 *len_p = raw.len;
442 *type_p = raw.type;
443 git_oid_cpy(out_oid, &e.sha1);
444 }
445 }
446
447 return error;
448 }
449
450 static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
451 {
452 struct git_pack_entry e;
453 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == 0;
454 }
455
456 static int pack_backend__foreach(git_odb_backend *_backend, int (*cb)(git_oid *oid, void *data), void *data)
457 {
458 int error;
459 struct git_pack_file *p;
460 struct pack_backend *backend;
461 unsigned int i;
462
463 assert(_backend && cb);
464 backend = (struct pack_backend *)_backend;
465
466 /* Make sure we know about the packfiles */
467 if ((error = packfile_refresh_all(backend)) < 0)
468 return error;
469
470 git_vector_foreach(&backend->packs, i, p) {
471 if ((error = git_pack_foreach_entry(p, cb, data)) < 0)
472 return error;
473 }
474
475 return 0;
476 }
477
478 static void pack_backend__free(git_odb_backend *_backend)
479 {
480 struct pack_backend *backend;
481 unsigned int i;
482
483 assert(_backend);
484
485 backend = (struct pack_backend *)_backend;
486
487 for (i = 0; i < backend->packs.length; ++i) {
488 struct git_pack_file *p = git_vector_get(&backend->packs, i);
489 packfile_free(p);
490 }
491
492 git_vector_free(&backend->packs);
493 git__free(backend->pack_folder);
494 git__free(backend);
495 }
496
497 int git_odb_backend_one_pack(git_odb_backend **backend_out, const char *idx)
498 {
499 struct pack_backend *backend = NULL;
500 struct git_pack_file *packfile = NULL;
501
502 if (git_packfile_check(&packfile, idx) < 0)
503 return -1;
504
505 backend = git__calloc(1, sizeof(struct pack_backend));
506 GITERR_CHECK_ALLOC(backend);
507
508 if (git_vector_init(&backend->packs, 1, NULL) < 0)
509 goto on_error;
510
511 if (git_vector_insert(&backend->packs, packfile) < 0)
512 goto on_error;
513
514 backend->parent.read = &pack_backend__read;
515 backend->parent.read_prefix = &pack_backend__read_prefix;
516 backend->parent.read_header = NULL;
517 backend->parent.exists = &pack_backend__exists;
518 backend->parent.foreach = &pack_backend__foreach;
519 backend->parent.free = &pack_backend__free;
520
521 *backend_out = (git_odb_backend *)backend;
522
523 return 0;
524
525 on_error:
526 git_vector_free(&backend->packs);
527 git__free(backend);
528 git__free(packfile);
529 return -1;
530 }
531
532 int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
533 {
534 struct pack_backend *backend = NULL;
535 git_buf path = GIT_BUF_INIT;
536
537 backend = git__calloc(1, sizeof(struct pack_backend));
538 GITERR_CHECK_ALLOC(backend);
539
540 if (git_vector_init(&backend->packs, 8, packfile_sort__cb) < 0 ||
541 git_buf_joinpath(&path, objects_dir, "pack") < 0)
542 {
543 git__free(backend);
544 return -1;
545 }
546
547 if (git_path_isdir(git_buf_cstr(&path)) == true) {
548 backend->pack_folder = git_buf_detach(&path);
549 }
550
551 backend->parent.read = &pack_backend__read;
552 backend->parent.read_prefix = &pack_backend__read_prefix;
553 backend->parent.read_header = NULL;
554 backend->parent.exists = &pack_backend__exists;
555 backend->parent.foreach = &pack_backend__foreach;
556 backend->parent.free = &pack_backend__free;
557
558 *backend_out = (git_odb_backend *)backend;
559
560 git_buf_free(&path);
561
562 return 0;
563 }