]> git.proxmox.com Git - libgit2.git/blob - src/odb_pack.c
Update Copyright header
[libgit2.git] / src / odb_pack.c
1 /*
2 * Copyright (C) 2009-2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include "git2/zlib.h"
10 #include "git2/repository.h"
11 #include "git2/oid.h"
12 #include "fileops.h"
13 #include "hash.h"
14 #include "odb.h"
15 #include "delta-apply.h"
16 #include "sha1_lookup.h"
17 #include "mwindow.h"
18 #include "pack.h"
19
20 #include "git2/odb_backend.h"
21
22 struct pack_backend {
23 git_odb_backend parent;
24 git_vector packs;
25 struct git_pack_file *last_found;
26 char *pack_folder;
27 time_t pack_folder_mtime;
28 };
29
30 /**
31 * The wonderful tale of a Packed Object lookup query
32 * ===================================================
33 * A riveting and epic story of epicness and ASCII
34 * art, presented by yours truly,
35 * Sir Vicent of Marti
36 *
37 *
38 * Chapter 1: Once upon a time...
39 * Initialization of the Pack Backend
40 * --------------------------------------------------
41 *
42 * # git_odb_backend_pack
43 * | Creates the pack backend structure, initializes the
44 * | callback pointers to our default read() and exist() methods,
45 * | and tries to preload all the known packfiles in the ODB.
46 * |
47 * |-# packfile_load_all
48 * | Tries to find the `pack` folder, if it exists. ODBs without
49 * | a pack folder are ignored altogether. If there's a `pack` folder
50 * | we run a `dirent` callback through every file in the pack folder
51 * | to find our packfiles. The packfiles are then sorted according
52 * | to a sorting callback.
53 * |
54 * |-# packfile_load__cb
55 * | | This callback is called from `dirent` with every single file
56 * | | inside the pack folder. We find the packs by actually locating
57 * | | their index (ends in ".idx"). From that index, we verify that
58 * | | the corresponding packfile exists and is valid, and if so, we
59 * | | add it to the pack list.
60 * | |
61 * | |-# packfile_check
62 * | Make sure that there's a packfile to back this index, and store
63 * | some very basic information regarding the packfile itself,
64 * | such as the full path, the size, and the modification time.
65 * | We don't actually open the packfile to check for internal consistency.
66 * |
67 * |-# packfile_sort__cb
68 * Sort all the preloaded packs according to some specific criteria:
69 * we prioritize the "newer" packs because it's more likely they
70 * contain the objects we are looking for, and we prioritize local
71 * packs over remote ones.
72 *
73 *
74 *
75 * Chapter 2: To be, or not to be...
76 * A standard packed `exist` query for an OID
77 * --------------------------------------------------
78 *
79 * # pack_backend__exists
80 * | Check if the given SHA1 oid exists in any of the packs
81 * | that have been loaded for our ODB.
82 * |
83 * |-# pack_entry_find
84 * | Iterate through all the packs that have been preloaded
85 * | (starting by the pack where the latest object was found)
86 * | to try to find the OID in one of them.
87 * |
88 * |-# pack_entry_find1
89 * | Check the index of an individual pack to see if the SHA1
90 * | OID can be found. If we can find the offset to that SHA1
91 * | inside of the index, that means the object is contained
92 * | inside of the packfile and we can stop searching.
93 * | Before returning, we verify that the packfile behing the
94 * | index we are searching still exists on disk.
95 * |
96 * |-# pack_entry_find_offset
97 * | | Mmap the actual index file to disk if it hasn't been opened
98 * | | yet, and run a binary search through it to find the OID.
99 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
100 * | | on the Packfile Index format and how do we find entries in it.
101 * | |
102 * | |-# pack_index_open
103 * | | Guess the name of the index based on the full path to the
104 * | | packfile, open it and verify its contents. Only if the index
105 * | | has not been opened already.
106 * | |
107 * | |-# pack_index_check
108 * | Mmap the index file and do a quick run through the header
109 * | to guess the index version (right now we support v1 and v2),
110 * | and to verify that the size of the index makes sense.
111 * |
112 * |-# packfile_open
113 * See `packfile_open` in Chapter 3
114 *
115 *
116 *
117 * Chapter 3: The neverending story...
118 * A standard packed `lookup` query for an OID
119 * --------------------------------------------------
120 * TODO
121 *
122 */
123
124
125 /***********************************************************
126 *
127 * FORWARD DECLARATIONS
128 *
129 ***********************************************************/
130
131 static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
132 static int pack_window_contains(git_mwindow *win, off_t offset);
133
134 static int packfile_sort__cb(const void *a_, const void *b_);
135
136 static int packfile_load__cb(void *_data, git_buf *path);
137 static int packfile_refresh_all(struct pack_backend *backend);
138
139 static int pack_entry_find(struct git_pack_entry *e,
140 struct pack_backend *backend, const git_oid *oid);
141
142 /* Can find the offset of an object given
143 * a prefix of an identifier.
144 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
145 * is ambiguous.
146 * This method assumes that len is between
147 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
148 */
149 static int pack_entry_find_prefix(struct git_pack_entry *e,
150 struct pack_backend *backend,
151 const git_oid *short_oid,
152 unsigned int len);
153
154
155
156 /***********************************************************
157 *
158 * PACK WINDOW MANAGEMENT
159 *
160 ***********************************************************/
161
162 GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct git_pack_file *p)
163 {
164 GIT_UNUSED_ARG(backend);
165 git_mwindow_free_all(&p->mwf);
166 }
167
168 GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
169 {
170 /* We must promise at least 20 bytes (one hash) after the
171 * offset is available from this window, otherwise the offset
172 * is not actually in this window and a different window (which
173 * has that one hash excess) must be used. This is to support
174 * the object header and delta base parsing routines below.
175 */
176 return git_mwindow_contains(win, offset + 20);
177 }
178
179 static int packfile_sort__cb(const void *a_, const void *b_)
180 {
181 const struct git_pack_file *a = a_;
182 const struct git_pack_file *b = b_;
183 int st;
184
185 /*
186 * Local packs tend to contain objects specific to our
187 * variant of the project than remote ones. In addition,
188 * remote ones could be on a network mounted filesystem.
189 * Favor local ones for these reasons.
190 */
191 st = a->pack_local - b->pack_local;
192 if (st)
193 return -st;
194
195 /*
196 * Younger packs tend to contain more recent objects,
197 * and more recent objects tend to get accessed more
198 * often.
199 */
200 if (a->mtime < b->mtime)
201 return 1;
202 else if (a->mtime == b->mtime)
203 return 0;
204
205 return -1;
206 }
207
208
209
210 static int packfile_load__cb(void *_data, git_buf *path)
211 {
212 struct pack_backend *backend = (struct pack_backend *)_data;
213 struct git_pack_file *pack;
214 int error;
215 size_t i;
216
217 if (git__suffixcmp(path->ptr, ".idx") != 0)
218 return GIT_SUCCESS; /* not an index */
219
220 for (i = 0; i < backend->packs.length; ++i) {
221 struct git_pack_file *p = git_vector_get(&backend->packs, i);
222 if (memcmp(p->pack_name, path->ptr, path->size - strlen(".idx")) == 0)
223 return GIT_SUCCESS;
224 }
225
226 error = git_packfile_check(&pack, path->ptr);
227 if (error == GIT_ENOTFOUND) {
228 /* ignore missing .pack file as git does */
229 return GIT_SUCCESS;
230 } else if (error < GIT_SUCCESS)
231 return git__rethrow(error, "Failed to load packfile");
232
233 if (git_vector_insert(&backend->packs, pack) < GIT_SUCCESS) {
234 git__free(pack);
235 return GIT_ENOMEM;
236 }
237
238 return GIT_SUCCESS;
239 }
240
241 static int packfile_refresh_all(struct pack_backend *backend)
242 {
243 int error;
244 struct stat st;
245
246 if (backend->pack_folder == NULL)
247 return GIT_SUCCESS;
248
249 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
250 return git__throw(GIT_ENOTFOUND, "Failed to refresh packfiles. Backend not found");
251
252 if (st.st_mtime != backend->pack_folder_mtime) {
253 git_buf path = GIT_BUF_INIT;
254 git_buf_sets(&path, backend->pack_folder);
255
256 /* reload all packs */
257 error = git_path_direach(&path, packfile_load__cb, (void *)backend);
258
259 git_buf_free(&path);
260 if (error < GIT_SUCCESS)
261 return git__rethrow(error, "Failed to refresh packfiles");
262
263 git_vector_sort(&backend->packs);
264 backend->pack_folder_mtime = st.st_mtime;
265 }
266
267 return GIT_SUCCESS;
268 }
269
270 static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
271 {
272 int error;
273 size_t i;
274
275 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
276 return git__rethrow(error, "Failed to find pack entry");
277
278 if (backend->last_found &&
279 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS)
280 return GIT_SUCCESS;
281
282 for (i = 0; i < backend->packs.length; ++i) {
283 struct git_pack_file *p;
284
285 p = git_vector_get(&backend->packs, i);
286 if (p == backend->last_found)
287 continue;
288
289 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) {
290 backend->last_found = p;
291 return GIT_SUCCESS;
292 }
293 }
294
295 return git__throw(GIT_ENOTFOUND, "Failed to find pack entry");
296 }
297
298 static int pack_entry_find_prefix(
299 struct git_pack_entry *e,
300 struct pack_backend *backend,
301 const git_oid *short_oid,
302 unsigned int len)
303 {
304 int error;
305 size_t i;
306 unsigned found = 0;
307
308 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
309 return git__rethrow(error, "Failed to find pack entry");
310
311 if (backend->last_found) {
312 error = git_pack_entry_find(e, backend->last_found, short_oid, len);
313 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
314 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
315 } else if (error == GIT_SUCCESS) {
316 found = 1;
317 }
318 }
319
320 for (i = 0; i < backend->packs.length; ++i) {
321 struct git_pack_file *p;
322
323 p = git_vector_get(&backend->packs, i);
324 if (p == backend->last_found)
325 continue;
326
327 error = git_pack_entry_find(e, p, short_oid, len);
328 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
329 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
330 } else if (error == GIT_SUCCESS) {
331 found++;
332 if (found > 1)
333 break;
334 backend->last_found = p;
335 }
336 }
337
338 if (!found) {
339 return git__rethrow(GIT_ENOTFOUND, "Failed to find pack entry");
340 } else if (found > 1) {
341 return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find pack entry. Ambiguous sha1 prefix");
342 } else {
343 return GIT_SUCCESS;
344 }
345
346 }
347
348
349 /***********************************************************
350 *
351 * PACKED BACKEND PUBLIC API
352 *
353 * Implement the git_odb_backend API calls
354 *
355 ***********************************************************/
356
357 /*
358 int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
359 {
360 pack_location location;
361
362 assert(obj && backend && oid);
363
364 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
365 return GIT_ENOTFOUND;
366
367 return read_header_packed(obj, &location);
368 }
369 */
370
371 static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
372 {
373 struct git_pack_entry e;
374 git_rawobj raw;
375 int error;
376
377 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS)
378 return git__rethrow(error, "Failed to read pack backend");
379
380 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
381 return git__rethrow(error, "Failed to read pack backend");
382
383 *buffer_p = raw.data;
384 *len_p = raw.len;
385 *type_p = raw.type;
386
387 return GIT_SUCCESS;
388 }
389
390 static int pack_backend__read_prefix(
391 git_oid *out_oid,
392 void **buffer_p,
393 size_t *len_p,
394 git_otype *type_p,
395 git_odb_backend *backend,
396 const git_oid *short_oid,
397 unsigned int len)
398 {
399 if (len < GIT_OID_MINPREFIXLEN)
400 return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN);
401
402 if (len >= GIT_OID_HEXSZ) {
403 /* We can fall back to regular read method */
404 int error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
405 if (error == GIT_SUCCESS)
406 git_oid_cpy(out_oid, short_oid);
407
408 return error;
409 } else {
410 struct git_pack_entry e;
411 git_rawobj raw;
412 int error;
413
414 if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS)
415 return git__rethrow(error, "Failed to read pack backend");
416
417 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
418 return git__rethrow(error, "Failed to read pack backend");
419
420 *buffer_p = raw.data;
421 *len_p = raw.len;
422 *type_p = raw.type;
423 git_oid_cpy(out_oid, &e.sha1);
424 }
425
426 return GIT_SUCCESS;
427 }
428
429 static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
430 {
431 struct git_pack_entry e;
432 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == GIT_SUCCESS;
433 }
434
435 static void pack_backend__free(git_odb_backend *_backend)
436 {
437 struct pack_backend *backend;
438 size_t i;
439
440 assert(_backend);
441
442 backend = (struct pack_backend *)_backend;
443
444 for (i = 0; i < backend->packs.length; ++i) {
445 struct git_pack_file *p = git_vector_get(&backend->packs, i);
446 packfile_free(p);
447 }
448
449 git_vector_free(&backend->packs);
450 git__free(backend->pack_folder);
451 git__free(backend);
452 }
453
454 int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
455 {
456 struct pack_backend *backend = NULL;
457 git_buf path = GIT_BUF_INIT;
458 int error = GIT_SUCCESS;
459
460 backend = git__calloc(1, sizeof(struct pack_backend));
461 if (backend == NULL)
462 return GIT_ENOMEM;
463
464 error = git_vector_init(&backend->packs, 8, packfile_sort__cb);
465 if (error < GIT_SUCCESS)
466 goto cleanup;
467
468 error = git_buf_joinpath(&path, objects_dir, "pack");
469 if (error < GIT_SUCCESS)
470 goto cleanup;
471
472 if (git_path_isdir(git_buf_cstr(&path)) == GIT_SUCCESS) {
473 backend->pack_folder = git_buf_detach(&path);
474 backend->pack_folder_mtime = 0;
475 }
476
477 backend->parent.read = &pack_backend__read;
478 backend->parent.read_prefix = &pack_backend__read_prefix;
479 backend->parent.read_header = NULL;
480 backend->parent.exists = &pack_backend__exists;
481 backend->parent.free = &pack_backend__free;
482
483 *backend_out = (git_odb_backend *)backend;
484
485 cleanup:
486 if (error < GIT_SUCCESS)
487 git__free(backend);
488 git_buf_free(&path);
489
490 return error;
491 }