]> git.proxmox.com Git - libgit2.git/blob - src/odb_pack.c
global: Properly use `git__` memory wrappers
[libgit2.git] / src / odb_pack.c
1 /*
2 * Copyright (C) 2009-2011 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include "git2/zlib.h"
10 #include "git2/repository.h"
11 #include "git2/oid.h"
12 #include "fileops.h"
13 #include "hash.h"
14 #include "odb.h"
15 #include "delta-apply.h"
16 #include "sha1_lookup.h"
17 #include "mwindow.h"
18 #include "pack.h"
19
20 #include "git2/odb_backend.h"
21
22 struct pack_backend {
23 git_odb_backend parent;
24 git_vector packs;
25 struct git_pack_file *last_found;
26 char *pack_folder;
27 time_t pack_folder_mtime;
28 };
29
30 /**
31 * The wonderful tale of a Packed Object lookup query
32 * ===================================================
33 * A riveting and epic story of epicness and ASCII
34 * art, presented by yours truly,
35 * Sir Vicent of Marti
36 *
37 *
38 * Chapter 1: Once upon a time...
39 * Initialization of the Pack Backend
40 * --------------------------------------------------
41 *
42 * # git_odb_backend_pack
43 * | Creates the pack backend structure, initializes the
44 * | callback pointers to our default read() and exist() methods,
45 * | and tries to preload all the known packfiles in the ODB.
46 * |
47 * |-# packfile_load_all
48 * | Tries to find the `pack` folder, if it exists. ODBs without
49 * | a pack folder are ignored altogether. If there's a `pack` folder
50 * | we run a `dirent` callback through every file in the pack folder
51 * | to find our packfiles. The packfiles are then sorted according
52 * | to a sorting callback.
53 * |
54 * |-# packfile_load__cb
55 * | | This callback is called from `dirent` with every single file
56 * | | inside the pack folder. We find the packs by actually locating
57 * | | their index (ends in ".idx"). From that index, we verify that
58 * | | the corresponding packfile exists and is valid, and if so, we
59 * | | add it to the pack list.
60 * | |
61 * | |-# packfile_check
62 * | Make sure that there's a packfile to back this index, and store
63 * | some very basic information regarding the packfile itself,
64 * | such as the full path, the size, and the modification time.
65 * | We don't actually open the packfile to check for internal consistency.
66 * |
67 * |-# packfile_sort__cb
68 * Sort all the preloaded packs according to some specific criteria:
69 * we prioritize the "newer" packs because it's more likely they
70 * contain the objects we are looking for, and we prioritize local
71 * packs over remote ones.
72 *
73 *
74 *
75 * Chapter 2: To be, or not to be...
76 * A standard packed `exist` query for an OID
77 * --------------------------------------------------
78 *
79 * # pack_backend__exists
80 * | Check if the given SHA1 oid exists in any of the packs
81 * | that have been loaded for our ODB.
82 * |
83 * |-# pack_entry_find
84 * | Iterate through all the packs that have been preloaded
85 * | (starting by the pack where the latest object was found)
86 * | to try to find the OID in one of them.
87 * |
88 * |-# pack_entry_find1
89 * | Check the index of an individual pack to see if the SHA1
90 * | OID can be found. If we can find the offset to that SHA1
91 * | inside of the index, that means the object is contained
92 * | inside of the packfile and we can stop searching.
93 * | Before returning, we verify that the packfile behing the
94 * | index we are searching still exists on disk.
95 * |
96 * |-# pack_entry_find_offset
97 * | | Mmap the actual index file to disk if it hasn't been opened
98 * | | yet, and run a binary search through it to find the OID.
99 * | | See <http://book.git-scm.com/7_the_packfile.html> for specifics
100 * | | on the Packfile Index format and how do we find entries in it.
101 * | |
102 * | |-# pack_index_open
103 * | | Guess the name of the index based on the full path to the
104 * | | packfile, open it and verify its contents. Only if the index
105 * | | has not been opened already.
106 * | |
107 * | |-# pack_index_check
108 * | Mmap the index file and do a quick run through the header
109 * | to guess the index version (right now we support v1 and v2),
110 * | and to verify that the size of the index makes sense.
111 * |
112 * |-# packfile_open
113 * See `packfile_open` in Chapter 3
114 *
115 *
116 *
117 * Chapter 3: The neverending story...
118 * A standard packed `lookup` query for an OID
119 * --------------------------------------------------
120 * TODO
121 *
122 */
123
124
125 /***********************************************************
126 *
127 * FORWARD DECLARATIONS
128 *
129 ***********************************************************/
130
131 static void pack_window_free_all(struct pack_backend *backend, struct git_pack_file *p);
132 static int pack_window_contains(git_mwindow *win, off_t offset);
133
134 static int packfile_sort__cb(const void *a_, const void *b_);
135
136 static int packfile_load__cb(void *_data, char *path);
137 static int packfile_refresh_all(struct pack_backend *backend);
138
139 static int pack_entry_find(struct git_pack_entry *e,
140 struct pack_backend *backend, const git_oid *oid);
141
142 /* Can find the offset of an object given
143 * a prefix of an identifier.
144 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
145 * is ambiguous.
146 * This method assumes that len is between
147 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
148 */
149 static int pack_entry_find_prefix(struct git_pack_entry *e,
150 struct pack_backend *backend,
151 const git_oid *short_oid,
152 unsigned int len);
153
154
155
156 /***********************************************************
157 *
158 * PACK WINDOW MANAGEMENT
159 *
160 ***********************************************************/
161
162 GIT_INLINE(void) pack_window_free_all(struct pack_backend *GIT_UNUSED(backend), struct git_pack_file *p)
163 {
164 GIT_UNUSED_ARG(backend);
165 git_mwindow_free_all(&p->mwf);
166 }
167
168 GIT_INLINE(int) pack_window_contains(git_mwindow *win, off_t offset)
169 {
170 /* We must promise at least 20 bytes (one hash) after the
171 * offset is available from this window, otherwise the offset
172 * is not actually in this window and a different window (which
173 * has that one hash excess) must be used. This is to support
174 * the object header and delta base parsing routines below.
175 */
176 return git_mwindow_contains(win, offset + 20);
177 }
178
179 static int packfile_sort__cb(const void *a_, const void *b_)
180 {
181 const struct git_pack_file *a = a_;
182 const struct git_pack_file *b = b_;
183 int st;
184
185 /*
186 * Local packs tend to contain objects specific to our
187 * variant of the project than remote ones. In addition,
188 * remote ones could be on a network mounted filesystem.
189 * Favor local ones for these reasons.
190 */
191 st = a->pack_local - b->pack_local;
192 if (st)
193 return -st;
194
195 /*
196 * Younger packs tend to contain more recent objects,
197 * and more recent objects tend to get accessed more
198 * often.
199 */
200 if (a->mtime < b->mtime)
201 return 1;
202 else if (a->mtime == b->mtime)
203 return 0;
204
205 return -1;
206 }
207
208
209
210 static int packfile_load__cb(void *_data, char *path)
211 {
212 struct pack_backend *backend = (struct pack_backend *)_data;
213 struct git_pack_file *pack;
214 int error;
215 size_t i;
216
217 if (git__suffixcmp(path, ".idx") != 0)
218 return GIT_SUCCESS; /* not an index */
219
220 for (i = 0; i < backend->packs.length; ++i) {
221 struct git_pack_file *p = git_vector_get(&backend->packs, i);
222 if (memcmp(p->pack_name, path, strlen(path) - strlen(".idx")) == 0)
223 return GIT_SUCCESS;
224 }
225
226 error = git_packfile_check(&pack, path);
227 if (error == GIT_ENOTFOUND) {
228 /* ignore missing .pack file as git does */
229 return GIT_SUCCESS;
230 } else if (error < GIT_SUCCESS)
231 return git__rethrow(error, "Failed to load packfile");
232
233 if (git_vector_insert(&backend->packs, pack) < GIT_SUCCESS) {
234 git__free(pack);
235 return GIT_ENOMEM;
236 }
237
238 return GIT_SUCCESS;
239 }
240
241 static int packfile_refresh_all(struct pack_backend *backend)
242 {
243 int error;
244 struct stat st;
245
246 if (backend->pack_folder == NULL)
247 return GIT_SUCCESS;
248
249 if (p_stat(backend->pack_folder, &st) < 0 || !S_ISDIR(st.st_mode))
250 return git__throw(GIT_ENOTFOUND, "Failed to refresh packfiles. Backend not found");
251
252 if (st.st_mtime != backend->pack_folder_mtime) {
253 char path[GIT_PATH_MAX];
254 strcpy(path, backend->pack_folder);
255
256 /* reload all packs */
257 error = git_futils_direach(path, GIT_PATH_MAX, packfile_load__cb, (void *)backend);
258 if (error < GIT_SUCCESS)
259 return git__rethrow(error, "Failed to refresh packfiles");
260
261 git_vector_sort(&backend->packs);
262 backend->pack_folder_mtime = st.st_mtime;
263 }
264
265 return GIT_SUCCESS;
266 }
267
268 static int pack_entry_find(struct git_pack_entry *e, struct pack_backend *backend, const git_oid *oid)
269 {
270 int error;
271 size_t i;
272
273 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
274 return git__rethrow(error, "Failed to find pack entry");
275
276 if (backend->last_found &&
277 git_pack_entry_find(e, backend->last_found, oid, GIT_OID_HEXSZ) == GIT_SUCCESS)
278 return GIT_SUCCESS;
279
280 for (i = 0; i < backend->packs.length; ++i) {
281 struct git_pack_file *p;
282
283 p = git_vector_get(&backend->packs, i);
284 if (p == backend->last_found)
285 continue;
286
287 if (git_pack_entry_find(e, p, oid, GIT_OID_HEXSZ) == GIT_SUCCESS) {
288 backend->last_found = p;
289 return GIT_SUCCESS;
290 }
291 }
292
293 return git__throw(GIT_ENOTFOUND, "Failed to find pack entry");
294 }
295
296 static int pack_entry_find_prefix(
297 struct git_pack_entry *e,
298 struct pack_backend *backend,
299 const git_oid *short_oid,
300 unsigned int len)
301 {
302 int error;
303 size_t i;
304 unsigned found = 0;
305
306 if ((error = packfile_refresh_all(backend)) < GIT_SUCCESS)
307 return git__rethrow(error, "Failed to find pack entry");
308
309 if (backend->last_found) {
310 error = git_pack_entry_find(e, backend->last_found, short_oid, len);
311 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
312 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
313 } else if (error == GIT_SUCCESS) {
314 found = 1;
315 }
316 }
317
318 for (i = 0; i < backend->packs.length; ++i) {
319 struct git_pack_file *p;
320
321 p = git_vector_get(&backend->packs, i);
322 if (p == backend->last_found)
323 continue;
324
325 error = git_pack_entry_find(e, p, short_oid, len);
326 if (error == GIT_EAMBIGUOUSOIDPREFIX) {
327 return git__rethrow(error, "Failed to find pack entry. Ambiguous sha1 prefix");
328 } else if (error == GIT_SUCCESS) {
329 found++;
330 if (found > 1)
331 break;
332 backend->last_found = p;
333 }
334 }
335
336 if (!found) {
337 return git__rethrow(GIT_ENOTFOUND, "Failed to find pack entry");
338 } else if (found > 1) {
339 return git__rethrow(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find pack entry. Ambiguous sha1 prefix");
340 } else {
341 return GIT_SUCCESS;
342 }
343
344 }
345
346
347 /***********************************************************
348 *
349 * PACKED BACKEND PUBLIC API
350 *
351 * Implement the git_odb_backend API calls
352 *
353 ***********************************************************/
354
355 /*
356 int pack_backend__read_header(git_rawobj *obj, git_odb_backend *backend, const git_oid *oid)
357 {
358 pack_location location;
359
360 assert(obj && backend && oid);
361
362 if (locate_packfile(&location, (struct pack_backend *)backend, oid) < 0)
363 return GIT_ENOTFOUND;
364
365 return read_header_packed(obj, &location);
366 }
367 */
368
369 static int pack_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
370 {
371 struct git_pack_entry e;
372 git_rawobj raw;
373 int error;
374
375 if ((error = pack_entry_find(&e, (struct pack_backend *)backend, oid)) < GIT_SUCCESS)
376 return git__rethrow(error, "Failed to read pack backend");
377
378 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
379 return git__rethrow(error, "Failed to read pack backend");
380
381 *buffer_p = raw.data;
382 *len_p = raw.len;
383 *type_p = raw.type;
384
385 return GIT_SUCCESS;
386 }
387
388 static int pack_backend__read_prefix(
389 git_oid *out_oid,
390 void **buffer_p,
391 size_t *len_p,
392 git_otype *type_p,
393 git_odb_backend *backend,
394 const git_oid *short_oid,
395 unsigned int len)
396 {
397 if (len < GIT_OID_MINPREFIXLEN)
398 return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to read pack backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN);
399
400 if (len >= GIT_OID_HEXSZ) {
401 /* We can fall back to regular read method */
402 int error = pack_backend__read(buffer_p, len_p, type_p, backend, short_oid);
403 if (error == GIT_SUCCESS)
404 git_oid_cpy(out_oid, short_oid);
405
406 return error;
407 } else {
408 struct git_pack_entry e;
409 git_rawobj raw;
410 int error;
411
412 if ((error = pack_entry_find_prefix(&e, (struct pack_backend *)backend, short_oid, len)) < GIT_SUCCESS)
413 return git__rethrow(error, "Failed to read pack backend");
414
415 if ((error = git_packfile_unpack(&raw, e.p, &e.offset)) < GIT_SUCCESS)
416 return git__rethrow(error, "Failed to read pack backend");
417
418 *buffer_p = raw.data;
419 *len_p = raw.len;
420 *type_p = raw.type;
421 git_oid_cpy(out_oid, &e.sha1);
422 }
423
424 return GIT_SUCCESS;
425 }
426
427 static int pack_backend__exists(git_odb_backend *backend, const git_oid *oid)
428 {
429 struct git_pack_entry e;
430 return pack_entry_find(&e, (struct pack_backend *)backend, oid) == GIT_SUCCESS;
431 }
432
433 static void pack_backend__free(git_odb_backend *_backend)
434 {
435 struct pack_backend *backend;
436 size_t i;
437
438 assert(_backend);
439
440 backend = (struct pack_backend *)_backend;
441
442 for (i = 0; i < backend->packs.length; ++i) {
443 struct git_pack_file *p = git_vector_get(&backend->packs, i);
444 packfile_free(p);
445 }
446
447 git_vector_free(&backend->packs);
448 git__free(backend->pack_folder);
449 git__free(backend);
450 }
451
452 int git_odb_backend_pack(git_odb_backend **backend_out, const char *objects_dir)
453 {
454 struct pack_backend *backend;
455 char path[GIT_PATH_MAX];
456
457 backend = git__calloc(1, sizeof(struct pack_backend));
458 if (backend == NULL)
459 return GIT_ENOMEM;
460
461 if (git_vector_init(&backend->packs, 8, packfile_sort__cb) < GIT_SUCCESS) {
462 git__free(backend);
463 return GIT_ENOMEM;
464 }
465
466 git_path_join(path, objects_dir, "pack");
467 if (git_futils_isdir(path) == GIT_SUCCESS) {
468 backend->pack_folder = git__strdup(path);
469 backend->pack_folder_mtime = 0;
470
471 if (backend->pack_folder == NULL) {
472 git__free(backend);
473 return GIT_ENOMEM;
474 }
475 }
476
477 backend->parent.read = &pack_backend__read;
478 backend->parent.read_prefix = &pack_backend__read_prefix;
479 backend->parent.read_header = NULL;
480 backend->parent.exists = &pack_backend__exists;
481 backend->parent.free = &pack_backend__free;
482
483 *backend_out = (git_odb_backend *)backend;
484 return GIT_SUCCESS;
485 }