]> git.proxmox.com Git - libgit2.git/blob - src/pack.c
1b5cf670f8d65cc824816ede77c96fbceb95f831
[libgit2.git] / src / pack.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "pack.h"
9
10 #include "delta.h"
11 #include "futils.h"
12 #include "mwindow.h"
13 #include "odb.h"
14 #include "oid.h"
15
16 /* Option to bypass checking existence of '.keep' files */
17 bool git_disable_pack_keep_file_checks = false;
18
19 static int packfile_open(struct git_pack_file *p);
20 static off64_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
21 static int packfile_unpack_compressed(
22 git_rawobj *obj,
23 struct git_pack_file *p,
24 git_mwindow **w_curs,
25 off64_t *curpos,
26 size_t size,
27 git_object_t type);
28
29 /* Can find the offset of an object given
30 * a prefix of an identifier.
31 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
32 * is ambiguous within the pack.
33 * This method assumes that len is between
34 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
35 */
36 static int pack_entry_find_offset(
37 off64_t *offset_out,
38 git_oid *found_oid,
39 struct git_pack_file *p,
40 const git_oid *short_oid,
41 size_t len);
42
43 static int packfile_error(const char *message)
44 {
45 git_error_set(GIT_ERROR_ODB, "invalid pack file - %s", message);
46 return -1;
47 }
48
49 /********************
50 * Delta base cache
51 ********************/
52
53 static git_pack_cache_entry *new_cache_object(git_rawobj *source)
54 {
55 git_pack_cache_entry *e = git__calloc(1, sizeof(git_pack_cache_entry));
56 if (!e)
57 return NULL;
58
59 git_atomic_inc(&e->refcount);
60 memcpy(&e->raw, source, sizeof(git_rawobj));
61
62 return e;
63 }
64
65 static void free_cache_object(void *o)
66 {
67 git_pack_cache_entry *e = (git_pack_cache_entry *)o;
68
69 if (e != NULL) {
70 assert(e->refcount.val == 0);
71 git__free(e->raw.data);
72 git__free(e);
73 }
74 }
75
76 static void cache_free(git_pack_cache *cache)
77 {
78 git_pack_cache_entry *entry;
79
80 if (cache->entries) {
81 git_offmap_foreach_value(cache->entries, entry, {
82 free_cache_object(entry);
83 });
84
85 git_offmap_free(cache->entries);
86 cache->entries = NULL;
87 }
88 }
89
90 static int cache_init(git_pack_cache *cache)
91 {
92 if (git_offmap_new(&cache->entries) < 0)
93 return -1;
94
95 cache->memory_limit = GIT_PACK_CACHE_MEMORY_LIMIT;
96
97 if (git_mutex_init(&cache->lock)) {
98 git_error_set(GIT_ERROR_OS, "failed to initialize pack cache mutex");
99
100 git__free(cache->entries);
101 cache->entries = NULL;
102
103 return -1;
104 }
105
106 return 0;
107 }
108
109 static git_pack_cache_entry *cache_get(git_pack_cache *cache, off64_t offset)
110 {
111 git_pack_cache_entry *entry;
112
113 if (git_mutex_lock(&cache->lock) < 0)
114 return NULL;
115
116 if ((entry = git_offmap_get(cache->entries, offset)) != NULL) {
117 git_atomic_inc(&entry->refcount);
118 entry->last_usage = cache->use_ctr++;
119 }
120 git_mutex_unlock(&cache->lock);
121
122 return entry;
123 }
124
125 /* Run with the cache lock held */
126 static void free_lowest_entry(git_pack_cache *cache)
127 {
128 off64_t offset;
129 git_pack_cache_entry *entry;
130
131 git_offmap_foreach(cache->entries, offset, entry, {
132 if (entry && entry->refcount.val == 0) {
133 cache->memory_used -= entry->raw.len;
134 git_offmap_delete(cache->entries, offset);
135 free_cache_object(entry);
136 }
137 });
138 }
139
140 static int cache_add(
141 git_pack_cache_entry **cached_out,
142 git_pack_cache *cache,
143 git_rawobj *base,
144 off64_t offset)
145 {
146 git_pack_cache_entry *entry;
147 int exists;
148
149 if (base->len > GIT_PACK_CACHE_SIZE_LIMIT)
150 return -1;
151
152 entry = new_cache_object(base);
153 if (entry) {
154 if (git_mutex_lock(&cache->lock) < 0) {
155 git_error_set(GIT_ERROR_OS, "failed to lock cache");
156 git__free(entry);
157 return -1;
158 }
159 /* Add it to the cache if nobody else has */
160 exists = git_offmap_exists(cache->entries, offset);
161 if (!exists) {
162 while (cache->memory_used + base->len > cache->memory_limit)
163 free_lowest_entry(cache);
164
165 git_offmap_set(cache->entries, offset, entry);
166 cache->memory_used += entry->raw.len;
167
168 *cached_out = entry;
169 }
170 git_mutex_unlock(&cache->lock);
171 /* Somebody beat us to adding it into the cache */
172 if (exists) {
173 git__free(entry);
174 return -1;
175 }
176 }
177
178 return 0;
179 }
180
181 /***********************************************************
182 *
183 * PACK INDEX METHODS
184 *
185 ***********************************************************/
186
187 static void pack_index_free(struct git_pack_file *p)
188 {
189 if (p->oids) {
190 git__free(p->oids);
191 p->oids = NULL;
192 }
193 if (p->index_map.data) {
194 git_futils_mmap_free(&p->index_map);
195 p->index_map.data = NULL;
196 }
197 }
198
199 static int pack_index_check(const char *path, struct git_pack_file *p)
200 {
201 struct git_pack_idx_header *hdr;
202 uint32_t version, nr, i, *index;
203 void *idx_map;
204 size_t idx_size;
205 struct stat st;
206 int error;
207 /* TODO: properly open the file without access time using O_NOATIME */
208 git_file fd = git_futils_open_ro(path);
209 if (fd < 0)
210 return fd;
211
212 if (p_fstat(fd, &st) < 0) {
213 p_close(fd);
214 git_error_set(GIT_ERROR_OS, "unable to stat pack index '%s'", path);
215 return -1;
216 }
217
218 if (!S_ISREG(st.st_mode) ||
219 !git__is_sizet(st.st_size) ||
220 (idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
221 {
222 p_close(fd);
223 git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
224 return -1;
225 }
226
227 error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
228
229 p_close(fd);
230
231 if (error < 0)
232 return error;
233
234 hdr = idx_map = p->index_map.data;
235
236 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
237 version = ntohl(hdr->idx_version);
238
239 if (version < 2 || version > 2) {
240 git_futils_mmap_free(&p->index_map);
241 return packfile_error("unsupported index version");
242 }
243
244 } else
245 version = 1;
246
247 nr = 0;
248 index = idx_map;
249
250 if (version > 1)
251 index += 2; /* skip index header */
252
253 for (i = 0; i < 256; i++) {
254 uint32_t n = ntohl(index[i]);
255 if (n < nr) {
256 git_futils_mmap_free(&p->index_map);
257 return packfile_error("index is non-monotonic");
258 }
259 nr = n;
260 }
261
262 if (version == 1) {
263 /*
264 * Total size:
265 * - 256 index entries 4 bytes each
266 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
267 * - 20-byte SHA1 of the packfile
268 * - 20-byte SHA1 file checksum
269 */
270 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
271 git_futils_mmap_free(&p->index_map);
272 return packfile_error("index is corrupted");
273 }
274 } else if (version == 2) {
275 /*
276 * Minimum size:
277 * - 8 bytes of header
278 * - 256 index entries 4 bytes each
279 * - 20-byte sha1 entry * nr
280 * - 4-byte crc entry * nr
281 * - 4-byte offset entry * nr
282 * - 20-byte SHA1 of the packfile
283 * - 20-byte SHA1 file checksum
284 * And after the 4-byte offset table might be a
285 * variable sized table containing 8-byte entries
286 * for offsets larger than 2^31.
287 */
288 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
289 unsigned long max_size = min_size;
290
291 if (nr)
292 max_size += (nr - 1)*8;
293
294 if (idx_size < min_size || idx_size > max_size) {
295 git_futils_mmap_free(&p->index_map);
296 return packfile_error("wrong index size");
297 }
298 }
299
300 p->num_objects = nr;
301 p->index_version = version;
302 return 0;
303 }
304
305 static int pack_index_open(struct git_pack_file *p)
306 {
307 int error = 0;
308 size_t name_len;
309 git_buf idx_name;
310
311 if (p->index_version > -1)
312 return 0;
313
314 name_len = strlen(p->pack_name);
315 assert(name_len > strlen(".pack")); /* checked by git_pack_file alloc */
316
317 if (git_buf_init(&idx_name, name_len) < 0)
318 return -1;
319
320 git_buf_put(&idx_name, p->pack_name, name_len - strlen(".pack"));
321 git_buf_puts(&idx_name, ".idx");
322 if (git_buf_oom(&idx_name)) {
323 git_buf_dispose(&idx_name);
324 return -1;
325 }
326
327 if ((error = git_mutex_lock(&p->lock)) < 0) {
328 git_buf_dispose(&idx_name);
329 return error;
330 }
331
332 if (p->index_version == -1)
333 error = pack_index_check(idx_name.ptr, p);
334
335 git_buf_dispose(&idx_name);
336
337 git_mutex_unlock(&p->lock);
338
339 return error;
340 }
341
342 static unsigned char *pack_window_open(
343 struct git_pack_file *p,
344 git_mwindow **w_cursor,
345 off64_t offset,
346 unsigned int *left)
347 {
348 if (p->mwf.fd == -1 && packfile_open(p) < 0)
349 return NULL;
350
351 /* Since packfiles end in a hash of their content and it's
352 * pointless to ask for an offset into the middle of that
353 * hash, and the pack_window_contains function above wouldn't match
354 * don't allow an offset too close to the end of the file.
355 *
356 * Don't allow a negative offset, as that means we've wrapped
357 * around.
358 */
359 if (offset > (p->mwf.size - 20))
360 return NULL;
361 if (offset < 0)
362 return NULL;
363
364 return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
365 }
366
367 /*
368 * The per-object header is a pretty dense thing, which is
369 * - first byte: low four bits are "size",
370 * then three bits of "type",
371 * with the high bit being "size continues".
372 * - each byte afterwards: low seven bits are size continuation,
373 * with the high bit being "size continues"
374 */
375 size_t git_packfile__object_header(unsigned char *hdr, size_t size, git_object_t type)
376 {
377 unsigned char *hdr_base;
378 unsigned char c;
379
380 assert(type >= GIT_OBJECT_COMMIT && type <= GIT_OBJECT_REF_DELTA);
381
382 /* TODO: add support for chunked objects; see git.git 6c0d19b1 */
383
384 c = (unsigned char)((type << 4) | (size & 15));
385 size >>= 4;
386 hdr_base = hdr;
387
388 while (size) {
389 *hdr++ = c | 0x80;
390 c = size & 0x7f;
391 size >>= 7;
392 }
393 *hdr++ = c;
394
395 return (hdr - hdr_base);
396 }
397
398
399 static int packfile_unpack_header1(
400 unsigned long *usedp,
401 size_t *sizep,
402 git_object_t *type,
403 const unsigned char *buf,
404 unsigned long len)
405 {
406 unsigned shift;
407 unsigned long size, c;
408 unsigned long used = 0;
409
410 c = buf[used++];
411 *type = (c >> 4) & 7;
412 size = c & 15;
413 shift = 4;
414 while (c & 0x80) {
415 if (len <= used) {
416 git_error_set(GIT_ERROR_ODB, "buffer too small");
417 return GIT_EBUFS;
418 }
419
420 if (bitsizeof(long) <= shift) {
421 *usedp = 0;
422 git_error_set(GIT_ERROR_ODB, "packfile corrupted");
423 return -1;
424 }
425
426 c = buf[used++];
427 size += (c & 0x7f) << shift;
428 shift += 7;
429 }
430
431 *sizep = (size_t)size;
432 *usedp = used;
433 return 0;
434 }
435
436 int git_packfile_unpack_header(
437 size_t *size_p,
438 git_object_t *type_p,
439 git_mwindow_file *mwf,
440 git_mwindow **w_curs,
441 off64_t *curpos)
442 {
443 unsigned char *base;
444 unsigned int left;
445 unsigned long used;
446 int ret;
447
448 /* pack_window_open() assures us we have [base, base + 20) available
449 * as a range that we can look at at. (Its actually the hash
450 * size that is assured.) With our object header encoding
451 * the maximum deflated object size is 2^137, which is just
452 * insane, so we know won't exceed what we have been given.
453 */
454 /* base = pack_window_open(p, w_curs, *curpos, &left); */
455 base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
456 if (base == NULL)
457 return GIT_EBUFS;
458
459 ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
460 git_mwindow_close(w_curs);
461 if (ret == GIT_EBUFS)
462 return ret;
463 else if (ret < 0)
464 return packfile_error("header length is zero");
465
466 *curpos += used;
467 return 0;
468 }
469
470 int git_packfile_resolve_header(
471 size_t *size_p,
472 git_object_t *type_p,
473 struct git_pack_file *p,
474 off64_t offset)
475 {
476 git_mwindow *w_curs = NULL;
477 off64_t curpos = offset;
478 size_t size;
479 git_object_t type;
480 off64_t base_offset;
481 int error;
482
483 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
484 if (error < 0)
485 return error;
486
487 if (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
488 size_t base_size;
489 git_packfile_stream stream;
490
491 error = get_delta_base(&base_offset, p, &w_curs, &curpos, type, offset);
492 git_mwindow_close(&w_curs);
493
494 if (error < 0)
495 return error;
496
497 if ((error = git_packfile_stream_open(&stream, p, curpos)) < 0)
498 return error;
499 error = git_delta_read_header_fromstream(&base_size, size_p, &stream);
500 git_packfile_stream_dispose(&stream);
501 if (error < 0)
502 return error;
503 } else {
504 *size_p = size;
505 base_offset = 0;
506 }
507
508 while (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
509 curpos = base_offset;
510 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
511 if (error < 0)
512 return error;
513 if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
514 break;
515
516 error = get_delta_base(&base_offset, p, &w_curs, &curpos, type, base_offset);
517 git_mwindow_close(&w_curs);
518
519 if (error < 0)
520 return error;
521 }
522 *type_p = type;
523
524 return error;
525 }
526
527 #define SMALL_STACK_SIZE 64
528
529 /**
530 * Generate the chain of dependencies which we need to get to the
531 * object at `off`. `chain` is used a stack, popping gives the right
532 * order to apply deltas on. If an object is found in the pack's base
533 * cache, we stop calculating there.
534 */
535 static int pack_dependency_chain(git_dependency_chain *chain_out,
536 git_pack_cache_entry **cached_out, off64_t *cached_off,
537 struct pack_chain_elem *small_stack, size_t *stack_sz,
538 struct git_pack_file *p, off64_t obj_offset)
539 {
540 git_dependency_chain chain = GIT_ARRAY_INIT;
541 git_mwindow *w_curs = NULL;
542 off64_t curpos = obj_offset, base_offset;
543 int error = 0, use_heap = 0;
544 size_t size, elem_pos;
545 git_object_t type;
546
547 elem_pos = 0;
548 while (true) {
549 struct pack_chain_elem *elem;
550 git_pack_cache_entry *cached = NULL;
551
552 /* if we have a base cached, we can stop here instead */
553 if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
554 *cached_out = cached;
555 *cached_off = obj_offset;
556 break;
557 }
558
559 /* if we run out of space on the small stack, use the array */
560 if (elem_pos == SMALL_STACK_SIZE) {
561 git_array_init_to_size(chain, elem_pos);
562 GIT_ERROR_CHECK_ARRAY(chain);
563 memcpy(chain.ptr, small_stack, elem_pos * sizeof(struct pack_chain_elem));
564 chain.size = elem_pos;
565 use_heap = 1;
566 }
567
568 curpos = obj_offset;
569 if (!use_heap) {
570 elem = &small_stack[elem_pos];
571 } else {
572 elem = git_array_alloc(chain);
573 if (!elem) {
574 error = -1;
575 goto on_error;
576 }
577 }
578
579 elem->base_key = obj_offset;
580
581 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
582
583 if (error < 0)
584 goto on_error;
585
586 elem->offset = curpos;
587 elem->size = size;
588 elem->type = type;
589 elem->base_key = obj_offset;
590
591 if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
592 break;
593
594 error = get_delta_base(&base_offset, p, &w_curs, &curpos, type, obj_offset);
595 git_mwindow_close(&w_curs);
596
597 if (error < 0)
598 goto on_error;
599
600 /* we need to pass the pos *after* the delta-base bit */
601 elem->offset = curpos;
602
603 /* go through the loop again, but with the new object */
604 obj_offset = base_offset;
605 elem_pos++;
606 }
607
608
609 *stack_sz = elem_pos + 1;
610 *chain_out = chain;
611 return error;
612
613 on_error:
614 git_array_clear(chain);
615 return error;
616 }
617
618 int git_packfile_unpack(
619 git_rawobj *obj,
620 struct git_pack_file *p,
621 off64_t *obj_offset)
622 {
623 git_mwindow *w_curs = NULL;
624 off64_t curpos = *obj_offset;
625 int error, free_base = 0;
626 git_dependency_chain chain = GIT_ARRAY_INIT;
627 struct pack_chain_elem *elem = NULL, *stack;
628 git_pack_cache_entry *cached = NULL;
629 struct pack_chain_elem small_stack[SMALL_STACK_SIZE];
630 size_t stack_size = 0, elem_pos, alloclen;
631 git_object_t base_type;
632
633 /*
634 * TODO: optionally check the CRC on the packfile
635 */
636
637 error = pack_dependency_chain(&chain, &cached, obj_offset, small_stack, &stack_size, p, *obj_offset);
638 if (error < 0)
639 return error;
640
641 obj->data = NULL;
642 obj->len = 0;
643 obj->type = GIT_OBJECT_INVALID;
644
645 /* let's point to the right stack */
646 stack = chain.ptr ? chain.ptr : small_stack;
647
648 elem_pos = stack_size;
649 if (cached) {
650 memcpy(obj, &cached->raw, sizeof(git_rawobj));
651 base_type = obj->type;
652 elem_pos--; /* stack_size includes the base, which isn't actually there */
653 } else {
654 elem = &stack[--elem_pos];
655 base_type = elem->type;
656 }
657
658 switch (base_type) {
659 case GIT_OBJECT_COMMIT:
660 case GIT_OBJECT_TREE:
661 case GIT_OBJECT_BLOB:
662 case GIT_OBJECT_TAG:
663 if (!cached) {
664 curpos = elem->offset;
665 error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
666 git_mwindow_close(&w_curs);
667 base_type = elem->type;
668 }
669 if (error < 0)
670 goto cleanup;
671 break;
672 case GIT_OBJECT_OFS_DELTA:
673 case GIT_OBJECT_REF_DELTA:
674 error = packfile_error("dependency chain ends in a delta");
675 goto cleanup;
676 default:
677 error = packfile_error("invalid packfile type in header");
678 goto cleanup;
679 }
680
681 /*
682 * Finding the object we want a cached base element is
683 * problematic, as we need to make sure we don't accidentally
684 * give the caller the cached object, which it would then feel
685 * free to free, so we need to copy the data.
686 */
687 if (cached && stack_size == 1) {
688 void *data = obj->data;
689
690 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, obj->len, 1);
691 obj->data = git__malloc(alloclen);
692 GIT_ERROR_CHECK_ALLOC(obj->data);
693
694 memcpy(obj->data, data, obj->len + 1);
695 git_atomic_dec(&cached->refcount);
696 goto cleanup;
697 }
698
699 /* we now apply each consecutive delta until we run out */
700 while (elem_pos > 0 && !error) {
701 git_rawobj base, delta;
702
703 /*
704 * We can now try to add the base to the cache, as
705 * long as it's not already the cached one.
706 */
707 if (!cached)
708 free_base = !!cache_add(&cached, &p->bases, obj, elem->base_key);
709
710 elem = &stack[elem_pos - 1];
711 curpos = elem->offset;
712 error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
713 git_mwindow_close(&w_curs);
714
715 if (error < 0) {
716 /* We have transferred ownership of the data to the cache. */
717 obj->data = NULL;
718 break;
719 }
720
721 /* the current object becomes the new base, on which we apply the delta */
722 base = *obj;
723 obj->data = NULL;
724 obj->len = 0;
725 obj->type = GIT_OBJECT_INVALID;
726
727 error = git_delta_apply(&obj->data, &obj->len, base.data, base.len, delta.data, delta.len);
728 obj->type = base_type;
729
730 /*
731 * We usually don't want to free the base at this
732 * point, as we put it into the cache in the previous
733 * iteration. free_base lets us know that we got the
734 * base object directly from the packfile, so we can free it.
735 */
736 git__free(delta.data);
737 if (free_base) {
738 free_base = 0;
739 git__free(base.data);
740 }
741
742 if (cached) {
743 git_atomic_dec(&cached->refcount);
744 cached = NULL;
745 }
746
747 if (error < 0)
748 break;
749
750 elem_pos--;
751 }
752
753 cleanup:
754 if (error < 0) {
755 git__free(obj->data);
756 if (cached)
757 git_atomic_dec(&cached->refcount);
758 }
759
760 if (elem)
761 *obj_offset = curpos;
762
763 git_array_clear(chain);
764 return error;
765 }
766
767 int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, off64_t curpos)
768 {
769 memset(obj, 0, sizeof(git_packfile_stream));
770 obj->curpos = curpos;
771 obj->p = p;
772
773 if (git_zstream_init(&obj->zstream, GIT_ZSTREAM_INFLATE) < 0) {
774 git_error_set(GIT_ERROR_ZLIB, "failed to init packfile stream");
775 return -1;
776 }
777
778 return 0;
779 }
780
781 ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
782 {
783 unsigned int window_len;
784 unsigned char *in;
785 int error;
786
787 if (obj->done)
788 return 0;
789
790 if ((in = pack_window_open(obj->p, &obj->mw, obj->curpos, &window_len)) == NULL)
791 return GIT_EBUFS;
792
793 if ((error = git_zstream_set_input(&obj->zstream, in, window_len)) < 0 ||
794 (error = git_zstream_get_output_chunk(buffer, &len, &obj->zstream)) < 0) {
795 git_mwindow_close(&obj->mw);
796 git_error_set(GIT_ERROR_ZLIB, "error reading from the zlib stream");
797 return -1;
798 }
799
800 git_mwindow_close(&obj->mw);
801
802 obj->curpos += window_len - obj->zstream.in_len;
803
804 if (git_zstream_eos(&obj->zstream))
805 obj->done = 1;
806
807 /* If we didn't write anything out but we're not done, we need more data */
808 if (!len && !git_zstream_eos(&obj->zstream))
809 return GIT_EBUFS;
810
811 return len;
812
813 }
814
815 void git_packfile_stream_dispose(git_packfile_stream *obj)
816 {
817 git_zstream_free(&obj->zstream);
818 }
819
820 static int packfile_unpack_compressed(
821 git_rawobj *obj,
822 struct git_pack_file *p,
823 git_mwindow **mwindow,
824 off64_t *position,
825 size_t size,
826 git_object_t type)
827 {
828 git_zstream zstream = GIT_ZSTREAM_INIT;
829 size_t buffer_len, total = 0;
830 char *data = NULL;
831 int error;
832
833 GIT_ERROR_CHECK_ALLOC_ADD(&buffer_len, size, 1);
834 data = git__calloc(1, buffer_len);
835 GIT_ERROR_CHECK_ALLOC(data);
836
837 if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0) {
838 git_error_set(GIT_ERROR_ZLIB, "failed to init zlib stream on unpack");
839 goto out;
840 }
841
842 do {
843 size_t bytes = buffer_len - total;
844 unsigned int window_len;
845 unsigned char *in;
846
847 if ((in = pack_window_open(p, mwindow, *position, &window_len)) == NULL) {
848 error = -1;
849 goto out;
850 }
851
852 if ((error = git_zstream_set_input(&zstream, in, window_len)) < 0 ||
853 (error = git_zstream_get_output_chunk(data + total, &bytes, &zstream)) < 0) {
854 git_mwindow_close(mwindow);
855 goto out;
856 }
857
858 git_mwindow_close(mwindow);
859
860 if (!bytes)
861 break;
862
863 *position += window_len - zstream.in_len;
864 total += bytes;
865 } while (!git_zstream_eos(&zstream));
866
867 if (total != size || !git_zstream_eos(&zstream)) {
868 git_error_set(GIT_ERROR_ZLIB, "error inflating zlib stream");
869 error = -1;
870 goto out;
871 }
872
873 obj->type = type;
874 obj->len = size;
875 obj->data = data;
876
877 out:
878 git_zstream_free(&zstream);
879 if (error)
880 git__free(data);
881
882 return error;
883 }
884
885 /*
886 * curpos is where the data starts, delta_obj_offset is the where the
887 * header starts
888 */
889 int get_delta_base(
890 off64_t *delta_base_out,
891 struct git_pack_file *p,
892 git_mwindow **w_curs,
893 off64_t *curpos,
894 git_object_t type,
895 off64_t delta_obj_offset)
896 {
897 unsigned int left = 0;
898 unsigned char *base_info;
899 off64_t base_offset;
900 git_oid unused;
901
902 assert(delta_base_out);
903
904 base_info = pack_window_open(p, w_curs, *curpos, &left);
905 /* Assumption: the only reason this would fail is because the file is too small */
906 if (base_info == NULL)
907 return GIT_EBUFS;
908 /* pack_window_open() assured us we have [base_info, base_info + 20)
909 * as a range that we can look at without walking off the
910 * end of the mapped window. Its actually the hash size
911 * that is assured. An OFS_DELTA longer than the hash size
912 * is stupid, as then a REF_DELTA would be smaller to store.
913 */
914 if (type == GIT_OBJECT_OFS_DELTA) {
915 unsigned used = 0;
916 unsigned char c = base_info[used++];
917 size_t unsigned_base_offset = c & 127;
918 while (c & 128) {
919 if (left <= used)
920 return GIT_EBUFS;
921 unsigned_base_offset += 1;
922 if (!unsigned_base_offset || MSB(unsigned_base_offset, 7))
923 return packfile_error("overflow");
924 c = base_info[used++];
925 unsigned_base_offset = (unsigned_base_offset << 7) + (c & 127);
926 }
927 if (unsigned_base_offset == 0 || (size_t)delta_obj_offset <= unsigned_base_offset)
928 return packfile_error("out of bounds");
929 base_offset = delta_obj_offset - unsigned_base_offset;
930 *curpos += used;
931 } else if (type == GIT_OBJECT_REF_DELTA) {
932 /* If we have the cooperative cache, search in it first */
933 if (p->has_cache) {
934 struct git_pack_entry *entry;
935 git_oid oid;
936
937 git_oid_fromraw(&oid, base_info);
938 if ((entry = git_oidmap_get(p->idx_cache, &oid)) != NULL) {
939 if (entry->offset == 0)
940 return packfile_error("delta offset is zero");
941
942 *curpos += 20;
943 *delta_base_out = entry->offset;
944 return 0;
945 } else {
946 /* If we're building an index, don't try to find the pack
947 * entry; we just haven't seen it yet. We'll make
948 * progress again in the next loop.
949 */
950 return GIT_PASSTHROUGH;
951 }
952 }
953
954 /* The base entry _must_ be in the same pack */
955 if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
956 return packfile_error("base entry delta is not in the same pack");
957 *curpos += 20;
958 } else
959 return packfile_error("unknown object type");
960
961 if (base_offset == 0)
962 return packfile_error("delta offset is zero");
963
964 *delta_base_out = base_offset;
965 return 0;
966 }
967
968 /***********************************************************
969 *
970 * PACKFILE METHODS
971 *
972 ***********************************************************/
973
974 void git_packfile_close(struct git_pack_file *p, bool unlink_packfile)
975 {
976 if (p->mwf.fd >= 0) {
977 git_mwindow_free_all_locked(&p->mwf);
978 p_close(p->mwf.fd);
979 p->mwf.fd = -1;
980 }
981
982 if (unlink_packfile)
983 p_unlink(p->pack_name);
984 }
985
986 void git_packfile_free(struct git_pack_file *p)
987 {
988 if (!p)
989 return;
990
991 cache_free(&p->bases);
992
993 git_packfile_close(p, false);
994
995 pack_index_free(p);
996
997 git__free(p->bad_object_sha1);
998
999 git_mutex_free(&p->lock);
1000 git_mutex_free(&p->bases.lock);
1001 git__free(p);
1002 }
1003
1004 static int packfile_open(struct git_pack_file *p)
1005 {
1006 struct stat st;
1007 struct git_pack_header hdr;
1008 git_oid sha1;
1009 unsigned char *idx_sha1;
1010
1011 if (p->index_version == -1 && pack_index_open(p) < 0)
1012 return git_odb__error_notfound("failed to open packfile", NULL, 0);
1013
1014 /* if mwf opened by another thread, return now */
1015 if (git_mutex_lock(&p->lock) < 0)
1016 return packfile_error("failed to get lock for open");
1017
1018 if (p->mwf.fd >= 0) {
1019 git_mutex_unlock(&p->lock);
1020 return 0;
1021 }
1022
1023 /* TODO: open with noatime */
1024 p->mwf.fd = git_futils_open_ro(p->pack_name);
1025 if (p->mwf.fd < 0)
1026 goto cleanup;
1027
1028 if (p_fstat(p->mwf.fd, &st) < 0 ||
1029 git_mwindow_file_register(&p->mwf) < 0)
1030 goto cleanup;
1031
1032 /* If we created the struct before we had the pack we lack size. */
1033 if (!p->mwf.size) {
1034 if (!S_ISREG(st.st_mode))
1035 goto cleanup;
1036 p->mwf.size = (off64_t)st.st_size;
1037 } else if (p->mwf.size != st.st_size)
1038 goto cleanup;
1039
1040 #if 0
1041 /* We leave these file descriptors open with sliding mmap;
1042 * there is no point keeping them open across exec(), though.
1043 */
1044 fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
1045 if (fd_flag < 0)
1046 goto cleanup;
1047
1048 fd_flag |= FD_CLOEXEC;
1049 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1050 goto cleanup;
1051 #endif
1052
1053 /* Verify we recognize this pack file format. */
1054 if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
1055 hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
1056 !pack_version_ok(hdr.hdr_version))
1057 goto cleanup;
1058
1059 /* Verify the pack matches its index. */
1060 if (p->num_objects != ntohl(hdr.hdr_entries) ||
1061 p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
1062 p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
1063 goto cleanup;
1064
1065 idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;
1066
1067 if (git_oid__cmp(&sha1, (git_oid *)idx_sha1) != 0)
1068 goto cleanup;
1069
1070 git_mutex_unlock(&p->lock);
1071 return 0;
1072
1073 cleanup:
1074 git_error_set(GIT_ERROR_OS, "invalid packfile '%s'", p->pack_name);
1075
1076 if (p->mwf.fd >= 0)
1077 p_close(p->mwf.fd);
1078 p->mwf.fd = -1;
1079
1080 git_mutex_unlock(&p->lock);
1081
1082 return -1;
1083 }
1084
1085 int git_packfile__name(char **out, const char *path)
1086 {
1087 size_t path_len;
1088 git_buf buf = GIT_BUF_INIT;
1089
1090 path_len = strlen(path);
1091
1092 if (path_len < strlen(".idx"))
1093 return git_odb__error_notfound("invalid packfile path", NULL, 0);
1094
1095 if (git_buf_printf(&buf, "%.*s.pack", (int)(path_len - strlen(".idx")), path) < 0)
1096 return -1;
1097
1098 *out = git_buf_detach(&buf);
1099 return 0;
1100 }
1101
1102 int git_packfile_alloc(struct git_pack_file **pack_out, const char *path)
1103 {
1104 struct stat st;
1105 struct git_pack_file *p;
1106 size_t path_len = path ? strlen(path) : 0, alloc_len;
1107
1108 *pack_out = NULL;
1109
1110 if (path_len < strlen(".idx"))
1111 return git_odb__error_notfound("invalid packfile path", NULL, 0);
1112
1113 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*p), path_len);
1114 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
1115
1116 p = git__calloc(1, alloc_len);
1117 GIT_ERROR_CHECK_ALLOC(p);
1118
1119 memcpy(p->pack_name, path, path_len + 1);
1120
1121 /*
1122 * Make sure a corresponding .pack file exists and that
1123 * the index looks sane.
1124 */
1125 if (git__suffixcmp(path, ".idx") == 0) {
1126 size_t root_len = path_len - strlen(".idx");
1127
1128 if (!git_disable_pack_keep_file_checks) {
1129 memcpy(p->pack_name + root_len, ".keep", sizeof(".keep"));
1130 if (git_path_exists(p->pack_name) == true)
1131 p->pack_keep = 1;
1132 }
1133
1134 memcpy(p->pack_name + root_len, ".pack", sizeof(".pack"));
1135 }
1136
1137 if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
1138 git__free(p);
1139 return git_odb__error_notfound("packfile not found", NULL, 0);
1140 }
1141
1142 /* ok, it looks sane as far as we can check without
1143 * actually mapping the pack file.
1144 */
1145 p->mwf.fd = -1;
1146 p->mwf.size = st.st_size;
1147 p->pack_local = 1;
1148 p->mtime = (git_time_t)st.st_mtime;
1149 p->index_version = -1;
1150
1151 if (git_mutex_init(&p->lock)) {
1152 git_error_set(GIT_ERROR_OS, "failed to initialize packfile mutex");
1153 git__free(p);
1154 return -1;
1155 }
1156
1157 if (cache_init(&p->bases) < 0) {
1158 git__free(p);
1159 return -1;
1160 }
1161
1162 *pack_out = p;
1163
1164 return 0;
1165 }
1166
1167 /***********************************************************
1168 *
1169 * PACKFILE ENTRY SEARCH INTERNALS
1170 *
1171 ***********************************************************/
1172
1173 static off64_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
1174 {
1175 const unsigned char *index = p->index_map.data;
1176 const unsigned char *end = index + p->index_map.len;
1177 index += 4 * 256;
1178 if (p->index_version == 1) {
1179 return ntohl(*((uint32_t *)(index + 24 * n)));
1180 } else {
1181 uint32_t off;
1182 index += 8 + p->num_objects * (20 + 4);
1183 off = ntohl(*((uint32_t *)(index + 4 * n)));
1184 if (!(off & 0x80000000))
1185 return off;
1186 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1187
1188 /* Make sure we're not being sent out of bounds */
1189 if (index >= end - 8)
1190 return -1;
1191
1192 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
1193 ntohl(*((uint32_t *)(index + 4)));
1194 }
1195 }
1196
1197 static int git__memcmp4(const void *a, const void *b) {
1198 return memcmp(a, b, 4);
1199 }
1200
1201 int git_pack_foreach_entry(
1202 struct git_pack_file *p,
1203 git_odb_foreach_cb cb,
1204 void *data)
1205 {
1206 const unsigned char *index = p->index_map.data, *current;
1207 uint32_t i;
1208 int error = 0;
1209
1210 if (index == NULL) {
1211 if ((error = pack_index_open(p)) < 0)
1212 return error;
1213
1214 assert(p->index_map.data);
1215
1216 index = p->index_map.data;
1217 }
1218
1219 if (p->index_version > 1) {
1220 index += 8;
1221 }
1222
1223 index += 4 * 256;
1224
1225 if (p->oids == NULL) {
1226 git_vector offsets, oids;
1227
1228 if ((error = git_vector_init(&oids, p->num_objects, NULL)))
1229 return error;
1230
1231 if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
1232 return error;
1233
1234 if (p->index_version > 1) {
1235 const unsigned char *off = index + 24 * p->num_objects;
1236 for (i = 0; i < p->num_objects; i++)
1237 git_vector_insert(&offsets, (void*)&off[4 * i]);
1238 git_vector_sort(&offsets);
1239 git_vector_foreach(&offsets, i, current)
1240 git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
1241 } else {
1242 for (i = 0; i < p->num_objects; i++)
1243 git_vector_insert(&offsets, (void*)&index[24 * i]);
1244 git_vector_sort(&offsets);
1245 git_vector_foreach(&offsets, i, current)
1246 git_vector_insert(&oids, (void*)&current[4]);
1247 }
1248
1249 git_vector_free(&offsets);
1250 p->oids = (git_oid **)git_vector_detach(NULL, NULL, &oids);
1251 }
1252
1253 for (i = 0; i < p->num_objects; i++)
1254 if ((error = cb(p->oids[i], data)) != 0)
1255 return git_error_set_after_callback(error);
1256
1257 return error;
1258 }
1259
1260 int git_pack__lookup_sha1(const void *oid_lookup_table, size_t stride, unsigned lo,
1261 unsigned hi, const unsigned char *oid_prefix)
1262 {
1263 const unsigned char *base = oid_lookup_table;
1264
1265 while (lo < hi) {
1266 unsigned mi = (lo + hi) / 2;
1267 int cmp = git_oid__hashcmp(base + mi * stride, oid_prefix);
1268
1269 if (!cmp)
1270 return mi;
1271
1272 if (cmp > 0)
1273 hi = mi;
1274 else
1275 lo = mi+1;
1276 }
1277
1278 return -((int)lo)-1;
1279 }
1280
1281 static int pack_entry_find_offset(
1282 off64_t *offset_out,
1283 git_oid *found_oid,
1284 struct git_pack_file *p,
1285 const git_oid *short_oid,
1286 size_t len)
1287 {
1288 const uint32_t *level1_ofs;
1289 const unsigned char *index;
1290 unsigned hi, lo, stride;
1291 int pos, found = 0;
1292 off64_t offset;
1293 const unsigned char *current = 0;
1294
1295 *offset_out = 0;
1296
1297 if (p->index_version == -1) {
1298 int error;
1299
1300 if ((error = pack_index_open(p)) < 0)
1301 return error;
1302 assert(p->index_map.data);
1303 }
1304
1305 index = p->index_map.data;
1306 level1_ofs = p->index_map.data;
1307
1308 if (p->index_version > 1) {
1309 level1_ofs += 2;
1310 index += 8;
1311 }
1312
1313 index += 4 * 256;
1314 hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
1315 lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));
1316
1317 if (p->index_version > 1) {
1318 stride = 20;
1319 } else {
1320 stride = 24;
1321 index += 4;
1322 }
1323
1324 #ifdef INDEX_DEBUG_LOOKUP
1325 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
1326 short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
1327 #endif
1328
1329 pos = git_pack__lookup_sha1(index, stride, lo, hi, short_oid->id);
1330
1331 if (pos >= 0) {
1332 /* An object matching exactly the oid was found */
1333 found = 1;
1334 current = index + pos * stride;
1335 } else {
1336 /* No object was found */
1337 /* pos refers to the object with the "closest" oid to short_oid */
1338 pos = - 1 - pos;
1339 if (pos < (int)p->num_objects) {
1340 current = index + pos * stride;
1341
1342 if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
1343 found = 1;
1344 }
1345 }
1346
1347 if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
1348 /* Check for ambiguousity */
1349 const unsigned char *next = current + stride;
1350
1351 if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
1352 found = 2;
1353 }
1354 }
1355
1356 if (!found)
1357 return git_odb__error_notfound("failed to find offset for pack entry", short_oid, len);
1358 if (found > 1)
1359 return git_odb__error_ambiguous("found multiple offsets for pack entry");
1360
1361 if ((offset = nth_packed_object_offset(p, pos)) < 0) {
1362 git_error_set(GIT_ERROR_ODB, "packfile index is corrupt");
1363 return -1;
1364 }
1365
1366 *offset_out = offset;
1367 git_oid_fromraw(found_oid, current);
1368
1369 #ifdef INDEX_DEBUG_LOOKUP
1370 {
1371 unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
1372 git_oid_fmt(hex_sha1, found_oid);
1373 hex_sha1[GIT_OID_HEXSZ] = '\0';
1374 printf("found lo=%d %s\n", lo, hex_sha1);
1375 }
1376 #endif
1377
1378 return 0;
1379 }
1380
1381 int git_pack_entry_find(
1382 struct git_pack_entry *e,
1383 struct git_pack_file *p,
1384 const git_oid *short_oid,
1385 size_t len)
1386 {
1387 off64_t offset;
1388 git_oid found_oid;
1389 int error;
1390
1391 assert(p);
1392
1393 if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
1394 unsigned i;
1395 for (i = 0; i < p->num_bad_objects; i++)
1396 if (git_oid__cmp(short_oid, &p->bad_object_sha1[i]) == 0)
1397 return packfile_error("bad object found in packfile");
1398 }
1399
1400 error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
1401 if (error < 0)
1402 return error;
1403
1404 /* we found a unique entry in the index;
1405 * make sure the packfile backing the index
1406 * still exists on disk */
1407 if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
1408 return error;
1409
1410 e->offset = offset;
1411 e->p = p;
1412
1413 git_oid_cpy(&e->sha1, &found_oid);
1414 return 0;
1415 }