]> git.proxmox.com Git - libgit2.git/blob - src/pack.c
Prep for unstable release
[libgit2.git] / src / pack.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "pack.h"
9
10 #include "delta.h"
11 #include "futils.h"
12 #include "mwindow.h"
13 #include "odb.h"
14 #include "oid.h"
15 #include "sha1_lookup.h"
16
17 /* Option to bypass checking existence of '.keep' files */
18 bool git_disable_pack_keep_file_checks = false;
19
20 static int packfile_open(struct git_pack_file *p);
21 static off64_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
22 static int packfile_unpack_compressed(
23 git_rawobj *obj,
24 struct git_pack_file *p,
25 git_mwindow **w_curs,
26 off64_t *curpos,
27 size_t size,
28 git_object_t type);
29
30 /* Can find the offset of an object given
31 * a prefix of an identifier.
32 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
33 * is ambiguous within the pack.
34 * This method assumes that len is between
35 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
36 */
37 static int pack_entry_find_offset(
38 off64_t *offset_out,
39 git_oid *found_oid,
40 struct git_pack_file *p,
41 const git_oid *short_oid,
42 size_t len);
43
44 static int packfile_error(const char *message)
45 {
46 git_error_set(GIT_ERROR_ODB, "invalid pack file - %s", message);
47 return -1;
48 }
49
50 /********************
51 * Delta base cache
52 ********************/
53
54 static git_pack_cache_entry *new_cache_object(git_rawobj *source)
55 {
56 git_pack_cache_entry *e = git__calloc(1, sizeof(git_pack_cache_entry));
57 if (!e)
58 return NULL;
59
60 git_atomic_inc(&e->refcount);
61 memcpy(&e->raw, source, sizeof(git_rawobj));
62
63 return e;
64 }
65
66 static void free_cache_object(void *o)
67 {
68 git_pack_cache_entry *e = (git_pack_cache_entry *)o;
69
70 if (e != NULL) {
71 assert(e->refcount.val == 0);
72 git__free(e->raw.data);
73 git__free(e);
74 }
75 }
76
77 static void cache_free(git_pack_cache *cache)
78 {
79 git_pack_cache_entry *entry;
80
81 if (cache->entries) {
82 git_offmap_foreach_value(cache->entries, entry, {
83 free_cache_object(entry);
84 });
85
86 git_offmap_free(cache->entries);
87 cache->entries = NULL;
88 }
89 }
90
91 static int cache_init(git_pack_cache *cache)
92 {
93 if (git_offmap_new(&cache->entries) < 0)
94 return -1;
95
96 cache->memory_limit = GIT_PACK_CACHE_MEMORY_LIMIT;
97
98 if (git_mutex_init(&cache->lock)) {
99 git_error_set(GIT_ERROR_OS, "failed to initialize pack cache mutex");
100
101 git__free(cache->entries);
102 cache->entries = NULL;
103
104 return -1;
105 }
106
107 return 0;
108 }
109
110 static git_pack_cache_entry *cache_get(git_pack_cache *cache, off64_t offset)
111 {
112 git_pack_cache_entry *entry;
113
114 if (git_mutex_lock(&cache->lock) < 0)
115 return NULL;
116
117 if ((entry = git_offmap_get(cache->entries, offset)) != NULL) {
118 git_atomic_inc(&entry->refcount);
119 entry->last_usage = cache->use_ctr++;
120 }
121 git_mutex_unlock(&cache->lock);
122
123 return entry;
124 }
125
126 /* Run with the cache lock held */
127 static void free_lowest_entry(git_pack_cache *cache)
128 {
129 off64_t offset;
130 git_pack_cache_entry *entry;
131
132 git_offmap_foreach(cache->entries, offset, entry, {
133 if (entry && entry->refcount.val == 0) {
134 cache->memory_used -= entry->raw.len;
135 git_offmap_delete(cache->entries, offset);
136 free_cache_object(entry);
137 }
138 });
139 }
140
141 static int cache_add(
142 git_pack_cache_entry **cached_out,
143 git_pack_cache *cache,
144 git_rawobj *base,
145 off64_t offset)
146 {
147 git_pack_cache_entry *entry;
148 int exists;
149
150 if (base->len > GIT_PACK_CACHE_SIZE_LIMIT)
151 return -1;
152
153 entry = new_cache_object(base);
154 if (entry) {
155 if (git_mutex_lock(&cache->lock) < 0) {
156 git_error_set(GIT_ERROR_OS, "failed to lock cache");
157 git__free(entry);
158 return -1;
159 }
160 /* Add it to the cache if nobody else has */
161 exists = git_offmap_exists(cache->entries, offset);
162 if (!exists) {
163 while (cache->memory_used + base->len > cache->memory_limit)
164 free_lowest_entry(cache);
165
166 git_offmap_set(cache->entries, offset, entry);
167 cache->memory_used += entry->raw.len;
168
169 *cached_out = entry;
170 }
171 git_mutex_unlock(&cache->lock);
172 /* Somebody beat us to adding it into the cache */
173 if (exists) {
174 git__free(entry);
175 return -1;
176 }
177 }
178
179 return 0;
180 }
181
182 /***********************************************************
183 *
184 * PACK INDEX METHODS
185 *
186 ***********************************************************/
187
188 static void pack_index_free(struct git_pack_file *p)
189 {
190 if (p->oids) {
191 git__free(p->oids);
192 p->oids = NULL;
193 }
194 if (p->index_map.data) {
195 git_futils_mmap_free(&p->index_map);
196 p->index_map.data = NULL;
197 }
198 }
199
200 static int pack_index_check(const char *path, struct git_pack_file *p)
201 {
202 struct git_pack_idx_header *hdr;
203 uint32_t version, nr, i, *index;
204 void *idx_map;
205 size_t idx_size;
206 struct stat st;
207 int error;
208 /* TODO: properly open the file without access time using O_NOATIME */
209 git_file fd = git_futils_open_ro(path);
210 if (fd < 0)
211 return fd;
212
213 if (p_fstat(fd, &st) < 0) {
214 p_close(fd);
215 git_error_set(GIT_ERROR_OS, "unable to stat pack index '%s'", path);
216 return -1;
217 }
218
219 if (!S_ISREG(st.st_mode) ||
220 !git__is_sizet(st.st_size) ||
221 (idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
222 {
223 p_close(fd);
224 git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
225 return -1;
226 }
227
228 error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
229
230 p_close(fd);
231
232 if (error < 0)
233 return error;
234
235 hdr = idx_map = p->index_map.data;
236
237 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
238 version = ntohl(hdr->idx_version);
239
240 if (version < 2 || version > 2) {
241 git_futils_mmap_free(&p->index_map);
242 return packfile_error("unsupported index version");
243 }
244
245 } else
246 version = 1;
247
248 nr = 0;
249 index = idx_map;
250
251 if (version > 1)
252 index += 2; /* skip index header */
253
254 for (i = 0; i < 256; i++) {
255 uint32_t n = ntohl(index[i]);
256 if (n < nr) {
257 git_futils_mmap_free(&p->index_map);
258 return packfile_error("index is non-monotonic");
259 }
260 nr = n;
261 }
262
263 if (version == 1) {
264 /*
265 * Total size:
266 * - 256 index entries 4 bytes each
267 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
268 * - 20-byte SHA1 of the packfile
269 * - 20-byte SHA1 file checksum
270 */
271 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
272 git_futils_mmap_free(&p->index_map);
273 return packfile_error("index is corrupted");
274 }
275 } else if (version == 2) {
276 /*
277 * Minimum size:
278 * - 8 bytes of header
279 * - 256 index entries 4 bytes each
280 * - 20-byte sha1 entry * nr
281 * - 4-byte crc entry * nr
282 * - 4-byte offset entry * nr
283 * - 20-byte SHA1 of the packfile
284 * - 20-byte SHA1 file checksum
285 * And after the 4-byte offset table might be a
286 * variable sized table containing 8-byte entries
287 * for offsets larger than 2^31.
288 */
289 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
290 unsigned long max_size = min_size;
291
292 if (nr)
293 max_size += (nr - 1)*8;
294
295 if (idx_size < min_size || idx_size > max_size) {
296 git_futils_mmap_free(&p->index_map);
297 return packfile_error("wrong index size");
298 }
299 }
300
301 p->num_objects = nr;
302 p->index_version = version;
303 return 0;
304 }
305
306 static int pack_index_open(struct git_pack_file *p)
307 {
308 int error = 0;
309 size_t name_len;
310 git_buf idx_name;
311
312 if (p->index_version > -1)
313 return 0;
314
315 name_len = strlen(p->pack_name);
316 assert(name_len > strlen(".pack")); /* checked by git_pack_file alloc */
317
318 if (git_buf_init(&idx_name, name_len) < 0)
319 return -1;
320
321 git_buf_put(&idx_name, p->pack_name, name_len - strlen(".pack"));
322 git_buf_puts(&idx_name, ".idx");
323 if (git_buf_oom(&idx_name)) {
324 git_buf_dispose(&idx_name);
325 return -1;
326 }
327
328 if ((error = git_mutex_lock(&p->lock)) < 0) {
329 git_buf_dispose(&idx_name);
330 return error;
331 }
332
333 if (p->index_version == -1)
334 error = pack_index_check(idx_name.ptr, p);
335
336 git_buf_dispose(&idx_name);
337
338 git_mutex_unlock(&p->lock);
339
340 return error;
341 }
342
343 static unsigned char *pack_window_open(
344 struct git_pack_file *p,
345 git_mwindow **w_cursor,
346 off64_t offset,
347 unsigned int *left)
348 {
349 if (p->mwf.fd == -1 && packfile_open(p) < 0)
350 return NULL;
351
352 /* Since packfiles end in a hash of their content and it's
353 * pointless to ask for an offset into the middle of that
354 * hash, and the pack_window_contains function above wouldn't match
355 * don't allow an offset too close to the end of the file.
356 *
357 * Don't allow a negative offset, as that means we've wrapped
358 * around.
359 */
360 if (offset > (p->mwf.size - 20))
361 return NULL;
362 if (offset < 0)
363 return NULL;
364
365 return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
366 }
367
368 /*
369 * The per-object header is a pretty dense thing, which is
370 * - first byte: low four bits are "size",
371 * then three bits of "type",
372 * with the high bit being "size continues".
373 * - each byte afterwards: low seven bits are size continuation,
374 * with the high bit being "size continues"
375 */
376 size_t git_packfile__object_header(unsigned char *hdr, size_t size, git_object_t type)
377 {
378 unsigned char *hdr_base;
379 unsigned char c;
380
381 assert(type >= GIT_OBJECT_COMMIT && type <= GIT_OBJECT_REF_DELTA);
382
383 /* TODO: add support for chunked objects; see git.git 6c0d19b1 */
384
385 c = (unsigned char)((type << 4) | (size & 15));
386 size >>= 4;
387 hdr_base = hdr;
388
389 while (size) {
390 *hdr++ = c | 0x80;
391 c = size & 0x7f;
392 size >>= 7;
393 }
394 *hdr++ = c;
395
396 return (hdr - hdr_base);
397 }
398
399
400 static int packfile_unpack_header1(
401 unsigned long *usedp,
402 size_t *sizep,
403 git_object_t *type,
404 const unsigned char *buf,
405 unsigned long len)
406 {
407 unsigned shift;
408 unsigned long size, c;
409 unsigned long used = 0;
410
411 c = buf[used++];
412 *type = (c >> 4) & 7;
413 size = c & 15;
414 shift = 4;
415 while (c & 0x80) {
416 if (len <= used) {
417 git_error_set(GIT_ERROR_ODB, "buffer too small");
418 return GIT_EBUFS;
419 }
420
421 if (bitsizeof(long) <= shift) {
422 *usedp = 0;
423 git_error_set(GIT_ERROR_ODB, "packfile corrupted");
424 return -1;
425 }
426
427 c = buf[used++];
428 size += (c & 0x7f) << shift;
429 shift += 7;
430 }
431
432 *sizep = (size_t)size;
433 *usedp = used;
434 return 0;
435 }
436
437 int git_packfile_unpack_header(
438 size_t *size_p,
439 git_object_t *type_p,
440 git_mwindow_file *mwf,
441 git_mwindow **w_curs,
442 off64_t *curpos)
443 {
444 unsigned char *base;
445 unsigned int left;
446 unsigned long used;
447 int ret;
448
449 /* pack_window_open() assures us we have [base, base + 20) available
450 * as a range that we can look at at. (Its actually the hash
451 * size that is assured.) With our object header encoding
452 * the maximum deflated object size is 2^137, which is just
453 * insane, so we know won't exceed what we have been given.
454 */
455 /* base = pack_window_open(p, w_curs, *curpos, &left); */
456 base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
457 if (base == NULL)
458 return GIT_EBUFS;
459
460 ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
461 git_mwindow_close(w_curs);
462 if (ret == GIT_EBUFS)
463 return ret;
464 else if (ret < 0)
465 return packfile_error("header length is zero");
466
467 *curpos += used;
468 return 0;
469 }
470
471 int git_packfile_resolve_header(
472 size_t *size_p,
473 git_object_t *type_p,
474 struct git_pack_file *p,
475 off64_t offset)
476 {
477 git_mwindow *w_curs = NULL;
478 off64_t curpos = offset;
479 size_t size;
480 git_object_t type;
481 off64_t base_offset;
482 int error;
483
484 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
485 if (error < 0)
486 return error;
487
488 if (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
489 size_t base_size;
490 git_packfile_stream stream;
491
492 base_offset = get_delta_base(p, &w_curs, &curpos, type, offset);
493 git_mwindow_close(&w_curs);
494 if ((error = git_packfile_stream_open(&stream, p, curpos)) < 0)
495 return error;
496 error = git_delta_read_header_fromstream(&base_size, size_p, &stream);
497 git_packfile_stream_dispose(&stream);
498 if (error < 0)
499 return error;
500 } else {
501 *size_p = size;
502 base_offset = 0;
503 }
504
505 while (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
506 curpos = base_offset;
507 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
508 if (error < 0)
509 return error;
510 if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
511 break;
512 base_offset = get_delta_base(p, &w_curs, &curpos, type, base_offset);
513 git_mwindow_close(&w_curs);
514 }
515 *type_p = type;
516
517 return error;
518 }
519
520 #define SMALL_STACK_SIZE 64
521
522 /**
523 * Generate the chain of dependencies which we need to get to the
524 * object at `off`. `chain` is used a stack, popping gives the right
525 * order to apply deltas on. If an object is found in the pack's base
526 * cache, we stop calculating there.
527 */
528 static int pack_dependency_chain(git_dependency_chain *chain_out,
529 git_pack_cache_entry **cached_out, off64_t *cached_off,
530 struct pack_chain_elem *small_stack, size_t *stack_sz,
531 struct git_pack_file *p, off64_t obj_offset)
532 {
533 git_dependency_chain chain = GIT_ARRAY_INIT;
534 git_mwindow *w_curs = NULL;
535 off64_t curpos = obj_offset, base_offset;
536 int error = 0, use_heap = 0;
537 size_t size, elem_pos;
538 git_object_t type;
539
540 elem_pos = 0;
541 while (true) {
542 struct pack_chain_elem *elem;
543 git_pack_cache_entry *cached = NULL;
544
545 /* if we have a base cached, we can stop here instead */
546 if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
547 *cached_out = cached;
548 *cached_off = obj_offset;
549 break;
550 }
551
552 /* if we run out of space on the small stack, use the array */
553 if (elem_pos == SMALL_STACK_SIZE) {
554 git_array_init_to_size(chain, elem_pos);
555 GIT_ERROR_CHECK_ARRAY(chain);
556 memcpy(chain.ptr, small_stack, elem_pos * sizeof(struct pack_chain_elem));
557 chain.size = elem_pos;
558 use_heap = 1;
559 }
560
561 curpos = obj_offset;
562 if (!use_heap) {
563 elem = &small_stack[elem_pos];
564 } else {
565 elem = git_array_alloc(chain);
566 if (!elem) {
567 error = -1;
568 goto on_error;
569 }
570 }
571
572 elem->base_key = obj_offset;
573
574 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
575
576 if (error < 0)
577 goto on_error;
578
579 elem->offset = curpos;
580 elem->size = size;
581 elem->type = type;
582 elem->base_key = obj_offset;
583
584 if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
585 break;
586
587 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
588 git_mwindow_close(&w_curs);
589
590 if (base_offset == 0) {
591 error = packfile_error("delta offset is zero");
592 goto on_error;
593 }
594 if (base_offset < 0) { /* must actually be an error code */
595 error = (int)base_offset;
596 goto on_error;
597 }
598
599 /* we need to pass the pos *after* the delta-base bit */
600 elem->offset = curpos;
601
602 /* go through the loop again, but with the new object */
603 obj_offset = base_offset;
604 elem_pos++;
605 }
606
607
608 *stack_sz = elem_pos + 1;
609 *chain_out = chain;
610 return error;
611
612 on_error:
613 git_array_clear(chain);
614 return error;
615 }
616
617 int git_packfile_unpack(
618 git_rawobj *obj,
619 struct git_pack_file *p,
620 off64_t *obj_offset)
621 {
622 git_mwindow *w_curs = NULL;
623 off64_t curpos = *obj_offset;
624 int error, free_base = 0;
625 git_dependency_chain chain = GIT_ARRAY_INIT;
626 struct pack_chain_elem *elem = NULL, *stack;
627 git_pack_cache_entry *cached = NULL;
628 struct pack_chain_elem small_stack[SMALL_STACK_SIZE];
629 size_t stack_size = 0, elem_pos, alloclen;
630 git_object_t base_type;
631
632 /*
633 * TODO: optionally check the CRC on the packfile
634 */
635
636 error = pack_dependency_chain(&chain, &cached, obj_offset, small_stack, &stack_size, p, *obj_offset);
637 if (error < 0)
638 return error;
639
640 obj->data = NULL;
641 obj->len = 0;
642 obj->type = GIT_OBJECT_INVALID;
643
644 /* let's point to the right stack */
645 stack = chain.ptr ? chain.ptr : small_stack;
646
647 elem_pos = stack_size;
648 if (cached) {
649 memcpy(obj, &cached->raw, sizeof(git_rawobj));
650 base_type = obj->type;
651 elem_pos--; /* stack_size includes the base, which isn't actually there */
652 } else {
653 elem = &stack[--elem_pos];
654 base_type = elem->type;
655 }
656
657 switch (base_type) {
658 case GIT_OBJECT_COMMIT:
659 case GIT_OBJECT_TREE:
660 case GIT_OBJECT_BLOB:
661 case GIT_OBJECT_TAG:
662 if (!cached) {
663 curpos = elem->offset;
664 error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
665 git_mwindow_close(&w_curs);
666 base_type = elem->type;
667 }
668 if (error < 0)
669 goto cleanup;
670 break;
671 case GIT_OBJECT_OFS_DELTA:
672 case GIT_OBJECT_REF_DELTA:
673 error = packfile_error("dependency chain ends in a delta");
674 goto cleanup;
675 default:
676 error = packfile_error("invalid packfile type in header");
677 goto cleanup;
678 }
679
680 /*
681 * Finding the object we want a cached base element is
682 * problematic, as we need to make sure we don't accidentally
683 * give the caller the cached object, which it would then feel
684 * free to free, so we need to copy the data.
685 */
686 if (cached && stack_size == 1) {
687 void *data = obj->data;
688
689 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, obj->len, 1);
690 obj->data = git__malloc(alloclen);
691 GIT_ERROR_CHECK_ALLOC(obj->data);
692
693 memcpy(obj->data, data, obj->len + 1);
694 git_atomic_dec(&cached->refcount);
695 goto cleanup;
696 }
697
698 /* we now apply each consecutive delta until we run out */
699 while (elem_pos > 0 && !error) {
700 git_rawobj base, delta;
701
702 /*
703 * We can now try to add the base to the cache, as
704 * long as it's not already the cached one.
705 */
706 if (!cached)
707 free_base = !!cache_add(&cached, &p->bases, obj, elem->base_key);
708
709 elem = &stack[elem_pos - 1];
710 curpos = elem->offset;
711 error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
712 git_mwindow_close(&w_curs);
713
714 if (error < 0) {
715 /* We have transferred ownership of the data to the cache. */
716 obj->data = NULL;
717 break;
718 }
719
720 /* the current object becomes the new base, on which we apply the delta */
721 base = *obj;
722 obj->data = NULL;
723 obj->len = 0;
724 obj->type = GIT_OBJECT_INVALID;
725
726 error = git_delta_apply(&obj->data, &obj->len, base.data, base.len, delta.data, delta.len);
727 obj->type = base_type;
728
729 /*
730 * We usually don't want to free the base at this
731 * point, as we put it into the cache in the previous
732 * iteration. free_base lets us know that we got the
733 * base object directly from the packfile, so we can free it.
734 */
735 git__free(delta.data);
736 if (free_base) {
737 free_base = 0;
738 git__free(base.data);
739 }
740
741 if (cached) {
742 git_atomic_dec(&cached->refcount);
743 cached = NULL;
744 }
745
746 if (error < 0)
747 break;
748
749 elem_pos--;
750 }
751
752 cleanup:
753 if (error < 0) {
754 git__free(obj->data);
755 if (cached)
756 git_atomic_dec(&cached->refcount);
757 }
758
759 if (elem)
760 *obj_offset = curpos;
761
762 git_array_clear(chain);
763 return error;
764 }
765
766 int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, off64_t curpos)
767 {
768 memset(obj, 0, sizeof(git_packfile_stream));
769 obj->curpos = curpos;
770 obj->p = p;
771
772 if (git_zstream_init(&obj->zstream, GIT_ZSTREAM_INFLATE) < 0) {
773 git_error_set(GIT_ERROR_ZLIB, "failed to init packfile stream");
774 return -1;
775 }
776
777 return 0;
778 }
779
780 ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
781 {
782 unsigned int window_len;
783 unsigned char *in;
784 int error;
785
786 if (obj->done)
787 return 0;
788
789 if ((in = pack_window_open(obj->p, &obj->mw, obj->curpos, &window_len)) == NULL)
790 return GIT_EBUFS;
791
792 if ((error = git_zstream_set_input(&obj->zstream, in, window_len)) < 0 ||
793 (error = git_zstream_get_output_chunk(buffer, &len, &obj->zstream)) < 0) {
794 git_mwindow_close(&obj->mw);
795 git_error_set(GIT_ERROR_ZLIB, "error reading from the zlib stream");
796 return -1;
797 }
798
799 git_mwindow_close(&obj->mw);
800
801 obj->curpos += window_len - obj->zstream.in_len;
802
803 if (git_zstream_eos(&obj->zstream))
804 obj->done = 1;
805
806 /* If we didn't write anything out but we're not done, we need more data */
807 if (!len && !git_zstream_eos(&obj->zstream))
808 return GIT_EBUFS;
809
810 return len;
811
812 }
813
814 void git_packfile_stream_dispose(git_packfile_stream *obj)
815 {
816 git_zstream_free(&obj->zstream);
817 }
818
819 static int packfile_unpack_compressed(
820 git_rawobj *obj,
821 struct git_pack_file *p,
822 git_mwindow **mwindow,
823 off64_t *position,
824 size_t size,
825 git_object_t type)
826 {
827 git_zstream zstream = GIT_ZSTREAM_INIT;
828 size_t buffer_len, total = 0;
829 char *data = NULL;
830 int error;
831
832 GIT_ERROR_CHECK_ALLOC_ADD(&buffer_len, size, 1);
833 data = git__calloc(1, buffer_len);
834 GIT_ERROR_CHECK_ALLOC(data);
835
836 if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0) {
837 git_error_set(GIT_ERROR_ZLIB, "failed to init zlib stream on unpack");
838 goto out;
839 }
840
841 do {
842 size_t bytes = buffer_len - total;
843 unsigned int window_len;
844 unsigned char *in;
845
846 in = pack_window_open(p, mwindow, *position, &window_len);
847
848 if ((error = git_zstream_set_input(&zstream, in, window_len)) < 0 ||
849 (error = git_zstream_get_output_chunk(data + total, &bytes, &zstream)) < 0) {
850 git_mwindow_close(mwindow);
851 goto out;
852 }
853
854 git_mwindow_close(mwindow);
855
856 *position += window_len - zstream.in_len;
857 total += bytes;
858 } while (total < size);
859
860 if (total != size || !git_zstream_eos(&zstream)) {
861 git_error_set(GIT_ERROR_ZLIB, "error inflating zlib stream");
862 error = -1;
863 goto out;
864 }
865
866 obj->type = type;
867 obj->len = size;
868 obj->data = data;
869
870 out:
871 git_zstream_free(&zstream);
872 if (error)
873 git__free(data);
874
875 return error;
876 }
877
878 /*
879 * curpos is where the data starts, delta_obj_offset is the where the
880 * header starts
881 */
882 off64_t get_delta_base(
883 struct git_pack_file *p,
884 git_mwindow **w_curs,
885 off64_t *curpos,
886 git_object_t type,
887 off64_t delta_obj_offset)
888 {
889 unsigned int left = 0;
890 unsigned char *base_info;
891 off64_t base_offset;
892 git_oid unused;
893
894 base_info = pack_window_open(p, w_curs, *curpos, &left);
895 /* Assumption: the only reason this would fail is because the file is too small */
896 if (base_info == NULL)
897 return GIT_EBUFS;
898 /* pack_window_open() assured us we have [base_info, base_info + 20)
899 * as a range that we can look at without walking off the
900 * end of the mapped window. Its actually the hash size
901 * that is assured. An OFS_DELTA longer than the hash size
902 * is stupid, as then a REF_DELTA would be smaller to store.
903 */
904 if (type == GIT_OBJECT_OFS_DELTA) {
905 unsigned used = 0;
906 unsigned char c = base_info[used++];
907 size_t unsigned_base_offset = c & 127;
908 while (c & 128) {
909 if (left <= used)
910 return GIT_EBUFS;
911 unsigned_base_offset += 1;
912 if (!unsigned_base_offset || MSB(unsigned_base_offset, 7))
913 return 0; /* overflow */
914 c = base_info[used++];
915 unsigned_base_offset = (unsigned_base_offset << 7) + (c & 127);
916 }
917 if (unsigned_base_offset == 0 || (size_t)delta_obj_offset <= unsigned_base_offset)
918 return 0; /* out of bound */
919 base_offset = delta_obj_offset - unsigned_base_offset;
920 *curpos += used;
921 } else if (type == GIT_OBJECT_REF_DELTA) {
922 /* If we have the cooperative cache, search in it first */
923 if (p->has_cache) {
924 struct git_pack_entry *entry;
925 git_oid oid;
926
927 git_oid_fromraw(&oid, base_info);
928 if ((entry = git_oidmap_get(p->idx_cache, &oid)) != NULL) {
929 *curpos += 20;
930 return entry->offset;
931 } else {
932 /* If we're building an index, don't try to find the pack
933 * entry; we just haven't seen it yet. We'll make
934 * progress again in the next loop.
935 */
936 return GIT_PASSTHROUGH;
937 }
938 }
939
940 /* The base entry _must_ be in the same pack */
941 if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
942 return packfile_error("base entry delta is not in the same pack");
943 *curpos += 20;
944 } else
945 return 0;
946
947 return base_offset;
948 }
949
950 /***********************************************************
951 *
952 * PACKFILE METHODS
953 *
954 ***********************************************************/
955
956 void git_packfile_close(struct git_pack_file *p, bool unlink_packfile)
957 {
958 if (p->mwf.fd >= 0) {
959 git_mwindow_free_all_locked(&p->mwf);
960 p_close(p->mwf.fd);
961 p->mwf.fd = -1;
962 }
963
964 if (unlink_packfile)
965 p_unlink(p->pack_name);
966 }
967
968 void git_packfile_free(struct git_pack_file *p)
969 {
970 if (!p)
971 return;
972
973 cache_free(&p->bases);
974
975 git_packfile_close(p, false);
976
977 pack_index_free(p);
978
979 git__free(p->bad_object_sha1);
980
981 git_mutex_free(&p->lock);
982 git_mutex_free(&p->bases.lock);
983 git__free(p);
984 }
985
986 static int packfile_open(struct git_pack_file *p)
987 {
988 struct stat st;
989 struct git_pack_header hdr;
990 git_oid sha1;
991 unsigned char *idx_sha1;
992
993 if (p->index_version == -1 && pack_index_open(p) < 0)
994 return git_odb__error_notfound("failed to open packfile", NULL, 0);
995
996 /* if mwf opened by another thread, return now */
997 if (git_mutex_lock(&p->lock) < 0)
998 return packfile_error("failed to get lock for open");
999
1000 if (p->mwf.fd >= 0) {
1001 git_mutex_unlock(&p->lock);
1002 return 0;
1003 }
1004
1005 /* TODO: open with noatime */
1006 p->mwf.fd = git_futils_open_ro(p->pack_name);
1007 if (p->mwf.fd < 0)
1008 goto cleanup;
1009
1010 if (p_fstat(p->mwf.fd, &st) < 0 ||
1011 git_mwindow_file_register(&p->mwf) < 0)
1012 goto cleanup;
1013
1014 /* If we created the struct before we had the pack we lack size. */
1015 if (!p->mwf.size) {
1016 if (!S_ISREG(st.st_mode))
1017 goto cleanup;
1018 p->mwf.size = (off64_t)st.st_size;
1019 } else if (p->mwf.size != st.st_size)
1020 goto cleanup;
1021
1022 #if 0
1023 /* We leave these file descriptors open with sliding mmap;
1024 * there is no point keeping them open across exec(), though.
1025 */
1026 fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
1027 if (fd_flag < 0)
1028 goto cleanup;
1029
1030 fd_flag |= FD_CLOEXEC;
1031 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1032 goto cleanup;
1033 #endif
1034
1035 /* Verify we recognize this pack file format. */
1036 if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
1037 hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
1038 !pack_version_ok(hdr.hdr_version))
1039 goto cleanup;
1040
1041 /* Verify the pack matches its index. */
1042 if (p->num_objects != ntohl(hdr.hdr_entries) ||
1043 p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
1044 p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
1045 goto cleanup;
1046
1047 idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;
1048
1049 if (git_oid__cmp(&sha1, (git_oid *)idx_sha1) != 0)
1050 goto cleanup;
1051
1052 git_mutex_unlock(&p->lock);
1053 return 0;
1054
1055 cleanup:
1056 git_error_set(GIT_ERROR_OS, "invalid packfile '%s'", p->pack_name);
1057
1058 if (p->mwf.fd >= 0)
1059 p_close(p->mwf.fd);
1060 p->mwf.fd = -1;
1061
1062 git_mutex_unlock(&p->lock);
1063
1064 return -1;
1065 }
1066
1067 int git_packfile__name(char **out, const char *path)
1068 {
1069 size_t path_len;
1070 git_buf buf = GIT_BUF_INIT;
1071
1072 path_len = strlen(path);
1073
1074 if (path_len < strlen(".idx"))
1075 return git_odb__error_notfound("invalid packfile path", NULL, 0);
1076
1077 if (git_buf_printf(&buf, "%.*s.pack", (int)(path_len - strlen(".idx")), path) < 0)
1078 return -1;
1079
1080 *out = git_buf_detach(&buf);
1081 return 0;
1082 }
1083
1084 int git_packfile_alloc(struct git_pack_file **pack_out, const char *path)
1085 {
1086 struct stat st;
1087 struct git_pack_file *p;
1088 size_t path_len = path ? strlen(path) : 0, alloc_len;
1089
1090 *pack_out = NULL;
1091
1092 if (path_len < strlen(".idx"))
1093 return git_odb__error_notfound("invalid packfile path", NULL, 0);
1094
1095 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*p), path_len);
1096 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
1097
1098 p = git__calloc(1, alloc_len);
1099 GIT_ERROR_CHECK_ALLOC(p);
1100
1101 memcpy(p->pack_name, path, path_len + 1);
1102
1103 /*
1104 * Make sure a corresponding .pack file exists and that
1105 * the index looks sane.
1106 */
1107 if (git__suffixcmp(path, ".idx") == 0) {
1108 size_t root_len = path_len - strlen(".idx");
1109
1110 if (!git_disable_pack_keep_file_checks) {
1111 memcpy(p->pack_name + root_len, ".keep", sizeof(".keep"));
1112 if (git_path_exists(p->pack_name) == true)
1113 p->pack_keep = 1;
1114 }
1115
1116 memcpy(p->pack_name + root_len, ".pack", sizeof(".pack"));
1117 }
1118
1119 if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
1120 git__free(p);
1121 return git_odb__error_notfound("packfile not found", NULL, 0);
1122 }
1123
1124 /* ok, it looks sane as far as we can check without
1125 * actually mapping the pack file.
1126 */
1127 p->mwf.fd = -1;
1128 p->mwf.size = st.st_size;
1129 p->pack_local = 1;
1130 p->mtime = (git_time_t)st.st_mtime;
1131 p->index_version = -1;
1132
1133 if (git_mutex_init(&p->lock)) {
1134 git_error_set(GIT_ERROR_OS, "failed to initialize packfile mutex");
1135 git__free(p);
1136 return -1;
1137 }
1138
1139 if (cache_init(&p->bases) < 0) {
1140 git__free(p);
1141 return -1;
1142 }
1143
1144 *pack_out = p;
1145
1146 return 0;
1147 }
1148
1149 /***********************************************************
1150 *
1151 * PACKFILE ENTRY SEARCH INTERNALS
1152 *
1153 ***********************************************************/
1154
1155 static off64_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
1156 {
1157 const unsigned char *index = p->index_map.data;
1158 const unsigned char *end = index + p->index_map.len;
1159 index += 4 * 256;
1160 if (p->index_version == 1) {
1161 return ntohl(*((uint32_t *)(index + 24 * n)));
1162 } else {
1163 uint32_t off;
1164 index += 8 + p->num_objects * (20 + 4);
1165 off = ntohl(*((uint32_t *)(index + 4 * n)));
1166 if (!(off & 0x80000000))
1167 return off;
1168 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1169
1170 /* Make sure we're not being sent out of bounds */
1171 if (index >= end - 8)
1172 return -1;
1173
1174 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
1175 ntohl(*((uint32_t *)(index + 4)));
1176 }
1177 }
1178
1179 static int git__memcmp4(const void *a, const void *b) {
1180 return memcmp(a, b, 4);
1181 }
1182
1183 int git_pack_foreach_entry(
1184 struct git_pack_file *p,
1185 git_odb_foreach_cb cb,
1186 void *data)
1187 {
1188 const unsigned char *index = p->index_map.data, *current;
1189 uint32_t i;
1190 int error = 0;
1191
1192 if (index == NULL) {
1193 if ((error = pack_index_open(p)) < 0)
1194 return error;
1195
1196 assert(p->index_map.data);
1197
1198 index = p->index_map.data;
1199 }
1200
1201 if (p->index_version > 1) {
1202 index += 8;
1203 }
1204
1205 index += 4 * 256;
1206
1207 if (p->oids == NULL) {
1208 git_vector offsets, oids;
1209
1210 if ((error = git_vector_init(&oids, p->num_objects, NULL)))
1211 return error;
1212
1213 if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
1214 return error;
1215
1216 if (p->index_version > 1) {
1217 const unsigned char *off = index + 24 * p->num_objects;
1218 for (i = 0; i < p->num_objects; i++)
1219 git_vector_insert(&offsets, (void*)&off[4 * i]);
1220 git_vector_sort(&offsets);
1221 git_vector_foreach(&offsets, i, current)
1222 git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
1223 } else {
1224 for (i = 0; i < p->num_objects; i++)
1225 git_vector_insert(&offsets, (void*)&index[24 * i]);
1226 git_vector_sort(&offsets);
1227 git_vector_foreach(&offsets, i, current)
1228 git_vector_insert(&oids, (void*)&current[4]);
1229 }
1230
1231 git_vector_free(&offsets);
1232 p->oids = (git_oid **)git_vector_detach(NULL, NULL, &oids);
1233 }
1234
1235 for (i = 0; i < p->num_objects; i++)
1236 if ((error = cb(p->oids[i], data)) != 0)
1237 return git_error_set_after_callback(error);
1238
1239 return error;
1240 }
1241
1242 static int pack_entry_find_offset(
1243 off64_t *offset_out,
1244 git_oid *found_oid,
1245 struct git_pack_file *p,
1246 const git_oid *short_oid,
1247 size_t len)
1248 {
1249 const uint32_t *level1_ofs;
1250 const unsigned char *index;
1251 unsigned hi, lo, stride;
1252 int pos, found = 0;
1253 off64_t offset;
1254 const unsigned char *current = 0;
1255
1256 *offset_out = 0;
1257
1258 if (p->index_version == -1) {
1259 int error;
1260
1261 if ((error = pack_index_open(p)) < 0)
1262 return error;
1263 assert(p->index_map.data);
1264 }
1265
1266 index = p->index_map.data;
1267 level1_ofs = p->index_map.data;
1268
1269 if (p->index_version > 1) {
1270 level1_ofs += 2;
1271 index += 8;
1272 }
1273
1274 index += 4 * 256;
1275 hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
1276 lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));
1277
1278 if (p->index_version > 1) {
1279 stride = 20;
1280 } else {
1281 stride = 24;
1282 index += 4;
1283 }
1284
1285 #ifdef INDEX_DEBUG_LOOKUP
1286 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
1287 short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
1288 #endif
1289
1290 pos = sha1_position(index, stride, lo, hi, short_oid->id);
1291
1292 if (pos >= 0) {
1293 /* An object matching exactly the oid was found */
1294 found = 1;
1295 current = index + pos * stride;
1296 } else {
1297 /* No object was found */
1298 /* pos refers to the object with the "closest" oid to short_oid */
1299 pos = - 1 - pos;
1300 if (pos < (int)p->num_objects) {
1301 current = index + pos * stride;
1302
1303 if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
1304 found = 1;
1305 }
1306 }
1307
1308 if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
1309 /* Check for ambiguousity */
1310 const unsigned char *next = current + stride;
1311
1312 if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
1313 found = 2;
1314 }
1315 }
1316
1317 if (!found)
1318 return git_odb__error_notfound("failed to find offset for pack entry", short_oid, len);
1319 if (found > 1)
1320 return git_odb__error_ambiguous("found multiple offsets for pack entry");
1321
1322 if ((offset = nth_packed_object_offset(p, pos)) < 0) {
1323 git_error_set(GIT_ERROR_ODB, "packfile index is corrupt");
1324 return -1;
1325 }
1326
1327 *offset_out = offset;
1328 git_oid_fromraw(found_oid, current);
1329
1330 #ifdef INDEX_DEBUG_LOOKUP
1331 {
1332 unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
1333 git_oid_fmt(hex_sha1, found_oid);
1334 hex_sha1[GIT_OID_HEXSZ] = '\0';
1335 printf("found lo=%d %s\n", lo, hex_sha1);
1336 }
1337 #endif
1338
1339 return 0;
1340 }
1341
1342 int git_pack_entry_find(
1343 struct git_pack_entry *e,
1344 struct git_pack_file *p,
1345 const git_oid *short_oid,
1346 size_t len)
1347 {
1348 off64_t offset;
1349 git_oid found_oid;
1350 int error;
1351
1352 assert(p);
1353
1354 if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
1355 unsigned i;
1356 for (i = 0; i < p->num_bad_objects; i++)
1357 if (git_oid__cmp(short_oid, &p->bad_object_sha1[i]) == 0)
1358 return packfile_error("bad object found in packfile");
1359 }
1360
1361 error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
1362 if (error < 0)
1363 return error;
1364
1365 /* we found a unique entry in the index;
1366 * make sure the packfile backing the index
1367 * still exists on disk */
1368 if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
1369 return error;
1370
1371 e->offset = offset;
1372 e->p = p;
1373
1374 git_oid_cpy(&e->sha1, &found_oid);
1375 return 0;
1376 }