]> git.proxmox.com Git - libgit2.git/blame - src/pack.c
New upstream version 1.0.0+dfsg.1
[libgit2.git] / src / pack.c
CommitLineData
7d0cdf82 1/*
359fc2d2 2 * Copyright (C) the libgit2 contributors. All rights reserved.
7d0cdf82 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
7d0cdf82
CMN
6 */
7
7d0cdf82 8#include "pack.h"
eae0bfdc 9
6a2d2f8a 10#include "delta.h"
0c9c969a 11#include "futils.h"
a15c550d 12#include "mwindow.h"
0c9c969a 13#include "odb.h"
b7f167da 14#include "oid.h"
7d0cdf82 15
0c9c969a
UG
16/* Option to bypass checking existence of '.keep' files */
17bool git_disable_pack_keep_file_checks = false;
7d0cdf82 18
a070f152 19static int packfile_open(struct git_pack_file *p);
0c9c969a 20static off64_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
b644e223 21static int packfile_unpack_compressed(
a070f152
CMN
22 git_rawobj *obj,
23 struct git_pack_file *p,
24 git_mwindow **w_curs,
0c9c969a 25 off64_t *curpos,
a070f152 26 size_t size,
ac3d33df 27 git_object_t type);
a070f152
CMN
28
29/* Can find the offset of an object given
30 * a prefix of an identifier.
904b67e6 31 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
a070f152
CMN
32 * is ambiguous within the pack.
33 * This method assumes that len is between
34 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
35 */
36static int pack_entry_find_offset(
0c9c969a 37 off64_t *offset_out,
a070f152
CMN
38 git_oid *found_oid,
39 struct git_pack_file *p,
40 const git_oid *short_oid,
b8457baa 41 size_t len);
a070f152 42
e1de726c
RB
43static int packfile_error(const char *message)
44{
ac3d33df 45 git_error_set(GIT_ERROR_ODB, "invalid pack file - %s", message);
e1de726c
RB
46 return -1;
47}
48
c8f79c2b
CMN
49/********************
50 * Delta base cache
51 ********************/
c0f4a011 52
525d961c 53static git_pack_cache_entry *new_cache_object(git_rawobj *source)
c0f4a011 54{
c8f79c2b 55 git_pack_cache_entry *e = git__calloc(1, sizeof(git_pack_cache_entry));
c0f4a011
CMN
56 if (!e)
57 return NULL;
58
8588cb0c 59 git_atomic_inc(&e->refcount);
c0f4a011
CMN
60 memcpy(&e->raw, source, sizeof(git_rawobj));
61
62 return e;
63}
64
65static void free_cache_object(void *o)
66{
67 git_pack_cache_entry *e = (git_pack_cache_entry *)o;
68
69 if (e != NULL) {
0ed75620 70 assert(e->refcount.val == 0);
c0f4a011
CMN
71 git__free(e->raw.data);
72 git__free(e);
73 }
74}
75
c8f79c2b
CMN
76static void cache_free(git_pack_cache *cache)
77{
9694d9ba 78 git_pack_cache_entry *entry;
c8f79c2b
CMN
79
80 if (cache->entries) {
9694d9ba
PS
81 git_offmap_foreach_value(cache->entries, entry, {
82 free_cache_object(entry);
83 });
c8f79c2b
CMN
84
85 git_offmap_free(cache->entries);
649214be 86 cache->entries = NULL;
c8f79c2b
CMN
87 }
88}
89
90static int cache_init(git_pack_cache *cache)
91{
0c9c969a
UG
92 if (git_offmap_new(&cache->entries) < 0)
93 return -1;
f658dc43 94
c8f79c2b 95 cache->memory_limit = GIT_PACK_CACHE_MEMORY_LIMIT;
1a42dd17
RB
96
97 if (git_mutex_init(&cache->lock)) {
ac3d33df 98 git_error_set(GIT_ERROR_OS, "failed to initialize pack cache mutex");
1a42dd17
RB
99
100 git__free(cache->entries);
101 cache->entries = NULL;
102
103 return -1;
104 }
c8f79c2b
CMN
105
106 return 0;
107}
108
0c9c969a 109static git_pack_cache_entry *cache_get(git_pack_cache *cache, off64_t offset)
c8f79c2b 110{
0c9c969a 111 git_pack_cache_entry *entry;
c8f79c2b 112
09e29e47
CMN
113 if (git_mutex_lock(&cache->lock) < 0)
114 return NULL;
115
0c9c969a 116 if ((entry = git_offmap_get(cache->entries, offset)) != NULL) {
c8f79c2b 117 git_atomic_inc(&entry->refcount);
0ed75620 118 entry->last_usage = cache->use_ctr++;
c8f79c2b
CMN
119 }
120 git_mutex_unlock(&cache->lock);
121
122 return entry;
123}
124
0ed75620
CMN
125/* Run with the cache lock held */
126static void free_lowest_entry(git_pack_cache *cache)
127{
0c9c969a 128 off64_t offset;
ed6648ba 129 git_pack_cache_entry *entry;
ed6648ba 130
685f2251 131 git_offmap_foreach(cache->entries, offset, entry, {
9c62aaab
CMN
132 if (entry && entry->refcount.val == 0) {
133 cache->memory_used -= entry->raw.len;
685f2251 134 git_offmap_delete(cache->entries, offset);
9c62aaab 135 free_cache_object(entry);
ed6648ba 136 }
9694d9ba 137 });
0ed75620
CMN
138}
139
8588cb0c
JH
140static int cache_add(
141 git_pack_cache_entry **cached_out,
142 git_pack_cache *cache,
143 git_rawobj *base,
0c9c969a 144 off64_t offset)
c8f79c2b
CMN
145{
146 git_pack_cache_entry *entry;
0c9c969a 147 int exists;
c8f79c2b 148
0ed75620
CMN
149 if (base->len > GIT_PACK_CACHE_SIZE_LIMIT)
150 return -1;
151
c8f79c2b
CMN
152 entry = new_cache_object(base);
153 if (entry) {
09e29e47 154 if (git_mutex_lock(&cache->lock) < 0) {
ac3d33df 155 git_error_set(GIT_ERROR_OS, "failed to lock cache");
6f73e026 156 git__free(entry);
09e29e47
CMN
157 return -1;
158 }
c8f79c2b 159 /* Add it to the cache if nobody else has */
036daa59 160 exists = git_offmap_exists(cache->entries, offset);
c8f79c2b 161 if (!exists) {
0ed75620
CMN
162 while (cache->memory_used + base->len > cache->memory_limit)
163 free_lowest_entry(cache);
164
0c9c969a 165 git_offmap_set(cache->entries, offset, entry);
0ed75620 166 cache->memory_used += entry->raw.len;
8588cb0c
JH
167
168 *cached_out = entry;
c8f79c2b
CMN
169 }
170 git_mutex_unlock(&cache->lock);
171 /* Somebody beat us to adding it into the cache */
172 if (exists) {
173 git__free(entry);
174 return -1;
175 }
176 }
177
178 return 0;
179}
180
a070f152
CMN
181/***********************************************************
182 *
183 * PACK INDEX METHODS
184 *
185 ***********************************************************/
186
187static void pack_index_free(struct git_pack_file *p)
188{
60ecdf59
DMB
189 if (p->oids) {
190 git__free(p->oids);
191 p->oids = NULL;
192 }
a070f152
CMN
193 if (p->index_map.data) {
194 git_futils_mmap_free(&p->index_map);
195 p->index_map.data = NULL;
196 }
197}
198
87d9869f 199static int pack_index_check(const char *path, struct git_pack_file *p)
a070f152
CMN
200{
201 struct git_pack_idx_header *hdr;
202 uint32_t version, nr, i, *index;
a070f152
CMN
203 void *idx_map;
204 size_t idx_size;
a070f152 205 struct stat st;
a070f152 206 int error;
e1de726c
RB
207 /* TODO: properly open the file without access time using O_NOATIME */
208 git_file fd = git_futils_open_ro(path);
a070f152 209 if (fd < 0)
e1de726c 210 return fd;
a070f152 211
9d2f841a
RB
212 if (p_fstat(fd, &st) < 0) {
213 p_close(fd);
ac3d33df 214 git_error_set(GIT_ERROR_OS, "unable to stat pack index '%s'", path);
9d2f841a
RB
215 return -1;
216 }
217
218 if (!S_ISREG(st.st_mode) ||
e1de726c
RB
219 !git__is_sizet(st.st_size) ||
220 (idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
221 {
a070f152 222 p_close(fd);
ac3d33df 223 git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
e1de726c 224 return -1;
a070f152
CMN
225 }
226
227 error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
e1de726c 228
a070f152
CMN
229 p_close(fd);
230
e1de726c
RB
231 if (error < 0)
232 return error;
a070f152
CMN
233
234 hdr = idx_map = p->index_map.data;
235
236 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
237 version = ntohl(hdr->idx_version);
238
239 if (version < 2 || version > 2) {
240 git_futils_mmap_free(&p->index_map);
e1de726c 241 return packfile_error("unsupported index version");
a070f152
CMN
242 }
243
244 } else
245 version = 1;
246
247 nr = 0;
248 index = idx_map;
249
250 if (version > 1)
87d9869f 251 index += 2; /* skip index header */
a070f152
CMN
252
253 for (i = 0; i < 256; i++) {
254 uint32_t n = ntohl(index[i]);
255 if (n < nr) {
256 git_futils_mmap_free(&p->index_map);
e1de726c 257 return packfile_error("index is non-monotonic");
a070f152
CMN
258 }
259 nr = n;
260 }
261
262 if (version == 1) {
263 /*
264 * Total size:
87d9869f
VM
265 * - 256 index entries 4 bytes each
266 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
267 * - 20-byte SHA1 of the packfile
268 * - 20-byte SHA1 file checksum
a070f152
CMN
269 */
270 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
271 git_futils_mmap_free(&p->index_map);
e1de726c 272 return packfile_error("index is corrupted");
a070f152
CMN
273 }
274 } else if (version == 2) {
275 /*
276 * Minimum size:
87d9869f
VM
277 * - 8 bytes of header
278 * - 256 index entries 4 bytes each
279 * - 20-byte sha1 entry * nr
280 * - 4-byte crc entry * nr
281 * - 4-byte offset entry * nr
282 * - 20-byte SHA1 of the packfile
283 * - 20-byte SHA1 file checksum
a070f152
CMN
284 * And after the 4-byte offset table might be a
285 * variable sized table containing 8-byte entries
286 * for offsets larger than 2^31.
287 */
288 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
289 unsigned long max_size = min_size;
290
291 if (nr)
292 max_size += (nr - 1)*8;
293
294 if (idx_size < min_size || idx_size > max_size) {
295 git_futils_mmap_free(&p->index_map);
e1de726c 296 return packfile_error("wrong index size");
a070f152 297 }
a070f152
CMN
298 }
299
a070f152 300 p->num_objects = nr;
0ddfcb40 301 p->index_version = version;
e1de726c 302 return 0;
a070f152
CMN
303}
304
305static int pack_index_open(struct git_pack_file *p)
306{
24c70804 307 int error = 0;
878293f7 308 size_t name_len;
a693b873 309 git_buf idx_name;
24c70804 310
0ddfcb40 311 if (p->index_version > -1)
53607868 312 return 0;
a070f152 313
24c70804
RB
314 name_len = strlen(p->pack_name);
315 assert(name_len > strlen(".pack")); /* checked by git_pack_file alloc */
e1de726c 316
a693b873
PS
317 if (git_buf_init(&idx_name, name_len) < 0)
318 return -1;
319
878293f7
CMN
320 git_buf_put(&idx_name, p->pack_name, name_len - strlen(".pack"));
321 git_buf_puts(&idx_name, ".idx");
322 if (git_buf_oom(&idx_name)) {
ac3d33df 323 git_buf_dispose(&idx_name);
53607868 324 return -1;
878293f7 325 }
a070f152 326
050af8bb 327 if ((error = git_mutex_lock(&p->lock)) < 0) {
ac3d33df 328 git_buf_dispose(&idx_name);
53607868 329 return error;
050af8bb 330 }
53607868 331
0ddfcb40 332 if (p->index_version == -1)
878293f7 333 error = pack_index_check(idx_name.ptr, p);
24c70804 334
ac3d33df 335 git_buf_dispose(&idx_name);
a070f152 336
24c70804
RB
337 git_mutex_unlock(&p->lock);
338
e1de726c 339 return error;
a070f152
CMN
340}
341
342static unsigned char *pack_window_open(
343 struct git_pack_file *p,
7d0cdf82 344 git_mwindow **w_cursor,
0c9c969a 345 off64_t offset,
7d0cdf82
CMN
346 unsigned int *left)
347{
e1de726c 348 if (p->mwf.fd == -1 && packfile_open(p) < 0)
7d0cdf82
CMN
349 return NULL;
350
351 /* Since packfiles end in a hash of their content and it's
352 * pointless to ask for an offset into the middle of that
353 * hash, and the pack_window_contains function above wouldn't match
354 * don't allow an offset too close to the end of the file.
6d97beb9
CMN
355 *
356 * Don't allow a negative offset, as that means we've wrapped
357 * around.
7d0cdf82
CMN
358 */
359 if (offset > (p->mwf.size - 20))
360 return NULL;
6d97beb9
CMN
361 if (offset < 0)
362 return NULL;
7d0cdf82
CMN
363
364 return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
365 }
366
51e82492
CMN
367/*
368 * The per-object header is a pretty dense thing, which is
369 * - first byte: low four bits are "size",
370 * then three bits of "type",
371 * with the high bit being "size continues".
372 * - each byte afterwards: low seven bits are size continuation,
373 * with the high bit being "size continues"
374 */
ac3d33df 375size_t git_packfile__object_header(unsigned char *hdr, size_t size, git_object_t type)
51e82492
CMN
376{
377 unsigned char *hdr_base;
378 unsigned char c;
379
ac3d33df 380 assert(type >= GIT_OBJECT_COMMIT && type <= GIT_OBJECT_REF_DELTA);
51e82492
CMN
381
382 /* TODO: add support for chunked objects; see git.git 6c0d19b1 */
383
384 c = (unsigned char)((type << 4) | (size & 15));
385 size >>= 4;
386 hdr_base = hdr;
387
388 while (size) {
389 *hdr++ = c | 0x80;
390 c = size & 0x7f;
391 size >>= 7;
392 }
393 *hdr++ = c;
394
51a3dfb5 395 return (hdr - hdr_base);
51e82492
CMN
396}
397
398
45d773ef
CMN
399static int packfile_unpack_header1(
400 unsigned long *usedp,
7d0cdf82 401 size_t *sizep,
ac3d33df 402 git_object_t *type,
7d0cdf82
CMN
403 const unsigned char *buf,
404 unsigned long len)
405{
406 unsigned shift;
407 unsigned long size, c;
408 unsigned long used = 0;
2aeadb9c 409
7d0cdf82
CMN
410 c = buf[used++];
411 *type = (c >> 4) & 7;
412 size = c & 15;
413 shift = 4;
414 while (c & 0x80) {
ec7e680c 415 if (len <= used) {
ac3d33df 416 git_error_set(GIT_ERROR_ODB, "buffer too small");
904b67e6 417 return GIT_EBUFS;
ec7e680c 418 }
45d773ef
CMN
419
420 if (bitsizeof(long) <= shift) {
421 *usedp = 0;
ac3d33df 422 git_error_set(GIT_ERROR_ODB, "packfile corrupted");
45d773ef
CMN
423 return -1;
424 }
7d0cdf82
CMN
425
426 c = buf[used++];
427 size += (c & 0x7f) << shift;
428 shift += 7;
429 }
430
431 *sizep = (size_t)size;
45d773ef
CMN
432 *usedp = used;
433 return 0;
7d0cdf82
CMN
434}
435
436int git_packfile_unpack_header(
437 size_t *size_p,
ac3d33df 438 git_object_t *type_p,
7d0cdf82
CMN
439 git_mwindow_file *mwf,
440 git_mwindow **w_curs,
0c9c969a 441 off64_t *curpos)
7d0cdf82
CMN
442{
443 unsigned char *base;
444 unsigned int left;
445 unsigned long used;
45d773ef 446 int ret;
7d0cdf82
CMN
447
448 /* pack_window_open() assures us we have [base, base + 20) available
87d9869f
VM
449 * as a range that we can look at at. (Its actually the hash
450 * size that is assured.) With our object header encoding
7d0cdf82
CMN
451 * the maximum deflated object size is 2^137, which is just
452 * insane, so we know won't exceed what we have been given.
453 */
24c70804 454/* base = pack_window_open(p, w_curs, *curpos, &left); */
7d0cdf82
CMN
455 base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
456 if (base == NULL)
904b67e6 457 return GIT_EBUFS;
2aeadb9c 458
9d2f841a 459 ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
45d773ef 460 git_mwindow_close(w_curs);
904b67e6 461 if (ret == GIT_EBUFS)
45d773ef
CMN
462 return ret;
463 else if (ret < 0)
e1de726c 464 return packfile_error("header length is zero");
7d0cdf82
CMN
465
466 *curpos += used;
e1de726c 467 return 0;
7d0cdf82
CMN
468}
469
44f9f547
DMB
470int git_packfile_resolve_header(
471 size_t *size_p,
ac3d33df 472 git_object_t *type_p,
44f9f547 473 struct git_pack_file *p,
0c9c969a 474 off64_t offset)
44f9f547
DMB
475{
476 git_mwindow *w_curs = NULL;
0c9c969a 477 off64_t curpos = offset;
44f9f547 478 size_t size;
ac3d33df 479 git_object_t type;
0c9c969a 480 off64_t base_offset;
44f9f547
DMB
481 int error;
482
483 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
44f9f547
DMB
484 if (error < 0)
485 return error;
486
ac3d33df 487 if (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
44f9f547 488 size_t base_size;
a97b769a
CMN
489 git_packfile_stream stream;
490
44f9f547
DMB
491 base_offset = get_delta_base(p, &w_curs, &curpos, type, offset);
492 git_mwindow_close(&w_curs);
a97b769a 493 if ((error = git_packfile_stream_open(&stream, p, curpos)) < 0)
44f9f547 494 return error;
6a2d2f8a 495 error = git_delta_read_header_fromstream(&base_size, size_p, &stream);
ac3d33df 496 git_packfile_stream_dispose(&stream);
44f9f547
DMB
497 if (error < 0)
498 return error;
34b32053 499 } else {
44f9f547 500 *size_p = size;
34b32053
PS
501 base_offset = 0;
502 }
44f9f547 503
ac3d33df 504 while (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
44f9f547
DMB
505 curpos = base_offset;
506 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
44f9f547
DMB
507 if (error < 0)
508 return error;
ac3d33df 509 if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
44f9f547
DMB
510 break;
511 base_offset = get_delta_base(p, &w_curs, &curpos, type, base_offset);
512 git_mwindow_close(&w_curs);
513 }
514 *type_p = type;
515
516 return error;
517}
518
15bcced2
CMN
519#define SMALL_STACK_SIZE 64
520
a3ffbf23
CMN
521/**
522 * Generate the chain of dependencies which we need to get to the
523 * object at `off`. `chain` is used a stack, popping gives the right
524 * order to apply deltas on. If an object is found in the pack's base
525 * cache, we stop calculating there.
526 */
15bcced2 527static int pack_dependency_chain(git_dependency_chain *chain_out,
0c9c969a 528 git_pack_cache_entry **cached_out, off64_t *cached_off,
15bcced2 529 struct pack_chain_elem *small_stack, size_t *stack_sz,
0c9c969a 530 struct git_pack_file *p, off64_t obj_offset)
a3ffbf23
CMN
531{
532 git_dependency_chain chain = GIT_ARRAY_INIT;
533 git_mwindow *w_curs = NULL;
0c9c969a 534 off64_t curpos = obj_offset, base_offset;
15bcced2
CMN
535 int error = 0, use_heap = 0;
536 size_t size, elem_pos;
ac3d33df 537 git_object_t type;
a3ffbf23 538
15bcced2 539 elem_pos = 0;
a3ffbf23
CMN
540 while (true) {
541 struct pack_chain_elem *elem;
542 git_pack_cache_entry *cached = NULL;
543
544 /* if we have a base cached, we can stop here instead */
545 if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
546 *cached_out = cached;
547 *cached_off = obj_offset;
548 break;
549 }
550
15bcced2
CMN
551 /* if we run out of space on the small stack, use the array */
552 if (elem_pos == SMALL_STACK_SIZE) {
553 git_array_init_to_size(chain, elem_pos);
ac3d33df 554 GIT_ERROR_CHECK_ARRAY(chain);
15bcced2
CMN
555 memcpy(chain.ptr, small_stack, elem_pos * sizeof(struct pack_chain_elem));
556 chain.size = elem_pos;
557 use_heap = 1;
558 }
559
a3ffbf23 560 curpos = obj_offset;
15bcced2
CMN
561 if (!use_heap) {
562 elem = &small_stack[elem_pos];
563 } else {
564 elem = git_array_alloc(chain);
565 if (!elem) {
566 error = -1;
567 goto on_error;
568 }
a3ffbf23
CMN
569 }
570
571 elem->base_key = obj_offset;
572
573 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
a3ffbf23
CMN
574
575 if (error < 0)
576 goto on_error;
577
578 elem->offset = curpos;
579 elem->size = size;
580 elem->type = type;
581 elem->base_key = obj_offset;
582
ac3d33df 583 if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
a3ffbf23
CMN
584 break;
585
586 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
587 git_mwindow_close(&w_curs);
588
589 if (base_offset == 0) {
590 error = packfile_error("delta offset is zero");
591 goto on_error;
592 }
593 if (base_offset < 0) { /* must actually be an error code */
594 error = (int)base_offset;
595 goto on_error;
596 }
597
598 /* we need to pass the pos *after* the delta-base bit */
599 elem->offset = curpos;
600
601 /* go through the loop again, but with the new object */
602 obj_offset = base_offset;
15bcced2 603 elem_pos++;
a3ffbf23
CMN
604 }
605
ac3d33df 606
15bcced2 607 *stack_sz = elem_pos + 1;
a3ffbf23
CMN
608 *chain_out = chain;
609 return error;
610
611on_error:
612 git_array_clear(chain);
613 return error;
614}
615
a070f152 616int git_packfile_unpack(
e1de726c
RB
617 git_rawobj *obj,
618 struct git_pack_file *p,
0c9c969a 619 off64_t *obj_offset)
7d0cdf82
CMN
620{
621 git_mwindow *w_curs = NULL;
0c9c969a 622 off64_t curpos = *obj_offset;
a332e91c
CMN
623 int error, free_base = 0;
624 git_dependency_chain chain = GIT_ARRAY_INIT;
15bcced2 625 struct pack_chain_elem *elem = NULL, *stack;
a332e91c 626 git_pack_cache_entry *cached = NULL;
15bcced2 627 struct pack_chain_elem small_stack[SMALL_STACK_SIZE];
f1453c59 628 size_t stack_size = 0, elem_pos, alloclen;
ac3d33df 629 git_object_t base_type;
7d0cdf82
CMN
630
631 /*
632 * TODO: optionally check the CRC on the packfile
633 */
634
15bcced2 635 error = pack_dependency_chain(&chain, &cached, obj_offset, small_stack, &stack_size, p, *obj_offset);
2acdf4b8
CMN
636 if (error < 0)
637 return error;
638
7d0cdf82
CMN
639 obj->data = NULL;
640 obj->len = 0;
ac3d33df 641 obj->type = GIT_OBJECT_INVALID;
7d0cdf82 642
15bcced2
CMN
643 /* let's point to the right stack */
644 stack = chain.ptr ? chain.ptr : small_stack;
645
646 elem_pos = stack_size;
a3ffbf23 647 if (cached) {
a332e91c 648 memcpy(obj, &cached->raw, sizeof(git_rawobj));
a3ffbf23 649 base_type = obj->type;
15bcced2 650 elem_pos--; /* stack_size includes the base, which isn't actually there */
a3ffbf23 651 } else {
15bcced2 652 elem = &stack[--elem_pos];
a3ffbf23 653 base_type = elem->type;
a332e91c 654 }
45d773ef 655
9dbd150f 656 switch (base_type) {
ac3d33df
JK
657 case GIT_OBJECT_COMMIT:
658 case GIT_OBJECT_TREE:
659 case GIT_OBJECT_BLOB:
660 case GIT_OBJECT_TAG:
a3ffbf23 661 if (!cached) {
9dbd150f
CMN
662 curpos = elem->offset;
663 error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
664 git_mwindow_close(&w_curs);
a3ffbf23 665 base_type = elem->type;
9dbd150f
CMN
666 }
667 if (error < 0)
668 goto cleanup;
669 break;
ac3d33df
JK
670 case GIT_OBJECT_OFS_DELTA:
671 case GIT_OBJECT_REF_DELTA:
9dbd150f
CMN
672 error = packfile_error("dependency chain ends in a delta");
673 goto cleanup;
674 default:
675 error = packfile_error("invalid packfile type in header");
676 goto cleanup;
677 }
678
a332e91c 679 /*
c968ce2c 680 * Finding the object we want a cached base element is
a332e91c
CMN
681 * problematic, as we need to make sure we don't accidentally
682 * give the caller the cached object, which it would then feel
683 * free to free, so we need to copy the data.
684 */
15bcced2 685 if (cached && stack_size == 1) {
a332e91c 686 void *data = obj->data;
392702ee 687
ac3d33df 688 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, obj->len, 1);
f1453c59 689 obj->data = git__malloc(alloclen);
ac3d33df 690 GIT_ERROR_CHECK_ALLOC(obj->data);
392702ee 691
a332e91c
CMN
692 memcpy(obj->data, data, obj->len + 1);
693 git_atomic_dec(&cached->refcount);
694 goto cleanup;
695 }
696
2acdf4b8 697 /* we now apply each consecutive delta until we run out */
15bcced2 698 while (elem_pos > 0 && !error) {
2acdf4b8
CMN
699 git_rawobj base, delta;
700
c968ce2c
CMN
701 /*
702 * We can now try to add the base to the cache, as
703 * long as it's not already the cached one.
704 */
705 if (!cached)
8588cb0c 706 free_base = !!cache_add(&cached, &p->bases, obj, elem->base_key);
c968ce2c 707
15bcced2 708 elem = &stack[elem_pos - 1];
2acdf4b8
CMN
709 curpos = elem->offset;
710 error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
711 git_mwindow_close(&w_curs);
712
eae0bfdc
PP
713 if (error < 0) {
714 /* We have transferred ownership of the data to the cache. */
715 obj->data = NULL;
2acdf4b8 716 break;
eae0bfdc 717 }
2acdf4b8
CMN
718
719 /* the current object becomes the new base, on which we apply the delta */
720 base = *obj;
721 obj->data = NULL;
722 obj->len = 0;
ac3d33df 723 obj->type = GIT_OBJECT_INVALID;
2acdf4b8 724
6a2d2f8a 725 error = git_delta_apply(&obj->data, &obj->len, base.data, base.len, delta.data, delta.len);
a332e91c 726 obj->type = base_type;
6a2d2f8a 727
a332e91c
CMN
728 /*
729 * We usually don't want to free the base at this
730 * point, as we put it into the cache in the previous
731 * iteration. free_base lets us know that we got the
732 * base object directly from the packfile, so we can free it.
733 */
2acdf4b8 734 git__free(delta.data);
a332e91c
CMN
735 if (free_base) {
736 free_base = 0;
737 git__free(base.data);
738 }
739
740 if (cached) {
741 git_atomic_dec(&cached->refcount);
742 cached = NULL;
743 }
2acdf4b8
CMN
744
745 if (error < 0)
746 break;
7d0cdf82 747
15bcced2 748 elem_pos--;
7d0cdf82
CMN
749 }
750
2acdf4b8 751cleanup:
ff5eea06 752 if (error < 0) {
a332e91c 753 git__free(obj->data);
ff5eea06
PS
754 if (cached)
755 git_atomic_dec(&cached->refcount);
756 }
a332e91c 757
a3ffbf23 758 if (elem)
b3d3459f 759 *obj_offset = curpos;
a332e91c 760
2acdf4b8 761 git_array_clear(chain);
e1de726c 762 return error;
7d0cdf82
CMN
763}
764
0c9c969a 765int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, off64_t curpos)
282283ac 766{
46635339
CMN
767 memset(obj, 0, sizeof(git_packfile_stream));
768 obj->curpos = curpos;
769 obj->p = p;
0c9c969a
UG
770
771 if (git_zstream_init(&obj->zstream, GIT_ZSTREAM_INFLATE) < 0) {
ac3d33df 772 git_error_set(GIT_ERROR_ZLIB, "failed to init packfile stream");
46635339
CMN
773 return -1;
774 }
775
776 return 0;
777}
778
779ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
780{
0c9c969a 781 unsigned int window_len;
46635339 782 unsigned char *in;
0c9c969a 783 int error;
46635339
CMN
784
785 if (obj->done)
786 return 0;
787
0c9c969a 788 if ((in = pack_window_open(obj->p, &obj->mw, obj->curpos, &window_len)) == NULL)
46635339
CMN
789 return GIT_EBUFS;
790
0c9c969a
UG
791 if ((error = git_zstream_set_input(&obj->zstream, in, window_len)) < 0 ||
792 (error = git_zstream_get_output_chunk(buffer, &len, &obj->zstream)) < 0) {
793 git_mwindow_close(&obj->mw);
ac3d33df 794 git_error_set(GIT_ERROR_ZLIB, "error reading from the zlib stream");
46635339
CMN
795 return -1;
796 }
797
0c9c969a 798 git_mwindow_close(&obj->mw);
46635339 799
0c9c969a
UG
800 obj->curpos += window_len - obj->zstream.in_len;
801
802 if (git_zstream_eos(&obj->zstream))
803 obj->done = 1;
46635339
CMN
804
805 /* If we didn't write anything out but we're not done, we need more data */
0c9c969a 806 if (!len && !git_zstream_eos(&obj->zstream))
46635339
CMN
807 return GIT_EBUFS;
808
0c9c969a 809 return len;
46635339
CMN
810
811}
812
ac3d33df 813void git_packfile_stream_dispose(git_packfile_stream *obj)
46635339 814{
0c9c969a 815 git_zstream_free(&obj->zstream);
46635339
CMN
816}
817
b644e223 818static int packfile_unpack_compressed(
e1de726c
RB
819 git_rawobj *obj,
820 struct git_pack_file *p,
0c9c969a
UG
821 git_mwindow **mwindow,
822 off64_t *position,
e1de726c 823 size_t size,
ac3d33df 824 git_object_t type)
7d0cdf82 825{
0c9c969a
UG
826 git_zstream zstream = GIT_ZSTREAM_INIT;
827 size_t buffer_len, total = 0;
828 char *data = NULL;
829 int error;
45d773ef 830
0c9c969a
UG
831 GIT_ERROR_CHECK_ALLOC_ADD(&buffer_len, size, 1);
832 data = git__calloc(1, buffer_len);
833 GIT_ERROR_CHECK_ALLOC(data);
834
835 if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0) {
836 git_error_set(GIT_ERROR_ZLIB, "failed to init zlib stream on unpack");
837 goto out;
7d0cdf82
CMN
838 }
839
840 do {
0c9c969a
UG
841 size_t bytes = buffer_len - total;
842 unsigned int window_len;
843 unsigned char *in;
7d0cdf82 844
0c9c969a 845 in = pack_window_open(p, mwindow, *position, &window_len);
7d0cdf82 846
0c9c969a
UG
847 if ((error = git_zstream_set_input(&zstream, in, window_len)) < 0 ||
848 (error = git_zstream_get_output_chunk(data + total, &bytes, &zstream)) < 0) {
849 git_mwindow_close(mwindow);
850 goto out;
45d773ef
CMN
851 }
852
0c9c969a 853 git_mwindow_close(mwindow);
7d0cdf82 854
0c9c969a
UG
855 *position += window_len - zstream.in_len;
856 total += bytes;
857 } while (total < size);
7d0cdf82 858
0c9c969a 859 if (total != size || !git_zstream_eos(&zstream)) {
ac3d33df 860 git_error_set(GIT_ERROR_ZLIB, "error inflating zlib stream");
0c9c969a
UG
861 error = -1;
862 goto out;
7d0cdf82
CMN
863 }
864
865 obj->type = type;
866 obj->len = size;
0c9c969a
UG
867 obj->data = data;
868
869out:
870 git_zstream_free(&zstream);
871 if (error)
872 git__free(data);
873
874 return error;
7d0cdf82
CMN
875}
876
b5b474dd
CMN
877/*
878 * curpos is where the data starts, delta_obj_offset is the where the
879 * header starts
880 */
0c9c969a 881off64_t get_delta_base(
e1de726c
RB
882 struct git_pack_file *p,
883 git_mwindow **w_curs,
0c9c969a 884 off64_t *curpos,
ac3d33df 885 git_object_t type,
0c9c969a 886 off64_t delta_obj_offset)
7d0cdf82 887{
45d773ef
CMN
888 unsigned int left = 0;
889 unsigned char *base_info;
0c9c969a 890 off64_t base_offset;
7d0cdf82
CMN
891 git_oid unused;
892
45d773ef
CMN
893 base_info = pack_window_open(p, w_curs, *curpos, &left);
894 /* Assumption: the only reason this would fail is because the file is too small */
895 if (base_info == NULL)
904b67e6 896 return GIT_EBUFS;
7d0cdf82
CMN
897 /* pack_window_open() assured us we have [base_info, base_info + 20)
898 * as a range that we can look at without walking off the
87d9869f
VM
899 * end of the mapped window. Its actually the hash size
900 * that is assured. An OFS_DELTA longer than the hash size
7d0cdf82
CMN
901 * is stupid, as then a REF_DELTA would be smaller to store.
902 */
ac3d33df 903 if (type == GIT_OBJECT_OFS_DELTA) {
7d0cdf82
CMN
904 unsigned used = 0;
905 unsigned char c = base_info[used++];
eae0bfdc 906 size_t unsigned_base_offset = c & 127;
7d0cdf82 907 while (c & 128) {
45d773ef 908 if (left <= used)
904b67e6 909 return GIT_EBUFS;
eae0bfdc
PP
910 unsigned_base_offset += 1;
911 if (!unsigned_base_offset || MSB(unsigned_base_offset, 7))
87d9869f 912 return 0; /* overflow */
7d0cdf82 913 c = base_info[used++];
eae0bfdc 914 unsigned_base_offset = (unsigned_base_offset << 7) + (c & 127);
7d0cdf82 915 }
eae0bfdc 916 if (unsigned_base_offset == 0 || (size_t)delta_obj_offset <= unsigned_base_offset)
87d9869f 917 return 0; /* out of bound */
eae0bfdc 918 base_offset = delta_obj_offset - unsigned_base_offset;
7d0cdf82 919 *curpos += used;
ac3d33df 920 } else if (type == GIT_OBJECT_REF_DELTA) {
c1af5a39
CMN
921 /* If we have the cooperative cache, search in it first */
922 if (p->has_cache) {
0c9c969a 923 struct git_pack_entry *entry;
0e040c03 924 git_oid oid;
c1af5a39 925
0e040c03 926 git_oid_fromraw(&oid, base_info);
0c9c969a 927 if ((entry = git_oidmap_get(p->idx_cache, &oid)) != NULL) {
c1af5a39 928 *curpos += 20;
0c9c969a 929 return entry->offset;
38c10ecd
ET
930 } else {
931 /* If we're building an index, don't try to find the pack
932 * entry; we just haven't seen it yet. We'll make
933 * progress again in the next loop.
934 */
935 return GIT_PASSTHROUGH;
c1af5a39
CMN
936 }
937 }
38c10ecd 938
7d0cdf82 939 /* The base entry _must_ be in the same pack */
e1de726c
RB
940 if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
941 return packfile_error("base entry delta is not in the same pack");
7d0cdf82
CMN
942 *curpos += 20;
943 } else
944 return 0;
945
946 return base_offset;
947}
a070f152
CMN
948
949/***********************************************************
950 *
951 * PACKFILE METHODS
952 *
953 ***********************************************************/
954
bf339ab0
ET
955void git_packfile_close(struct git_pack_file *p, bool unlink_packfile)
956{
957 if (p->mwf.fd >= 0) {
958 git_mwindow_free_all_locked(&p->mwf);
959 p_close(p->mwf.fd);
960 p->mwf.fd = -1;
961 }
962
963 if (unlink_packfile)
964 p_unlink(p->pack_name);
965}
966
96c9b9f0 967void git_packfile_free(struct git_pack_file *p)
a070f152 968{
24c70804
RB
969 if (!p)
970 return;
971
c8f79c2b 972 cache_free(&p->bases);
c0f4a011 973
bf339ab0 974 git_packfile_close(p, false);
a070f152
CMN
975
976 pack_index_free(p);
977
3286c408 978 git__free(p->bad_object_sha1);
24c70804 979
24c70804 980 git_mutex_free(&p->lock);
649214be 981 git_mutex_free(&p->bases.lock);
3286c408 982 git__free(p);
a070f152
CMN
983}
984
985static int packfile_open(struct git_pack_file *p)
986{
987 struct stat st;
988 struct git_pack_header hdr;
989 git_oid sha1;
990 unsigned char *idx_sha1;
991
0ddfcb40 992 if (p->index_version == -1 && pack_index_open(p) < 0)
e10144ae 993 return git_odb__error_notfound("failed to open packfile", NULL, 0);
a070f152 994
9d2f841a
RB
995 /* if mwf opened by another thread, return now */
996 if (git_mutex_lock(&p->lock) < 0)
997 return packfile_error("failed to get lock for open");
998
999 if (p->mwf.fd >= 0) {
1000 git_mutex_unlock(&p->lock);
1001 return 0;
1002 }
1003
a070f152 1004 /* TODO: open with noatime */
e1de726c 1005 p->mwf.fd = git_futils_open_ro(p->pack_name);
9d2f841a
RB
1006 if (p->mwf.fd < 0)
1007 goto cleanup;
a070f152 1008
e1de726c
RB
1009 if (p_fstat(p->mwf.fd, &st) < 0 ||
1010 git_mwindow_file_register(&p->mwf) < 0)
1011 goto cleanup;
a070f152
CMN
1012
1013 /* If we created the struct before we had the pack we lack size. */
1014 if (!p->mwf.size) {
1015 if (!S_ISREG(st.st_mode))
1016 goto cleanup;
0c9c969a 1017 p->mwf.size = (off64_t)st.st_size;
a070f152
CMN
1018 } else if (p->mwf.size != st.st_size)
1019 goto cleanup;
1020
1021#if 0
1022 /* We leave these file descriptors open with sliding mmap;
1023 * there is no point keeping them open across exec(), though.
1024 */
1025 fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
1026 if (fd_flag < 0)
e1de726c 1027 goto cleanup;
a070f152
CMN
1028
1029 fd_flag |= FD_CLOEXEC;
1030 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
e1de726c 1031 goto cleanup;
a070f152
CMN
1032#endif
1033
1034 /* Verify we recognize this pack file format. */
e1de726c
RB
1035 if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
1036 hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
1037 !pack_version_ok(hdr.hdr_version))
a070f152
CMN
1038 goto cleanup;
1039
1040 /* Verify the pack matches its index. */
e1de726c
RB
1041 if (p->num_objects != ntohl(hdr.hdr_entries) ||
1042 p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
1043 p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
a070f152
CMN
1044 goto cleanup;
1045
1046 idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;
1047
9d2f841a
RB
1048 if (git_oid__cmp(&sha1, (git_oid *)idx_sha1) != 0)
1049 goto cleanup;
1050
1051 git_mutex_unlock(&p->lock);
1052 return 0;
a070f152
CMN
1053
1054cleanup:
ac3d33df 1055 git_error_set(GIT_ERROR_OS, "invalid packfile '%s'", p->pack_name);
9d2f841a 1056
3a2d48d5
SS
1057 if (p->mwf.fd >= 0)
1058 p_close(p->mwf.fd);
a070f152 1059 p->mwf.fd = -1;
9d2f841a
RB
1060
1061 git_mutex_unlock(&p->lock);
1062
e1de726c 1063 return -1;
a070f152
CMN
1064}
1065
b3b66c57
CMN
1066int git_packfile__name(char **out, const char *path)
1067{
1068 size_t path_len;
1069 git_buf buf = GIT_BUF_INIT;
1070
1071 path_len = strlen(path);
1072
1073 if (path_len < strlen(".idx"))
e10144ae 1074 return git_odb__error_notfound("invalid packfile path", NULL, 0);
b3b66c57
CMN
1075
1076 if (git_buf_printf(&buf, "%.*s.pack", (int)(path_len - strlen(".idx")), path) < 0)
1077 return -1;
1078
1079 *out = git_buf_detach(&buf);
1080 return 0;
1081}
1082
5d2d21e5 1083int git_packfile_alloc(struct git_pack_file **pack_out, const char *path)
a070f152
CMN
1084{
1085 struct stat st;
1086 struct git_pack_file *p;
f1453c59 1087 size_t path_len = path ? strlen(path) : 0, alloc_len;
a070f152
CMN
1088
1089 *pack_out = NULL;
24c70804 1090
38eef611 1091 if (path_len < strlen(".idx"))
e10144ae 1092 return git_odb__error_notfound("invalid packfile path", NULL, 0);
24c70804 1093
ac3d33df
JK
1094 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*p), path_len);
1095 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
392702ee 1096
f1453c59 1097 p = git__calloc(1, alloc_len);
ac3d33df 1098 GIT_ERROR_CHECK_ALLOC(p);
a070f152 1099
38eef611
RB
1100 memcpy(p->pack_name, path, path_len + 1);
1101
a070f152
CMN
1102 /*
1103 * Make sure a corresponding .pack file exists and that
1104 * the index looks sane.
1105 */
38eef611
RB
1106 if (git__suffixcmp(path, ".idx") == 0) {
1107 size_t root_len = path_len - strlen(".idx");
1108
0c9c969a
UG
1109 if (!git_disable_pack_keep_file_checks) {
1110 memcpy(p->pack_name + root_len, ".keep", sizeof(".keep"));
1111 if (git_path_exists(p->pack_name) == true)
1112 p->pack_keep = 1;
1113 }
a070f152 1114
38eef611 1115 memcpy(p->pack_name + root_len, ".pack", sizeof(".pack"));
38eef611 1116 }
a070f152 1117
e1de726c 1118 if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
3286c408 1119 git__free(p);
e10144ae 1120 return git_odb__error_notfound("packfile not found", NULL, 0);
a070f152
CMN
1121 }
1122
1123 /* ok, it looks sane as far as we can check without
1124 * actually mapping the pack file.
1125 */
38eef611 1126 p->mwf.fd = -1;
1af56d7d 1127 p->mwf.size = st.st_size;
a070f152
CMN
1128 p->pack_local = 1;
1129 p->mtime = (git_time_t)st.st_mtime;
0ddfcb40 1130 p->index_version = -1;
a070f152 1131
1a42dd17 1132 if (git_mutex_init(&p->lock)) {
ac3d33df 1133 git_error_set(GIT_ERROR_OS, "failed to initialize packfile mutex");
1a42dd17
RB
1134 git__free(p);
1135 return -1;
1136 }
38eef611 1137
649214be
CMN
1138 if (cache_init(&p->bases) < 0) {
1139 git__free(p);
1140 return -1;
1141 }
1142
a070f152 1143 *pack_out = p;
e1de726c
RB
1144
1145 return 0;
a070f152
CMN
1146}
1147
1148/***********************************************************
1149 *
1150 * PACKFILE ENTRY SEARCH INTERNALS
1151 *
1152 ***********************************************************/
1153
0c9c969a 1154static off64_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
a070f152
CMN
1155{
1156 const unsigned char *index = p->index_map.data;
ea9e00cb 1157 const unsigned char *end = index + p->index_map.len;
a070f152
CMN
1158 index += 4 * 256;
1159 if (p->index_version == 1) {
1160 return ntohl(*((uint32_t *)(index + 24 * n)));
1161 } else {
1162 uint32_t off;
1163 index += 8 + p->num_objects * (20 + 4);
1164 off = ntohl(*((uint32_t *)(index + 4 * n)));
1165 if (!(off & 0x80000000))
1166 return off;
1167 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
ea9e00cb
CMN
1168
1169 /* Make sure we're not being sent out of bounds */
1170 if (index >= end - 8)
1171 return -1;
1172
a070f152 1173 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
87d9869f 1174 ntohl(*((uint32_t *)(index + 4)));
a070f152
CMN
1175 }
1176}
1177
60ecdf59
DMB
1178static int git__memcmp4(const void *a, const void *b) {
1179 return memcmp(a, b, 4);
1180}
1181
521aedad 1182int git_pack_foreach_entry(
5dca2010 1183 struct git_pack_file *p,
c3fb7d04 1184 git_odb_foreach_cb cb,
5dca2010 1185 void *data)
521aedad
CMN
1186{
1187 const unsigned char *index = p->index_map.data, *current;
521aedad 1188 uint32_t i;
dab89f9b 1189 int error = 0;
521aedad
CMN
1190
1191 if (index == NULL) {
521aedad
CMN
1192 if ((error = pack_index_open(p)) < 0)
1193 return error;
1194
1195 assert(p->index_map.data);
1196
1197 index = p->index_map.data;
1198 }
1199
1200 if (p->index_version > 1) {
1201 index += 8;
1202 }
1203
1204 index += 4 * 256;
1205
60ecdf59
DMB
1206 if (p->oids == NULL) {
1207 git_vector offsets, oids;
521aedad 1208
60ecdf59
DMB
1209 if ((error = git_vector_init(&oids, p->num_objects, NULL)))
1210 return error;
1211
1212 if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
1213 return error;
5dca2010 1214
60ecdf59
DMB
1215 if (p->index_version > 1) {
1216 const unsigned char *off = index + 24 * p->num_objects;
1217 for (i = 0; i < p->num_objects; i++)
1218 git_vector_insert(&offsets, (void*)&off[4 * i]);
1219 git_vector_sort(&offsets);
1220 git_vector_foreach(&offsets, i, current)
1221 git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
1222 } else {
1223 for (i = 0; i < p->num_objects; i++)
1224 git_vector_insert(&offsets, (void*)&index[24 * i]);
1225 git_vector_sort(&offsets);
1226 git_vector_foreach(&offsets, i, current)
1227 git_vector_insert(&oids, (void*)&current[4]);
1228 }
25e0b157 1229
60ecdf59 1230 git_vector_free(&offsets);
25e0b157 1231 p->oids = (git_oid **)git_vector_detach(NULL, NULL, &oids);
521aedad
CMN
1232 }
1233
60ecdf59 1234 for (i = 0; i < p->num_objects; i++)
26c1cb91 1235 if ((error = cb(p->oids[i], data)) != 0)
ac3d33df 1236 return git_error_set_after_callback(error);
60ecdf59 1237
25e0b157 1238 return error;
521aedad
CMN
1239}
1240
66a70851
UG
1241static int sha1_position(const void *table, size_t stride, unsigned lo,
1242 unsigned hi, const unsigned char *key)
1243{
1244 const unsigned char *base = table;
1245
1246 while (lo < hi) {
1247 unsigned mi = (lo + hi) / 2;
1248 int cmp = git_oid__hashcmp(base + mi * stride, key);
1249
1250 if (!cmp)
1251 return mi;
1252
1253 if (cmp > 0)
1254 hi = mi;
1255 else
1256 lo = mi+1;
1257 }
1258
1259 return -((int)lo)-1;
1260}
1261
a070f152 1262static int pack_entry_find_offset(
0c9c969a 1263 off64_t *offset_out,
e1de726c
RB
1264 git_oid *found_oid,
1265 struct git_pack_file *p,
1266 const git_oid *short_oid,
b8457baa 1267 size_t len)
a070f152 1268{
0cf15e39
PS
1269 const uint32_t *level1_ofs;
1270 const unsigned char *index;
a070f152
CMN
1271 unsigned hi, lo, stride;
1272 int pos, found = 0;
0c9c969a 1273 off64_t offset;
a070f152
CMN
1274 const unsigned char *current = 0;
1275
1276 *offset_out = 0;
1277
0ddfcb40 1278 if (p->index_version == -1) {
34bd5999 1279 int error;
a070f152 1280
34bd5999
CMN
1281 if ((error = pack_index_open(p)) < 0)
1282 return error;
1283 assert(p->index_map.data);
34bd5999 1284 }
a070f152 1285
0cf15e39
PS
1286 index = p->index_map.data;
1287 level1_ofs = p->index_map.data;
1288
a070f152
CMN
1289 if (p->index_version > 1) {
1290 level1_ofs += 2;
1291 index += 8;
1292 }
1293
1294 index += 4 * 256;
1295 hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
1296 lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));
1297
1298 if (p->index_version > 1) {
1299 stride = 20;
1300 } else {
1301 stride = 24;
1302 index += 4;
1303 }
1304
1305#ifdef INDEX_DEBUG_LOOKUP
1306 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
1307 short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
1308#endif
1309
67591c8c 1310 pos = sha1_position(index, stride, lo, hi, short_oid->id);
a070f152
CMN
1311
1312 if (pos >= 0) {
1313 /* An object matching exactly the oid was found */
1314 found = 1;
1315 current = index + pos * stride;
1316 } else {
1317 /* No object was found */
1318 /* pos refers to the object with the "closest" oid to short_oid */
1319 pos = - 1 - pos;
1320 if (pos < (int)p->num_objects) {
1321 current = index + pos * stride;
1322
282283ac 1323 if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
a070f152 1324 found = 1;
a070f152
CMN
1325 }
1326 }
1327
b2a2702d 1328 if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
a070f152
CMN
1329 /* Check for ambiguousity */
1330 const unsigned char *next = current + stride;
1331
1332 if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
1333 found = 2;
1334 }
1335 }
1336
e1de726c 1337 if (!found)
e10144ae 1338 return git_odb__error_notfound("failed to find offset for pack entry", short_oid, len);
e1de726c
RB
1339 if (found > 1)
1340 return git_odb__error_ambiguous("found multiple offsets for pack entry");
24c70804 1341
ea9e00cb 1342 if ((offset = nth_packed_object_offset(p, pos)) < 0) {
ac3d33df 1343 git_error_set(GIT_ERROR_ODB, "packfile index is corrupt");
ea9e00cb
CMN
1344 return -1;
1345 }
1346
1347 *offset_out = offset;
e1de726c 1348 git_oid_fromraw(found_oid, current);
a070f152
CMN
1349
1350#ifdef INDEX_DEBUG_LOOKUP
e1de726c 1351 {
a070f152
CMN
1352 unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
1353 git_oid_fmt(hex_sha1, found_oid);
1354 hex_sha1[GIT_OID_HEXSZ] = '\0';
1355 printf("found lo=%d %s\n", lo, hex_sha1);
a070f152 1356 }
e1de726c 1357#endif
24c70804 1358
e1de726c 1359 return 0;
a070f152
CMN
1360}
1361
1362int git_pack_entry_find(
1363 struct git_pack_entry *e,
1364 struct git_pack_file *p,
1365 const git_oid *short_oid,
b8457baa 1366 size_t len)
a070f152 1367{
0c9c969a 1368 off64_t offset;
a070f152
CMN
1369 git_oid found_oid;
1370 int error;
1371
1372 assert(p);
1373
1374 if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
1375 unsigned i;
1376 for (i = 0; i < p->num_bad_objects; i++)
b7f167da 1377 if (git_oid__cmp(short_oid, &p->bad_object_sha1[i]) == 0)
e1de726c 1378 return packfile_error("bad object found in packfile");
a070f152
CMN
1379 }
1380
1381 error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
e1de726c
RB
1382 if (error < 0)
1383 return error;
a070f152
CMN
1384
1385 /* we found a unique entry in the index;
1386 * make sure the packfile backing the index
1387 * still exists on disk */
e1de726c
RB
1388 if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
1389 return error;
a070f152
CMN
1390
1391 e->offset = offset;
1392 e->p = p;
1393
1394 git_oid_cpy(&e->sha1, &found_oid);
e1de726c 1395 return 0;
a070f152 1396}